From 83bcd1bc1f69be184e223a93514601417917fe3f Mon Sep 17 00:00:00 2001 From: Yoshito Umaoka Date: Fri, 5 Mar 2010 18:24:54 +0000 Subject: [PATCH] ICU-7444 Created ICU4J 4.4 release branch from trunk r27783. X-SVN-Rev: 27784 --- .gitattributes | 357 + .gitignore | 22 + APIChangeReport.html | 591 + build.properties | 6 + build.xml | 1330 + demos/.classpath | 9 + demos/.project | 20 + demos/.settings/org.eclipse.jdt.core.prefs | 330 + demos/.settings/org.eclipse.jdt.ui.prefs | 6 + demos/build.properties | 5 + demos/build.xml | 29 + demos/demos-build.launch | 21 + demos/manifest.stub | 13 + demos/src/com/ibm/icu/dev/demo/Launcher.java | 192 + .../icu/dev/demo/calendar/CalendarApp.java | 37 + .../icu/dev/demo/calendar/CalendarCalc.java | 595 + .../icu/dev/demo/calendar/CalendarFrame.java | 442 + .../icu/dev/demo/calendar/CalendarPanel.java | 365 + .../ibm/icu/dev/demo/calendar/package.html | 12 + .../dev/demo/charsetdet/DetectingViewer.java | 421 + .../dev/demo/holiday/HolidayBorderPanel.java | 552 + .../dev/demo/holiday/HolidayCalendarDemo.java | 744 + .../com/ibm/icu/dev/demo/holiday/package.html | 12 + .../ibm/icu/dev/demo/impl/AppletFrame.java | 149 + .../com/ibm/icu/dev/demo/impl/DemoApplet.java | 80 + .../ibm/icu/dev/demo/impl/DemoTextBox.java | 96 + .../ibm/icu/dev/demo/impl/DemoUtility.java | 136 + .../icu/dev/demo/impl/DumbTextComponent.java | 827 + .../com/ibm/icu/dev/demo/impl/Selection.java | 161 + .../com/ibm/icu/dev/demo/impl/package.html | 12 + .../ibm/icu/dev/demo/number/CurrencyDemo.java | 114 + .../com/ibm/icu/dev/demo/rbnf/RbnfDemo.java | 580 + .../icu/dev/demo/rbnf/RbnfSampleRuleSets.java | 1941 + .../com/ibm/icu/dev/demo/rbnf/package.html | 12 + .../ibm/icu/dev/demo/timescale/PivotDemo.java | 78 + .../dev/demo/translit/AnyTransliterator.java | 308 + .../icu/dev/demo/translit/CaseIterator.java | 560 + .../com/ibm/icu/dev/demo/translit/Demo.java | 1417 + .../ibm/icu/dev/demo/translit/DemoApplet.java | 73 + .../ibm/icu/dev/demo/translit/InfoDialog.java | 66 + .../dev/demo/translit/Test_Arabic-Latin.txt | 24 + .../dev/demo/translit/Test_Greek-Latin.txt | 73 + .../icu/dev/demo/translit/Test_Han-Latin.txt | 26 + .../dev/demo/translit/Test_Hebrew-Latin.txt | 26 + .../dev/demo/translit/Test_Instructions.html | 154 + .../icu/dev/demo/translit/Test_Thai-Latin.txt | 69 + .../TransliteratingTextComponent.java | 257 + .../demo/translit/TransliterationChart.java | 294 + .../com/ibm/icu/dev/demo/translit/demo.bat | 13 + .../com/ibm/icu/dev/demo/translit/demo.html | 34 + .../ibm/icu/dev/demo/translit/package.html | 12 + .../resources/Transliterator_Han_Pinyin.txt | 20365 ++ .../Transliterator_Kanji_English.txt | 6366 + .../Transliterator_Kanji_OnRomaji.txt | 6216 + .../ibm/icu/dev/demo/translit/thai_test.txt | 55 + main/classes/charset/.classpath | 7 + .../copy-data-charset.launch | 25 + main/classes/charset/.project | 29 + .../.settings/org.eclipse.jdt.core.prefs | 343 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/charset/build.properties | 6 + main/classes/charset/build.xml | 38 + main/classes/charset/charset-build.launch | 21 + main/classes/charset/manifest.stub | 11 + .../java.nio.charset.spi.CharsetProvider | 3 + .../src/com/ibm/icu/charset/Charset88591.java | 128 + .../src/com/ibm/icu/charset/CharsetASCII.java | 357 + .../src/com/ibm/icu/charset/CharsetBOCU1.java | 1063 + .../src/com/ibm/icu/charset/CharsetCESU8.java | 26 + .../com/ibm/icu/charset/CharsetCallback.java | 408 + .../ibm/icu/charset/CharsetDecoderICU.java | 725 + .../ibm/icu/charset/CharsetEncoderICU.java | 920 + .../src/com/ibm/icu/charset/CharsetHZ.java | 385 + .../src/com/ibm/icu/charset/CharsetICU.java | 378 + .../src/com/ibm/icu/charset/CharsetISCII.java | 1458 + .../com/ibm/icu/charset/CharsetISO2022.java | 2992 + .../src/com/ibm/icu/charset/CharsetLMBCS.java | 1109 + .../src/com/ibm/icu/charset/CharsetMBCS.java | 5126 + .../ibm/icu/charset/CharsetProviderICU.java | 334 + .../src/com/ibm/icu/charset/CharsetSCSU.java | 1267 + .../com/ibm/icu/charset/CharsetSelector.java | 215 + .../src/com/ibm/icu/charset/CharsetUTF16.java | 288 + .../com/ibm/icu/charset/CharsetUTF16BE.java | 17 + .../com/ibm/icu/charset/CharsetUTF16LE.java | 17 + .../src/com/ibm/icu/charset/CharsetUTF32.java | 251 + .../com/ibm/icu/charset/CharsetUTF32BE.java | 17 + .../com/ibm/icu/charset/CharsetUTF32LE.java | 17 + .../src/com/ibm/icu/charset/CharsetUTF7.java | 756 + .../src/com/ibm/icu/charset/CharsetUTF8.java | 694 + .../com/ibm/icu/charset/UConverterAlias.java | 831 + .../charset/UConverterAliasDataReader.java | 221 + .../ibm/icu/charset/UConverterConstants.java | 169 + .../ibm/icu/charset/UConverterDataReader.java | 612 + .../ibm/icu/charset/UConverterSharedData.java | 448 + .../ibm/icu/charset/UConverterStaticData.java | 61 + .../src/com/ibm/icu/charset/package.html | 15 + main/classes/collate/.classpath | 7 + .../copy-data-collate.launch | 25 + main/classes/collate/.project | 29 + .../.settings/org.eclipse.jdt.core.prefs | 345 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/collate/build.properties | 6 + main/classes/collate/build.xml | 35 + main/classes/collate/collate-build.launch | 26 + main/classes/collate/manifest.stub | 11 + .../icu/text/CollationElementIterator.java | 2856 + .../src/com/ibm/icu/text/CollationKey.java | 624 + .../icu/text/CollationParsedRuleBuilder.java | 4234 + .../com/ibm/icu/text/CollationRuleParser.java | 2110 + .../src/com/ibm/icu/text/Collator.java | 1097 + .../src/com/ibm/icu/text/CollatorReader.java | 681 + .../com/ibm/icu/text/CollatorServiceShim.java | 145 + .../src/com/ibm/icu/text/IndexCharacters.java | 288 + .../src/com/ibm/icu/text/RawCollationKey.java | 102 + .../ibm/icu/text/RbnfScannerProviderImpl.java | 273 + .../com/ibm/icu/text/RuleBasedCollator.java | 4679 + .../src/com/ibm/icu/text/StringSearch.java | 3177 + .../icu/util/GlobalizationPreferences.java | 1514 + main/classes/core/.classpath | 6 + .../copy-data-core.launch | 25 + main/classes/core/.project | 28 + .../core/.settings/org.eclipse.jdt.core.prefs | 345 + .../core/.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/core/build.properties | 6 + main/classes/core/build.xml | 42 + main/classes/core/core-build.launch | 20 + main/classes/core/manifest.stub | 11 + .../core/src/com/ibm/icu/ICUConfig.properties | 23 + .../core/src/com/ibm/icu/impl/Assert.java | 23 + .../core/src/com/ibm/icu/impl/BMPSet.java | 500 + .../core/src/com/ibm/icu/impl/BOCU.java | 378 + .../core/src/com/ibm/icu/impl/CacheBase.java | 39 + .../com/ibm/icu/impl/CalendarAstronomer.java | 1666 + .../src/com/ibm/icu/impl/CalendarCache.java | 127 + .../src/com/ibm/icu/impl/CalendarData.java | 167 + .../src/com/ibm/icu/impl/CalendarUtil.java | 100 + .../core/src/com/ibm/icu/impl/CharTrie.java | 357 + .../icu/impl/CharacterIteratorWrapper.java | 148 + .../src/com/ibm/icu/impl/CurrencyData.java | 152 + .../com/ibm/icu/impl/DateNumberFormat.java | 209 + .../core/src/com/ibm/icu/impl/Differ.java | 171 + .../core/src/com/ibm/icu/impl/Grego.java | 213 + .../core/src/com/ibm/icu/impl/ICUBinary.java | 157 + .../src/com/ibm/icu/impl/ICUBinaryStream.java | 61 + .../core/src/com/ibm/icu/impl/ICUCache.java | 21 + .../core/src/com/ibm/icu/impl/ICUConfig.java | 77 + .../core/src/com/ibm/icu/impl/ICUData.java | 113 + .../src/com/ibm/icu/impl/ICUDataVersion.java | 89 + .../core/src/com/ibm/icu/impl/ICUDebug.java | 129 + .../com/ibm/icu/impl/ICULocaleService.java | 615 + .../core/src/com/ibm/icu/impl/ICULogger.java | 190 + .../src/com/ibm/icu/impl/ICUNotifier.java | 169 + .../core/src/com/ibm/icu/impl/ICURWLock.java | 297 + .../com/ibm/icu/impl/ICUResourceBundle.java | 1422 + .../ibm/icu/impl/ICUResourceBundleImpl.java | 213 + .../ibm/icu/impl/ICUResourceBundleReader.java | 857 + .../ibm/icu/impl/ICUResourceTableAccess.java | 103 + .../core/src/com/ibm/icu/impl/ICUService.java | 985 + .../icu/impl/IllegalIcuArgumentException.java | 32 + .../com/ibm/icu/impl/ImplicitCEGenerator.java | 389 + .../core/src/com/ibm/icu/impl/IntTrie.java | 333 + .../src/com/ibm/icu/impl/IntTrieBuilder.java | 792 + .../ibm/icu/impl/InvalidFormatException.java | 21 + .../com/ibm/icu/impl/IterableComparator.java | 59 + .../src/com/ibm/icu/impl/JavaTimeZone.java | 193 + .../ibm/icu/impl/LocaleDisplayNamesImpl.java | 332 + .../src/com/ibm/icu/impl/LocaleIDParser.java | 741 + .../core/src/com/ibm/icu/impl/LocaleIDs.java | 536 + .../src/com/ibm/icu/impl/LocaleUtility.java | 132 + .../src/com/ibm/icu/impl/MultiComparator.java | 36 + .../src/com/ibm/icu/impl/Norm2AllModes.java | 367 + .../src/com/ibm/icu/impl/Normalizer2Impl.java | 2005 + .../src/com/ibm/icu/impl/OlsonTimeZone.java | 1153 + .../icu/impl/PVecToTrieCompactHandler.java | 40 + .../com/ibm/icu/impl/PatternTokenizer.java | 390 + .../com/ibm/icu/impl/PluralRulesLoader.java | 209 + .../src/com/ibm/icu/impl/PropsVectors.java | 559 + .../core/src/com/ibm/icu/impl/Punycode.java | 476 + .../com/ibm/icu/impl/RelativeDateFormat.java | 258 + .../impl/ReplaceableUCharacterIterator.java | 203 + .../ibm/icu/impl/ResourceBundleWrapper.java | 231 + .../core/src/com/ibm/icu/impl/Row.java | 184 + .../ibm/icu/impl/RuleCharacterIterator.java | 346 + .../src/com/ibm/icu/impl/SimpleCache.java | 73 + .../core/src/com/ibm/icu/impl/SoftCache.java | 99 + .../com/ibm/icu/impl/SortedSetRelation.java | 180 + .../ibm/icu/impl/StringPrepDataReader.java | 100 + .../icu/impl/StringUCharacterIterator.java | 226 + .../src/com/ibm/icu/impl/TextTrieMap.java | 275 + .../src/com/ibm/icu/impl/TimeZoneAdapter.java | 147 + .../core/src/com/ibm/icu/impl/Trie.java | 460 + .../core/src/com/ibm/icu/impl/Trie2.java | 1051 + .../src/com/ibm/icu/impl/Trie2Writable.java | 1217 + .../core/src/com/ibm/icu/impl/Trie2_16.java | 255 + .../core/src/com/ibm/icu/impl/Trie2_32.java | 254 + .../src/com/ibm/icu/impl/TrieBuilder.java | 261 + .../src/com/ibm/icu/impl/TrieIterator.java | 530 + .../core/src/com/ibm/icu/impl/UBiDiProps.java | 353 + .../core/src/com/ibm/icu/impl/UCaseProps.java | 1490 + .../com/ibm/icu/impl/UCharArrayIterator.java | 84 + .../icu/impl/UCharacterIteratorWrapper.java | 141 + .../src/com/ibm/icu/impl/UCharacterName.java | 1674 + .../ibm/icu/impl/UCharacterNameChoice.java | 31 + .../ibm/icu/impl/UCharacterNameReader.java | 210 + .../com/ibm/icu/impl/UCharacterProperty.java | 1094 + .../icu/impl/UCharacterPropertyReader.java | 162 + .../com/ibm/icu/impl/UCharacterUtility.java | 194 + .../com/ibm/icu/impl/UPropertyAliases.java | 673 + .../core/src/com/ibm/icu/impl/URLHandler.java | 244 + .../src/com/ibm/icu/impl/USerializedSet.java | 185 + .../src/com/ibm/icu/impl/UnicodeRegex.java | 387 + .../ibm/icu/impl/UnicodeSetStringSpan.java | 932 + .../core/src/com/ibm/icu/impl/Utility.java | 1845 + .../core/src/com/ibm/icu/impl/ZoneMeta.java | 932 + .../com/ibm/icu/impl/ZoneStringFormat.java | 1096 + .../ibm/icu/impl/data/BreakIteratorRules.java | 36 + .../icu/impl/data/BreakIteratorRules_th.java | 41 + .../com/ibm/icu/impl/data/HolidayBundle.java | 28 + .../ibm/icu/impl/data/HolidayBundle_da.java | 30 + .../icu/impl/data/HolidayBundle_da_DK.java | 38 + .../ibm/icu/impl/data/HolidayBundle_de.java | 67 + .../icu/impl/data/HolidayBundle_de_AT.java | 43 + .../icu/impl/data/HolidayBundle_de_DE.java | 38 + .../ibm/icu/impl/data/HolidayBundle_el.java | 29 + .../icu/impl/data/HolidayBundle_el_GR.java | 39 + .../ibm/icu/impl/data/HolidayBundle_en.java | 28 + .../icu/impl/data/HolidayBundle_en_CA.java | 40 + .../icu/impl/data/HolidayBundle_en_GB.java | 36 + .../icu/impl/data/HolidayBundle_en_US.java | 46 + .../ibm/icu/impl/data/HolidayBundle_es.java | 50 + .../icu/impl/data/HolidayBundle_es_MX.java | 34 + .../ibm/icu/impl/data/HolidayBundle_fr.java | 43 + .../icu/impl/data/HolidayBundle_fr_CA.java | 37 + .../icu/impl/data/HolidayBundle_fr_FR.java | 36 + .../ibm/icu/impl/data/HolidayBundle_it.java | 35 + .../icu/impl/data/HolidayBundle_it_IT.java | 35 + .../ibm/icu/impl/data/HolidayBundle_iw.java | 20 + .../icu/impl/data/HolidayBundle_iw_IL.java | 28 + .../icu/impl/data/HolidayBundle_ja_JP.java | 22 + .../com/ibm/icu/impl/data/ResourceReader.java | 242 + .../com/ibm/icu/impl/data/TokenIterator.java | 159 + .../src/com/ibm/icu/impl/data/package.html | 12 + .../impl/duration/BasicDurationFormat.java | 177 + .../impl/duration/BasicDurationFormatter.java | 118 + .../BasicDurationFormatterFactory.java | 246 + .../duration/BasicPeriodBuilderFactory.java | 514 + .../impl/duration/BasicPeriodFormatter.java | 192 + .../duration/BasicPeriodFormatterFactory.java | 234 + .../duration/BasicPeriodFormatterService.java | 62 + .../ibm/icu/impl/duration/DateFormatter.java | 50 + .../icu/impl/duration/DurationFormatter.java | 76 + .../duration/DurationFormatterFactory.java | 79 + .../src/com/ibm/icu/impl/duration/Period.java | 373 + .../ibm/icu/impl/duration/PeriodBuilder.java | 53 + .../impl/duration/PeriodBuilderFactory.java | 131 + .../icu/impl/duration/PeriodFormatter.java | 41 + .../impl/duration/PeriodFormatterFactory.java | 74 + .../impl/duration/PeriodFormatterService.java | 46 + .../com/ibm/icu/impl/duration/TimeUnit.java | 85 + .../icu/impl/duration/TimeUnitConstants.java | 37 + .../icu/impl/duration/impl/DataRecord.java | 311 + .../duration/impl/PeriodFormatterData.java | 661 + .../impl/PeriodFormatterDataService.java | 31 + .../icu/impl/duration/impl/RecordReader.java | 23 + .../icu/impl/duration/impl/RecordWriter.java | 23 + ...sourceBasedPeriodFormatterDataService.java | 162 + .../com/ibm/icu/impl/duration/impl/Utils.java | 224 + .../impl/duration/impl/XMLRecordReader.java | 306 + .../impl/duration/impl/XMLRecordWriter.java | 250 + .../impl/duration/impl/YMDDateFormatter.java | 98 + .../ibm/icu/impl/duration/impl/data/index.txt | 19 + .../icu/impl/duration/impl/data/pfd_ar_EG.xml | 118 + .../duration/impl/data/pfd_ar_EG.xml.escaped | 118 + .../icu/impl/duration/impl/data/pfd_en.xml | 128 + .../icu/impl/duration/impl/data/pfd_es.xml | 150 + .../icu/impl/duration/impl/data/pfd_fr.xml | 105 + .../icu/impl/duration/impl/data/pfd_he_IL.xml | 129 + .../icu/impl/duration/impl/data/pfd_hi.xml | 105 + .../icu/impl/duration/impl/data/pfd_it.xml | 175 + .../icu/impl/duration/impl/data/pfd_ja.xml | 128 + .../icu/impl/duration/impl/data/pfd_ko.xml | 93 + .../icu/impl/duration/impl/data/pfd_ru.xml | 143 + .../icu/impl/duration/impl/data/pfd_th.xml | 118 + .../impl/duration/impl/data/pfd_zh_Hans.xml | 132 + .../duration/impl/data/pfd_zh_Hans_SG.xml | 130 + .../impl/duration/impl/data/pfd_zh_Hant.xml | 130 + .../duration/impl/data/pfd_zh_Hant_HK.xml | 116 + .../com/ibm/icu/impl/locale/AsciiUtil.java | 180 + .../com/ibm/icu/impl/locale/BaseLocale.java | 221 + .../com/ibm/icu/impl/locale/Extension.java | 114 + .../impl/locale/InternalLocaleBuilder.java | 284 + .../com/ibm/icu/impl/locale/LanguageTag.java | 897 + .../ibm/icu/impl/locale/LocaleExtensions.java | 190 + .../icu/impl/locale/LocaleObjectCache.java | 78 + .../impl/locale/LocaleSyntaxException.java | 27 + .../icu/impl/locale/PrivateuseExtension.java | 53 + .../icu/impl/locale/StringTokenIterator.java | 93 + .../impl/locale/UnicodeLocaleExtension.java | 207 + .../core/src/com/ibm/icu/lang/UCharacter.java | 6404 + .../com/ibm/icu/lang/UCharacterCategory.java | 112 + .../com/ibm/icu/lang/UCharacterDirection.java | 84 + .../src/com/ibm/icu/lang/UCharacterEnums.java | 491 + .../ibm/icu/lang/UCharacterNameIterator.java | 336 + .../ibm/icu/lang/UCharacterTypeIterator.java | 62 + .../core/src/com/ibm/icu/lang/UProperty.java | 878 + .../core/src/com/ibm/icu/lang/UScript.java | 910 + .../core/src/com/ibm/icu/lang/UScriptRun.java | 628 + .../core/src/com/ibm/icu/lang/package.html | 16 + .../core/src/com/ibm/icu/math/BigDecimal.java | 3882 + .../src/com/ibm/icu/math/MathContext.java | 601 + .../core/src/com/ibm/icu/math/package.html | 16 + .../src/com/ibm/icu/text/ArabicShaping.java | 1941 + .../ibm/icu/text/ArabicShapingException.java | 26 + .../core/src/com/ibm/icu/text/Bidi.java | 4851 + .../src/com/ibm/icu/text/BidiClassifier.java | 96 + .../core/src/com/ibm/icu/text/BidiLine.java | 1236 + .../core/src/com/ibm/icu/text/BidiRun.java | 153 + .../core/src/com/ibm/icu/text/BidiWriter.java | 430 + .../com/ibm/icu/text/BreakCTDictionary.java | 255 + .../src/com/ibm/icu/text/BreakDictionary.java | 306 + .../src/com/ibm/icu/text/BreakIterator.java | 891 + .../ibm/icu/text/BreakIteratorFactory.java | 175 + .../com/ibm/icu/text/CanonicalIterator.java | 529 + .../src/com/ibm/icu/text/CharsetDetector.java | 525 + .../src/com/ibm/icu/text/CharsetMatch.java | 262 + .../com/ibm/icu/text/CharsetRecog_2022.java | 164 + .../com/ibm/icu/text/CharsetRecog_UTF8.java | 97 + .../ibm/icu/text/CharsetRecog_Unicode.java | 153 + .../com/ibm/icu/text/CharsetRecog_mbcs.java | 540 + .../com/ibm/icu/text/CharsetRecog_sbcs.java | 1277 + .../com/ibm/icu/text/CharsetRecognizer.java | 53 + .../com/ibm/icu/text/ChineseDateFormat.java | 283 + .../icu/text/ChineseDateFormatSymbols.java | 105 + .../com/ibm/icu/text/ComposedCharIter.java | 164 + .../ibm/icu/text/CurrencyDisplayNames.java | 112 + .../src/com/ibm/icu/text/CurrencyFormat.java | 62 + .../com/ibm/icu/text/CurrencyMetaInfo.java | 520 + .../com/ibm/icu/text/CurrencyPluralInfo.java | 304 + .../core/src/com/ibm/icu/text/DateFormat.java | 1810 + .../com/ibm/icu/text/DateFormatSymbols.java | 1705 + .../com/ibm/icu/text/DateIntervalFormat.java | 1615 + .../com/ibm/icu/text/DateIntervalInfo.java | 937 + .../icu/text/DateTimePatternGenerator.java | 2035 + .../src/com/ibm/icu/text/DecimalFormat.java | 5519 + .../ibm/icu/text/DecimalFormatSymbols.java | 1178 + .../core/src/com/ibm/icu/text/DecompData.java | 746 + .../text/DictionaryBasedBreakIterator.java | 564 + .../core/src/com/ibm/icu/text/DigitList.java | 862 + .../src/com/ibm/icu/text/DurationFormat.java | 114 + .../com/ibm/icu/text/FilteredNormalizer2.java | 263 + .../core/src/com/ibm/icu/text/IDNA.java | 948 + .../com/ibm/icu/text/LocaleDisplayNames.java | 190 + .../src/com/ibm/icu/text/MeasureFormat.java | 56 + .../src/com/ibm/icu/text/MessageFormat.java | 2359 + .../core/src/com/ibm/icu/text/NFRule.java | 1347 + .../core/src/com/ibm/icu/text/NFRuleSet.java | 793 + .../src/com/ibm/icu/text/NFSubstitution.java | 1867 + .../core/src/com/ibm/icu/text/Normalizer.java | 2462 + .../src/com/ibm/icu/text/Normalizer2.java | 311 + .../src/com/ibm/icu/text/NumberFormat.java | 1797 + .../ibm/icu/text/NumberFormatServiceShim.java | 112 + .../src/com/ibm/icu/text/NumberingSystem.java | 285 + .../src/com/ibm/icu/text/PluralFormat.java | 644 + .../src/com/ibm/icu/text/PluralRules.java | 814 + .../core/src/com/ibm/icu/text/Quantifier.java | 115 + .../src/com/ibm/icu/text/RBBIDataWrapper.java | 514 + .../core/src/com/ibm/icu/text/RBBINode.java | 355 + .../src/com/ibm/icu/text/RBBIRuleBuilder.java | 334 + .../com/ibm/icu/text/RBBIRuleParseTable.java | 176 + .../src/com/ibm/icu/text/RBBIRuleScanner.java | 1072 + .../src/com/ibm/icu/text/RBBISetBuilder.java | 561 + .../src/com/ibm/icu/text/RBBISymbolTable.java | 201 + .../com/ibm/icu/text/RBBITableBuilder.java | 1013 + .../icu/text/RBNFChinesePostProcessor.java | 159 + .../com/ibm/icu/text/RBNFPostProcessor.java | 29 + .../com/ibm/icu/text/RbnfLenientScanner.java | 58 + .../icu/text/RbnfLenientScannerProvider.java | 30 + .../src/com/ibm/icu/text/Replaceable.java | 186 + .../icu/text/ReplaceableContextIterator.java | 196 + .../com/ibm/icu/text/ReplaceableString.java | 197 + .../ibm/icu/text/RuleBasedBreakIterator.java | 1373 + .../ibm/icu/text/RuleBasedNumberFormat.java | 1800 + .../core/src/com/ibm/icu/text/SCSU.java | 184 + .../src/com/ibm/icu/text/SearchIterator.java | 788 + .../src/com/ibm/icu/text/SelectFormat.java | 550 + .../com/ibm/icu/text/SimpleDateFormat.java | 3189 + .../ibm/icu/text/StringCharacterIterator.java | 281 + .../core/src/com/ibm/icu/text/StringPrep.java | 620 + .../icu/text/StringPrepParseException.java | 220 + .../src/com/ibm/icu/text/StringTransform.java | 24 + .../src/com/ibm/icu/text/SymbolTable.java | 86 + .../com/ibm/icu/text/ThaiBreakIterator.java | 382 + .../src/com/ibm/icu/text/TimeUnitFormat.java | 507 + .../core/src/com/ibm/icu/text/Transform.java | 28 + .../com/ibm/icu/text/UCharacterIterator.java | 409 + .../core/src/com/ibm/icu/text/UFormat.java | 108 + .../icu/text/UForwardCharacterIterator.java | 91 + .../core/src/com/ibm/icu/text/UTF16.java | 2678 + .../com/ibm/icu/text/UnicodeCompressor.java | 1006 + .../com/ibm/icu/text/UnicodeDecompressor.java | 557 + .../src/com/ibm/icu/text/UnicodeFilter.java | 71 + .../src/com/ibm/icu/text/UnicodeMatcher.java | 138 + .../src/com/ibm/icu/text/UnicodeReplacer.java | 64 + .../core/src/com/ibm/icu/text/UnicodeSet.java | 4581 + .../com/ibm/icu/text/UnicodeSetIterator.java | 266 + .../core/src/com/ibm/icu/text/package.html | 25 + .../com/ibm/icu/util/AnnualTimeZoneRule.java | 264 + .../src/com/ibm/icu/util/BasicTimeZone.java | 585 + .../com/ibm/icu/util/BuddhistCalendar.java | 246 + .../com/ibm/icu/util/ByteArrayWrapper.java | 283 + .../core/src/com/ibm/icu/util/CECalendar.java | 272 + .../core/src/com/ibm/icu/util/Calendar.java | 5565 + .../com/ibm/icu/util/CalendarServiceShim.java | 107 + .../ibm/icu/util/CaseInsensitiveString.java | 100 + .../src/com/ibm/icu/util/ChineseCalendar.java | 978 + .../com/ibm/icu/util/CompactByteArray.java | 384 + .../com/ibm/icu/util/CompactCharArray.java | 410 + .../src/com/ibm/icu/util/CopticCalendar.java | 321 + .../core/src/com/ibm/icu/util/Currency.java | 820 + .../src/com/ibm/icu/util/CurrencyAmount.java | 53 + .../com/ibm/icu/util/CurrencyServiceShim.java | 75 + .../src/com/ibm/icu/util/DateInterval.java | 85 + .../core/src/com/ibm/icu/util/DateRule.java | 80 + .../src/com/ibm/icu/util/DateTimeRule.java | 327 + .../src/com/ibm/icu/util/EasterHoliday.java | 290 + .../com/ibm/icu/util/EthiopicCalendar.java | 389 + .../core/src/com/ibm/icu/util/Freezable.java | 321 + .../com/ibm/icu/util/GregorianCalendar.java | 905 + .../src/com/ibm/icu/util/HebrewCalendar.java | 877 + .../src/com/ibm/icu/util/HebrewHoliday.java | 185 + .../core/src/com/ibm/icu/util/Holiday.java | 211 + .../icu/util/IllformedLocaleException.java | 56 + .../src/com/ibm/icu/util/IndianCalendar.java | 574 + .../com/ibm/icu/util/InitialTimeZoneRule.java | 103 + .../src/com/ibm/icu/util/IslamicCalendar.java | 667 + .../com/ibm/icu/util/JapaneseCalendar.java | 664 + .../core/src/com/ibm/icu/util/LocaleData.java | 347 + .../src/com/ibm/icu/util/LocaleMatcher.java | 673 + .../com/ibm/icu/util/LocalePriorityList.java | 366 + .../core/src/com/ibm/icu/util/Measure.java | 120 + .../src/com/ibm/icu/util/MeasureUnit.java | 27 + .../src/com/ibm/icu/util/OverlayBundle.java | 176 + .../src/com/ibm/icu/util/RangeDateRule.java | 168 + .../com/ibm/icu/util/RangeValueIterator.java | 110 + .../com/ibm/icu/util/RuleBasedTimeZone.java | 690 + .../core/src/com/ibm/icu/util/STZInfo.java | 85 + .../src/com/ibm/icu/util/SimpleDateRule.java | 229 + .../src/com/ibm/icu/util/SimpleHoliday.java | 265 + .../src/com/ibm/icu/util/SimpleTimeZone.java | 1343 + .../src/com/ibm/icu/util/StringTokenizer.java | 689 + .../src/com/ibm/icu/util/TaiwanCalendar.java | 252 + .../ibm/icu/util/TimeArrayTimeZoneRule.java | 182 + .../core/src/com/ibm/icu/util/TimeUnit.java | 60 + .../src/com/ibm/icu/util/TimeUnitAmount.java | 42 + .../core/src/com/ibm/icu/util/TimeZone.java | 952 + .../src/com/ibm/icu/util/TimeZoneRule.java | 184 + .../com/ibm/icu/util/TimeZoneTransition.java | 83 + .../core/src/com/ibm/icu/util/ULocale.java | 3314 + .../src/com/ibm/icu/util/UResourceBundle.java | 1078 + .../ibm/icu/util/UResourceBundleIterator.java | 95 + .../util/UResourceTypeMismatchException.java | 28 + .../com/ibm/icu/util/UniversalTimeScale.java | 653 + .../core/src/com/ibm/icu/util/VTimeZone.java | 2022 + .../src/com/ibm/icu/util/ValueIterator.java | 109 + .../src/com/ibm/icu/util/VersionInfo.java | 499 + .../core/src/com/ibm/icu/util/package.html | 12 + main/classes/currdata/.classpath | 7 + .../copy-data-currdata.launch | 25 + main/classes/currdata/.project | 29 + .../.settings/org.eclipse.jdt.core.prefs | 345 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/currdata/build.properties | 6 + main/classes/currdata/build.xml | 35 + main/classes/currdata/currdata-build.launch | 20 + main/classes/currdata/manifest.stub | 11 + .../impl/ICUCurrencyDisplayInfoProvider.java | 279 + .../com/ibm/icu/impl/ICUCurrencyMetaInfo.java | 252 + main/classes/langdata/.classpath | 7 + .../copy-data-langdata.launch | 25 + main/classes/langdata/.project | 29 + .../.settings/org.eclipse.jdt.core.prefs | 345 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/langdata/build.properties | 6 + main/classes/langdata/build.xml | 35 + main/classes/langdata/langdata-build.launch | 20 + main/classes/langdata/manifest.stub | 11 + .../com/ibm/icu/impl/ICULangDataTables.java | 13 + main/classes/localespi/.classpath | 8 + main/classes/localespi/.project | 20 + .../.settings/org.eclipse.jdt.core.prefs | 330 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/localespi/build.properties | 9 + main/classes/localespi/build.xml | 29 + main/classes/localespi/localespi-build.launch | 20 + main/classes/localespi/manifest.stub | 11 + main/classes/localespi/readme.html | 280 + .../java.text.spi.BreakIteratorProvider | 3 + .../services/java.text.spi.CollatorProvider | 3 + .../services/java.text.spi.DateFormatProvider | 3 + .../java.text.spi.DateFormatSymbolsProvider | 3 + ...java.text.spi.DecimalFormatSymbolsProvider | 3 + .../java.text.spi.NumberFormatProvider | 3 + .../java.util.spi.CurrencyNameProvider | 3 + .../services/java.util.spi.LocaleNameProvider | 3 + .../java.util.spi.TimeZoneNameProvider | 3 + .../icu/impl/icuadapter/NumberFormatJDK.java | 283 + .../ibm/icu/impl/icuadapter/TimeZoneJDK.java | 174 + .../javaspi/ICULocaleServiceProvider.java | 183 + .../ICULocaleServiceProviderConfig.properties | 30 + .../text/BreakIteratorProviderICU.java | 51 + .../javaspi/text/CollatorProviderICU.java | 30 + .../javaspi/text/DateFormatProviderICU.java | 68 + .../text/DateFormatSymbolsProviderICU.java | 30 + .../text/DecimalFormatSymbolsProviderICU.java | 31 + .../javaspi/text/NumberFormatProviderICU.java | 88 + .../javaspi/util/CurrencyNameProviderICU.java | 32 + .../javaspi/util/LocaleNameProviderICU.java | 47 + .../javaspi/util/TimeZoneNameProviderICU.java | 57 + .../icu/impl/jdkadapter/BreakIteratorICU.java | 100 + .../ibm/icu/impl/jdkadapter/CalendarICU.java | 329 + .../icu/impl/jdkadapter/CollationKeyICU.java | 62 + .../ibm/icu/impl/jdkadapter/CollatorICU.java | 158 + .../impl/jdkadapter/DateFormatSymbolsICU.java | 131 + .../icu/impl/jdkadapter/DecimalFormatICU.java | 449 + .../jdkadapter/DecimalFormatSymbolsICU.java | 212 + .../icu/impl/jdkadapter/NumberFormatICU.java | 227 + .../impl/jdkadapter/SimpleDateFormatICU.java | 373 + .../ibm/icu/impl/jdkadapter/TimeZoneICU.java | 105 + main/classes/regiondata/.classpath | 7 + .../copy-data-regiondata.launch | 25 + main/classes/regiondata/.project | 29 + .../.settings/org.eclipse.jdt.core.prefs | 345 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/regiondata/build.properties | 6 + main/classes/regiondata/build.xml | 35 + main/classes/regiondata/manifest.stub | 11 + .../regiondata/regiondata-build.launch | 20 + .../com/ibm/icu/impl/ICURegionDataTables.java | 13 + main/classes/translit/.classpath | 7 + .../copy-data-translit.launch | 25 + main/classes/translit/.project | 29 + .../.settings/org.eclipse.jdt.core.prefs | 345 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/classes/translit/build.properties | 6 + main/classes/translit/build.xml | 35 + main/classes/translit/manifest.stub | 11 + .../com/ibm/icu/impl/UtilityExtensions.java | 113 + .../com/ibm/icu/text/AnyTransliterator.java | 406 + .../com/ibm/icu/text/BreakTransliterator.java | 391 + .../ibm/icu/text/CaseFoldTransliterator.java | 111 + .../ibm/icu/text/CompoundTransliterator.java | 537 + .../ibm/icu/text/EscapeTransliterator.java | 200 + .../com/ibm/icu/text/FunctionReplacer.java | 84 + .../ibm/icu/text/LowercaseTransliterator.java | 118 + .../icu/text/NameUnicodeTransliterator.java | 171 + .../icu/text/NormalizationTransliterator.java | 132 + .../com/ibm/icu/text/NullTransliterator.java | 33 + .../ibm/icu/text/RemoveTransliterator.java | 52 + .../ibm/icu/text/RuleBasedTransliterator.java | 483 + .../src/com/ibm/icu/text/StringMatcher.java | 289 + .../src/com/ibm/icu/text/StringReplacer.java | 333 + .../ibm/icu/text/TitlecaseTransliterator.java | 156 + .../ibm/icu/text/TransformTransliterator.java | 127 + .../com/ibm/icu/text/TransliterationRule.java | 572 + .../ibm/icu/text/TransliterationRuleSet.java | 257 + .../src/com/ibm/icu/text/Transliterator.java | 1917 + .../ibm/icu/text/TransliteratorIDParser.java | 759 + .../ibm/icu/text/TransliteratorParser.java | 1558 + .../ibm/icu/text/TransliteratorRegistry.java | 917 + .../ibm/icu/text/UnescapeTransliterator.java | 251 + .../icu/text/UnicodeNameTransliterator.java | 73 + .../ibm/icu/text/UppercaseTransliterator.java | 114 + main/classes/translit/translit-build.launch | 26 + main/shared/.project | 11 + main/shared/build/common-targets.xml | 256 + main/shared/build/common.properties | 56 + .../shared/build/locations-eclipse.properties | 27 + main/shared/build/locations.properties | 26 + .../Transliterator_Han_Latin_Definition.txt | 55798 ++++++ .../data/Transliterator_Han_Latin_EDICT.txt | 116016 +++++++++++ main/shared/data/icudata.jar | 3 + main/shared/data/security.policy | 52 + main/shared/data/testdata.jar | 3 + main/shared/licenses/license.html | 51 + main/shared/licenses/unicode-license.txt | 48 + main/tests/charset/.classpath | 9 + .../copy-charset-test-data.launch | 25 + main/tests/charset/.project | 31 + .../.settings/org.eclipse.jdt.core.prefs | 330 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/tests/charset/build.properties | 5 + main/tests/charset/build.xml | 38 + main/tests/charset/charset-tests-build.launch | 20 + main/tests/charset/manifest.stub | 11 + .../com/ibm/icu/dev/test/charset/TestAll.java | 34 + .../ibm/icu/dev/test/charset/TestCharset.java | 5595 + .../icu/dev/test/charset/TestConversion.java | 1147 + .../icu/dev/test/charset/TestSelection.java | 950 + main/tests/collate/.classpath | 13 + .../copy-collate-test-data.launch | 25 + main/tests/collate/.project | 35 + .../.settings/org.eclipse.jdt.core.prefs | 332 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/tests/collate/build.properties | 5 + main/tests/collate/build.xml | 43 + main/tests/collate/collate-tests-build.launch | 20 + main/tests/collate/manifest.stub | 11 + .../CollationTest_NON_IGNORABLE_SHORT.txt | 152859 +++++++++++++++ .../dev/data/CollationTest_SHIFTED_SHORT.txt | 152859 +++++++++++++++ .../src/com/ibm/icu/dev/data/riwords.txt | 32943 ++++ .../com/ibm/icu/dev/test/TestAllCollate.java | 35 + .../dev/test/collator/CollationAPITest.java | 1254 + .../test/collator/CollationChineseTest.java | 47 + .../collator/CollationCreationMethodTest.java | 122 + .../test/collator/CollationCurrencyTest.java | 190 + .../dev/test/collator/CollationDummyTest.java | 517 + .../test/collator/CollationEnglishTest.java | 424 + .../test/collator/CollationFinnishTest.java | 172 + .../test/collator/CollationFrenchTest.java | 281 + .../test/collator/CollationGermanTest.java | 225 + .../test/collator/CollationIteratorTest.java | 650 + .../dev/test/collator/CollationKanaTest.java | 286 + .../dev/test/collator/CollationMiscTest.java | 2647 + .../test/collator/CollationMonkeyTest.java | 320 + .../collator/CollationRegressionTest.java | 1196 + .../test/collator/CollationServiceTest.java | 512 + .../test/collator/CollationSpanishTest.java | 183 + .../icu/dev/test/collator/CollationTest.java | 466 + .../dev/test/collator/CollationThaiTest.java | 361 + .../test/collator/CollationThreadTest.java | 243 + .../test/collator/CollationTurkishTest.java | 190 + .../dev/test/collator/G7CollationTest.java | 295 + .../test/collator/IndexCharactersTest.java | 93 + .../collator/LotusCollationKoreanTest.java | 149 + .../ibm/icu/dev/test/collator/TestAll.java | 56 + .../icu/dev/test/collator/TestComparator.java | 74 + .../dev/test/collator/UCAConformanceTest.java | 243 + .../format/GlobalizationPreferencesTest.java | 1567 + .../test/format/RbnfLenientScannerTest.java | 223 + .../ibm/icu/dev/test/search/SearchTest.java | 2017 + .../com/ibm/icu/dev/test/search/package.html | 12 + .../util/ICUResourceBundleCollationTest.java | 178 + .../test/util/LocaleAliasCollationTest.java | 72 + .../dev/test/util/ULocaleCollationTest.java | 223 + main/tests/core/.classpath | 11 + .../copy-test-data.launch | 25 + main/tests/core/.project | 33 + .../core/.settings/org.eclipse.jdt.core.prefs | 330 + .../core/.settings/org.eclipse.jdt.ui.prefs | 10 + main/tests/core/build.properties | 5 + main/tests/core/build.xml | 39 + main/tests/core/core-tests-build.launch | 20 + main/tests/core/manifest.stub | 11 + .../com/ibm/icu/dev/data/IDNATestInput.txt | 1266 + .../dev/data/TestDataElements_testtypes.java | 140 + .../com/ibm/icu/dev/data/rbbi/english.dict | Bin 0 -> 1123 bytes .../src/com/ibm/icu/dev/data/rbbi/words.txt | 2990 + .../dev/data/resources/TestDataElements.java | 34 + .../resources/TestDataElements_en.properties | 10 + .../TestDataElements_en_Latn.properties | 9 + .../TestDataElements_en_Latn_US.java | 21 + .../resources/TestDataElements_en_US.java | 23 + .../TestDataElements_fr_Latn_FR.java | 24 + .../data/resources/TestDataElements_te.java | 21 + .../icu/dev/data/resources/TestMessages.java | 20 + .../data/resources/testmessages.properties | 5 + .../core/src/com/ibm/icu/dev/data/thai6.ucs | Bin 0 -> 363306 bytes .../data/unicode/CompositionExclusions.txt | 197 + .../data/unicode/NormalizationCorrections.txt | 48 + .../data/unicode/NormalizationTest-3.2.0.txt | 17042 ++ .../dev/data/unicode/NormalizationTest.txt | 18116 ++ .../icu/dev/data/unicode/SpecialCasing.txt | 274 + .../ibm/icu/dev/data/unicode/UnicodeData.txt | 21829 ++ .../com/ibm/icu/dev/data/unicode/ucdterms.txt | 45 + .../src/com/ibm/icu/dev/test/TestAllCore.java | 43 + .../com/ibm/icu/dev/test/bidi/BidiTest.java | 494 + .../com/ibm/icu/dev/test/bidi/TestAll.java | 40 + .../com/ibm/icu/dev/test/bidi/TestBidi.java | 533 + .../dev/test/bidi/TestCharFromDirProp.java | 44 + .../icu/dev/test/bidi/TestClassOverride.java | 144 + .../icu/dev/test/bidi/TestCompatibility.java | 296 + .../com/ibm/icu/dev/test/bidi/TestData.java | 258 + .../dev/test/bidi/TestFailureRecovery.java | 127 + .../ibm/icu/dev/test/bidi/TestInverse.java | 273 + .../dev/test/bidi/TestMultipleParagraphs.java | 462 + .../ibm/icu/dev/test/bidi/TestReorder.java | 261 + .../dev/test/bidi/TestReorderRunsOnly.java | 190 + .../icu/dev/test/bidi/TestReorderingMode.java | 713 + .../ibm/icu/dev/test/bidi/TestStreaming.java | 149 + .../icu/dev/test/bigdec/DiagBigDecimal.java | 5621 + .../ibm/icu/dev/test/calendar/AstroTest.java | 386 + .../dev/test/calendar/CalendarRegression.java | 2158 + .../icu/dev/test/calendar/CalendarTest.java | 445 + .../icu/dev/test/calendar/ChineseTest.java | 691 + .../dev/test/calendar/ChineseTestCase.java | 54 + .../dev/test/calendar/CompatibilityTest.java | 1126 + .../ibm/icu/dev/test/calendar/CopticTest.java | 421 + .../test/calendar/DataDrivenCalendarTest.java | 296 + .../icu/dev/test/calendar/EthiopicTest.java | 478 + .../ibm/icu/dev/test/calendar/HebrewTest.java | 499 + .../icu/dev/test/calendar/HolidayTest.java | 226 + .../dev/test/calendar/IBMCalendarTest.java | 1038 + .../ibm/icu/dev/test/calendar/IndianTest.java | 243 + .../icu/dev/test/calendar/IslamicTest.java | 267 + .../icu/dev/test/calendar/JapaneseTest.java | 370 + .../ibm/icu/dev/test/calendar/TestAll.java | 40 + .../ibm/icu/dev/test/calendar/TestCase.java | 242 + .../ibm/icu/dev/test/calendar/package.html | 12 + .../test/charsetdet/CharsetDetectionTests.xml | 551 + .../test/charsetdet/TestCharsetDetector.java | 634 + .../com/ibm/icu/dev/test/cldr/TestAll.java | 29 + .../ibm/icu/dev/test/cldr/TestCLDRVsICU.java | 639 + .../test/compression/DecompressionTest.java | 237 + .../dev/test/compression/ExhaustiveTest.java | 522 + .../ibm/icu/dev/test/compression/TestAll.java | 27 + .../ibm/icu/dev/test/compression/package.html | 12 + .../dev/test/duration/DataReadWriteTest.java | 354 + .../dev/test/duration/ICUDurationTest.java | 264 + .../dev/test/duration/LanguageTestRoot.java | 805 + .../duration/PeriodBuilderFactoryTest.java | 90 + .../dev/test/duration/PeriodBuilderTest.java | 22 + .../ibm/icu/dev/test/duration/PeriodTest.java | 108 + .../icu/dev/test/duration/RegressionTest.java | 73 + ...ceBasedPeriodFormatterDataServiceTest.java | 38 + .../ibm/icu/dev/test/duration/TestAll.java | 38 + .../dev/test/duration/languages/TestAll.java | 44 + .../test/duration/languages/Test_ar_EG.java | 33 + .../dev/test/duration/languages/Test_en.java | 29 + .../dev/test/duration/languages/Test_es.java | 32 + .../dev/test/duration/languages/Test_fr.java | 32 + .../test/duration/languages/Test_he_IL.java | 32 + .../dev/test/duration/languages/Test_hi.java | 64 + .../dev/test/duration/languages/Test_it.java | 32 + .../dev/test/duration/languages/Test_ja.java | 68 + .../dev/test/duration/languages/Test_ko.java | 32 + .../dev/test/duration/languages/Test_ru.java | 33 + .../test/duration/languages/Test_zh_Hans.java | 33 + .../duration/languages/Test_zh_Hans_SG.java | 32 + .../test/duration/languages/Test_zh_Hant.java | 33 + .../duration/languages/Test_zh_Hant_HK.java | 33 + .../test/duration/testdata/testdata_ar_EG.txt | 401 + .../test/duration/testdata/testdata_en.txt | 401 + .../test/duration/testdata/testdata_es.txt | 401 + .../test/duration/testdata/testdata_fr.txt | 401 + .../test/duration/testdata/testdata_he_IL.txt | 401 + .../test/duration/testdata/testdata_hi.txt | 401 + .../test/duration/testdata/testdata_it.txt | 401 + .../test/duration/testdata/testdata_ja.txt | 401 + .../test/duration/testdata/testdata_ko.txt | 401 + .../test/duration/testdata/testdata_ru.txt | 401 + .../test/duration/testdata/testdata_th.txt | 401 + .../duration/testdata/testdata_zh_Hans.txt | 401 + .../duration/testdata/testdata_zh_Hans_SG.txt | 401 + .../duration/testdata/testdata_zh_Hant.txt | 401 + .../duration/testdata/testdata_zh_Hant_HK.txt | 401 + .../dev/test/format/BigNumberFormatTest.java | 450 + .../dev/test/format/DataDrivenFormatTest.java | 180 + .../dev/test/format/DateFormatMiscTests.java | 155 + .../test/format/DateFormatRegressionTest.java | 1140 + .../format/DateFormatRegressionTestJ.java | 291 + .../test/format/DateFormatRoundTripTest.java | 275 + .../icu/dev/test/format/DateFormatTest.java | 3680 + .../test/format/DateIntervalFormatTest.java | 1211 + .../test/format/DateTimeGeneratorTest.java | 1105 + .../dev/test/format/IntlTestDateFormat.java | 271 + .../test/format/IntlTestDateFormatAPI.java | 223 + .../test/format/IntlTestDateFormatAPIC.java | 160 + .../format/IntlTestDateFormatSymbols.java | 480 + .../test/format/IntlTestDecimalFormatAPI.java | 393 + .../format/IntlTestDecimalFormatAPIC.java | 513 + .../format/IntlTestDecimalFormatSymbols.java | 184 + .../format/IntlTestDecimalFormatSymbolsC.java | 143 + .../dev/test/format/IntlTestNumberFormat.java | 289 + .../test/format/IntlTestNumberFormatAPI.java | 226 + .../format/IntlTestSimpleDateFormatAPI.java | 201 + .../dev/test/format/MessageRegression.java | 839 + .../format/NumberFormatRegistrationTest.java | 117 + .../format/NumberFormatRegressionTest.java | 329 + .../format/NumberFormatRoundTripTest.java | 236 + .../format/NumberFormatSerialTestData.java | 306 + .../icu/dev/test/format/NumberFormatTest.java | 2611 + .../dev/test/format/NumberFormatTestCases.txt | 122 + .../icu/dev/test/format/NumberRegression.java | 1865 + .../icu/dev/test/format/PluralFormatTest.java | 267 + .../dev/test/format/PluralFormatUnitTest.java | 253 + .../icu/dev/test/format/PluralRulesTest.java | 233 + .../icu/dev/test/format/RBNFParseTest.java | 159 + .../dev/test/format/RbnfRoundTripTest.java | 219 + .../com/ibm/icu/dev/test/format/RbnfTest.java | 1280 + .../dev/test/format/SelectFormatAPITest.java | 212 + .../dev/test/format/SelectFormatUnitTest.java | 176 + .../com/ibm/icu/dev/test/format/TestAll.java | 130 + .../dev/test/format/TestMessageFormat.java | 1685 + .../ibm/icu/dev/test/format/TimeUnitTest.java | 209 + .../icu/dev/test/format/TimeZoneAliases.txt | 165 + .../dev/test/format/TimeZoneFormatTest.java | 358 + .../WriteNumberFormatSerialTestData.java | 92 + .../com/ibm/icu/dev/test/impl/TestAll.java | 33 + .../test/iterator/TestUCharacterIterator.java | 485 + .../com/ibm/icu/dev/test/lang/TestAll.java | 32 + .../ibm/icu/dev/test/lang/TestCharacter.java | 30 + .../ibm/icu/dev/test/lang/TestUScript.java | 419 + .../ibm/icu/dev/test/lang/TestUScriptRun.java | 430 + .../icu/dev/test/lang/UCharacterCaseTest.java | 952 + .../dev/test/lang/UCharacterCategoryTest.java | 86 + .../icu/dev/test/lang/UCharacterCompare.java | 309 + .../test/lang/UCharacterDirectionTest.java | 80 + .../test/lang/UCharacterSurrogateTest.java | 422 + .../ibm/icu/dev/test/lang/UCharacterTest.java | 3293 + .../dev/test/lang/UCharacterThreadTest.java | 88 + .../dev/test/lang/UPropertyAliasesTest.java | 128 + .../com/ibm/icu/dev/test/lang/UTF16Test.java | 1655 + .../test/lang/UnicodeSetStringSpanTest.java | 1131 + .../ibm/icu/dev/test/lang/UnicodeSetTest.java | 2375 + .../icu/dev/test/normalizer/BasicTest.java | 2938 + .../dev/test/normalizer/ConformanceTest.java | 517 + .../icu/dev/test/normalizer/IntHashtable.java | 38 + .../test/normalizer/IntStringHashtable.java | 38 + .../dev/test/normalizer/LongHashtable.java | 40 + .../normalizer/NormalizationMonkeyTest.java | 110 + .../test/normalizer/NormalizerBuilder.java | 507 + .../dev/test/normalizer/NormalizerData.java | 137 + .../normalizer/NormalizerRegressionTests.java | 46 + .../ibm/icu/dev/test/normalizer/TestAll.java | 32 + .../normalizer/TestCanonicalIterator.java | 270 + .../TestDeprecatedNormalizerAPI.java | 171 + .../test/normalizer/UnicodeNormalizer.java | 187 + .../UnicodeNormalizerConformanceTest.java | 270 + .../ibm/icu/dev/test/normalizer/package.html | 12 + .../dev/test/rbbi/BreakIteratorRegTest.java | 122 + .../rbbi/BreakIteratorRules_en_US_TEST.java | 219 + .../icu/dev/test/rbbi/BreakIteratorTest.java | 899 + .../ibm/icu/dev/test/rbbi/RBBIAPITest.java | 419 + .../com/ibm/icu/dev/test/rbbi/RBBITest.java | 830 + .../icu/dev/test/rbbi/RBBITestExtended.java | 493 + .../ibm/icu/dev/test/rbbi/RBBITestMonkey.java | 1998 + .../ibm/icu/dev/test/rbbi/SimpleBITest.java | 264 + .../com/ibm/icu/dev/test/rbbi/TestAll.java | 36 + .../com/ibm/icu/dev/test/rbbi/package.html | 12 + .../src/com/ibm/icu/dev/test/rbbi/rbbitst.txt | 582 + .../dev/test/serializable/CalendarTests.java | 226 + .../test/serializable/CompatibilityTest.java | 273 + .../dev/test/serializable/CoverageTest.java | 211 + .../dev/test/serializable/ExceptionTests.java | 157 + .../dev/test/serializable/FormatTests.java | 2193 + .../test/serializable/SerializableTest.java | 724 + .../test/serializable/SerializableWriter.java | 61 + .../com.ibm.icu.impl.OlsonTimeZone.dat | 3 + .../com.ibm.icu.impl.TimeZoneAdapter.dat | 3 + .../ICU_3.6/com.ibm.icu.math.BigDecimal.dat | 3 + .../ICU_3.6/com.ibm.icu.math.MathContext.dat | 3 + ...om.ibm.icu.text.ArabicShapingException.dat | 3 + .../com.ibm.icu.text.ChineseDateFormat.dat | 3 + ....ibm.icu.text.ChineseDateFormatSymbols.dat | 3 + .../ICU_3.6/com.ibm.icu.text.DateFormat.dat | 3 + .../com.ibm.icu.text.DateFormatSymbols.dat | 3 + .../com.ibm.icu.text.DecimalFormat.dat | 3 + .../com.ibm.icu.text.DecimalFormatSymbols.dat | 3 + .../com.ibm.icu.text.MessageFormat.dat | 3 + .../ICU_3.6/com.ibm.icu.text.NumberFormat.dat | 3 + ...com.ibm.icu.text.RuleBasedNumberFormat.dat | 3 + .../com.ibm.icu.text.SimpleDateFormat.dat | 3 + ....ibm.icu.text.StringPrepParseException.dat | 3 + .../com.ibm.icu.util.BuddhistCalendar.dat | 3 + .../ICU_3.6/com.ibm.icu.util.Calendar.dat | 3 + .../com.ibm.icu.util.ChineseCalendar.dat | 3 + .../com.ibm.icu.util.CopticCalendar.dat | 3 + .../ICU_3.6/com.ibm.icu.util.Currency.dat | 3 + .../com.ibm.icu.util.EthiopicCalendar.dat | 3 + .../com.ibm.icu.util.GregorianCalendar.dat | 3 + .../com.ibm.icu.util.HebrewCalendar.dat | 3 + .../com.ibm.icu.util.IslamicCalendar.dat | 3 + .../com.ibm.icu.util.JapaneseCalendar.dat | 3 + .../com.ibm.icu.util.SimpleTimeZone.dat | 3 + .../ICU_3.6/com.ibm.icu.util.TimeZone.dat | 3 + .../data/ICU_3.6/com.ibm.icu.util.ULocale.dat | 3 + ...cu.util.UResourceTypeMismatchException.dat | 3 + .../com.ibm.icu.impl.DateNumberFormat.dat | 3 + ...om.ibm.icu.impl.InvalidFormatException.dat | 3 + .../com.ibm.icu.impl.OlsonTimeZone.dat | 3 + .../com.ibm.icu.impl.RelativeDateFormat.dat | 3 + .../com.ibm.icu.impl.TimeZoneAdapter.dat | 3 + ....icu.impl.duration.BasicDurationFormat.dat | 3 + .../ICU_3.8.1/com.ibm.icu.math.BigDecimal.dat | 3 + .../com.ibm.icu.math.MathContext.dat | 3 + ...om.ibm.icu.text.ArabicShapingException.dat | 3 + ...m.ibm.icu.text.ChineseDateFormat$Field.dat | 3 + .../com.ibm.icu.text.ChineseDateFormat.dat | 3 + ....ibm.icu.text.ChineseDateFormatSymbols.dat | 3 + .../com.ibm.icu.text.DateFormat$Field.dat | 3 + .../ICU_3.8.1/com.ibm.icu.text.DateFormat.dat | 3 + .../com.ibm.icu.text.DateFormatSymbols.dat | 3 + .../com.ibm.icu.text.DecimalFormat.dat | 3 + .../com.ibm.icu.text.DecimalFormatSymbols.dat | 3 + .../com.ibm.icu.text.MessageFormat$Field.dat | 3 + .../com.ibm.icu.text.MessageFormat.dat | 3 + .../com.ibm.icu.text.NumberFormat$Field.dat | 3 + .../com.ibm.icu.text.NumberFormat.dat | 3 + .../com.ibm.icu.text.PluralFormat.dat | 3 + .../com.ibm.icu.text.PluralRules.dat | 3 + ...com.ibm.icu.text.RuleBasedNumberFormat.dat | 3 + .../com.ibm.icu.text.SimpleDateFormat.dat | 3 + ....ibm.icu.text.StringPrepParseException.dat | 3 + .../com.ibm.icu.util.AnnualTimeZoneRule.dat | 3 + .../com.ibm.icu.util.BuddhistCalendar.dat | 3 + .../ICU_3.8.1/com.ibm.icu.util.Calendar.dat | 3 + .../com.ibm.icu.util.ChineseCalendar.dat | 3 + .../com.ibm.icu.util.CopticCalendar.dat | 3 + .../ICU_3.8.1/com.ibm.icu.util.Currency.dat | 3 + .../com.ibm.icu.util.DateTimeRule.dat | 3 + .../com.ibm.icu.util.EthiopicCalendar.dat | 3 + .../com.ibm.icu.util.GregorianCalendar.dat | 3 + .../com.ibm.icu.util.HebrewCalendar.dat | 3 + .../com.ibm.icu.util.IndianCalendar.dat | 3 + .../com.ibm.icu.util.InitialTimeZoneRule.dat | 3 + .../com.ibm.icu.util.IslamicCalendar.dat | 3 + .../com.ibm.icu.util.JapaneseCalendar.dat | 3 + .../com.ibm.icu.util.RuleBasedTimeZone.dat | 3 + .../com.ibm.icu.util.SimpleTimeZone.dat | 3 + .../com.ibm.icu.util.TaiwanCalendar.dat | 3 + ...com.ibm.icu.util.TimeArrayTimeZoneRule.dat | 3 + .../ICU_3.8.1/com.ibm.icu.util.TimeZone.dat | 3 + .../ICU_3.8.1/com.ibm.icu.util.ULocale.dat | 3 + ...cu.util.UResourceTypeMismatchException.dat | 3 + .../ICU_3.8.1/com.ibm.icu.util.VTimeZone.dat | 3 + .../com.ibm.icu.impl.DateNumberFormat.dat | 3 + ...om.ibm.icu.impl.InvalidFormatException.dat | 3 + .../ICU_4.0/com.ibm.icu.impl.JavaTimeZone.dat | 3 + .../com.ibm.icu.impl.OlsonTimeZone.dat | 3 + .../com.ibm.icu.impl.RelativeDateFormat.dat | 3 + .../com.ibm.icu.impl.TimeZoneAdapter.dat | 3 + ....icu.impl.duration.BasicDurationFormat.dat | 3 + .../ICU_4.0/com.ibm.icu.math.BigDecimal.dat | 3 + .../ICU_4.0/com.ibm.icu.math.MathContext.dat | 3 + ...om.ibm.icu.text.ArabicShapingException.dat | 3 + ...m.ibm.icu.text.ChineseDateFormat$Field.dat | 3 + .../com.ibm.icu.text.ChineseDateFormat.dat | 3 + ....ibm.icu.text.ChineseDateFormatSymbols.dat | 3 + .../com.ibm.icu.text.DateFormat$Field.dat | 3 + .../ICU_4.0/com.ibm.icu.text.DateFormat.dat | 3 + .../com.ibm.icu.text.DateFormatSymbols.dat | 3 + .../com.ibm.icu.text.DateIntervalFormat.dat | 3 + ....icu.text.DateIntervalInfo$PatternInfo.dat | 3 + .../com.ibm.icu.text.DateIntervalInfo.dat | 3 + .../com.ibm.icu.text.DecimalFormat.dat | 3 + .../com.ibm.icu.text.DecimalFormatSymbols.dat | 3 + .../com.ibm.icu.text.MessageFormat$Field.dat | 3 + .../com.ibm.icu.text.MessageFormat.dat | 3 + .../com.ibm.icu.text.NumberFormat$Field.dat | 3 + .../ICU_4.0/com.ibm.icu.text.NumberFormat.dat | 3 + .../ICU_4.0/com.ibm.icu.text.PluralFormat.dat | 3 + .../ICU_4.0/com.ibm.icu.text.PluralRules.dat | 3 + ...com.ibm.icu.text.RuleBasedNumberFormat.dat | 3 + .../com.ibm.icu.text.SimpleDateFormat.dat | 3 + ....ibm.icu.text.StringPrepParseException.dat | 3 + .../com.ibm.icu.text.TimeUnitFormat.dat | 3 + .../com.ibm.icu.util.AnnualTimeZoneRule.dat | 3 + .../com.ibm.icu.util.BuddhistCalendar.dat | 3 + .../ICU_4.0/com.ibm.icu.util.Calendar.dat | 3 + .../com.ibm.icu.util.ChineseCalendar.dat | 3 + .../com.ibm.icu.util.CopticCalendar.dat | 3 + .../ICU_4.0/com.ibm.icu.util.Currency.dat | 3 + .../ICU_4.0/com.ibm.icu.util.DateInterval.dat | 3 + .../ICU_4.0/com.ibm.icu.util.DateTimeRule.dat | 3 + .../com.ibm.icu.util.EthiopicCalendar.dat | 3 + .../com.ibm.icu.util.GregorianCalendar.dat | 3 + .../com.ibm.icu.util.HebrewCalendar.dat | 3 + .../com.ibm.icu.util.IndianCalendar.dat | 3 + .../com.ibm.icu.util.InitialTimeZoneRule.dat | 3 + .../com.ibm.icu.util.IslamicCalendar.dat | 3 + .../com.ibm.icu.util.JapaneseCalendar.dat | 3 + .../com.ibm.icu.util.RuleBasedTimeZone.dat | 3 + .../com.ibm.icu.util.SimpleTimeZone.dat | 3 + .../com.ibm.icu.util.TaiwanCalendar.dat | 3 + ...com.ibm.icu.util.TimeArrayTimeZoneRule.dat | 3 + .../ICU_4.0/com.ibm.icu.util.TimeZone.dat | 3 + .../data/ICU_4.0/com.ibm.icu.util.ULocale.dat | 3 + ...cu.util.UResourceTypeMismatchException.dat | 3 + .../ICU_4.0/com.ibm.icu.util.VTimeZone.dat | 3 + .../com.ibm.icu.impl.DateNumberFormat.dat | 3 + ...m.icu.impl.IllegalIcuArgumentException.dat | 3 + ...om.ibm.icu.impl.InvalidFormatException.dat | 3 + .../com.ibm.icu.impl.JavaTimeZone.dat | 3 + .../com.ibm.icu.impl.OlsonTimeZone.dat | 3 + .../com.ibm.icu.impl.RelativeDateFormat.dat | 3 + .../com.ibm.icu.impl.TimeZoneAdapter.dat | 3 + ....icu.impl.duration.BasicDurationFormat.dat | 3 + ....icu.impl.locale.LocaleSyntaxException.dat | 3 + .../ICU_4.2.1/com.ibm.icu.math.BigDecimal.dat | 3 + .../com.ibm.icu.math.MathContext.dat | 3 + ...om.ibm.icu.text.ArabicShapingException.dat | 3 + ...m.ibm.icu.text.ChineseDateFormat$Field.dat | 3 + .../com.ibm.icu.text.ChineseDateFormat.dat | 3 + ....ibm.icu.text.ChineseDateFormatSymbols.dat | 3 + .../com.ibm.icu.text.CurrencyPluralInfo.dat | 3 + .../com.ibm.icu.text.DateFormat$Field.dat | 3 + .../ICU_4.2.1/com.ibm.icu.text.DateFormat.dat | 3 + .../com.ibm.icu.text.DateFormatSymbols.dat | 3 + .../com.ibm.icu.text.DateIntervalFormat.dat | 3 + ....icu.text.DateIntervalInfo$PatternInfo.dat | 3 + .../com.ibm.icu.text.DateIntervalInfo.dat | 3 + .../com.ibm.icu.text.DecimalFormat.dat | 3 + .../com.ibm.icu.text.DecimalFormatSymbols.dat | 3 + .../com.ibm.icu.text.MessageFormat$Field.dat | 3 + .../com.ibm.icu.text.MessageFormat.dat | 3 + .../com.ibm.icu.text.NumberFormat$Field.dat | 3 + .../com.ibm.icu.text.NumberFormat.dat | 3 + .../com.ibm.icu.text.PluralFormat.dat | 3 + .../com.ibm.icu.text.PluralRules.dat | 3 + ...com.ibm.icu.text.RuleBasedNumberFormat.dat | 3 + .../com.ibm.icu.text.SimpleDateFormat.dat | 3 + ....ibm.icu.text.StringPrepParseException.dat | 3 + .../com.ibm.icu.text.TimeUnitFormat.dat | 3 + .../com.ibm.icu.util.AnnualTimeZoneRule.dat | 3 + .../com.ibm.icu.util.BuddhistCalendar.dat | 3 + .../ICU_4.2.1/com.ibm.icu.util.Calendar.dat | 3 + .../com.ibm.icu.util.ChineseCalendar.dat | 3 + .../com.ibm.icu.util.CopticCalendar.dat | 3 + .../ICU_4.2.1/com.ibm.icu.util.Currency.dat | 3 + .../com.ibm.icu.util.DateInterval.dat | 3 + .../com.ibm.icu.util.DateTimeRule.dat | 3 + .../com.ibm.icu.util.EthiopicCalendar.dat | 3 + .../com.ibm.icu.util.GregorianCalendar.dat | 3 + .../com.ibm.icu.util.HebrewCalendar.dat | 3 + ....ibm.icu.util.IllformedLocaleException.dat | 3 + .../com.ibm.icu.util.IndianCalendar.dat | 3 + .../com.ibm.icu.util.InitialTimeZoneRule.dat | 3 + .../com.ibm.icu.util.IslamicCalendar.dat | 3 + .../com.ibm.icu.util.JapaneseCalendar.dat | 3 + .../com.ibm.icu.util.RuleBasedTimeZone.dat | 3 + .../com.ibm.icu.util.SimpleTimeZone.dat | 3 + .../com.ibm.icu.util.TaiwanCalendar.dat | 3 + ...com.ibm.icu.util.TimeArrayTimeZoneRule.dat | 3 + .../ICU_4.2.1/com.ibm.icu.util.TimeZone.dat | 3 + .../ICU_4.2.1/com.ibm.icu.util.ULocale.dat | 3 + ...cu.util.UResourceTypeMismatchException.dat | 3 + .../ICU_4.2.1/com.ibm.icu.util.VTimeZone.dat | 3 + .../test/shaping/ArabicShapingRegTest.java | 619 + .../dev/test/shaping/ArabicShapingTest.java | 286 + .../test/stringprep/IDNAConformanceTest.java | 311 + .../dev/test/stringprep/IDNAReference.java | 433 + .../dev/test/stringprep/NFS4StringPrep.java | 150 + .../test/stringprep/NamePrepTransform.java | 215 + .../test/stringprep/PunycodeReference.java | 382 + .../ibm/icu/dev/test/stringprep/TestAll.java | 37 + .../ibm/icu/dev/test/stringprep/TestData.java | 633 + .../ibm/icu/dev/test/stringprep/TestIDNA.java | 1061 + .../icu/dev/test/stringprep/TestIDNARef.java | 666 + .../stringprep/TestInputDataStructure.java | 141 + .../dev/test/stringprep/TestStringPrep.java | 326 + .../stringprep/TestStringPrepProfiles.java | 170 + .../ibm/icu/dev/test/timescale/TestAll.java | 34 + .../dev/test/timescale/TimeScaleAPITest.java | 356 + .../dev/test/timescale/TimeScaleDataTest.java | 259 + .../test/timescale/TimeScaleMonkeyTest.java | 106 + .../ibm/icu/dev/test/timezone/TestAll.java | 31 + .../dev/test/timezone/TimeZoneAliasTest.java | 422 + .../test/timezone/TimeZoneBoundaryTest.java | 853 + .../timezone/TimeZoneOffsetLocalTest.java | 240 + .../dev/test/timezone/TimeZoneRegression.java | 1186 + .../dev/test/timezone/TimeZoneRuleTest.java | 1662 + .../icu/dev/test/timezone/TimeZoneTest.java | 1627 + .../icu/dev/test/util/ArrayComparator.java | 76 + .../icu/dev/test/util/CalendarFieldsSet.java | 58 + .../icu/dev/test/util/CompactArrayTest.java | 159 + .../ibm/icu/dev/test/util/CurrencyTest.java | 317 + .../icu/dev/test/util/DateTimeStyleSet.java | 65 + .../ibm/icu/dev/test/util/DebugUtilities.java | 77 + .../icu/dev/test/util/DebugUtilitiesData.java | 222 + .../icu/dev/test/util/DebugUtilitiesTest.java | 47 + .../icu/dev/test/util/DisplayNameTest.java | 399 + .../ibm/icu/dev/test/util/ElapsedTimer.java | 107 + .../com/ibm/icu/dev/test/util/Equator.java | 27 + .../com/ibm/icu/dev/test/util/FieldsSet.java | 241 + .../ibm/icu/dev/test/util/ICUBinaryTest.java | 115 + .../dev/test/util/ICUResourceBundleTest.java | 1067 + .../ibm/icu/dev/test/util/ICUServiceTest.java | 1007 + .../dev/test/util/ICUServiceTestSample.java | 216 + .../dev/test/util/ICUServiceThreadTest.java | 482 + .../icu/dev/test/util/LocaleAliasTest.java | 214 + .../icu/dev/test/util/LocaleBuilderTest.java | 151 + .../ibm/icu/dev/test/util/LocaleDataTest.java | 303 + .../icu/dev/test/util/LocaleMatcherTest.java | 70 + .../dev/test/util/LocalePriorityListTest.java | 56 + .../com/ibm/icu/dev/test/util/Relation.java | 291 + .../com/ibm/icu/dev/test/util/SortedBag.java | 142 + .../dev/test/util/StringTokenizerTest.java | 1052 + .../com/ibm/icu/dev/test/util/TestAll.java | 44 + .../com/ibm/icu/dev/test/util/TestData_en.jpp | 39 + .../test/util/TestDefaultPackageLoading.jpp | 58 + .../icu/dev/test/util/TextTrieMapTest.java | 183 + .../com/ibm/icu/dev/test/util/Trie2Test.java | 761 + .../test/util/Trie2Test.setRanges1.16.tri2 | Bin 0 -> 6616 bytes .../test/util/Trie2Test.setRanges1.32.tri2 | Bin 0 -> 7776 bytes .../test/util/Trie2Test.setRanges2.16.tri2 | Bin 0 -> 5952 bytes .../test/util/Trie2Test.setRanges2.32.tri2 | Bin 0 -> 7088 bytes .../test/util/Trie2Test.setRanges3.16.tri2 | Bin 0 -> 5392 bytes .../test/util/Trie2Test.setRanges3.32.tri2 | Bin 0 -> 6544 bytes .../util/Trie2Test.setRangesEmpty.16.tri2 | Bin 0 -> 4768 bytes .../util/Trie2Test.setRangesEmpty.32.tri2 | Bin 0 -> 5296 bytes .../Trie2Test.setRangesSingleValue.16.tri2 | Bin 0 -> 4896 bytes .../Trie2Test.setRangesSingleValue.32.tri2 | Bin 0 -> 5552 bytes .../com/ibm/icu/dev/test/util/TrieTest.java | 566 + .../ibm/icu/dev/test/util/ULocaleTest.java | 3886 + .../dev/test/util/UnicodePropertySource.java | 321 + .../ibm/icu/dev/test/util/UtilityTest.java | 230 + .../icu/dev/test/util/VariableReplacer.java | 42 + .../icu/dev/test/util/VersionInfoTest.java | 345 + .../icu/dev/test/util/XEquivalenceClass.java | 337 + .../icu/dev/test/util/XEquivalenceMap.java | 81 + .../icu/dev/test/util/manifest.cldrutil.stub | 14 + main/tests/framework/.classpath | 7 + main/tests/framework/.project | 19 + .../.settings/org.eclipse.jdt.core.prefs | 326 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/tests/framework/build.properties | 5 + main/tests/framework/build.xml | 29 + main/tests/framework/manifest.stub | 11 + .../com/ibm/icu/dev/test/AbstractTestLog.java | 107 + .../src/com/ibm/icu/dev/test/ModuleTest.java | 169 + .../com/ibm/icu/dev/test/ResourceModule.java | 404 + .../src/com/ibm/icu/dev/test/TestAll.java | 32 + .../com/ibm/icu/dev/test/TestBoilerplate.java | 192 + .../com/ibm/icu/dev/test/TestDataModule.java | 107 + .../src/com/ibm/icu/dev/test/TestFmwk.java | 1885 + .../src/com/ibm/icu/dev/test/TestLog.java | 38 + .../com/ibm/icu/dev/test/TestLogWriter.java | 43 + .../src/com/ibm/icu/dev/test/TestUtil.java | 211 + .../src/com/ibm/icu/dev/test/UTF16Util.java | 377 + .../com/ibm/icu/dev/test/manifest.test.stub | 14 + .../src/com/ibm/icu/dev/test/package.html | 12 + .../icu/dev/test/sample/ModuleTestSample.java | 169 + .../dev/test/sample/ModuleTestSampleData.java | 170 + .../framework/test-framework-build.launch | 20 + main/tests/localespi/.classpath | 10 + main/tests/localespi/.project | 24 + .../.settings/org.eclipse.jdt.core.prefs | 330 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/tests/localespi/build.properties | 8 + main/tests/localespi/build.xml | 29 + .../localespi/localespi-tests-build.launch | 20 + main/tests/localespi/manifest.stub | 11 + .../dev/test/localespi/BreakIteratorTest.java | 224 + .../icu/dev/test/localespi/CollatorTest.java | 132 + .../dev/test/localespi/CurrencyNameTest.java | 86 + .../test/localespi/DateFormatSymbolsTest.java | 183 + .../dev/test/localespi/DateFormatTest.java | 196 + .../localespi/DecimalFormatSymbolsTest.java | 159 + .../dev/test/localespi/LocaleNameTest.java | 181 + .../dev/test/localespi/NumberFormatTest.java | 291 + .../ibm/icu/dev/test/localespi/TestAll.java | 30 + .../ibm/icu/dev/test/localespi/TestUtil.java | 102 + .../dev/test/localespi/TimeZoneNameTest.java | 123 + main/tests/packaging/.classpath | 8 + .../copy-test-data.launch | 25 + main/tests/packaging/.project | 30 + .../.settings/org.eclipse.jdt.core.prefs | 330 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/tests/packaging/build.properties | 5 + main/tests/packaging/build.xml | 31 + main/tests/packaging/manifest.stub | 11 + .../packaging/packaging-tests-build.launch | 20 + .../icu/dev/test/TestLocaleNamePackaging.java | 187 + .../com/ibm/icu/dev/test/TestPackaging.java | 26 + main/tests/translit/.classpath | 9 + .../copy-translit-test-data.launch | 25 + main/tests/translit/.project | 31 + .../.settings/org.eclipse.jdt.core.prefs | 332 + .../.settings/org.eclipse.jdt.ui.prefs | 10 + main/tests/translit/build.properties | 5 + main/tests/translit/build.xml | 36 + main/tests/translit/manifest.stub | 11 + .../com/ibm/icu/dev/test/TestAllTranslit.java | 32 + .../icu/dev/test/translit/AnyScriptTest.java | 150 + .../translit/CompoundTransliteratorTest.java | 272 + .../ibm/icu/dev/test/translit/ErrorTest.java | 243 + .../ibm/icu/dev/test/translit/JamoTest.java | 510 + .../dev/test/translit/PrettyPrinterTest.java | 38 + .../dev/test/translit/RegexUtilitiesTest.java | 217 + .../dev/test/translit/ReplaceableTest.java | 192 + .../icu/dev/test/translit/RoundTripTest.java | 1767 + .../ibm/icu/dev/test/translit/TestAll.java | 35 + .../icu/dev/test/translit/TestUtility.java | 445 + .../dev/test/translit/TransliteratorTest.java | 3746 + .../icu/dev/test/translit/UnicodeMapTest.java | 229 + .../icu/dev/test/translit/WriteCharts.java | 393 + .../icu/dev/test/translit/langtagRegex.txt | 87 + .../ibm/icu/dev/test/translit/package.html | 12 + .../src/com/ibm/icu/dev/test/util/BNF.java | 329 + .../ibm/icu/dev/test/util/BagFormatter.java | 1145 + .../ibm/icu/dev/test/util/CaseIterator.java | 562 + .../dev/test/util/CollectionUtilities.java | 450 + .../dev/test/util/DataInputCompressor.java | 229 + .../dev/test/util/DataOutputCompressor.java | 207 + .../ibm/icu/dev/test/util/FileUtilities.java | 83 + .../icu/dev/test/util/ICUPropertyFactory.java | 532 + .../ibm/icu/dev/test/util/ImmutableEntry.java | 48 + .../src/com/ibm/icu/dev/test/util/Pick.java | 796 + .../ibm/icu/dev/test/util/PrettyPrinter.java | 290 + .../src/com/ibm/icu/dev/test/util/Quoter.java | 65 + .../src/com/ibm/icu/dev/test/util/Tabber.java | 239 + .../com/ibm/icu/dev/test/util/TestBNF.java | 240 + .../icu/dev/test/util/TestBagFormatter.java | 243 + .../ibm/icu/dev/test/util/TestUtilities.java | 508 + .../com/ibm/icu/dev/test/util/Tokenizer.java | 320 + .../test/util/TransliteratorUtilities.java | 149 + .../ibm/icu/dev/test/util/UnicodeLabel.java | 56 + .../com/ibm/icu/dev/test/util/UnicodeMap.java | 1058 + .../icu/dev/test/util/UnicodeMapIterator.java | 246 + .../icu/dev/test/util/UnicodeProperty.java | 1374 + .../com/ibm/icu/dev/test/util/Visitor.java | 133 + .../translit/translit-tests-build.launch | 20 + perf-tests/Dataset.pm | 86 + perf-tests/collationperf.pl | 126 + perf-tests/converterperf.pl | 510 + perf-tests/data/collation/TestNames_Asian.txt | 10008 + .../data/collation/TestNames_Chinese.txt | 10902 + .../data/collation/TestNames_Japanese.txt | 22833 +++ .../data/collation/TestNames_Japanese_h.txt | 22749 +++ .../data/collation/TestNames_Japanese_k.txt | 22749 +++ .../data/collation/TestNames_Korean.txt | 49795 +++++ perf-tests/data/collation/TestNames_Latin.txt | 7611 + .../data/collation/TestNames_Russian.txt | 1310 + .../data/collation/TestNames_SerbianSH.txt | 61254 ++++++ .../data/collation/TestNames_SerbianSR.txt | 61254 ++++++ .../TestNames_Simplified_Chinese.txt | 10902 + perf-tests/data/collation/TestNames_Thai.txt | 10843 + perf-tests/data/conversion/arabic.txt | 18 + perf-tests/data/conversion/english.txt | 20 + perf-tests/data/conversion/french.txt | 20 + perf-tests/data/conversion/greek.txt | 20 + perf-tests/data/conversion/hebrew.txt | 20 + perf-tests/data/conversion/hindi.txt | 20 + perf-tests/data/conversion/japanese.txt | 20 + perf-tests/data/conversion/korean.txt | 20 + perf-tests/data/conversion/s-chinese.txt | 20 + perf-tests/dateformatperf.pl | 496 + perf-tests/decimalformatperf.pl | 491 + perf-tests/normalizationperf_r_b.pl | 83 + perf-tests/normalizationperf_r_l.pl | 83 + perf-tests/normperf.pl | 546 + perf-tests/perldriver/Dataset.pm | 139 + perf-tests/perldriver/Format.pm | 166 + perf-tests/perldriver/Output.pm | 389 + perf-tests/perldriver/PerfFramework4j.pm | 415 + perf-tests/rbbiperf_r.pl | 88 + perf-tests/resourcebundleperf.pl | 61 + .../perf/BreakIteratorPerformanceTest.java | 194 + .../test/perf/CollationPerformanceTest.java | 1262 + .../test/perf/ConverterPerformanceTest.java | 318 + .../test/perf/DateFormatPerformanceTest.java | 115 + .../perf/DecimalFormatPerformanceTest.java | 119 + .../test/perf/NormalizerPerformanceTest.java | 702 + .../com/ibm/icu/dev/test/perf/PerfTest.java | 932 + .../com/ibm/icu/dev/test/perf/RBBIPerf.java | 157 + .../icu/dev/test/perf/ResourceBundlePerf.java | 372 + .../ibm/icu/dev/test/perf/UCharacterPerf.java | 533 + .../ibm/icu/dev/test/perf/UnicodeSetPerf.java | 161 + perf-tests/ucharacterperf.pl | 493 + perf-tests/ucharacterperf_r.pl | 61 + perf-tests/unicodesetperf.pl | 483 + perf-tests/unicodesetperf_r.pl | 59 + readme.html | 1076 + tools/build/.classpath | 6 + tools/build/.project | 17 + .../.settings/org.eclipse.jdt.core.prefs | 330 + .../build/.settings/org.eclipse.jdt.ui.prefs | 10 + tools/build/README.txt | 95 + tools/build/build-tools-build.launch | 23 + tools/build/build.properties | 5 + tools/build/build.xml | 29 + tools/build/icu4j28.api.gz | 3 + tools/build/icu4j30.api.gz | 3 + tools/build/icu4j32.api.gz | 3 + tools/build/icu4j34.api.gz | 3 + tools/build/icu4j341.api.gz | 3 + tools/build/icu4j342.api.gz | 3 + tools/build/icu4j343.api.gz | 3 + tools/build/icu4j36.api.gz | 3 + tools/build/icu4j38.api.gz | 3 + tools/build/icu4j381.api.gz | 3 + tools/build/icu4j400.api.gz | 3 + tools/build/icu4j401.api.gz | 3 + tools/build/icu4j42.api.gz | 3 + tools/build/icu4j421.api.gz | 3 + tools/build/manifest.stub | 21 + .../com/ibm/icu/dev/tool/docs/APIData.java | 158 + .../com/ibm/icu/dev/tool/docs/APIInfo.java | 547 + .../com/ibm/icu/dev/tool/docs/CheckAPI.java | 1050 + .../com/ibm/icu/dev/tool/docs/CheckTags.java | 477 + .../ibm/icu/dev/tool/docs/CodeMangler.java | 783 + .../com/ibm/icu/dev/tool/docs/Deprecator.java | 194 + .../ibm/icu/dev/tool/docs/GatherAPIData.java | 497 + .../ibm/icu/dev/tool/docs/ICUJDKCompare.java | 802 + .../com/ibm/icu/dev/tool/docs/ICUTaglet.java | 353 + .../com/ibm/icu/dev/tool/docs/ReportAPI.java | 500 + .../ibm/icu/dev/tool/docs/SwatDeprecated.java | 345 + .../icu/dev/tool/index/IndexGenerator.java | 99 + tools/misc/.classpath | 11 + tools/misc/.project | 21 + .../misc/.settings/org.eclipse.jdt.core.prefs | 330 + tools/misc/.settings/org.eclipse.jdt.ui.prefs | 10 + tools/misc/build.properties | 5 + tools/misc/build.xml | 29 + tools/misc/manifest.stub | 11 + .../src/com/ibm/icu/dev/tool/UOption.java | 275 + .../dev/tool/charsetdet/mbcs/BIG5Tool.java | 327 + .../icu/dev/tool/charsetdet/mbcs/EUCTool.java | 353 + .../icu/dev/tool/charsetdet/sbcs/Checker.java | 184 + .../dev/tool/charsetdet/sbcs/InputFile.java | 173 + .../dev/tool/charsetdet/sbcs/NGramList.java | 142 + .../dev/tool/charsetdet/sbcs/NGramParser.java | 161 + .../tool/charsetdet/sbcs/StatisticsTool.java | 434 + .../icu/dev/tool/cldr/CheckSystemFonts.java | 651 + .../CompressionTableGenerator.java | 223 + .../ibm/icu/dev/tool/compression/package.html | 12 + .../src/com/ibm/icu/dev/tool/ime/IMETest.java | 36 + .../indic/BengaliInputMethodDescriptor.java | 216 + .../DevanagariInputMethodDescriptor.java | 246 + .../tool/ime/indic/DisplayNames.properties | 18 + .../indic/GujaratiInputMethodDescriptor.java | 171 + .../indic/GurmukhiInputMethodDescriptor.java | 202 + .../dev/tool/ime/indic/IndicIMDescriptor.java | 57 + .../dev/tool/ime/indic/IndicInputMethod.java | 92 + .../tool/ime/indic/IndicInputMethodImpl.java | 438 + .../indic/KannadaInputMethodDescriptor.java | 170 + .../indic/MalayalamInputMethodDescriptor.java | 166 + .../ime/indic/OriyaInputMethodDescriptor.java | 213 + .../ime/indic/TamilInputMethodDescriptor.java | 167 + .../indic/TeluguInputMethodDescriptor.java | 169 + .../ibm/icu/dev/tool/ime/indic/manifest.stub | 14 + .../java.awt.im.spi.InputMethodDescriptor | 9 + .../ime/translit/Transliterator.properties | 20 + .../translit/TransliteratorInputMethod.java | 718 + .../TransliteratorInputMethodDescriptor.java | 91 + .../icu/dev/tool/ime/translit/manifest.stub | 11 + .../java.awt.im.spi.InputMethodDescriptor | 7 + .../dev/tool/layout/ArabicCharacterData.java | 114 + .../icu/dev/tool/layout/ArabicShaping.java | 123 + .../dev/tool/layout/BuildMirroringTables.java | 97 + .../icu/dev/tool/layout/CanonGSUBBuilder.java | 523 + .../tool/layout/CanonicalCharacterData.java | 217 + .../ibm/icu/dev/tool/layout/ClassTable.java | 237 + .../ibm/icu/dev/tool/layout/DecompTable.java | 185 + .../com/ibm/icu/dev/tool/layout/Feature.java | 59 + .../ibm/icu/dev/tool/layout/FeatureList.java | 77 + .../ibm/icu/dev/tool/layout/GDEFWriter.java | 67 + .../ibm/icu/dev/tool/layout/GSUBWriter.java | 64 + .../ibm/icu/dev/tool/layout/LanguageData.java | 186 + .../icu/dev/tool/layout/LigatureEntry.java | 39 + .../dev/tool/layout/LigatureModuleWriter.java | 25 + .../ibm/icu/dev/tool/layout/LigatureTree.java | 247 + .../dev/tool/layout/LigatureTreeWalker.java | 144 + .../com/ibm/icu/dev/tool/layout/Lookup.java | 89 + .../ibm/icu/dev/tool/layout/LookupList.java | 57 + .../icu/dev/tool/layout/LookupSubtable.java | 16 + .../ibm/icu/dev/tool/layout/ModuleWriter.java | 170 + .../dev/tool/layout/OpenTypeTableWriter.java | 128 + .../dev/tool/layout/OpenTypeTagBuilder.java | 235 + .../icu/dev/tool/layout/ScriptAndLanguages | 28 + .../ibm/icu/dev/tool/layout/ScriptData.java | 246 + .../dev/tool/layout/ScriptIDModuleWriter.java | 304 + .../ibm/icu/dev/tool/layout/ScriptList.java | 200 + .../dev/tool/layout/ScriptModuleWriter.java | 22 + .../dev/tool/layout/ScriptNameBuilder.java | 37 + .../tool/layout/ScriptRunModuleWriter.java | 115 + .../tool/layout/ScriptTagModuleWriter.java | 126 + .../dev/tool/layout/ShapingTypeBuilder.java | 73 + .../ibm/icu/dev/tool/layout/TagUtilities.java | 73 + .../ibm/icu/dev/tool/layout/TagValueData.java | 23 + .../ibm/icu/dev/tool/layout/TaggedRecord.java | 79 + .../dev/tool/layout/ThaiCharacterClasses.java | 165 + .../icu/dev/tool/layout/ThaiStateTable.java | 205 + .../tool/layout/ThaiStateTableBuilder.java | 26 + .../layout/ThaiStateTableModuleWriter.java | 31 + .../dev/tool/layout/ThaiStateTransition.java | 66 + .../ibm/icu/dev/tool/layout/TreeWalker.java | 19 + .../tool/localeconverter/CalculateCRC32.java | 86 + .../localeconverter/XLIFF2ICUConverter.java | 1260 + .../dev/tool/localeconverter/manifest.stub | 13 + .../dev/tool/rbbi/BuildDictionaryFile.java | 880 + .../src/com/ibm/icu/dev/tool/rbbi/readme.html | 65 + .../serializable/SerializableChecker.java | 206 + .../dev/tool/timescale/CalculateLimits.java | 102 + .../icu/dev/tool/timescale/EpochOffsets.java | 104 + .../timescale/GenerateCTimeScaleData.java | 108 + .../ibm/icu/dev/tool/timezone/ICUZDump.java | 361 + .../ibm/icu/dev/tool/translit/SourceSet.java | 115 + .../com/ibm/icu/dev/tool/translit/Trans.java | 147 + .../tool/translit/UnicodeSetCloseOver.java | 471 + .../dev/tool/translit/UnicodeSetClosure.java | 306 + .../dev/tool/translit/WriteIndicCharts.java | 362 + .../icu/dev/tool/translit/dumpICUrules.bat | 483 + .../icu/dev/tool/translit/genIndexFilters.bat | 121 + .../dev/tool/translit/genIndexFilters.java | 70 + .../com/ibm/icu/dev/tool/translit/indic.bat | 21 + .../com/ibm/icu/dev/tool/translit/indic.pl | 662 + .../icu/dev/tool/translit/indicExceptions.txt | 377 + .../ibm/icu/dev/tool/translit/rbtTemplate.txt | 26 + .../com/ibm/icu/dev/tool/translit/varsub.bat | 81 + tools/misc/tools-build.launch | 23 + 1401 files changed, 1329801 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 APIChangeReport.html create mode 100644 build.properties create mode 100644 build.xml create mode 100644 demos/.classpath create mode 100644 demos/.project create mode 100644 demos/.settings/org.eclipse.jdt.core.prefs create mode 100644 demos/.settings/org.eclipse.jdt.ui.prefs create mode 100644 demos/build.properties create mode 100644 demos/build.xml create mode 100644 demos/demos-build.launch create mode 100644 demos/manifest.stub create mode 100644 demos/src/com/ibm/icu/dev/demo/Launcher.java create mode 100644 demos/src/com/ibm/icu/dev/demo/calendar/CalendarApp.java create mode 100644 demos/src/com/ibm/icu/dev/demo/calendar/CalendarCalc.java create mode 100644 demos/src/com/ibm/icu/dev/demo/calendar/CalendarFrame.java create mode 100644 demos/src/com/ibm/icu/dev/demo/calendar/CalendarPanel.java create mode 100644 demos/src/com/ibm/icu/dev/demo/calendar/package.html create mode 100644 demos/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java create mode 100644 demos/src/com/ibm/icu/dev/demo/holiday/HolidayBorderPanel.java create mode 100644 demos/src/com/ibm/icu/dev/demo/holiday/HolidayCalendarDemo.java create mode 100644 demos/src/com/ibm/icu/dev/demo/holiday/package.html create mode 100644 demos/src/com/ibm/icu/dev/demo/impl/AppletFrame.java create mode 100644 demos/src/com/ibm/icu/dev/demo/impl/DemoApplet.java create mode 100644 demos/src/com/ibm/icu/dev/demo/impl/DemoTextBox.java create mode 100644 demos/src/com/ibm/icu/dev/demo/impl/DemoUtility.java create mode 100644 demos/src/com/ibm/icu/dev/demo/impl/DumbTextComponent.java create mode 100644 demos/src/com/ibm/icu/dev/demo/impl/Selection.java create mode 100644 demos/src/com/ibm/icu/dev/demo/impl/package.html create mode 100644 demos/src/com/ibm/icu/dev/demo/number/CurrencyDemo.java create mode 100644 demos/src/com/ibm/icu/dev/demo/rbnf/RbnfDemo.java create mode 100644 demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java create mode 100644 demos/src/com/ibm/icu/dev/demo/rbnf/package.html create mode 100644 demos/src/com/ibm/icu/dev/demo/timescale/PivotDemo.java create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/CaseIterator.java create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/Demo.java create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/DemoApplet.java create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/InfoDialog.java create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/Test_Arabic-Latin.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/Test_Greek-Latin.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/Test_Han-Latin.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/Test_Hebrew-Latin.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/Test_Instructions.html create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/Test_Thai-Latin.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/TransliteratingTextComponent.java create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java create mode 100755 demos/src/com/ibm/icu/dev/demo/translit/demo.bat create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/demo.html create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/package.html create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Han_Pinyin.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_English.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_OnRomaji.txt create mode 100644 demos/src/com/ibm/icu/dev/demo/translit/thai_test.txt create mode 100644 main/classes/charset/.classpath create mode 100644 main/classes/charset/.externalToolBuilders/copy-data-charset.launch create mode 100644 main/classes/charset/.project create mode 100644 main/classes/charset/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/charset/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/charset/build.properties create mode 100644 main/classes/charset/build.xml create mode 100644 main/classes/charset/charset-build.launch create mode 100644 main/classes/charset/manifest.stub create mode 100644 main/classes/charset/src/META-INF/services/java.nio.charset.spi.CharsetProvider create mode 100644 main/classes/charset/src/com/ibm/icu/charset/Charset88591.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetCESU8.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetISCII.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetLMBCS.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetSelector.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16BE.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16LE.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32BE.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32LE.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/UConverterConstants.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/UConverterSharedData.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/UConverterStaticData.java create mode 100644 main/classes/charset/src/com/ibm/icu/charset/package.html create mode 100644 main/classes/collate/.classpath create mode 100644 main/classes/collate/.externalToolBuilders/copy-data-collate.launch create mode 100644 main/classes/collate/.project create mode 100644 main/classes/collate/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/collate/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/collate/build.properties create mode 100644 main/classes/collate/build.xml create mode 100644 main/classes/collate/collate-build.launch create mode 100644 main/classes/collate/manifest.stub create mode 100644 main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/CollationKey.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/Collator.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/CollatorReader.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/IndexCharacters.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/RawCollationKey.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java create mode 100644 main/classes/collate/src/com/ibm/icu/text/StringSearch.java create mode 100644 main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java create mode 100644 main/classes/core/.classpath create mode 100644 main/classes/core/.externalToolBuilders/copy-data-core.launch create mode 100644 main/classes/core/.project create mode 100644 main/classes/core/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/core/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/core/build.properties create mode 100644 main/classes/core/build.xml create mode 100644 main/classes/core/core-build.launch create mode 100644 main/classes/core/manifest.stub create mode 100644 main/classes/core/src/com/ibm/icu/ICUConfig.properties create mode 100644 main/classes/core/src/com/ibm/icu/impl/Assert.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/BMPSet.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/BOCU.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CacheBase.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CalendarAstronomer.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CalendarCache.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CalendarData.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CharTrie.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/CurrencyData.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Differ.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Grego.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUBinary.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUBinaryStream.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUCache.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUConfig.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUData.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUDataVersion.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUDebug.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICULogger.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICURWLock.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleImpl.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUResourceTableAccess.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ICUService.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ImplicitCEGenerator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/IntTrie.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/IterableComparator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/JavaTimeZone.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/LocaleIDs.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/LocaleUtility.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/MultiComparator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/PVecToTrieCompactHandler.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/PatternTokenizer.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/PluralRulesLoader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/PropsVectors.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Punycode.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/RelativeDateFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Row.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/SimpleCache.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/SoftCache.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/SortedSetRelation.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/StringUCharacterIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Trie.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Trie2.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Trie2_16.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Trie2_32.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/TrieBuilder.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/TrieIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCaseProps.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharArrayIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharacterName.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharacterNameChoice.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharacterPropertyReader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UCharacterUtility.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/URLHandler.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/USerializedSet.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/Utility.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ZoneMeta.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/ZoneStringFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules_th.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/data/package.html create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/DateFormatter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatterFactory.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/Period.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilder.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilderFactory.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterFactory.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterService.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/TimeUnitConstants.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/DataRecord.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterData.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterDataService.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordReader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordWriter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/Utils.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/YMDDateFormatter.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/index.txt create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml.escaped create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_en.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_es.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_fr.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_he_IL.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_hi.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_it.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ja.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ko.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ru.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_th.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans_SG.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant_HK.xml create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/Extension.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/LocaleObjectCache.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/PrivateuseExtension.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/StringTokenIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UCharacter.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UCharacterCategory.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UCharacterDirection.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UCharacterEnums.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UCharacterTypeIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UProperty.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UScript.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/UScriptRun.java create mode 100644 main/classes/core/src/com/ibm/icu/lang/package.html create mode 100644 main/classes/core/src/com/ibm/icu/math/BigDecimal.java create mode 100644 main/classes/core/src/com/ibm/icu/math/MathContext.java create mode 100644 main/classes/core/src/com/ibm/icu/math/package.html create mode 100644 main/classes/core/src/com/ibm/icu/text/ArabicShaping.java create mode 100644 main/classes/core/src/com/ibm/icu/text/ArabicShapingException.java create mode 100644 main/classes/core/src/com/ibm/icu/text/Bidi.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BidiClassifier.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BidiLine.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BidiRun.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BidiWriter.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BreakCTDictionary.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BreakDictionary.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BreakIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CanonicalIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetDetector.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetMatch.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetRecog_2022.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetRecog_UTF8.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetRecog_Unicode.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetRecog_mbcs.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CharsetRecognizer.java create mode 100644 main/classes/core/src/com/ibm/icu/text/ChineseDateFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/ChineseDateFormatSymbols.java create mode 100644 main/classes/core/src/com/ibm/icu/text/ComposedCharIter.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CurrencyDisplayNames.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CurrencyFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CurrencyMetaInfo.java create mode 100644 main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DateFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DateIntervalFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DateTimePatternGenerator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DecimalFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DecompData.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DictionaryBasedBreakIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DigitList.java create mode 100644 main/classes/core/src/com/ibm/icu/text/DurationFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/FilteredNormalizer2.java create mode 100644 main/classes/core/src/com/ibm/icu/text/IDNA.java create mode 100644 main/classes/core/src/com/ibm/icu/text/LocaleDisplayNames.java create mode 100644 main/classes/core/src/com/ibm/icu/text/MeasureFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/MessageFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/NFRule.java create mode 100644 main/classes/core/src/com/ibm/icu/text/NFRuleSet.java create mode 100644 main/classes/core/src/com/ibm/icu/text/NFSubstitution.java create mode 100644 main/classes/core/src/com/ibm/icu/text/Normalizer.java create mode 100644 main/classes/core/src/com/ibm/icu/text/Normalizer2.java create mode 100644 main/classes/core/src/com/ibm/icu/text/NumberFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/NumberFormatServiceShim.java create mode 100644 main/classes/core/src/com/ibm/icu/text/NumberingSystem.java create mode 100644 main/classes/core/src/com/ibm/icu/text/PluralFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/PluralRules.java create mode 100644 main/classes/core/src/com/ibm/icu/text/Quantifier.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBINode.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBIRuleParseTable.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBISymbolTable.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBNFChinesePostProcessor.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RBNFPostProcessor.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RbnfLenientScanner.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RbnfLenientScannerProvider.java create mode 100644 main/classes/core/src/com/ibm/icu/text/Replaceable.java create mode 100644 main/classes/core/src/com/ibm/icu/text/ReplaceableContextIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/ReplaceableString.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/SCSU.java create mode 100644 main/classes/core/src/com/ibm/icu/text/SearchIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/SelectFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/StringCharacterIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/StringPrep.java create mode 100644 main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java create mode 100644 main/classes/core/src/com/ibm/icu/text/StringTransform.java create mode 100644 main/classes/core/src/com/ibm/icu/text/SymbolTable.java create mode 100644 main/classes/core/src/com/ibm/icu/text/ThaiBreakIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/Transform.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UFormat.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UForwardCharacterIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UTF16.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UnicodeCompressor.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UnicodeDecompressor.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UnicodeFilter.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UnicodeMatcher.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UnicodeReplacer.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UnicodeSet.java create mode 100644 main/classes/core/src/com/ibm/icu/text/UnicodeSetIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/text/package.html create mode 100644 main/classes/core/src/com/ibm/icu/util/AnnualTimeZoneRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/BasicTimeZone.java create mode 100644 main/classes/core/src/com/ibm/icu/util/BuddhistCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CECalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/Calendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CalendarServiceShim.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java create mode 100644 main/classes/core/src/com/ibm/icu/util/ChineseCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CompactByteArray.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CompactCharArray.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CopticCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/Currency.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CurrencyAmount.java create mode 100644 main/classes/core/src/com/ibm/icu/util/CurrencyServiceShim.java create mode 100644 main/classes/core/src/com/ibm/icu/util/DateInterval.java create mode 100644 main/classes/core/src/com/ibm/icu/util/DateRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/DateTimeRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/EasterHoliday.java create mode 100644 main/classes/core/src/com/ibm/icu/util/EthiopicCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/Freezable.java create mode 100644 main/classes/core/src/com/ibm/icu/util/GregorianCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/HebrewCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/HebrewHoliday.java create mode 100644 main/classes/core/src/com/ibm/icu/util/Holiday.java create mode 100644 main/classes/core/src/com/ibm/icu/util/IllformedLocaleException.java create mode 100644 main/classes/core/src/com/ibm/icu/util/IndianCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/InitialTimeZoneRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/IslamicCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/LocaleData.java create mode 100644 main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java create mode 100644 main/classes/core/src/com/ibm/icu/util/LocalePriorityList.java create mode 100644 main/classes/core/src/com/ibm/icu/util/Measure.java create mode 100644 main/classes/core/src/com/ibm/icu/util/MeasureUnit.java create mode 100644 main/classes/core/src/com/ibm/icu/util/OverlayBundle.java create mode 100644 main/classes/core/src/com/ibm/icu/util/RangeDateRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/RangeValueIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/util/RuleBasedTimeZone.java create mode 100644 main/classes/core/src/com/ibm/icu/util/STZInfo.java create mode 100644 main/classes/core/src/com/ibm/icu/util/SimpleDateRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/SimpleHoliday.java create mode 100644 main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java create mode 100644 main/classes/core/src/com/ibm/icu/util/StringTokenizer.java create mode 100644 main/classes/core/src/com/ibm/icu/util/TaiwanCalendar.java create mode 100644 main/classes/core/src/com/ibm/icu/util/TimeArrayTimeZoneRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/TimeUnit.java create mode 100644 main/classes/core/src/com/ibm/icu/util/TimeUnitAmount.java create mode 100644 main/classes/core/src/com/ibm/icu/util/TimeZone.java create mode 100644 main/classes/core/src/com/ibm/icu/util/TimeZoneRule.java create mode 100644 main/classes/core/src/com/ibm/icu/util/TimeZoneTransition.java create mode 100644 main/classes/core/src/com/ibm/icu/util/ULocale.java create mode 100644 main/classes/core/src/com/ibm/icu/util/UResourceBundle.java create mode 100644 main/classes/core/src/com/ibm/icu/util/UResourceBundleIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/util/UResourceTypeMismatchException.java create mode 100644 main/classes/core/src/com/ibm/icu/util/UniversalTimeScale.java create mode 100644 main/classes/core/src/com/ibm/icu/util/VTimeZone.java create mode 100644 main/classes/core/src/com/ibm/icu/util/ValueIterator.java create mode 100644 main/classes/core/src/com/ibm/icu/util/VersionInfo.java create mode 100644 main/classes/core/src/com/ibm/icu/util/package.html create mode 100644 main/classes/currdata/.classpath create mode 100644 main/classes/currdata/.externalToolBuilders/copy-data-currdata.launch create mode 100644 main/classes/currdata/.project create mode 100644 main/classes/currdata/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/currdata/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/currdata/build.properties create mode 100644 main/classes/currdata/build.xml create mode 100644 main/classes/currdata/currdata-build.launch create mode 100644 main/classes/currdata/manifest.stub create mode 100644 main/classes/currdata/src/com/ibm/icu/impl/ICUCurrencyDisplayInfoProvider.java create mode 100644 main/classes/currdata/src/com/ibm/icu/impl/ICUCurrencyMetaInfo.java create mode 100644 main/classes/langdata/.classpath create mode 100644 main/classes/langdata/.externalToolBuilders/copy-data-langdata.launch create mode 100644 main/classes/langdata/.project create mode 100644 main/classes/langdata/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/langdata/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/langdata/build.properties create mode 100644 main/classes/langdata/build.xml create mode 100644 main/classes/langdata/langdata-build.launch create mode 100644 main/classes/langdata/manifest.stub create mode 100644 main/classes/langdata/src/com/ibm/icu/impl/ICULangDataTables.java create mode 100644 main/classes/localespi/.classpath create mode 100644 main/classes/localespi/.project create mode 100644 main/classes/localespi/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/localespi/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/localespi/build.properties create mode 100644 main/classes/localespi/build.xml create mode 100644 main/classes/localespi/localespi-build.launch create mode 100644 main/classes/localespi/manifest.stub create mode 100644 main/classes/localespi/readme.html create mode 100644 main/classes/localespi/src/META-INF/services/java.text.spi.BreakIteratorProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.text.spi.CollatorProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.text.spi.DateFormatProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.text.spi.DateFormatSymbolsProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.text.spi.DecimalFormatSymbolsProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.text.spi.NumberFormatProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.util.spi.CurrencyNameProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.util.spi.LocaleNameProvider create mode 100644 main/classes/localespi/src/META-INF/services/java.util.spi.TimeZoneNameProvider create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/icuadapter/NumberFormatJDK.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/icuadapter/TimeZoneJDK.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/ICULocaleServiceProvider.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/ICULocaleServiceProviderConfig.properties create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/text/BreakIteratorProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/text/CollatorProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/text/DateFormatProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/text/DateFormatSymbolsProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/text/DecimalFormatSymbolsProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/text/NumberFormatProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/CurrencyNameProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/LocaleNameProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/TimeZoneNameProviderICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/BreakIteratorICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/CalendarICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/CollationKeyICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/CollatorICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/DateFormatSymbolsICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/DecimalFormatICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/DecimalFormatSymbolsICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/NumberFormatICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/SimpleDateFormatICU.java create mode 100644 main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/TimeZoneICU.java create mode 100644 main/classes/regiondata/.classpath create mode 100644 main/classes/regiondata/.externalToolBuilders/copy-data-regiondata.launch create mode 100644 main/classes/regiondata/.project create mode 100644 main/classes/regiondata/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/regiondata/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/regiondata/build.properties create mode 100644 main/classes/regiondata/build.xml create mode 100644 main/classes/regiondata/manifest.stub create mode 100644 main/classes/regiondata/regiondata-build.launch create mode 100644 main/classes/regiondata/src/com/ibm/icu/impl/ICURegionDataTables.java create mode 100644 main/classes/translit/.classpath create mode 100644 main/classes/translit/.externalToolBuilders/copy-data-translit.launch create mode 100644 main/classes/translit/.project create mode 100644 main/classes/translit/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/classes/translit/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/classes/translit/build.properties create mode 100644 main/classes/translit/build.xml create mode 100644 main/classes/translit/manifest.stub create mode 100644 main/classes/translit/src/com/ibm/icu/impl/UtilityExtensions.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/BreakTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/CaseFoldTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/CompoundTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/EscapeTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/FunctionReplacer.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/NameUnicodeTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/NormalizationTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/NullTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/RemoveTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/StringMatcher.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/StringReplacer.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/TransformTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/TransliterationRule.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/TransliterationRuleSet.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/Transliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/TransliteratorIDParser.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/UnescapeTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/UnicodeNameTransliterator.java create mode 100644 main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java create mode 100644 main/classes/translit/translit-build.launch create mode 100644 main/shared/.project create mode 100644 main/shared/build/common-targets.xml create mode 100644 main/shared/build/common.properties create mode 100644 main/shared/build/locations-eclipse.properties create mode 100644 main/shared/build/locations.properties create mode 100644 main/shared/data/Transliterator_Han_Latin_Definition.txt create mode 100644 main/shared/data/Transliterator_Han_Latin_EDICT.txt create mode 100755 main/shared/data/icudata.jar create mode 100644 main/shared/data/security.policy create mode 100755 main/shared/data/testdata.jar create mode 100644 main/shared/licenses/license.html create mode 100644 main/shared/licenses/unicode-license.txt create mode 100644 main/tests/charset/.classpath create mode 100644 main/tests/charset/.externalToolBuilders/copy-charset-test-data.launch create mode 100644 main/tests/charset/.project create mode 100644 main/tests/charset/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/tests/charset/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/tests/charset/build.properties create mode 100644 main/tests/charset/build.xml create mode 100644 main/tests/charset/charset-tests-build.launch create mode 100644 main/tests/charset/manifest.stub create mode 100644 main/tests/charset/src/com/ibm/icu/dev/test/charset/TestAll.java create mode 100644 main/tests/charset/src/com/ibm/icu/dev/test/charset/TestCharset.java create mode 100644 main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java create mode 100644 main/tests/charset/src/com/ibm/icu/dev/test/charset/TestSelection.java create mode 100644 main/tests/collate/.classpath create mode 100644 main/tests/collate/.externalToolBuilders/copy-collate-test-data.launch create mode 100644 main/tests/collate/.project create mode 100644 main/tests/collate/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/tests/collate/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/tests/collate/build.properties create mode 100644 main/tests/collate/build.xml create mode 100644 main/tests/collate/collate-tests-build.launch create mode 100644 main/tests/collate/manifest.stub create mode 100644 main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_NON_IGNORABLE_SHORT.txt create mode 100644 main/tests/collate/src/com/ibm/icu/dev/data/CollationTest_SHIFTED_SHORT.txt create mode 100644 main/tests/collate/src/com/ibm/icu/dev/data/riwords.txt create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/TestAllCollate.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationAPITest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationChineseTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCreationMethodTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationCurrencyTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationDummyTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationEnglishTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFinnishTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationFrenchTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationGermanTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationIteratorTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationKanaTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMonkeyTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationRegressionTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationServiceTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationSpanishTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThaiTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationThreadTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationTurkishTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/G7CollationTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/IndexCharactersTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/LotusCollationKoreanTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/TestAll.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/TestComparator.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/collator/UCAConformanceTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/format/GlobalizationPreferencesTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/format/RbnfLenientScannerTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/search/SearchTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/search/package.html create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/util/ICUResourceBundleCollationTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/util/LocaleAliasCollationTest.java create mode 100644 main/tests/collate/src/com/ibm/icu/dev/test/util/ULocaleCollationTest.java create mode 100644 main/tests/core/.classpath create mode 100644 main/tests/core/.externalToolBuilders/copy-test-data.launch create mode 100644 main/tests/core/.project create mode 100644 main/tests/core/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/tests/core/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/tests/core/build.properties create mode 100644 main/tests/core/build.xml create mode 100644 main/tests/core/core-tests-build.launch create mode 100644 main/tests/core/manifest.stub create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/IDNATestInput.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/TestDataElements_testtypes.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/rbbi/english.dict create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/rbbi/words.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestDataElements.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestDataElements_en.properties create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestDataElements_en_Latn.properties create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestDataElements_en_Latn_US.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestDataElements_en_US.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestDataElements_fr_Latn_FR.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestDataElements_te.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/TestMessages.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/resources/testmessages.properties create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/unicode/CompositionExclusions.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/unicode/NormalizationCorrections.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/unicode/NormalizationTest-3.2.0.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/unicode/NormalizationTest.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/unicode/SpecialCasing.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/unicode/UnicodeData.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/data/unicode/ucdterms.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/TestAllCore.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/BidiTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestCharFromDirProp.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestClassOverride.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestCompatibility.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestData.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestFailureRecovery.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestInverse.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestMultipleParagraphs.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestReorder.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestReorderRunsOnly.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestReorderingMode.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bidi/TestStreaming.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/bigdec/DiagBigDecimal.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/AstroTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegression.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/ChineseTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/ChineseTestCase.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/CompatibilityTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/CopticTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/DataDrivenCalendarTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/EthiopicTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/HebrewTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/HolidayTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/IndianTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/IslamicTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/JapaneseTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/TestCase.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/calendar/package.html create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/charsetdet/CharsetDetectionTests.xml create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/cldr/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/cldr/TestCLDRVsICU.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/compression/DecompressionTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/compression/ExhaustiveTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/compression/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/compression/package.html create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/DataReadWriteTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/ICUDurationTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/LanguageTestRoot.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/PeriodBuilderFactoryTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/PeriodBuilderTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/PeriodTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/RegressionTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/ResourceBasedPeriodFormatterDataServiceTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_ar_EG.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_en.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_es.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_fr.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_he_IL.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_hi.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_it.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_ja.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_ko.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_ru.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hans.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hans_SG.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hant.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hant_HK.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_ar_EG.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_en.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_es.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_fr.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_he_IL.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_hi.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_it.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_ja.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_ko.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_ru.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_th.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hans.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hans_SG.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hant.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hant_HK.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/BigNumberFormatTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DataDrivenFormatTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatMiscTests.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatRegressionTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatRegressionTestJ.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatRoundTripTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DateIntervalFormatTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormat.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormatAPI.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormatAPIC.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormatSymbols.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatAPI.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatAPIC.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbolsC.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestNumberFormat.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestNumberFormatAPI.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestSimpleDateFormatAPI.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/MessageRegression.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatRegistrationTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatRegressionTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatRoundTripTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatSerialTestData.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTestCases.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/NumberRegression.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/PluralFormatTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/PluralFormatUnitTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/PluralRulesTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/RbnfRoundTripTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/RbnfTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/SelectFormatAPITest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/SelectFormatUnitTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/TestMessageFormat.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/TimeUnitTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/TimeZoneAliases.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/TimeZoneFormatTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/format/WriteNumberFormatSerialTestData.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/impl/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/iterator/TestUCharacterIterator.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/TestCharacter.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/TestUScript.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/TestUScriptRun.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCategoryTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterDirectionTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterSurrogateTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterThreadTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UPropertyAliasesTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UTF16Test.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetStringSpanTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/ConformanceTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/IntHashtable.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/IntStringHashtable.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/LongHashtable.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/NormalizationMonkeyTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/NormalizerBuilder.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/NormalizerData.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/NormalizerRegressionTests.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/TestCanonicalIterator.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/TestDeprecatedNormalizerAPI.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/UnicodeNormalizer.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/UnicodeNormalizerConformanceTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/normalizer/package.html create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRegTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/BreakIteratorTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBIAPITest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestExtended.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/package.html create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/CalendarTests.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/CompatibilityTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/CoverageTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/ExceptionTests.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/FormatTests.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableWriter.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.MathContext.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ArabicShapingException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.MessageFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.NumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.RuleBasedNumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.SimpleDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.StringPrepParseException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.BuddhistCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Calendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ChineseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.CopticCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Currency.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.EthiopicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.GregorianCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.HebrewCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.IslamicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.JapaneseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.SimpleTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.TimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ULocale.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.UResourceTypeMismatchException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.DateNumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.InvalidFormatException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.OlsonTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.RelativeDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.TimeZoneAdapter.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.duration.BasicDurationFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.math.BigDecimal.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.math.MathContext.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ArabicShapingException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DecimalFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DecimalFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.MessageFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.MessageFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.NumberFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.NumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.PluralFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.PluralRules.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.RuleBasedNumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.SimpleDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.StringPrepParseException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.AnnualTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.BuddhistCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.Calendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.ChineseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.CopticCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.Currency.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.DateTimeRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.EthiopicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.GregorianCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.HebrewCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.IndianCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.InitialTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.IslamicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.JapaneseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.RuleBasedTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.SimpleTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TaiwanCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TimeArrayTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.ULocale.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.UResourceTypeMismatchException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.VTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.DateNumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.InvalidFormatException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.JavaTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.OlsonTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.RelativeDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.TimeZoneAdapter.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.duration.BasicDurationFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.math.BigDecimal.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.math.MathContext.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ArabicShapingException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalInfo$PatternInfo.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalInfo.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DecimalFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DecimalFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.MessageFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.MessageFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.NumberFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.NumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.PluralFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.PluralRules.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.RuleBasedNumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.SimpleDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.StringPrepParseException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.TimeUnitFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.AnnualTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.BuddhistCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.Calendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.ChineseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.CopticCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.Currency.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.DateInterval.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.DateTimeRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.EthiopicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.GregorianCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.HebrewCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.IndianCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.InitialTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.IslamicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.JapaneseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.RuleBasedTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.SimpleTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TaiwanCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TimeArrayTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.ULocale.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.UResourceTypeMismatchException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.VTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.DateNumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.IllegalIcuArgumentException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.InvalidFormatException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.JavaTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.OlsonTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.RelativeDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.TimeZoneAdapter.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.duration.BasicDurationFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.locale.LocaleSyntaxException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.math.BigDecimal.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.math.MathContext.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ArabicShapingException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.CurrencyPluralInfo.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalInfo$PatternInfo.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalInfo.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DecimalFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DecimalFormatSymbols.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.MessageFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.MessageFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.NumberFormat$Field.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.NumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.PluralFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.PluralRules.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.RuleBasedNumberFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.SimpleDateFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.StringPrepParseException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.TimeUnitFormat.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.AnnualTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.BuddhistCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.Calendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.ChineseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.CopticCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.Currency.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.DateInterval.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.DateTimeRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.EthiopicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.GregorianCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.HebrewCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IllformedLocaleException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IndianCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.InitialTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IslamicCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.JapaneseCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.RuleBasedTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.SimpleTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TaiwanCalendar.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TimeArrayTimeZoneRule.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.ULocale.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.UResourceTypeMismatchException.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.VTimeZone.dat create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/shaping/ArabicShapingRegTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/shaping/ArabicShapingTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/IDNAConformanceTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/IDNAReference.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/NamePrepTransform.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/PunycodeReference.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/TestData.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/TestIDNA.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/TestIDNARef.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/TestInputDataStructure.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/stringprep/TestStringPrepProfiles.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timescale/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timescale/TimeScaleAPITest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timescale/TimeScaleDataTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timescale/TimeScaleMonkeyTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timezone/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timezone/TimeZoneBoundaryTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timezone/TimeZoneOffsetLocalTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timezone/TimeZoneRegression.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timezone/TimeZoneRuleTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/timezone/TimeZoneTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ArrayComparator.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/CalendarFieldsSet.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/CompactArrayTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/CurrencyTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/DateTimeStyleSet.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/DebugUtilities.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/DebugUtilitiesData.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/DebugUtilitiesTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/DisplayNameTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ElapsedTimer.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Equator.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/FieldsSet.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ICUBinaryTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ICUResourceBundleTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ICUServiceTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ICUServiceTestSample.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ICUServiceThreadTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/LocaleAliasTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/LocaleDataTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/LocalePriorityListTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Relation.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/SortedBag.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/StringTokenizerTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/TestAll.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/TestData_en.jpp create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/TestDefaultPackageLoading.jpp create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/TextTrieMapTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges1.16.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges1.32.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges2.16.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges2.32.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges3.16.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges3.32.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesEmpty.16.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesEmpty.32.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesSingleValue.16.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesSingleValue.32.tri2 create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/TrieTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/UnicodePropertySource.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/UtilityTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/VariableReplacer.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/VersionInfoTest.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/XEquivalenceClass.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/XEquivalenceMap.java create mode 100644 main/tests/core/src/com/ibm/icu/dev/test/util/manifest.cldrutil.stub create mode 100644 main/tests/framework/.classpath create mode 100644 main/tests/framework/.project create mode 100644 main/tests/framework/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/tests/framework/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/tests/framework/build.properties create mode 100644 main/tests/framework/build.xml create mode 100644 main/tests/framework/manifest.stub create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/AbstractTestLog.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/ModuleTest.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/ResourceModule.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/TestAll.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/TestBoilerplate.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/TestDataModule.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/TestFmwk.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/TestLog.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/TestLogWriter.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/TestUtil.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/UTF16Util.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/manifest.test.stub create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/package.html create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/sample/ModuleTestSample.java create mode 100644 main/tests/framework/src/com/ibm/icu/dev/test/sample/ModuleTestSampleData.java create mode 100644 main/tests/framework/test-framework-build.launch create mode 100644 main/tests/localespi/.classpath create mode 100644 main/tests/localespi/.project create mode 100644 main/tests/localespi/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/tests/localespi/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/tests/localespi/build.properties create mode 100644 main/tests/localespi/build.xml create mode 100644 main/tests/localespi/localespi-tests-build.launch create mode 100644 main/tests/localespi/manifest.stub create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/BreakIteratorTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/CollatorTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/CurrencyNameTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/DateFormatSymbolsTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/DateFormatTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/DecimalFormatSymbolsTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/LocaleNameTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/NumberFormatTest.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/TestAll.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/TestUtil.java create mode 100644 main/tests/localespi/src/com/ibm/icu/dev/test/localespi/TimeZoneNameTest.java create mode 100644 main/tests/packaging/.classpath create mode 100644 main/tests/packaging/.externalToolBuilders/copy-test-data.launch create mode 100644 main/tests/packaging/.project create mode 100644 main/tests/packaging/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/tests/packaging/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/tests/packaging/build.properties create mode 100644 main/tests/packaging/build.xml create mode 100644 main/tests/packaging/manifest.stub create mode 100644 main/tests/packaging/packaging-tests-build.launch create mode 100644 main/tests/packaging/src/com/ibm/icu/dev/test/TestLocaleNamePackaging.java create mode 100644 main/tests/packaging/src/com/ibm/icu/dev/test/TestPackaging.java create mode 100644 main/tests/translit/.classpath create mode 100644 main/tests/translit/.externalToolBuilders/copy-translit-test-data.launch create mode 100644 main/tests/translit/.project create mode 100644 main/tests/translit/.settings/org.eclipse.jdt.core.prefs create mode 100644 main/tests/translit/.settings/org.eclipse.jdt.ui.prefs create mode 100644 main/tests/translit/build.properties create mode 100644 main/tests/translit/build.xml create mode 100644 main/tests/translit/manifest.stub create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/TestAllTranslit.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/AnyScriptTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/CompoundTransliteratorTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/ErrorTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/JamoTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/PrettyPrinterTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/RegexUtilitiesTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/ReplaceableTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/RoundTripTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/TestAll.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUtility.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/UnicodeMapTest.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/WriteCharts.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/langtagRegex.txt create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/translit/package.html create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/BNF.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/BagFormatter.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/CaseIterator.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/CollectionUtilities.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/DataInputCompressor.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/DataOutputCompressor.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/FileUtilities.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/ImmutableEntry.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/Pick.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/PrettyPrinter.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/Quoter.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/Tabber.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/TestBNF.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/TestBagFormatter.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/TestUtilities.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/Tokenizer.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/TransliteratorUtilities.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeLabel.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeMap.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeMapIterator.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeProperty.java create mode 100644 main/tests/translit/src/com/ibm/icu/dev/test/util/Visitor.java create mode 100644 main/tests/translit/translit-tests-build.launch create mode 100644 perf-tests/Dataset.pm create mode 100755 perf-tests/collationperf.pl create mode 100755 perf-tests/converterperf.pl create mode 100644 perf-tests/data/collation/TestNames_Asian.txt create mode 100644 perf-tests/data/collation/TestNames_Chinese.txt create mode 100644 perf-tests/data/collation/TestNames_Japanese.txt create mode 100644 perf-tests/data/collation/TestNames_Japanese_h.txt create mode 100644 perf-tests/data/collation/TestNames_Japanese_k.txt create mode 100644 perf-tests/data/collation/TestNames_Korean.txt create mode 100644 perf-tests/data/collation/TestNames_Latin.txt create mode 100644 perf-tests/data/collation/TestNames_Russian.txt create mode 100644 perf-tests/data/collation/TestNames_SerbianSH.txt create mode 100644 perf-tests/data/collation/TestNames_SerbianSR.txt create mode 100644 perf-tests/data/collation/TestNames_Simplified_Chinese.txt create mode 100644 perf-tests/data/collation/TestNames_Thai.txt create mode 100644 perf-tests/data/conversion/arabic.txt create mode 100644 perf-tests/data/conversion/english.txt create mode 100644 perf-tests/data/conversion/french.txt create mode 100644 perf-tests/data/conversion/greek.txt create mode 100644 perf-tests/data/conversion/hebrew.txt create mode 100644 perf-tests/data/conversion/hindi.txt create mode 100644 perf-tests/data/conversion/japanese.txt create mode 100644 perf-tests/data/conversion/korean.txt create mode 100644 perf-tests/data/conversion/s-chinese.txt create mode 100755 perf-tests/dateformatperf.pl create mode 100755 perf-tests/decimalformatperf.pl create mode 100755 perf-tests/normalizationperf_r_b.pl create mode 100755 perf-tests/normalizationperf_r_l.pl create mode 100755 perf-tests/normperf.pl create mode 100644 perf-tests/perldriver/Dataset.pm create mode 100644 perf-tests/perldriver/Format.pm create mode 100644 perf-tests/perldriver/Output.pm create mode 100644 perf-tests/perldriver/PerfFramework4j.pm create mode 100755 perf-tests/rbbiperf_r.pl create mode 100755 perf-tests/resourcebundleperf.pl create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/BreakIteratorPerformanceTest.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/CollationPerformanceTest.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/ConverterPerformanceTest.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/DateFormatPerformanceTest.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/DecimalFormatPerformanceTest.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/NormalizerPerformanceTest.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/PerfTest.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/RBBIPerf.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/ResourceBundlePerf.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/UCharacterPerf.java create mode 100644 perf-tests/src/com/ibm/icu/dev/test/perf/UnicodeSetPerf.java create mode 100755 perf-tests/ucharacterperf.pl create mode 100755 perf-tests/ucharacterperf_r.pl create mode 100755 perf-tests/unicodesetperf.pl create mode 100755 perf-tests/unicodesetperf_r.pl create mode 100644 readme.html create mode 100644 tools/build/.classpath create mode 100644 tools/build/.project create mode 100644 tools/build/.settings/org.eclipse.jdt.core.prefs create mode 100644 tools/build/.settings/org.eclipse.jdt.ui.prefs create mode 100644 tools/build/README.txt create mode 100644 tools/build/build-tools-build.launch create mode 100644 tools/build/build.properties create mode 100644 tools/build/build.xml create mode 100644 tools/build/icu4j28.api.gz create mode 100644 tools/build/icu4j30.api.gz create mode 100644 tools/build/icu4j32.api.gz create mode 100644 tools/build/icu4j34.api.gz create mode 100644 tools/build/icu4j341.api.gz create mode 100644 tools/build/icu4j342.api.gz create mode 100644 tools/build/icu4j343.api.gz create mode 100644 tools/build/icu4j36.api.gz create mode 100644 tools/build/icu4j38.api.gz create mode 100644 tools/build/icu4j381.api.gz create mode 100644 tools/build/icu4j400.api.gz create mode 100644 tools/build/icu4j401.api.gz create mode 100644 tools/build/icu4j42.api.gz create mode 100644 tools/build/icu4j421.api.gz create mode 100644 tools/build/manifest.stub create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/APIData.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/APIInfo.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/CheckAPI.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/CheckTags.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/CodeMangler.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/Deprecator.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/GatherAPIData.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/ICUJDKCompare.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/ICUTaglet.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/ReportAPI.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/docs/SwatDeprecated.java create mode 100644 tools/build/src/com/ibm/icu/dev/tool/index/IndexGenerator.java create mode 100644 tools/misc/.classpath create mode 100644 tools/misc/.project create mode 100644 tools/misc/.settings/org.eclipse.jdt.core.prefs create mode 100644 tools/misc/.settings/org.eclipse.jdt.ui.prefs create mode 100644 tools/misc/build.properties create mode 100644 tools/misc/build.xml create mode 100644 tools/misc/manifest.stub create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/UOption.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/charsetdet/mbcs/BIG5Tool.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/charsetdet/mbcs/EUCTool.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/charsetdet/sbcs/Checker.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/charsetdet/sbcs/InputFile.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/charsetdet/sbcs/NGramList.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/charsetdet/sbcs/NGramParser.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/charsetdet/sbcs/StatisticsTool.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/cldr/CheckSystemFonts.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/compression/CompressionTableGenerator.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/compression/package.html create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/IMETest.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/BengaliInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/DevanagariInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/DisplayNames.properties create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/GujaratiInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/GurmukhiInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/IndicIMDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/IndicInputMethod.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/IndicInputMethodImpl.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/KannadaInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/MalayalamInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/OriyaInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/TamilInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/TeluguInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/manifest.stub create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/indic/services/java.awt.im.spi.InputMethodDescriptor create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/translit/Transliterator.properties create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/translit/TransliteratorInputMethod.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/translit/TransliteratorInputMethodDescriptor.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/translit/manifest.stub create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/ime/translit/services/java.awt.im.spi.InputMethodDescriptor create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ArabicCharacterData.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ArabicShaping.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/BuildMirroringTables.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/CanonGSUBBuilder.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/CanonicalCharacterData.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ClassTable.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/DecompTable.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/Feature.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/FeatureList.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/GDEFWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/GSUBWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/LanguageData.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/LigatureEntry.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/LigatureModuleWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/LigatureTree.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/LigatureTreeWalker.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/Lookup.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/LookupList.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/LookupSubtable.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ModuleWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/OpenTypeTableWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/OpenTypeTagBuilder.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptAndLanguages create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptData.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptIDModuleWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptList.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptModuleWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptNameBuilder.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptRunModuleWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ScriptTagModuleWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ShapingTypeBuilder.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/TagUtilities.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/TagValueData.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/TaggedRecord.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ThaiCharacterClasses.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ThaiStateTable.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ThaiStateTableBuilder.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ThaiStateTableModuleWriter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/ThaiStateTransition.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/layout/TreeWalker.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/localeconverter/CalculateCRC32.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/localeconverter/XLIFF2ICUConverter.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/localeconverter/manifest.stub create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/rbbi/BuildDictionaryFile.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/rbbi/readme.html create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/serializable/SerializableChecker.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/timescale/CalculateLimits.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/timescale/EpochOffsets.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/timescale/GenerateCTimeScaleData.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/timezone/ICUZDump.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/SourceSet.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/Trans.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/UnicodeSetCloseOver.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/UnicodeSetClosure.java create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/WriteIndicCharts.java create mode 100755 tools/misc/src/com/ibm/icu/dev/tool/translit/dumpICUrules.bat create mode 100755 tools/misc/src/com/ibm/icu/dev/tool/translit/genIndexFilters.bat create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/genIndexFilters.java create mode 100755 tools/misc/src/com/ibm/icu/dev/tool/translit/indic.bat create mode 100755 tools/misc/src/com/ibm/icu/dev/tool/translit/indic.pl create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/indicExceptions.txt create mode 100644 tools/misc/src/com/ibm/icu/dev/tool/translit/rbtTemplate.txt create mode 100755 tools/misc/src/com/ibm/icu/dev/tool/translit/varsub.bat create mode 100644 tools/misc/tools-build.launch diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..f0dc857c4b8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,357 @@ +* text=auto !eol + +*.c text !eol +*.cc text !eol +*.classpath text !eol +*.cpp text !eol +*.css text !eol +*.dsp text !eol +*.dsw text !eol +*.filters text !eol +*.h text !eol +*.htm text !eol +*.html text !eol +*.in text !eol +*.java text !eol +*.launch text !eol +*.mak text !eol +*.md text !eol +*.MF text !eol +*.mk text !eol +*.pl text !eol +*.pm text !eol +*.project text !eol +*.properties text !eol +*.py text !eol +*.rc text !eol +*.sh text eol=lf +*.sln text !eol +*.stub text !eol +*.txt text !eol +*.ucm text !eol +*.vcproj text !eol +*.vcxproj text !eol +*.xml text !eol +*.xsl text !eol +*.xslt text !eol +Makefile text !eol +configure text !eol +LICENSE text !eol +README text !eol + +*.bin -text +*.brk -text +*.cnv -text +*.icu -text +*.res -text +*.nrm -text +*.spp -text +*.tri2 -text + +/build.properties -text +demos/manifest.stub -text +main/classes/charset/.classpath -text +main/classes/charset/.project -text +main/classes/charset/.settings/org.eclipse.jdt.core.prefs -text +main/classes/charset/manifest.stub -text +main/classes/collate/.classpath -text +main/classes/collate/.project -text +main/classes/collate/.settings/org.eclipse.jdt.core.prefs -text +main/classes/collate/.settings/org.eclipse.jdt.ui.prefs -text +main/classes/collate/collate-build.launch -text +main/classes/core/.classpath -text +main/classes/core/.project -text +main/classes/core/.settings/org.eclipse.jdt.core.prefs -text +main/classes/core/manifest.stub -text +main/classes/core/src/com/ibm/icu/impl/BMPSet.java -text +main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java -text +main/classes/currdata/.externalToolBuilders/copy-data-currdata.launch -text +main/classes/currdata/.settings/org.eclipse.jdt.core.prefs -text +main/classes/currdata/.settings/org.eclipse.jdt.ui.prefs -text +main/classes/currdata/currdata-build.launch -text +main/classes/langdata/.externalToolBuilders/copy-data-langdata.launch -text +main/classes/langdata/.settings/org.eclipse.jdt.core.prefs -text +main/classes/langdata/.settings/org.eclipse.jdt.ui.prefs -text +main/classes/langdata/langdata-build.launch -text +main/classes/localespi/.classpath -text +main/classes/localespi/.project -text +main/classes/localespi/.settings/org.eclipse.jdt.core.prefs -text +main/classes/localespi/manifest.stub -text +main/classes/localespi/src/META-INF/services/java.text.spi.BreakIteratorProvider -text +main/classes/localespi/src/META-INF/services/java.text.spi.CollatorProvider -text +main/classes/localespi/src/META-INF/services/java.text.spi.DateFormatProvider -text +main/classes/localespi/src/META-INF/services/java.text.spi.DateFormatSymbolsProvider -text +main/classes/localespi/src/META-INF/services/java.text.spi.DecimalFormatSymbolsProvider -text +main/classes/localespi/src/META-INF/services/java.text.spi.NumberFormatProvider -text +main/classes/localespi/src/META-INF/services/java.util.spi.CurrencyNameProvider -text +main/classes/localespi/src/META-INF/services/java.util.spi.LocaleNameProvider -text +main/classes/localespi/src/META-INF/services/java.util.spi.TimeZoneNameProvider -text +main/classes/localespi/src/com/ibm/icu/impl/javaspi/ICULocaleServiceProviderConfig.properties -text +main/classes/regiondata/.externalToolBuilders/copy-data-regiondata.launch -text +main/classes/regiondata/.settings/org.eclipse.jdt.core.prefs -text +main/classes/regiondata/.settings/org.eclipse.jdt.ui.prefs -text +main/classes/regiondata/regiondata-build.launch -text +main/classes/translit/.externalToolBuilders/copy-data-translit.launch -text +main/classes/translit/.settings/org.eclipse.jdt.core.prefs -text +main/classes/translit/.settings/org.eclipse.jdt.ui.prefs -text +main/classes/translit/translit-build.launch -text +main/shared/.project -text +main/shared/data/icudata.jar -text +main/shared/data/testdata.jar -text +main/tests/charset/.classpath -text +main/tests/charset/.project -text +main/tests/charset/.settings/org.eclipse.jdt.core.prefs -text +main/tests/charset/manifest.stub -text +main/tests/collate/.classpath -text +main/tests/collate/.project -text +main/tests/collate/.settings/org.eclipse.jdt.core.prefs -text +main/tests/collate/.settings/org.eclipse.jdt.ui.prefs -text +main/tests/collate/collate-tests-build.launch -text +main/tests/core/.classpath -text +main/tests/core/.project -text +main/tests/core/.settings/org.eclipse.jdt.core.prefs -text +main/tests/core/manifest.stub -text +main/tests/core/src/com/ibm/icu/dev/data/rbbi/english.dict -text +main/tests/core/src/com/ibm/icu/dev/data/resources/testmessages.properties -text +main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs -text +main/tests/core/src/com/ibm/icu/dev/test/lang/UnicodeSetStringSpanTest.java -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.MathContext.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ArabicShapingException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.MessageFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.NumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.RuleBasedNumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.SimpleDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.StringPrepParseException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.BuddhistCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Calendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ChineseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.CopticCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Currency.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.EthiopicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.GregorianCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.HebrewCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.IslamicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.JapaneseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.SimpleTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.TimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ULocale.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.UResourceTypeMismatchException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.DateNumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.InvalidFormatException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.OlsonTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.RelativeDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.TimeZoneAdapter.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.duration.BasicDurationFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.math.BigDecimal.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.math.MathContext.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ArabicShapingException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DecimalFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DecimalFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.MessageFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.MessageFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.NumberFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.NumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.PluralFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.PluralRules.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.RuleBasedNumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.SimpleDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.StringPrepParseException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.AnnualTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.BuddhistCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.Calendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.ChineseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.CopticCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.Currency.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.DateTimeRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.EthiopicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.GregorianCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.HebrewCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.IndianCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.InitialTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.IslamicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.JapaneseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.RuleBasedTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.SimpleTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TaiwanCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TimeArrayTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.ULocale.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.UResourceTypeMismatchException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.VTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.DateNumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.InvalidFormatException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.JavaTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.OlsonTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.RelativeDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.TimeZoneAdapter.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.duration.BasicDurationFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.math.BigDecimal.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.math.MathContext.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ArabicShapingException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalInfo$PatternInfo.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalInfo.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DecimalFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DecimalFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.MessageFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.MessageFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.NumberFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.NumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.PluralFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.PluralRules.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.RuleBasedNumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.SimpleDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.StringPrepParseException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.TimeUnitFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.AnnualTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.BuddhistCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.Calendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.ChineseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.CopticCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.Currency.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.DateInterval.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.DateTimeRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.EthiopicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.GregorianCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.HebrewCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.IndianCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.InitialTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.IslamicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.JapaneseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.RuleBasedTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.SimpleTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TaiwanCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TimeArrayTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.ULocale.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.UResourceTypeMismatchException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.VTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.DateNumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.IllegalIcuArgumentException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.InvalidFormatException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.JavaTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.OlsonTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.RelativeDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.TimeZoneAdapter.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.duration.BasicDurationFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.locale.LocaleSyntaxException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.math.BigDecimal.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.math.MathContext.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ArabicShapingException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.CurrencyPluralInfo.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalInfo$PatternInfo.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalInfo.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DecimalFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DecimalFormatSymbols.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.MessageFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.MessageFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.NumberFormat$Field.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.NumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.PluralFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.PluralRules.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.RuleBasedNumberFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.SimpleDateFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.StringPrepParseException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.TimeUnitFormat.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.AnnualTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.BuddhistCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.Calendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.ChineseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.CopticCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.Currency.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.DateInterval.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.DateTimeRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.EthiopicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.GregorianCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.HebrewCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IllformedLocaleException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IndianCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.InitialTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IslamicCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.JapaneseCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.RuleBasedTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.SimpleTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TaiwanCalendar.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TimeArrayTimeZoneRule.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.ULocale.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.UResourceTypeMismatchException.dat -text +main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.VTimeZone.dat -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges1.16.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges1.32.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges2.16.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges2.32.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges3.16.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges3.32.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesEmpty.16.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesEmpty.32.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesSingleValue.16.tri2 -text +main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesSingleValue.32.tri2 -text +main/tests/framework/.classpath -text +main/tests/framework/.project -text +main/tests/framework/.settings/org.eclipse.jdt.core.prefs -text +main/tests/framework/manifest.stub -text +main/tests/localespi/.classpath -text +main/tests/localespi/.project -text +main/tests/localespi/manifest.stub -text +main/tests/packaging/.externalToolBuilders/copy-test-data.launch -text +main/tests/packaging/.settings/org.eclipse.jdt.core.prefs -text +main/tests/packaging/.settings/org.eclipse.jdt.ui.prefs -text +main/tests/packaging/packaging-tests-build.launch -text +main/tests/translit/.externalToolBuilders/copy-translit-test-data.launch -text +main/tests/translit/.settings/org.eclipse.jdt.core.prefs -text +main/tests/translit/.settings/org.eclipse.jdt.ui.prefs -text +main/tests/translit/translit-tests-build.launch -text +tools/build/icu4j28.api.gz -text +tools/build/icu4j30.api.gz -text +tools/build/icu4j32.api.gz -text +tools/build/icu4j34.api.gz -text +tools/build/icu4j341.api.gz -text +tools/build/icu4j342.api.gz -text +tools/build/icu4j343.api.gz -text +tools/build/icu4j36.api.gz -text +tools/build/icu4j38.api.gz -text +tools/build/icu4j381.api.gz -text +tools/build/icu4j400.api.gz -text +tools/build/icu4j401.api.gz -text +tools/build/icu4j42.api.gz -text +tools/build/icu4j421.api.gz -text +tools/build/manifest.stub -text +tools/misc/manifest.stub -text + +# The following file types are stored in Git-LFS. +*.jar filter=lfs diff=lfs merge=lfs -text +*.dat filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text + diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..110959588d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +/*.jar +/.project +demos/out +/doc +main/classes/charset/out +main/classes/collate/out +main/classes/core/out +main/classes/currdata/out +main/classes/langdata/out +main/classes/localespi/out +main/classes/regiondata/out +main/classes/translit/out +main/tests/charset/out +main/tests/collate/out +main/tests/core/out +main/tests/framework/out +main/tests/localespi/out +main/tests/packaging/out +main/tests/translit/out +/out +tools/build/out +tools/misc/out diff --git a/APIChangeReport.html b/APIChangeReport.html new file mode 100644 index 00000000000..ee0396d59d7 --- /dev/null +++ b/APIChangeReport.html @@ -0,0 +1,591 @@ + + + + +ICU4J API Comparison: ICU4J 4.2.1 with ICU4J 4.4 + + + +

ICU4J API Comparison: ICU4J 4.2.1 with ICU4J 4.4

+ +
+

Removed from ICU4J 4.2.1

+ +

Package com.ibm.icu.charset

+ + +

Package com.ibm.icu.math

+ + +

Package com.ibm.icu.text

+ + +

Package com.ibm.icu.util

+ + + +
+

Withdrawn, Deprecated, or Obsoleted in ICU4J 4.4

+ +

Package com.ibm.icu.text

+ + +

Package com.ibm.icu.util

+ + + +
+

Changed in ICU4J 4.4 (old, new)

+ +

Package com.ibm.icu.charset

+ + +

Package com.ibm.icu.text

+ + +

Package com.ibm.icu.util

+ + + +
+

Promoted to stable in ICU4J 4.4

+ +

Package com.ibm.icu.charset

+ + +

Package com.ibm.icu.text

+ + +

Package com.ibm.icu.util

+ + + +
+

Added in ICU4J 4.4

+ +

Package com.ibm.icu.charset

+ + +

Package com.ibm.icu.lang

+ + +

Package com.ibm.icu.text

+ + +

Package com.ibm.icu.util

+ + +
+

Contents generated by ReportAPI tool on Wed Mar 03 00:10:36 EST 2010
Copyright (C) 2010, International Business Machines Corporation, All Rights Reserved.

+ + diff --git a/build.properties b/build.properties new file mode 100644 index 00000000000..c03706c8596 --- /dev/null +++ b/build.properties @@ -0,0 +1,6 @@ +#******************************************************************************* +#* Copyright (C) 2009-2010, International Business Machines Corporation and * +#* others. All Rights Reserved. * +#******************************************************************************* +api.report.version = 44 +api.report.prev.version = 421 diff --git a/build.xml b/build.xml new file mode 100644 index 00000000000..82dbb710dfc --- /dev/null +++ b/build.xml @@ -0,0 +1,1330 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/demos/.classpath b/demos/.classpath new file mode 100644 index 00000000000..c13960c4471 --- /dev/null +++ b/demos/.classpath @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/demos/.project b/demos/.project new file mode 100644 index 00000000000..69e0c1ba71f --- /dev/null +++ b/demos/.project @@ -0,0 +1,20 @@ + + + icu4j-demos + + + icu4j-charset + icu4j-core + icu4j-shared + + + + org.eclipse.jdt.core.javabuilder + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/demos/.settings/org.eclipse.jdt.core.prefs b/demos/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000000..1c6961a58bd --- /dev/null +++ b/demos/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,330 @@ +#Fri Aug 28 16:05:27 EDT 2009 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.autoboxing=ignore +org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning +org.eclipse.jdt.core.compiler.problem.deadCode=ignore +org.eclipse.jdt.core.compiler.problem.deprecation=ignore +org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled +org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled +org.eclipse.jdt.core.compiler.problem.discouragedReference=warning +org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore +org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled +org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore +org.eclipse.jdt.core.compiler.problem.finalParameterBound=ignore +org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning +org.eclipse.jdt.core.compiler.problem.forbiddenReference=error +org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning +org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning +org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore +org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore +org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore +org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning +org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore +org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning +org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore +org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning +org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning +org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore +org.eclipse.jdt.core.compiler.problem.nullReference=warning +org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning +org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore +org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore +org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore +org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore +org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore +org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore +org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled +org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning +org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled +org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore +org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning +org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore +org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore +org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning +org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore +org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore +org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled +org.eclipse.jdt.core.compiler.problem.unusedImport=warning +org.eclipse.jdt.core.compiler.problem.unusedLabel=warning +org.eclipse.jdt.core.compiler.problem.unusedLocal=warning +org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore +org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled +org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=ignore +org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning +org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning +org.eclipse.jdt.core.compiler.source=1.5 +org.eclipse.jdt.core.formatter.align_type_members_on_columns=false +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_assignment=0 +org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 +org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 +org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 +org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 +org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 +org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_after_package=1 +org.eclipse.jdt.core.formatter.blank_lines_before_field=0 +org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 +org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 +org.eclipse.jdt.core.formatter.blank_lines_before_method=1 +org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 +org.eclipse.jdt.core.formatter.blank_lines_before_package=0 +org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 +org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 +org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false +org.eclipse.jdt.core.formatter.comment.format_block_comments=true +org.eclipse.jdt.core.formatter.comment.format_header=false +org.eclipse.jdt.core.formatter.comment.format_html=true +org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true +org.eclipse.jdt.core.formatter.comment.format_line_comments=true +org.eclipse.jdt.core.formatter.comment.format_source_code=true +org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true +org.eclipse.jdt.core.formatter.comment.indent_root_tags=true +org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert +org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert +org.eclipse.jdt.core.formatter.comment.line_length=120 +org.eclipse.jdt.core.formatter.compact_else_if=true +org.eclipse.jdt.core.formatter.continuation_indentation=2 +org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 +org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true +org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_empty_lines=false +org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true +org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false +org.eclipse.jdt.core.formatter.indentation.size=4 +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert +org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false +org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false +org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false +org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false +org.eclipse.jdt.core.formatter.lineSplit=120 +org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false +org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false +org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 +org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 +org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true +org.eclipse.jdt.core.formatter.tabulation.char=space +org.eclipse.jdt.core.formatter.tabulation.size=4 +org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false +org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true diff --git a/demos/.settings/org.eclipse.jdt.ui.prefs b/demos/.settings/org.eclipse.jdt.ui.prefs new file mode 100644 index 00000000000..abf9d8707a9 --- /dev/null +++ b/demos/.settings/org.eclipse.jdt.ui.prefs @@ -0,0 +1,6 @@ +#Tue Jun 09 16:57:19 EDT 2009 +eclipse.preferences.version=1 +formatter_profile=_ICU4J Standard +formatter_settings_version=11 +org.eclipse.jdt.ui.javadoc=false +org.eclipse.jdt.ui.text.custom_code_templates= diff --git a/demos/build.properties b/demos/build.properties new file mode 100644 index 00000000000..f374881842f --- /dev/null +++ b/demos/build.properties @@ -0,0 +1,5 @@ +#******************************************************************************* +#* Copyright (C) 2009, International Business Machines Corporation and * +#* others. All Rights Reserved. * +#******************************************************************************* +shared.dir = ../main/shared diff --git a/demos/build.xml b/demos/build.xml new file mode 100644 index 00000000000..3187e793838 --- /dev/null +++ b/demos/build.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/demos/demos-build.launch b/demos/demos-build.launch new file mode 100644 index 00000000000..cead29ec957 --- /dev/null +++ b/demos/demos-build.launch @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/demos/manifest.stub b/demos/manifest.stub new file mode 100644 index 00000000000..5180077ab25 --- /dev/null +++ b/demos/manifest.stub @@ -0,0 +1,13 @@ +Manifest-Version: 1.0 +Main-Class: com.ibm.icu.dev.demo.Launcher +Class-Path: icu4j.jar + +Name: com/ibm/icu/dev/demo +Specification-Title: ICU for Java Demo +Specification-Version: @SPECVERSION@ +Specification-Vendor: ICU +Implementation-Title: ICU for Java Demo +Implementation-Version: @IMPLVERSION@ +Implementation-Vendor: IBM Corporation +Implementation-Vendor-Id: com.ibm +Copyright-Info: @COPYRIGHT@ diff --git a/demos/src/com/ibm/icu/dev/demo/Launcher.java b/demos/src/com/ibm/icu/dev/demo/Launcher.java new file mode 100644 index 00000000000..2d8804453ee --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/Launcher.java @@ -0,0 +1,192 @@ +/* + ******************************************************************************* + * Copyright (C) 2007, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.demo; + +import java.awt.BorderLayout; +import java.awt.Button; +import java.awt.Color; +import java.awt.Frame; +import java.awt.GridLayout; +import java.awt.Label; +import java.awt.Panel; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + +import com.ibm.icu.dev.demo.impl.DemoApplet; +import com.ibm.icu.dev.demo.impl.DemoUtility; +import com.ibm.icu.util.VersionInfo; + + +/** + * @author srl + * Application to provide a panel of demos to launch + */ +public class Launcher extends DemoApplet { + private static final long serialVersionUID = -8054963875776183877L; + + /** + * base package of all demos + */ + public static final String demoBase = "com.ibm.icu.dev.demo"; + /** + * list of classes, relative to the demoBase. all must have a static void main(String[]) + */ + public static final String demoList[] = { + "calendar.CalendarApp", + "charsetdet.DetectingViewer", + "holiday.HolidayCalendarDemo", +// "number.CurrencyDemo", -- console +// "rbbi.DBBIDemo", +// "rbbi.RBBIDemo", +// "rbbi.TextBoundDemo", + "rbnf.RbnfDemo", +// "timescale.PivotDemo", -- console + "translit.Demo", + }; + + public class LauncherFrame extends Frame implements ActionListener { + private static final long serialVersionUID = -8054963875776183878L; + + public Button buttonList[] = new Button[demoList.length]; // one button for each demo + public Label statusLabel; + private DemoApplet applet; + + LauncherFrame(DemoApplet applet) { + init(); + this.applet = applet; + } + + public void init() { + // close down when close is clicked. + // TODO: this should be factored.. + addWindowListener( + new WindowAdapter() { + public void windowClosing(WindowEvent e) { + setVisible(false); + dispose(); + + if (applet != null) { + applet.demoClosed(); + } else System.exit(0); + } + } ); + + setBackground(DemoUtility.bgColor); + setLayout(new BorderLayout()); + + Panel topPanel = new Panel(); + topPanel.setLayout(new GridLayout(5,3)); + + for(int i=0;i 0 && locales[i].getLanguage().equals(locales[i-1].getLanguage()) || + i < locales.length - 1 && + locales[i].getLanguage().equals(locales[i+1].getLanguage())) + { + localeMenu.addItem( locales[i].getDisplayName() ); + } else { + localeMenu.addItem( locales[i].getDisplayLanguage()); + } + + thisMatch = DemoUtility.compareLocales(locales[i], defaultLocale); + + if (thisMatch >= bestMatch) { + bestMatch = thisMatch; + selectMe = i; + } + } + + localeMenu.setBackground(DemoUtility.choiceColor); + localeMenu.select(selectMe); + + Label localeLabel =new Label("Display Locale"); + localeLabel.setFont(DemoUtility.labelFont); + + localePanel.add(localeLabel); + localePanel.add(localeMenu); + DemoUtility.fixGrid(localePanel,2); + + localeMenu.addItemListener( new ItemListener() { + public void itemStateChanged(ItemEvent e) { + Locale loc = locales[localeMenu.getSelectedIndex()]; + System.out.println("Change locale to " + loc.getDisplayName()); + + for (int i = 0; i < calendars.length; i++) { + calendars[i].setLocale(loc); + } + millisFormat(); + } + } ); + } + add(rollAddPanel); + add(DemoUtility.createSpacer()); + add(localePanel); + add(DemoUtility.createSpacer()); + +// COPYRIGHT + Panel copyrightPanel = new Panel(); + addWithFont (copyrightPanel,new Label(DemoUtility.copyright1, Label.LEFT), + DemoUtility.creditFont); + DemoUtility.fixGrid(copyrightPanel,1); + add(copyrightPanel); + } + + /** + * This function is called when users change the pattern text. + */ + public void setFormatFromPattern() { + String timePattern = patternText.getText(); + + for (int i = 0; i < calendars.length; i++) { + calendars[i].applyPattern(timePattern); + } + + millisFormat(); + } + + /** + * This function is called when it is necessary to parse the time + * string in one of the formatted date fields + */ + public void textChanged(int index) { + String rightString = calendars[index].text.getText(); + + ParsePosition status = new ParsePosition(0); + + if (rightString.length() == 0) + { + errorText("Error: no input to parse!"); + return; + } + + try { + Date date = calendars[index].format.parse(rightString, status); + time = date.getTime(); + } + catch (Exception e) { + for (int i = 0; i < calendars.length; i++) { + if (i != index) { + calendars[i].text.setText("ERROR"); + } + } + errorText("Exception: " + e.getClass().toString() + " parsing: "+rightString); + return; + } + + int start = calendars[index].text.getSelectionStart(); + int end = calendars[index].text.getSelectionEnd(); + + millisFormat(); + + calendars[index].text.select(start,end); + } + + /** + * This function is called when it is necessary to format the time + * in the "Millis" text field. + */ + public void millisFormat() { + String out = ""; + + for (int i = 0; i < calendars.length; i++) { + try { + out = calendars[i].format.format(new Date(time)); + calendars[i].text.setText(out); + } + catch (Exception e) { + calendars[i].text.setText("ERROR"); + errorText("Exception: " + e.getClass().toString() + " formatting " + + calendars[i].name + " " + time); + } + } + } + + + /** + * This function is called when users change the pattern text. + */ + public void patternTextChanged() { + setFormatFromPattern(); + } + + /** + * This function is called when users select a new representative city. + */ + public void cityChanged() { + TimeZone timeZone = TimeZone.getDefault(); + + for (int i = 0; i < calendars.length; i++) { + calendars[i].format.setTimeZone(timeZone); + } + millisFormat(); + } + + /** + * This function is called when users select a new time field + * to add or roll its value. + */ + public void dateFieldChanged(boolean isUp) { + int field = kRollAddFields[dateMenu.getSelectedIndex()].field; + + for (int i = 0; i < calendars.length; i++) + { + if (calendars[i].rollAdd.getState()) + { + Calendar c = calendars[i].calendar; + c.setTime(new Date(time)); + + if (getAdd.getState()) { + c.add(field, isUp ? 1 : -1); + } else { + c.roll(field, isUp); + } + + time = c.getTime().getTime(); + millisFormat(); + break; + } + } + } + + /** + * Print out the error message while debugging this program. + */ + public void errorText(String s) + { + if (true) { + System.out.println(s); + } + } + + /** + * Called if an action occurs in the CalendarCalcFrame object. + */ + public void actionPerformed(ActionEvent evt) + { + // *** Button events are handled here. + Object obj = evt.getSource(); + System.out.println("action " + obj); + if (obj instanceof Button) { + if (evt.getSource() == up) { + dateFieldChanged(false); + } else + if (evt.getSource() == down) { + dateFieldChanged(true); + } + } + } + + /** + * Handles the event. Returns true if the event is handled and should not + * be passed to the parent of this component. The default event handler + * calls some helper methods to make life easier on the programmer. + */ + protected void processKeyEvent(KeyEvent evt) + { + System.out.println("key " + evt); + if (evt.getID() == KeyEvent.KEY_RELEASED) { + if (evt.getSource() == patternText) { + patternTextChanged(); + } + else { + for (int i = 0; i < calendars.length; i++) { + if (evt.getSource() == calendars[i].text) { + textChanged(i); + } + } + } + } + } + + protected void processWindowEvent(WindowEvent evt) + { + System.out.println("window " + evt); + if (evt.getID() == WindowEvent.WINDOW_CLOSING && + evt.getSource() == this) { + this.hide(); + this.dispose(); + + if (applet != null) { + applet.demoClosed(); + } else System.exit(0); + } + } + + /* + protected void processEvent(AWTEvent evt) + { + if (evt.getID() == AWTEvent. Event.ACTION_EVENT && evt.target == up) { + dateFieldChanged(true); + return true; + } + else if (evt.id == Event.ACTION_EVENT && evt.target == down) { + dateFieldChanged(false); + return true; + } + } + */ + + private static final int FIELD_COLUMNS = 35; + + + class CalendarRec { + public CalendarRec(String nameStr, Calendar cal) + { + name = nameStr; + calendar = cal; + rollAdd = new Checkbox(); + + text = new JTextField("",FIELD_COLUMNS); + text.setFont(DemoUtility.editFont); + + format = DateFormat.getDateInstance(cal, DateFormat.FULL, + Locale.getDefault()); + //format.applyPattern(DEFAULT_FORMAT); + } + + public void setLocale(Locale loc) { + String pattern = toPattern(); + + format = DateFormat.getDateInstance(calendar, DateFormat.FULL, + loc); + applyPattern(pattern); + } + + public void applyPattern(String pattern) { + if (format instanceof SimpleDateFormat) { + ((SimpleDateFormat)format).applyPattern(pattern); +//hey {al} - +// } else if (format instanceof java.text.SimpleDateFormat) { +// ((java.text.SimpleDateFormat)format).applyPattern(pattern); + } + } + + private String toPattern() { + if (format instanceof SimpleDateFormat) { + return ((SimpleDateFormat)format).toPattern(); +//hey {al} - +// } else if (format instanceof java.text.SimpleDateFormat) { +// return ((java.text.SimpleDateFormat)format).toPattern(); + } + return ""; + } + + Calendar calendar; + DateFormat format; + String name; + JTextField text; + Checkbox rollAdd; + } + + private final CalendarRec[] calendars = { + new CalendarRec("Gregorian", new GregorianCalendar()), + new CalendarRec("Hebrew", new HebrewCalendar()), + new CalendarRec("Islamic (civil)", makeIslamic(true)), + new CalendarRec("Islamic (true)", makeIslamic(false)), + new CalendarRec("Buddhist", new BuddhistCalendar()), + new CalendarRec("Japanese", new JapaneseCalendar()), +// new CalendarRec("Chinese", new ChineseCalendar()), + }; + + static private final Calendar makeIslamic(boolean civil) { + IslamicCalendar cal = new IslamicCalendar(); + cal.setCivil(civil); + return cal; + } +} + +class RollAddField { + RollAddField(int field, String name) { + this.field = field; + this.name = name; + } + int field; + String name; +} diff --git a/demos/src/com/ibm/icu/dev/demo/calendar/CalendarFrame.java b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarFrame.java new file mode 100644 index 00000000000..fd76ec7d17d --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarFrame.java @@ -0,0 +1,442 @@ +/* + ******************************************************************************* + * Copyright (C) 1997-2007, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.dev.demo.calendar; + +import java.awt.BorderLayout; +import java.awt.Button; +import java.awt.Choice; +import java.awt.Color; +import java.awt.Component; +import java.awt.Container; +import java.awt.Dimension; +import java.awt.FlowLayout; +import java.awt.Font; +import java.awt.FontMetrics; +import java.awt.Frame; +import java.awt.Graphics; +import java.awt.GridBagConstraints; +import java.awt.GridBagLayout; +import java.awt.Label; +import java.awt.Panel; +import java.awt.Rectangle; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.ItemEvent; +import java.awt.event.ItemListener; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; +import java.util.Date; +import java.util.Locale; + +import com.ibm.icu.dev.demo.impl.DemoApplet; +import com.ibm.icu.dev.demo.impl.DemoUtility; +import com.ibm.icu.text.DateFormat; +import com.ibm.icu.util.BuddhistCalendar; +import com.ibm.icu.util.Calendar; +import com.ibm.icu.util.GregorianCalendar; +import com.ibm.icu.util.HebrewCalendar; +import com.ibm.icu.util.IslamicCalendar; +import com.ibm.icu.util.JapaneseCalendar; +import com.ibm.icu.util.SimpleTimeZone; + +/** + * A Frame is a top-level window with a title. The default layout for a frame + * is BorderLayout. The CalendarFrame class defines the window layout of + * CalendarDemo. + */ +class CalendarFrame extends Frame +{ + /** + * For serialization + */ + private static final long serialVersionUID = -4289697663503820619L; + + private static final boolean DEBUG = false; + + private DemoApplet applet; + + /** + * Constructs a new CalendarFrame that is initially invisible. + */ + public CalendarFrame(DemoApplet myApplet) + { + super("Calendar Demo"); + this.applet = myApplet; + init(); + + // When the window is closed, we want to shut down the applet or application + addWindowListener( + new WindowAdapter() { + public void windowClosing(WindowEvent e) { + setVisible(false); + dispose(); + + if (applet != null) { + applet.demoClosed(); + } else System.exit(0); + } + } ); + } + + private Choice displayMenu; + private Locale[] locales = DemoUtility.getG7Locales(); + + private Calendar calendars[] = new Calendar[2]; + private Choice calMenu[] = new Choice[2]; + private ColoredLabel monthLabel[] = new ColoredLabel[2]; + private DateFormat monthFormat[] = new DateFormat[2]; + + private Button prevYear; + private Button prevMonth; + private Button gotoToday; + private Button nextMonth; + private Button nextYear; + private CalendarPanel calendarPanel; + + private static void add(Container container, Component component, + GridBagLayout g, GridBagConstraints c, + int gridwidth, int weightx) + { + c.gridwidth = gridwidth; + c.weightx = weightx; + g.setConstraints(component, c); + container.add(component); + } + + /** + * Initializes the applet. You never need to call this directly, it + * is called automatically by the system once the applet is created. + */ + public void init() { + setBackground(DemoUtility.bgColor); + setLayout(new BorderLayout(10,10)); + + Panel topPanel = new Panel(); + GridBagLayout g = new GridBagLayout(); + topPanel.setLayout(g); + GridBagConstraints c = new GridBagConstraints(); + c.fill = GridBagConstraints.HORIZONTAL; + + // Build the two menus for selecting which calendar is displayed, + // plus the month/year label for each calendar + for (int i = 0; i < 2; i++) { + calMenu[i] = new Choice(); + for (int j = 0; j < CALENDARS.length; j++) { + calMenu[i].addItem(CALENDARS[j].name); + } + calMenu[i].setBackground(DemoUtility.choiceColor); + calMenu[i].select(i); + calMenu[i].addItemListener(new CalMenuListener()); + + // Label for the current month name + monthLabel[i] = new ColoredLabel("", COLORS[i]); + monthLabel[i].setFont(DemoUtility.titleFont); + + // And the default calendar to use for this slot + calendars[i] = CALENDARS[i].calendar; + + add(topPanel, calMenu[i], g, c, 5, 0); + add(topPanel, monthLabel[i], g, c, GridBagConstraints.REMAINDER, 1); + } + + // Now add the next/previous year/month buttons: + prevYear = new Button("<<"); + prevYear.addActionListener(new AddAction(Calendar.YEAR, -1)); + + prevMonth = new Button("<"); + prevMonth.addActionListener(new AddAction(Calendar.MONTH, -1)); + + gotoToday = new Button("Today"); + gotoToday.addActionListener( new ActionListener() + { + public void actionPerformed(ActionEvent e) { + calendarPanel.setDate( new Date() ); + updateMonthName(); + } + } ); + + nextMonth = new Button(">"); + nextMonth.addActionListener(new AddAction(Calendar.MONTH, 1)); + + nextYear = new Button(">>"); + nextYear.addActionListener(new AddAction(Calendar.YEAR, 1)); + + c.fill = GridBagConstraints.NONE; + add(topPanel, prevYear, g, c, 1, 0); + add(topPanel, prevMonth, g, c, 1, 0); + add(topPanel, gotoToday, g, c, 1, 0); + add(topPanel, nextMonth, g, c, 1, 0); + add(topPanel, nextYear, g, c, 1, 0); + + // Now add the menu for selecting the display language + Panel displayPanel = new Panel(); + { + displayMenu = new Choice(); + Locale defaultLocale = Locale.getDefault(); + int bestMatch = -1, thisMatch = -1; + int selectMe = 0; + + for (int i = 0; i < locales.length; i++) { + if (i > 0 && + locales[i].getLanguage().equals(locales[i-1].getLanguage()) || + i < locales.length - 1 && + locales[i].getLanguage().equals(locales[i+1].getLanguage())) + { + displayMenu.addItem( locales[i].getDisplayName() ); + } else { + displayMenu.addItem( locales[i].getDisplayLanguage()); + } + + thisMatch = DemoUtility.compareLocales(locales[i], defaultLocale); + + if (thisMatch >= bestMatch) { + bestMatch = thisMatch; + selectMe = i; + } + } + + displayMenu.setBackground(DemoUtility.choiceColor); + displayMenu.select(selectMe); + + displayMenu.addItemListener( new ItemListener() + { + public void itemStateChanged(ItemEvent e) { + Locale loc = locales[displayMenu.getSelectedIndex()]; + calendarPanel.setLocale( loc ); + monthFormat[0] = monthFormat[1] = null; + updateMonthName(); + repaint(); + } + } ); + + Label l1 = new Label("Display Language:", Label.RIGHT); + l1.setFont(DemoUtility.labelFont); + + displayPanel.setLayout(new FlowLayout()); + displayPanel.add(l1); + displayPanel.add(displayMenu); + + } + c.fill = GridBagConstraints.NONE; + c.anchor = GridBagConstraints.EAST; + + add(topPanel, displayPanel, g, c, GridBagConstraints.REMAINDER, 0); + + // The title, buttons, etc. go in a panel at the top of the window + add("North", topPanel); + + // The copyright notice goes at the bottom of the window + Label copyright = new Label(DemoUtility.copyright1, Label.LEFT); + copyright.setFont(DemoUtility.creditFont); + add("South", copyright); + + // Now create the big calendar panel and stick it in the middle + calendarPanel = new CalendarPanel( locales[displayMenu.getSelectedIndex()] ); + add("Center", calendarPanel); + + for (int i = 0; i < 2; i++) { + calendarPanel.setCalendar(i, calendars[i]); + calendarPanel.setColor(i, COLORS[i]); + } + + updateMonthName(); + } + + + private void updateMonthName() + { + for (int i = 0; i < 2; i++) { + try { + if (monthFormat[i] == null) { // TODO: optimize + DateFormat f = DateFormat.getDateTimeInstance( + calendars[i], DateFormat.MEDIUM, -1, + locales[displayMenu.getSelectedIndex()]); + if (f instanceof com.ibm.icu.text.SimpleDateFormat) { + com.ibm.icu.text.SimpleDateFormat f1 = (com.ibm.icu.text.SimpleDateFormat) f; + f1.applyPattern("MMMM, yyyy G"); + f1.setTimeZone(new SimpleTimeZone(0, "UTC")); + } + monthFormat[i] = f; + } + } catch (ClassCastException e) { + //hey {lw} - there's something wrong in this routine that cuases exceptions. + System.out.println(e); + } + + monthLabel[i].setText( monthFormat[i].format( calendarPanel.firstOfMonth() )); + } + } + + /** + * CalMenuListener responds to events in the two popup menus that select + * the calendar systems to be used in the display. It figures out which + * of the two menus the event occurred in and updates the corresponding + * element of the calendars[] array to match the new selection. + */ + private class CalMenuListener implements ItemListener + { + public void itemStateChanged(ItemEvent e) + { + for (int i = 0; i < calMenu.length; i++) + { + if (e.getItemSelectable() == calMenu[i]) + { + // We found the menu that the event happened in. + // Figure out which new calendar they selected. + Calendar newCal = CALENDARS[ calMenu[i].getSelectedIndex() ].calendar; + + if (newCal != calendars[i]) + { + // If any of the other menus are set to the same new calendar + // we're about to use for this menu, set them to the current + // calendar from *this* menu so we won't have two the same + for (int j = 0; j < calendars.length; j++) { + if (j != i && calendars[j] == newCal) { + calendars[j] = calendars[i]; + calendarPanel.setCalendar(j, calendars[j]); + monthFormat[j] = null; + + for (int k = 0; k < CALENDARS.length; k++) { + if (calendars[j] == CALENDARS[k].calendar) { + calMenu[j].select(k); + break; + } + } + } + } + // Now update this menu to use the new calendar the user selected + calendars[i] = newCal; + calendarPanel.setCalendar(i, newCal); + monthFormat[i] = null; + + updateMonthName(); + } + break; + } + } + } + } + + /** + * AddAction handles the next/previous year/month buttons... + */ + private class AddAction implements ActionListener { + AddAction(int field, int amount) { + this.field = field; + this.amount = amount; + } + + public void actionPerformed(ActionEvent e) { + calendarPanel.add(field, amount); + updateMonthName(); + } + + private int field, amount; + } + + /** + * ColoredLabel is similar to java.awt.Label, with two differences: + * + * - You can set its text color + * + * - It draws text using drawString rather than using a host-specific + * "Peer" object like AWT does. On 1.2, using drawString gives + * us Bidi reordering for free. + */ + static private class ColoredLabel extends Component { + /** + * For serialization + */ + private static final long serialVersionUID = 5004484960341875722L; + public ColoredLabel(String label) { + text = label; + } + + public ColoredLabel(String label, Color c) { + text = label; + color = c; + } + + public void setText(String label) { + text = label; + repaint(); + } + + public void setFont(Font f) { + font = f; + repaint(); + } + + public void paint(Graphics g) { + FontMetrics fm = g.getFontMetrics(font); + + Rectangle bounds = getBounds(); + + g.setColor(color); + g.setFont(font); + g.drawString(text, fm.stringWidth("\u00a0"), + bounds.height/2 + fm.getHeight() + - fm.getAscent() + fm.getLeading()/2); + } + + public Dimension getPreferredSize() { + return getMinimumSize(); + } + + public Dimension getMinimumSize() { + FontMetrics fm = getFontMetrics(font); + + return new Dimension( fm.stringWidth(text) + 2*fm.stringWidth("\u00a0"), + fm.getHeight() + fm.getLeading()*2); + } + + String text; + Color color = Color.black; + Font font = DemoUtility.labelFont; + } + + /** + * Print out the error message while debugging this program. + */ + public void errorText(String s) + { + if (DEBUG) + { + System.out.println(s); + } + } + + class CalendarRec { + public CalendarRec(String nameStr, Calendar cal) + { + name = nameStr; + calendar = cal; + } + + Calendar calendar; + String name; + } + + private final CalendarRec[] CALENDARS = { + new CalendarRec("Gregorian Calendar", new GregorianCalendar()), + new CalendarRec("Hebrew Calendar", new HebrewCalendar()), + new CalendarRec("Islamic Calendar", makeIslamic(false)), + new CalendarRec("Islamic Civil Calendar ", makeIslamic(true)), + new CalendarRec("Buddhist Calendar", new BuddhistCalendar()), + new CalendarRec("Japanese Calendar", new JapaneseCalendar()), + }; + + static private final Calendar makeIslamic(boolean civil) { + IslamicCalendar cal = new IslamicCalendar(); + cal.setCivil(civil); + return cal; + } + + static final Color[] COLORS = { Color.blue, Color.black }; +} + diff --git a/demos/src/com/ibm/icu/dev/demo/calendar/CalendarPanel.java b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarPanel.java new file mode 100644 index 00000000000..8ea94d3f9fa --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarPanel.java @@ -0,0 +1,365 @@ +/* + ******************************************************************************* + * Copyright (C) 1997-2007, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.dev.demo.calendar; + +import java.awt.Canvas; +import java.awt.Color; +import java.awt.Dimension; +import java.awt.FontMetrics; +import java.awt.Graphics; +import java.awt.Point; +import java.util.Date; +import java.util.Locale; + +import com.ibm.icu.dev.demo.impl.DemoUtility; +import com.ibm.icu.text.DateFormatSymbols; +import com.ibm.icu.util.Calendar; +import com.ibm.icu.util.SimpleTimeZone; + +class CalendarPanel extends Canvas { + + /** + * For serialization + */ + private static final long serialVersionUID = 625400018027387141L; + + public CalendarPanel( Locale locale ) { + setLocale(locale); + } + + public void setLocale(Locale locale) { + if (fDisplayLocale == null || !fDisplayLocale.equals(locale)) { + fDisplayLocale = locale; + dirty = true; + + for (int i = 0; i < fCalendar.length; i++) { + if (fCalendar[i] != null) { + fSymbols[i] = new DateFormatSymbols(fCalendar[i], + fDisplayLocale); + } + } + String lang = locale.getLanguage(); + leftToRight = !(lang.equals("iw") || lang.equals("ar")); + + repaint(); + } + } + + public void setDate(Date date) { + fStartOfMonth = date; + dirty = true; + repaint(); + } + + public void add(int field, int delta) + { + synchronized(fCalendar) { + fCalendar[0].setTime(fStartOfMonth); + fCalendar[0].add(field, delta); + fStartOfMonth = fCalendar[0].getTime(); + } + dirty = true; + repaint(); + } + + public void setColor(int index, Color c) { + fColor[index] = c; + repaint(); + } + + public void setCalendar(int index, Calendar c) { + Date date = (fCalendar[index] == null) ? new Date() + : fCalendar[index].getTime(); + + fCalendar[index] = c; + fCalendar[index].setTime(date); + + fSymbols[index] = new DateFormatSymbols(c, fDisplayLocale); + dirty = true; + repaint(); + } + + public Calendar getCalendar(int index) { + return fCalendar[index]; + } + + public Locale getDisplayLocale() { + return fDisplayLocale; + } + + public Date firstOfMonth() { + return fStartOfMonth; + } + + private Date startOfMonth(Date dateInMonth) + { + synchronized(fCalendar) { + fCalendar[0].setTime(dateInMonth); + + int era = fCalendar[0].get(Calendar.ERA); + int year = fCalendar[0].get(Calendar.YEAR); + int month = fCalendar[0].get(Calendar.MONTH); + + fCalendar[0].clear(); + fCalendar[0].set(Calendar.ERA, era); + fCalendar[0].set(Calendar.YEAR, year); + fCalendar[0].set(Calendar.MONTH, month); + fCalendar[0].set(Calendar.DATE, 1); + + return fCalendar[0].getTime(); + } + } + + private void calculate() + { + // + // As a workaround for JDK 1.1.3 and below, where Calendars and time + // zones are a bit goofy, always set my calendar's time zone to UTC. + // You would think I would want to do this in the "set" function above, + // but if I do that, the program hangs when this class is loaded, + // perhaps due to some sort of static initialization ordering problem. + // So I do it here instead. + // + fCalendar[0].setTimeZone(new SimpleTimeZone(0, "UTC")); + + Calendar c = (Calendar)fCalendar[0].clone(); // Temporary copy + + fStartOfMonth = startOfMonth(fStartOfMonth); + + // Stash away a few useful constants for this calendar and display + minDay = c.getMinimum(Calendar.DAY_OF_WEEK); + daysInWeek = c.getMaximum(Calendar.DAY_OF_WEEK) - minDay + 1; + + firstDayOfWeek = Calendar.getInstance(fDisplayLocale).getFirstDayOfWeek(); + + // Stash away a Date for the start of this month + + // Find the day of week of the first day in this month + c.setTime(fStartOfMonth); + firstDayInMonth = c.get(Calendar.DAY_OF_WEEK); + int firstWeek = c.get(Calendar.WEEK_OF_MONTH); + + // Now find the # of days in the month + c.roll(Calendar.DATE, false); + daysInMonth = c.get(Calendar.DATE); + + // Finally, find the end of the month, i.e. the start of the next one + c.roll(Calendar.DATE, true); + c.add(Calendar.MONTH, 1); + c.getTime(); // JDK 1.1.2 bug workaround + c.add(Calendar.SECOND, -1); + Date endOfMonth = c.getTime(); + if(endOfMonth==null){ + //do nothing + } + endOfMonth = null; + int lastWeek = c.get(Calendar.WEEK_OF_MONTH); + + // Calculate the number of full or partial weeks in this month. + numWeeks = lastWeek - firstWeek + 1; + + dirty = false; + } + + static final int XINSET = 4; + static final int YINSET = 2; + + /* + * Convert from the day number within a month (1-based) + * to the cell coordinates on the calendar (0-based) + */ + private void dateToCell(int date, Point pos) + { + int cell = (date + firstDayInMonth - firstDayOfWeek - minDay); + if (firstDayInMonth < firstDayOfWeek) { + cell += daysInWeek; + } + + pos.x = cell % daysInWeek; + pos.y = cell / daysInWeek; + } + //private Point dateToCell(int date) { + // Point p = new Point(0,0); + // dateToCell(date, p); + // return p; + //} + + public void paint(Graphics g) { + + if (dirty) { + calculate(); + } + + Point cellPos = new Point(0,0); // Temporary variable + Dimension d = this.getSize(); + + g.setColor(Color.lightGray); + g.fillRect(0,0,d.width,d.height); + + // Draw the day names at the top + g.setColor(Color.black); + g.setFont(DemoUtility.labelFont); + FontMetrics fm = g.getFontMetrics(); + int labelHeight = fm.getHeight() + YINSET * 2; + + int v = fm.getAscent() + YINSET; + for (int i = 0; i < daysInWeek; i++) { + int dayNum = (i + minDay + firstDayOfWeek - 2) % daysInWeek + 1; + String dayName = fSymbols[0].getWeekdays()[dayNum]; + + + double h; + if (leftToRight) { + h = d.width*(i + 0.5) / daysInWeek; + } else { + h = d.width*(daysInWeek - i - 0.5) / daysInWeek; + } + h -= fm.stringWidth(dayName) / 2; + + g.drawString(dayName, (int)h, v); + } + + double cellHeight = (d.height - labelHeight - 1) / numWeeks; + double cellWidth = (double)(d.width - 1) / daysInWeek; + + // Draw a white background in the part of the calendar + // that displays this month. + // First figure out how much of the first week should be shaded. + { + g.setColor(Color.white); + dateToCell(1, cellPos); + int width = (int)(cellPos.x*cellWidth); // Width of unshaded area + + if (leftToRight) { + g.fillRect((int)(width), labelHeight , + d.width - width, (int)cellHeight); + } else { + g.fillRect(0, labelHeight , + d.width - width, (int)cellHeight); + } + + // All of the intermediate weeks get shaded completely + g.fillRect(0, (int)(labelHeight + cellHeight), + d.width, (int)(cellHeight * (numWeeks - 2))); + + // Now figure out the last week. + dateToCell(daysInMonth, cellPos); + width = (int)((cellPos.x+1)*cellWidth); // Width of shaded area + + if (leftToRight) { + g.fillRect(0, (int)(labelHeight + (numWeeks-1) * cellHeight), + width, (int)cellHeight); + } else { + g.fillRect(d.width - width, (int)(labelHeight + (numWeeks-1) * cellHeight), + width, (int)cellHeight); + } + + } + // Draw the X/Y grid lines + g.setColor(Color.black); + for (int i = 0; i <= numWeeks; i++) { + int y = (int)(labelHeight + i * cellHeight); + g.drawLine(0, y, d.width - 1, y); + } + for (int i = 0; i <= daysInWeek; i++) { + int x = (int)(i * cellWidth); + g.drawLine(x, labelHeight, x, d.height - 1); + } + + // Now loop through all of the days in the month, figure out where + // they go in the grid, and draw the day # for each one + + // Figure out the date of the first cell in the calendar display + int cell = (1 + firstDayInMonth - firstDayOfWeek - minDay); + if (firstDayInMonth < firstDayOfWeek) { + cell += daysInWeek; + } + + Calendar c = (Calendar)fCalendar[0].clone(); + c.setTime(fStartOfMonth); + c.add(Calendar.DATE, -cell); + + StringBuffer buffer = new StringBuffer(); + + for (int row = 0; row < numWeeks; row++) { + for (int col = 0; col < daysInWeek; col++) { + + g.setFont(DemoUtility.numberFont); + g.setColor(Color.black); + fm = g.getFontMetrics(); + + int cellx; + if (leftToRight) { + cellx = (int)((col) * cellWidth); + } else { + cellx = (int)((daysInWeek - col - 1) * cellWidth); + } + + int celly = (int)(row * cellHeight + labelHeight); + + for (int i = 0; i < 2; i++) { + fCalendar[i].setTime(c.getTime()); + + int date = fCalendar[i].get(Calendar.DATE); + buffer.setLength(0); + buffer.append(date); + String dayNum = buffer.toString(); + + int x; + + if (leftToRight) { + x = cellx + (int)cellWidth - XINSET - fm.stringWidth(dayNum); + } else { + x = cellx + XINSET; + } + int y = celly + + fm.getAscent() + YINSET + i * fm.getHeight(); + + if (fColor[i] != null) { + g.setColor(fColor[i]); + } + g.drawString(dayNum, x, y); + + if (date == 1 || row == 0 && col == 0) { + g.setFont(DemoUtility.numberFont); + String month = fSymbols[i].getMonths()[ + fCalendar[i].get(Calendar.MONTH)]; + + if (leftToRight) { + x = cellx + XINSET; + } else { + x = cellx + (int)cellWidth - XINSET - fm.stringWidth(month); + } + g.drawString(month, x, y); + } + } + + c.add(Calendar.DATE, 1); + } + } + } + + // Important state variables + private Calendar[] fCalendar = new Calendar[4]; + private Color[] fColor = new Color[4]; + + private Locale fDisplayLocale; + private DateFormatSymbols[] fSymbols = new DateFormatSymbols[4]; + + private Date fStartOfMonth = new Date(); // 00:00:00 on first day of month + + // Cached calculations to make drawing faster. + private transient int minDay; // Minimum legal day # + private transient int daysInWeek; // # of days in a week + private transient int firstDayOfWeek; // First day to display in week + private transient int numWeeks; // # full or partial weeks in month + private transient int daysInMonth; // # days in this month + private transient int firstDayInMonth; // Day of week of first day in month + private transient boolean leftToRight; + + private transient boolean dirty = true; +} diff --git a/demos/src/com/ibm/icu/dev/demo/calendar/package.html b/demos/src/com/ibm/icu/dev/demo/calendar/package.html new file mode 100644 index 00000000000..c1bb1050957 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/calendar/package.html @@ -0,0 +1,12 @@ + + + + + + +Calendar demo applications including date/time arithmetic. + + \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java b/demos/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java new file mode 100644 index 00000000000..284d16d26b5 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java @@ -0,0 +1,421 @@ +/* + ************************************************************************** + * Copyright (C) 2005-2010, International Business Machines Corporation * + * and others. All Rights Reserved. * + ************************************************************************** + * + */ + +package com.ibm.icu.dev.demo.charsetdet; + +import java.awt.Font; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.KeyEvent; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.security.AccessControlException; + +import javax.swing.JFileChooser; +import javax.swing.JFrame; +import javax.swing.JMenu; +import javax.swing.JMenuBar; +import javax.swing.JMenuItem; +import javax.swing.JOptionPane; +import javax.swing.JScrollPane; +import javax.swing.JTextPane; +import javax.swing.KeyStroke; + +import com.ibm.icu.charset.CharsetICU; +import com.ibm.icu.dev.demo.impl.DemoApplet; +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; + +/** + * This simple application demonstrates how to use the CharsetDetector API. It + * opens a file or web page, detects the encoding, and then displays it using that + * encoding. + */ +public class DetectingViewer extends JFrame implements ActionListener +{ + + /** + * For serialization + */ + private static final long serialVersionUID = -2307065724464747775L; + private JTextPane text; + private JFileChooser fileChooser; + + /** + * @throws java.awt.HeadlessException + */ + public DetectingViewer() + { + super(); + DemoApplet.demoFrameOpened(); + + try { + fileChooser = new JFileChooser(); + } catch (AccessControlException ace) { + System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString()); + fileChooser = null; // + } + +// setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + setSize(800, 800); + + setJMenuBar(makeMenus()); + text = new JTextPane(); + text.setContentType("text/plain"); + text.setText(""); + text.setSize(800, 800); + + Font font = new Font("Arial Unicode MS", Font.PLAIN, 24); + text.setFont(font); + + JScrollPane scrollPane = new JScrollPane(text); + + getContentPane().add(scrollPane); + setVisible(true); + + addWindowListener( + new WindowAdapter() { + public void windowClosing(WindowEvent e) { +// setVisible(false); +// dispose(); + + doQuit(); + } + } ); + + + } + + public void actionPerformed(ActionEvent event) + { + String cmd = event.getActionCommand(); + + if (cmd.equals("New...")) { + doNew(); + } else if (cmd.equals("Open File...")) { + doOpenFile(); + } else if (cmd.equals("Open URL...")) { + doOpenURL(); + } else if (cmd.equals("Quit")) { + doQuit(); + } + } + + public static void main(String[] args) + { + new DetectingViewer(); + } + + private void errorDialog(String title, String msg) + { + JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE); + } + + private BufferedInputStream openFile(File file) + { + FileInputStream fileStream = null; + + try { + fileStream = new FileInputStream(file); + } catch (Exception e) { + errorDialog("Error Opening File", e.getMessage()); + return null; + } + + return new BufferedInputStream(fileStream); + } + +// private void openFile(String directory, String filename) +// { +// openFile(new File(directory, filename)); +// } + + + private BufferedInputStream openURL(String url) + { + InputStream s = null; + + try { + URL aURL = new URL(url); + s = aURL.openStream(); + } catch (Exception e) { + errorDialog("Error Opening URL", e.getMessage()); + return null; + } + + return new BufferedInputStream(s); + } + + private String encodingName(CharsetMatch match) + { + return match.getName() + " (" + match.getLanguage() + ")"; + } + + private void setMatchMenu(CharsetMatch[] matches) + { + JMenu menu = getJMenuBar().getMenu(1); + JMenuItem menuItem; + + menu.removeAll(); + + for (int i = 0; i < matches.length; i += 1) { + CharsetMatch match = matches[i]; + + menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence()); + + menu.add(menuItem); + } + } + + private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'}; + private byte[] styleTag = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'}; + private static int BUFFER_SIZE = 100000; + + private boolean openTag(byte[] buffer, int offset, int length, byte[] tag) + { + int tagLen = tag.length; + int bufRem = length - offset; + int b; + + for (b = 0; b < tagLen && b < bufRem; b += 1) { + if (buffer[b + offset] != tag[b]) { + return false; + } + } + + return b == tagLen; + } + + private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag) + { + if (buffer[offset] != (byte) '/') { + return false; + } + + return openTag(buffer, offset + 1, length, tag); + } + + private byte[] filter(InputStream in) + { + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRemaining = BUFFER_SIZE; + int bufLen = 0; + + in.mark(BUFFER_SIZE); + + try { + while (bytesRemaining > 0) { + int bytesRead = in.read(buffer, bufLen, bytesRemaining); + + if (bytesRead <= 0) { + break; + } + + bufLen += bytesRead; + bytesRemaining -= bytesRead; + } + } catch (Exception e) { + // TODO: error handling? + return null; + } + + boolean inTag = false; + boolean skip = false; + int out = 0; + + for (int i = 0; i < bufLen; i += 1) { + byte b = buffer[i]; + + if (b == (byte) '<') { + inTag = true; + + if (openTag(buffer, i + 1, bufLen, scriptTag) || + openTag(buffer, i + 1, bufLen, styleTag)) { + skip = true; + } else if (closedTag(buffer, i + 1, bufLen, scriptTag) || + closedTag(buffer, i + 1, bufLen, styleTag)) { + skip = false; + } + } else if (b == (byte) '>') { + inTag = false; + } else if (! (inTag || skip)) { + buffer[out++] = b; + } + } + + byte[] filtered = new byte[out]; + + System.arraycopy(buffer, 0, filtered, 0, out); + return filtered; + } + + private CharsetMatch[] detect(byte[] bytes) + { + CharsetDetector det = new CharsetDetector(); + + det.setText(bytes); + + return det.detectAll(); + } + + private CharsetMatch[] detect(BufferedInputStream inputStream) + { + CharsetDetector det = new CharsetDetector(); + + try { + det.setText(inputStream); + + return det.detectAll(); + } catch (Exception e) { + // TODO: error message? + return null; + } + } + + private void show(InputStream inputStream, CharsetMatch[] matches, String title) + { + InputStreamReader isr; + char[] buffer = new char[1024]; + int bytesRead = 0; + + if (matches == null || matches.length == 0) { + errorDialog("Match Error", "No matches!"); + return; + } + + try { + StringBuffer sb = new StringBuffer(); + String encoding = matches[0].getName(); + + inputStream.reset(); + + if (encoding.startsWith("UTF-32")) { + byte[] bytes = new byte[1024]; + int offset = 0; + int chBytes = 0; + Charset utf32 = CharsetICU.forNameICU(encoding); + + while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) { + offset = bytesRead % 4; + chBytes = bytesRead - offset; + + sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString()); + + if (offset != 0) { + for (int i = 0; i < offset; i += 1) { + bytes[i] = bytes[chBytes + i]; + } + } + } + } else { + isr = new InputStreamReader(inputStream, encoding); + + while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) { + sb.append(buffer, 0, bytesRead); + } + + isr.close(); + } + + this.setTitle(title + " - " + encodingName(matches[0])); + + setMatchMenu(matches); + text.setText(sb.toString()); + } catch (IOException e) { + errorDialog("IO Error", e.getMessage()); + } catch (Exception e) { + errorDialog("Internal Error", e.getMessage()); + } + } + + private void doNew() + { + // open a new window... + } + + private void doOpenFile() + { + int retVal = fileChooser.showOpenDialog(this); + + if (retVal == JFileChooser.APPROVE_OPTION) { + File file = fileChooser.getSelectedFile(); + BufferedInputStream inputStream = openFile(file); + + if (inputStream != null) { + CharsetMatch[] matches = detect(inputStream); + + show(inputStream, matches, file.getName()); + } + } + } + + private void doOpenURL() + { + String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE, + null, null, null); + + if (url != null && url.length() > 0) { + BufferedInputStream inputStream = openURL(url); + + if (inputStream != null) { + byte[] filtered = filter(inputStream); + CharsetMatch[] matches = detect(filtered); + + show(inputStream, matches, url); + } + } +} + + private void doQuit() + { + DemoApplet.demoFrameClosed(); + this.setVisible(false); + this.dispose(); + } + + private JMenuBar makeMenus() + { + JMenu menu = new JMenu("File"); + JMenuItem mi; + + mi = new JMenuItem("Open File..."); + mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK))); + mi.addActionListener(this); + menu.add(mi); + if(fileChooser == null) { + mi.setEnabled(false); // no file chooser. + } + + mi = new JMenuItem("Open URL..."); + mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK))); + mi.addActionListener(this); + menu.add(mi); + + mi = new JMenuItem("Quit"); + mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK))); + mi.addActionListener(this); + menu.add(mi); + + JMenuBar mbar = new JMenuBar(); + mbar.add(menu); + + menu = new JMenu("Detected Encodings"); + mbar.add(menu); + + return mbar; + } +} diff --git a/demos/src/com/ibm/icu/dev/demo/holiday/HolidayBorderPanel.java b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayBorderPanel.java new file mode 100644 index 00000000000..cd81ef1fa74 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayBorderPanel.java @@ -0,0 +1,552 @@ +/* + ******************************************************************************* + * Copyright (C) 1997-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.demo.holiday; + +import java.awt.Color; +import java.awt.Dimension; +import java.awt.Font; +import java.awt.FontMetrics; +import java.awt.Graphics; +import java.awt.Insets; +import java.awt.Panel; + +/** + * Various graphical borders. The border itself is a Panel so that it can + * contain other Components (i.e. it borders something). You use the + * HolidayBorderPanel like any other Panel: you set the layout that you prefer and + * add Components to it. Beware that a null layout does not obey the insets + * of the panel so if you use null layouts, adjust your measurements to + * handle the border by calling insets(). + * + * @author Andy Clark, Taligent Inc. + * @version 1.0 + */ +public class HolidayBorderPanel extends Panel { + /** + * For serialization + */ + private static final long serialVersionUID = 4669213306492461159L; + // Constants + + /** Solid border. */ + public final static int SOLID = 0; + /** A raised border. */ + public final static int RAISED = 1; + /** A lowered border. */ + public final static int LOWERED = 2; + /** An etched in border. */ + public final static int IN = 3; + /** An etched out border. */ + public final static int OUT = 4; + + /** Left alignment. */ + public final static int LEFT = 0; + /** Center alignment. */ + public final static int CENTER = 1; + /** Right alignment. */ + public final static int RIGHT = 2; + + /** Default style (IN). */ + public final static int DEFAULT_STYLE = IN; + /** Default thickness (10). */ + public final static int DEFAULT_THICKNESS = 10; + /** Default thickness for solid borders (4). */ + public final static int DEFAULT_SOLID_THICKNESS = 4; + /** Default thickness for raised borders (2). */ + public final static int DEFAULT_RAISED_THICKNESS = 2; + /** Default thickness for lowered borders (2). */ + public final static int DEFAULT_LOWERED_THICKNESS = 2; + /** Default thickness for etched-in borders (10). */ + public final static int DEFAULT_IN_THICKNESS = 10; + /** Default thickness for etched-out borders (10). */ + public final static int DEFAULT_OUT_THICKNESS = 10; + /** Default gap between border and contained component (5). */ + public final static int DEFAULT_GAP = 5; + /** Default color (black). Applies to SOLID and etched borders. */ + public final static Color DEFAULT_COLOR = Color.black; + + /** Default font (TimesRoman,PLAIN,14). Only applies to etched borders. */ + public final static Font DEFAULT_FONT = new Font("TimesRoman", Font.PLAIN, 14); + /** Default alignment (LEFT). Only applies to etched borders. */ + public final static int DEFAULT_ALIGNMENT = LEFT; + + // Data + private int style; + private int thickness; + private int gap; + private Color color; + + private Font font; + private String text; + private int alignment; + + /** + * Constructor. Makes default border. + */ + public HolidayBorderPanel() { + + // initialize data + style = DEFAULT_STYLE; + thickness = DEFAULT_THICKNESS; + gap = DEFAULT_GAP; + color = DEFAULT_COLOR; + + text = null; + font = DEFAULT_FONT; + alignment = DEFAULT_ALIGNMENT; + + } + + /** + * Constructor. Makes an etched IN border with given text caption. + * + * @param text Text caption + */ + public HolidayBorderPanel(String text) { + this(); + + style = IN; + this.text = text; + } + + /** + * Constructor. Makes SOLID border with color and thickness given. + * + * @param color The color for the border. + * @param thickness The thickness of the border. + */ + public HolidayBorderPanel(Color color, int thickness) { + this(); + + style = SOLID; + this.color = color; + this.thickness = thickness; + } + + /** + * Constructor. Makes a border of the given style with the default + * thickness for that style. + * + * @param style The style for this border. + */ + public HolidayBorderPanel(int style) { + this(); + + // set thickness appropriate to this style + switch (style) { + case SOLID: thickness = DEFAULT_SOLID_THICKNESS; break; + case RAISED: thickness = DEFAULT_RAISED_THICKNESS; break; + case LOWERED: thickness = DEFAULT_LOWERED_THICKNESS; break; + case IN: thickness = DEFAULT_IN_THICKNESS; break; + case OUT: thickness = DEFAULT_OUT_THICKNESS; break; + default: + thickness = DEFAULT_THICKNESS; + } + + this.style = style; + } + + /** + * Constructor. Makes border with given style and thickness. + * + * @param style The style for this border. + * @param thickness The thickness for this border. + */ + public HolidayBorderPanel(int style, int thickness) { + this(); + + this.style = style; + this.thickness = thickness; + } + + /** + * Returns the insets of this panel.. + */ + public Insets getInsets() { + int adjustment = 0; + + // adjust for text string + if (style == IN || style == OUT) { + if (text != null && text.length() > 0) { + try { + // set font and get info + int height = getGraphics().getFontMetrics(font).getHeight(); + if (height > thickness) + adjustment = height - thickness; + } + catch (Exception e) { + // nothing: just in case there is no graphics context + // at the beginning. + System.out.print(""); + } + } + } + + // return appropriate insets + int dist = thickness + gap; + return new Insets(dist + adjustment, dist, dist, dist); + } + + /** + * Sets the style of the border + * + * @param style The new style. + */ + public HolidayBorderPanel setStyle(int style) { + + // set the style and re-layout the panel + this.style = style; + doLayout(); + repaint(); + + return this; + } + + /** + * Gets the style of the border + */ + public int getStyle() { + + return style; + } + + /** + * Sets the thickness of the border. + * + * @param thickness The new thickness + */ + public HolidayBorderPanel setThickness(int thickness) { + + if (thickness > 0) { + this.thickness = thickness; + doLayout(); + repaint(); + } + + return this; + } + + /** + * Gets the thickness of the border. + */ + public int getThickness() { + + return thickness; + } + + /** + * Sets the gap between the border and the contained Component. + * + * @param gap The new gap, in pixels. + */ + public HolidayBorderPanel setGap(int gap) { + + if (gap > -1) { + this.gap = gap; + doLayout(); + repaint(); + } + + return this; + } + + /** + * Gets the gap between the border and the contained Component. + */ + public int getGap() { + + return gap; + } + + /** + * Sets the current color for SOLID borders and the caption text + * color for etched borders. + * + * @param color The new color. + */ + public HolidayBorderPanel setColor(Color color) { + + this.color = color; + if (style == SOLID || style == IN || style == OUT) + repaint(); + + return this; + } + + /** + * Gets the current color for SOLID borders and the caption + * text color for etched borders. + */ + public Color getColor() { + + return color; + } + + /** + * Sets the font. Only applies to etched borders. + */ + public HolidayBorderPanel setTextFont(Font font) { + + // set font + if (font != null) { + this.font = font; + if (style == IN || style == OUT) { + doLayout(); + repaint(); + } + } + + return this; + } + + /** + * Gets the font of the text. Only applies to etched borders. + */ + public Font getTextFont() { + + return font; + } + + /** + * Sets the text. Only applies to etched borders. + * + * @param text The new text. + */ + public HolidayBorderPanel setText(String text) { + + this.text = text; + if (style == IN || style == OUT) { + doLayout(); + repaint(); + } + + return this; + } + + /** + * Gets the text. Only applies to etched borders. + */ + public String getText() { + + return text; + } + + /** + * Sets the text alignment. Only applies to etched borders. + * + * @param alignment The new alignment. + */ + public HolidayBorderPanel setAlignment(int alignment) { + + this.alignment = alignment; + if (style == IN || style == OUT) { + doLayout(); + repaint(); + } + + return this; + } + + /** + * Gets the text alignment. + */ + public int getAlignment() { + + return alignment; + } + + /** + * Repaints the border. + * + * @param g The graphics context. + */ + public void paint(Graphics g) { + + // get current dimensions + Dimension size = getSize(); + int width = size.width; + int height = size.height; + + // set colors + Color light = getBackground().brighter().brighter().brighter(); + Color dark = getBackground().darker().darker().darker(); + + // Draw border + switch (style) { + case RAISED: // 3D Border (in or out) + case LOWERED: + Color topleft = null; + Color bottomright = null; + + // set colors + if (style == RAISED) { + topleft = light; + bottomright = dark; + } + else { + topleft = dark; + bottomright = light; + } + + // draw border + g.setColor(topleft); + for (int i = 0; i < thickness; i++) { + g.drawLine(i, i, width - i - 2, i); + g.drawLine(i, i + 1, i, height - i - 1); + } + g.setColor(bottomright); + for (int i = 0; i < thickness; i++) { + g.drawLine(i + 1, height - i - 1, width - i - 1, height - i - 1); + g.drawLine(width - i - 1, i, width - i - 1, height - i - 2); + } + break; + + case IN: // Etched Border (in or out) + case OUT: + int adjust1 = 0; + int adjust2 = 0; + + // set font and get info + Font oldfont = g.getFont(); + g.setFont(font); + FontMetrics fm = g.getFontMetrics(); + int ascent = fm.getAscent(); + + // set adjustment + if (style == IN) + adjust1 = 1; + else + adjust2 = 1; + + // Calculate adjustment for text + int adjustment = 0; + if (text != null && text.length() > 0) { + if (ascent > thickness) + adjustment = (ascent - thickness) / 2; + } + + // The adjustment is there so that we always draw the + // light rectangle first. Otherwise, your eye picks up + // the discrepancy where the light rect. passes over + // the darker rect. + int x = thickness / 2; + int y = thickness / 2 + adjustment; + int w = width - thickness - 1; + int h = height - thickness - 1 - adjustment; + + // draw rectangles + g.setColor(light); + g.drawRect(x + adjust1, y + adjust1, w, h); + g.setColor(dark); + g.drawRect(x + adjust2, y + adjust2, w, h); + + // draw text, if applicable + if (text != null && text.length() > 0) { + // calculate drawing area + int fontheight = fm.getHeight(); + int strwidth = fm.stringWidth(text); + + int textwidth = width - 2 * (thickness + 5); + if (strwidth > textwidth) + strwidth = textwidth; + + // calculate offset for alignment + int offset; + switch (alignment) { + case CENTER: + offset = (width - strwidth) / 2; + break; + case RIGHT: + offset = width - strwidth - thickness - 5; + break; + case LEFT: + default: // assume left alignment if invalid + offset = thickness + 5; + break; + } + + // clear drawing area and set clipping region + g.clearRect(offset - 5, 0, strwidth + 10, fontheight); + g.clipRect(offset, 0, strwidth, fontheight); + + // draw text + g.setColor(color); + g.drawString(text, offset, ascent); + + // restore old clipping area + g.clipRect(0, 0, width, height); + } + + g.setFont(oldfont); + break; + + case SOLID: + default: // assume SOLID + g.setColor(color); + for (int i = 0; i < thickness; i++) + g.drawRect(i, i, width - 2 * i - 1, height - 2 * i - 1); + } + + } + + /** + * Returns the settings of this HolidayBorderPanel instance as a string. + */ + public String toString() { + StringBuffer str = new StringBuffer("HolidayBorderPanel["); + + // style + str.append("style="); + switch (style) { + case SOLID: str.append("SOLID"); break; + case RAISED: str.append("RAISED"); break; + case LOWERED: str.append("LOWERED"); break; + case IN: str.append("IN"); break; + case OUT: str.append("OUT"); break; + default: str.append("unknown"); + } + str.append(","); + + // thickness + str.append("thickness="); + str.append(thickness); + str.append(","); + + // gap + str.append("gap="); + str.append(gap); + str.append(","); + + // color + str.append(color); + str.append(","); + + // font + str.append(font); + str.append(","); + + // text + str.append("text="); + str.append(text); + str.append(","); + + // alignment + str.append("alignment="); + switch (alignment) { + case LEFT: str.append("LEFT"); break; + case CENTER: str.append("CENTER"); break; + case RIGHT: str.append("RIGHT"); break; + default: str.append("unknown"); + } + + str.append("]"); + + return str.toString(); + } + + } + diff --git a/demos/src/com/ibm/icu/dev/demo/holiday/HolidayCalendarDemo.java b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayCalendarDemo.java new file mode 100644 index 00000000000..5899b78cc01 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayCalendarDemo.java @@ -0,0 +1,744 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2007, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.dev.demo.holiday; + +import java.awt.BorderLayout; +import java.awt.Button; +import java.awt.Canvas; +import java.awt.Choice; +import java.awt.Color; +import java.awt.Component; +import java.awt.Container; +import java.awt.Dimension; +import java.awt.Font; +import java.awt.FontMetrics; +import java.awt.Frame; +import java.awt.Graphics; +import java.awt.GridBagConstraints; +import java.awt.GridBagLayout; +import java.awt.Label; +import java.awt.Panel; +import java.awt.Point; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.ItemEvent; +import java.awt.event.ItemListener; +import java.awt.event.WindowEvent; +import java.text.DateFormatSymbols; +import java.util.Date; +import java.util.Locale; +import java.util.Vector; + +import com.ibm.icu.dev.demo.impl.DemoApplet; +import com.ibm.icu.dev.demo.impl.DemoTextBox; +import com.ibm.icu.dev.demo.impl.DemoUtility; +import com.ibm.icu.text.SimpleDateFormat; +import com.ibm.icu.util.Calendar; +import com.ibm.icu.util.Holiday; +import com.ibm.icu.util.SimpleTimeZone; + +/** + * CalendarDemo demonstrates how Calendar works. + */ +public class HolidayCalendarDemo extends DemoApplet +{ + /** + * For serialization + */ + private static final long serialVersionUID = 4546085430817359372L; + + /** + * The main function which defines the behavior of the CalendarDemo + * applet when an applet is started. + */ + public static void main(String argv[]) { + + new HolidayCalendarDemo().showDemo(); + } + + /* This creates a CalendarFrame for the demo applet. */ + public Frame createDemoFrame(DemoApplet applet) { + return new CalendarFrame(applet); + } + + /** + * A Frame is a top-level window with a title. The default layout for a frame + * is BorderLayout. The CalendarFrame class defines the window layout of + * CalendarDemo. + */ + private static class CalendarFrame extends Frame implements ActionListener, + ItemListener + { + /** + * For serialization + */ + private static final long serialVersionUID = -7023296782393042761L; + + private static final boolean DEBUG = false; + + //private Locale curLocale = Locale.US; // unused + + private DemoApplet applet; + + private static final Locale[] calendars = { + //new Locale("de","AT"), + Locale.CANADA, + Locale.CANADA_FRENCH, + Locale.FRANCE, + Locale.GERMANY, + new Locale("iw","IL"), + new Locale("el","GR"), + //new Locale("es","MX"), + Locale.UK, + Locale.US, + }; + private static final Locale[] displays = { + Locale.CANADA, + Locale.UK, + Locale.US, + Locale.FRANCE, + Locale.CANADA_FRENCH, + //new Locale("de","AT"), + Locale.GERMAN, + new Locale("el","GR"), + //new Locale("iw","IL"), + new Locale("es","MX"), + }; + + /** + * Constructs a new CalendarFrame that is initially invisible. + */ + public CalendarFrame(DemoApplet applet) + { + super("Calendar Demo"); + this.applet = applet; + init(); + start(); + enableEvents(WindowEvent.WINDOW_CLOSING); + } + + /** + * Initializes the applet. You never need to call this directly, it + * is called automatically by the system once the applet is created. + */ + public void init() + { + // Get G7 locales only for demo purpose. To get all the locales + // supported, switch to calling Calendar.getAvailableLocales(). + // commented + locales = displays; + + buildGUI(); + } + + //------------------------------------------------------------ + // package private + //------------------------------------------------------------ + void addWithFont(Container container, Component foo, Font font) { + if (font != null) + foo.setFont(font); + container.add(foo); + } + + /** + * Called to start the applet. You never need to call this method + * directly, it is called when the applet's document is visited. + */ + public void start() + { + // do nothing + } + + private Choice localeMenu; + private Choice displayMenu; + private Locale[] locales; + + private Label monthLabel; + private Button prevYear; + private Button prevMonth; + private Button gotoToday; + private Button nextMonth; + private Button nextYear; + private CalendarPanel calendarPanel; + + private static final Locale kFirstLocale = Locale.US; + + private static void add(Container container, Component component, + GridBagLayout g, GridBagConstraints c) + { + g.setConstraints(component, c); + container.add(component); + } + + public void buildGUI() + { + setBackground(DemoUtility.bgColor); + setLayout(new BorderLayout(10,10)); + + // Label for the demo's title + Label titleLabel = new Label("Calendar Demo", Label.CENTER); + titleLabel.setFont(DemoUtility.titleFont); + + // Label for the current month name + monthLabel = new Label("", Label.LEFT); + monthLabel.setFont(new Font(DemoUtility.titleFont.getName(), + DemoUtility.titleFont.getStyle(), + (DemoUtility.titleFont.getSize() * 3)/2)); + + // Make the locale popup menus + localeMenu= new Choice(); + localeMenu.addItemListener(this); + int selectMe = 0; + + for (int i = 0; i < calendars.length; i++) { + if (i > 0 && + calendars[i].getCountry().equals(calendars[i-1].getCountry()) || + i < calendars.length - 1 && + calendars[i].getCountry().equals(calendars[i+1].getCountry())) + { + localeMenu.addItem(calendars[i].getDisplayCountry() + " (" + + calendars[i].getDisplayLanguage() + ")"); + } else { + localeMenu.addItem( calendars[i].getDisplayCountry() ); + } + + if (calendars[i].equals(kFirstLocale)) { + selectMe = i; + } + } + + localeMenu.setBackground(DemoUtility.choiceColor); + localeMenu.select(selectMe); + + displayMenu = new Choice(); + displayMenu.addItemListener(this); + + selectMe = 0; + for (int i = 0; i < locales.length; i++) { + if (i > 0 && + locales[i].getLanguage().equals(locales[i-1].getLanguage()) || + i < locales.length - 1 && + locales[i].getLanguage().equals(locales[i+1].getLanguage())) + { + displayMenu.addItem( locales[i].getDisplayName() ); + } else { + displayMenu.addItem( locales[i].getDisplayLanguage()); + } + + if (locales[i].equals(kFirstLocale)) { + selectMe = i; + } + } + + displayMenu.setBackground(DemoUtility.choiceColor); + displayMenu.select(selectMe); + + // Make all the next/previous/today buttons + prevYear = new Button("<<"); + prevYear.addActionListener(this); + prevMonth = new Button("<"); + prevMonth.addActionListener(this); + gotoToday = new Button("Today"); + gotoToday.addActionListener(this); + nextMonth = new Button(">"); + nextMonth.addActionListener(this); + nextYear = new Button(">>"); + nextYear.addActionListener(this); + + // The month name and the control buttons are bunched together + Panel monthPanel = new Panel(); + { + GridBagLayout g = new GridBagLayout(); + GridBagConstraints c = new GridBagConstraints(); + monthPanel.setLayout(g); + + c.weightx = 1; + c.weighty = 1; + + c.gridwidth = 1; + c.fill = GridBagConstraints.HORIZONTAL; + c.gridwidth = GridBagConstraints.REMAINDER; + add(monthPanel, monthLabel, g, c); + + c.gridwidth = 1; + add(monthPanel, prevYear, g, c); + add(monthPanel, prevMonth, g, c); + add(monthPanel, gotoToday, g, c); + add(monthPanel, nextMonth, g, c); + c.gridwidth = GridBagConstraints.REMAINDER; + add(monthPanel, nextYear, g, c); + } + + // Stick the menu and buttons in a little "control panel" + Panel menuPanel = new Panel(); + { + GridBagLayout g = new GridBagLayout(); + GridBagConstraints c = new GridBagConstraints(); + menuPanel.setLayout(g); + + c.weightx = 1; + c.weighty = 1; + + c.fill = GridBagConstraints.HORIZONTAL; + + c.gridwidth = GridBagConstraints.RELATIVE; + Label l1 = new Label("Holidays"); + l1.setFont(DemoUtility.labelFont); + add(menuPanel, l1, g, c); + + c.gridwidth = GridBagConstraints.REMAINDER; + add(menuPanel, localeMenu, g, c); + + c.gridwidth = GridBagConstraints.RELATIVE; + Label l2 = new Label("Display:"); + l2.setFont(DemoUtility.labelFont); + add(menuPanel, l2, g, c); + + c.gridwidth = GridBagConstraints.REMAINDER; + add(menuPanel, displayMenu, g, c); + } + + // The title, buttons, etc. go in a panel at the top of the window + Panel topPanel = new Panel(); + { + topPanel.setLayout(new BorderLayout()); + + //topPanel.add("North", titleLabel); + topPanel.add("Center", monthPanel); + topPanel.add("East", menuPanel); + } + add("North", topPanel); + + // The copyright notice goes at the bottom of the window + Label copyright = new Label(DemoUtility.copyright1, Label.LEFT); + copyright.setFont(DemoUtility.creditFont); + add("South", copyright); + + // Now create the big calendar panel and stick it in the middle + calendarPanel = new CalendarPanel( kFirstLocale ); + add("Center", calendarPanel); + + updateMonthName(); + } + + private void updateMonthName() + { + SimpleDateFormat f = new SimpleDateFormat("MMMM yyyyy", + calendarPanel.getDisplayLocale()); + f.setCalendar(calendarPanel.getCalendar()); + f.setTimeZone(new SimpleTimeZone(0, "UTC")); // JDK 1.1.2 workaround + monthLabel.setText( f.format( calendarPanel.firstOfMonth() )); + } + + /** + * Handles the event. Returns true if the event is handled and should not + * be passed to the parent of this component. The default event handler + * calls some helper methods to make life easier on the programmer. + */ + public void actionPerformed(ActionEvent e) + { + Object obj = e.getSource(); + + // *** Button events are handled here. + if (obj instanceof Button) { + if (obj == nextMonth) { + calendarPanel.add(Calendar.MONTH, +1); + } + else + if (obj == prevMonth) { + calendarPanel.add(Calendar.MONTH, -1); + } + else + if (obj == prevYear) { + calendarPanel.add(Calendar.YEAR, -1); + } + else + if (obj == nextYear) { + calendarPanel.add(Calendar.YEAR, +1); + } + else + if (obj == gotoToday) { + calendarPanel.set( new Date() ); + } + updateMonthName(); + } + } + + public void itemStateChanged(ItemEvent e) + { + Object obj = e.getSource(); + if (obj == localeMenu) { + calendarPanel.setCalendarLocale(calendars[localeMenu.getSelectedIndex()]); + updateMonthName(); + } + else + if (obj == displayMenu) { + calendarPanel.setDisplayLocale(locales[displayMenu.getSelectedIndex()]); + updateMonthName(); + } + } + + /** + * Print out the error message while debugging this program. + */ + public void errorText(String s) + { + if (DEBUG) + { + System.out.println(s); + } + } + + protected void processWindowEvent(WindowEvent e) + { + System.out.println("event " + e); + if (e.getID() == WindowEvent.WINDOW_CLOSING) { + this.hide(); + this.dispose(); + + if (applet != null) { + applet.demoClosed(); + } else { + System.exit(0); + } + } + } + } + + + private static class CalendarPanel extends Canvas { + + /** + * For serialization + */ + private static final long serialVersionUID = 1521099412250120821L; + + public CalendarPanel( Locale locale ) { + set(locale, locale, new Date()); + } + + public void setCalendarLocale(Locale locale) { + set(locale, fDisplayLocale, fCalendar.getTime()); + } + + public void setDisplayLocale(Locale locale) { + set(fCalendarLocale, locale, fCalendar.getTime()); + } + + public void set(Date date) { + set(fCalendarLocale, fDisplayLocale, date); + } + + public void set(Locale loc, Locale display, Date date) + { + if (fCalendarLocale == null || !loc.equals(fCalendarLocale)) { + fCalendarLocale = loc; + fCalendar = Calendar.getInstance(fCalendarLocale); + fAllHolidays = Holiday.getHolidays(fCalendarLocale); + } + if (fDisplayLocale == null || !display.equals(fDisplayLocale)) { + fDisplayLocale = display; + fSymbols = new DateFormatSymbols(fDisplayLocale); + } + + fStartOfMonth = date; + + dirty = true; + repaint(); + } + + public void add(int field, int delta) + { + synchronized(fCalendar) { + fCalendar.setTime(fStartOfMonth); + fCalendar.add(field, delta); + fStartOfMonth = fCalendar.getTime(); + } + dirty = true; + repaint(); + } + + public com.ibm.icu.util.Calendar getCalendar() { + return fCalendar; + } + + public Locale getCalendarLocale() { + return fCalendarLocale; + } + + public Locale getDisplayLocale() { + return fDisplayLocale; + } + + + public Date firstOfMonth() { + return fStartOfMonth; + } + + private Date startOfMonth(Date dateInMonth) + { + synchronized(fCalendar) { + fCalendar.setTime(dateInMonth); // TODO: synchronization + + int era = fCalendar.get(Calendar.ERA); + int year = fCalendar.get(Calendar.YEAR); + int month = fCalendar.get(Calendar.MONTH); + + fCalendar.clear(); + fCalendar.set(Calendar.ERA, era); + fCalendar.set(Calendar.YEAR, year); + fCalendar.set(Calendar.MONTH, month); + fCalendar.set(Calendar.DATE, 1); + + return fCalendar.getTime(); + } + } + + private void calculate() + { + // + // As a workaround for JDK 1.1.3 and below, where Calendars and time + // zones are a bit goofy, always set my calendar's time zone to UTC. + // You would think I would want to do this in the "set" function above, + // but if I do that, the program hangs when this class is loaded, + // perhaps due to some sort of static initialization ordering problem. + // So I do it here instead. + // + fCalendar.setTimeZone(new SimpleTimeZone(0, "UTC")); + + Calendar c = (Calendar)fCalendar.clone(); // Temporary copy + + fStartOfMonth = startOfMonth(fStartOfMonth); + + // Stash away a few useful constants for this calendar and display + minDay = c.getMinimum(Calendar.DAY_OF_WEEK); + daysInWeek = c.getMaximum(Calendar.DAY_OF_WEEK) - minDay + 1; + + firstDayOfWeek = Calendar.getInstance(fDisplayLocale).getFirstDayOfWeek(); + + // Stash away a Date for the start of this month + + // Find the day of week of the first day in this month + c.setTime(fStartOfMonth); + firstDayInMonth = c.get(Calendar.DAY_OF_WEEK); + + // Now find the # of days in the month + c.roll(Calendar.DATE, false); + daysInMonth = c.get(Calendar.DATE); + + // Finally, find the end of the month, i.e. the start of the next one + c.roll(Calendar.DATE, true); + c.add(Calendar.MONTH, 1); + c.getTime(); // JDK 1.1.2 bug workaround + c.add(Calendar.SECOND, -1); + Date endOfMonth = c.getTime(); + + // + // Calculate the number of full or partial weeks in this month. + // To do this I can just reuse the code that calculates which + // calendar cell contains a given date. + // + numWeeks = dateToCell(daysInMonth).y - dateToCell(1).y + 1; + + // Remember which holidays fall on which days in this month, + // to save the trouble of having to do it later + fHolidays.setSize(0); + + for (int h = 0; h < fAllHolidays.length; h++) + { + Date d = fStartOfMonth; + while ( (d = fAllHolidays[h].firstBetween(d, endOfMonth) ) != null) + { + c.setTime(d); + fHolidays.addElement( new HolidayInfo(c.get(Calendar.DATE), + fAllHolidays[h], + fAllHolidays[h].getDisplayName(fDisplayLocale) )); + + d.setTime( d.getTime() + 1000 ); // "d++" + } + } + dirty = false; + } + + static final int INSET = 2; + + /* + * Convert from the day number within a month (1-based) + * to the cell coordinates on the calendar (0-based) + */ + private void dateToCell(int date, Point pos) + { + int cell = (date + firstDayInMonth - firstDayOfWeek - minDay); + if (firstDayInMonth < firstDayOfWeek) { + cell += daysInWeek; + } + + pos.x = cell % daysInWeek; + pos.y = cell / daysInWeek; + } + private Point dateToCell(int date) { + Point p = new Point(0,0); + dateToCell(date, p); + return p; + } + + public void paint(Graphics g) { + + if (dirty) { + calculate(); + } + + Point cellPos = new Point(0,0); // Temporary variable + Dimension d = getSize(); + + g.setColor(DemoUtility.bgColor); + g.fillRect(0,0,d.width,d.height); + + // Draw the day names at the top + g.setColor(Color.black); + g.setFont(DemoUtility.labelFont); + FontMetrics fm = g.getFontMetrics(); + int labelHeight = fm.getHeight() + INSET * 2; + + int v = fm.getAscent() + INSET; + for (int i = 0; i < daysInWeek; i++) { + int dayNum = (i + minDay + firstDayOfWeek - 2) % daysInWeek + 1; + String dayName = fSymbols.getWeekdays()[dayNum]; + + int h = (int) (d.width * (i + 0.5)) / daysInWeek; + h -= fm.stringWidth(dayName) / 2; + + g.drawString(dayName, h, v); + } + + double cellHeight = (d.height - labelHeight - 1) / numWeeks; + double cellWidth = (double)(d.width - 1) / daysInWeek; + + // Draw a white background in the part of the calendar + // that displays this month. + // First figure out how much of the first week should be shaded. + { + g.setColor(Color.white); + dateToCell(1, cellPos); + int width = (int)(cellPos.x*cellWidth); // Width of unshaded area + + g.fillRect((int)(width), labelHeight , + (int)(d.width - width), (int)cellHeight); + + // All of the intermediate weeks get shaded completely + g.fillRect(0, (int)(labelHeight + cellHeight), + d.width, (int)(cellHeight * (numWeeks - 2))); + + // Now figure out the last week. + dateToCell(daysInMonth, cellPos); + width = (int)((cellPos.x+1)*cellWidth); // Width of shaded area + + g.fillRect(0, (int)(labelHeight + (numWeeks-1) * cellHeight), + width, (int)(cellHeight)); + + } + // Draw the X/Y grid lines + g.setColor(Color.black); + for (int i = 0; i <= numWeeks; i++) { + int y = (int)(labelHeight + i * cellHeight); + g.drawLine(0, y, d.width - 1, y); + } + for (int i = 0; i <= daysInWeek; i++) { + int x = (int)(i * cellWidth); + g.drawLine(x, labelHeight, x, d.height - 1); + } + + // Now loop through all of the days in the month, figure out where + // they go in the grid, and draw the day # for each one + Font numberFont = new Font("Helvetica",Font.PLAIN,12); + // not used Font holidayFont = DemoUtility.creditFont; + + Calendar c = (Calendar)fCalendar.clone(); + c.setTime(fStartOfMonth); + + for (int i = 1, h = 0; i <= daysInMonth; i++) { + g.setFont(numberFont); + g.setColor(Color.black); + fm = g.getFontMetrics(); + + dateToCell(i, cellPos); + int x = (int)((cellPos.x + 1) * cellWidth); + int y = (int)(cellPos.y * cellHeight + labelHeight); + + StringBuffer buffer = new StringBuffer(); + buffer.append(i); + String dayNum = buffer.toString(); + + x = x - INSET - fm.stringWidth(dayNum); + y = y + fm.getAscent() + INSET; + + g.drawString(dayNum, x, y); + + // See if any of the holidays land on this day.... + HolidayInfo info = null; + int count = 0; + + // Coordinates of lower-left corner of cell. + x = (int)((cellPos.x) * cellWidth); + y = (int)((cellPos.y+1) * cellHeight) + labelHeight; + + while (h < fHolidays.size() && + (info = (HolidayInfo)fHolidays.elementAt(h)).date <= i) + { + if (info.date == i) { + // Draw the holiday here. + g.setFont(numberFont); + g.setColor(Color.red); + + DemoTextBox box = new DemoTextBox(g, info.name, (int)(cellWidth - INSET)); + box.draw(g, x + INSET, y - INSET - box.getHeight()); + + y -= (box.getHeight() + INSET); + count++; + } + h++; + } + } + } + + // Important state variables + private Locale fCalendarLocale; // Whose calendar + private Calendar fCalendar; // Calendar for calculations + + private Locale fDisplayLocale; // How to display it + private DateFormatSymbols fSymbols; // Symbols for drawing + + private Date fStartOfMonth; // 00:00:00 on first day of month + + // Cached calculations to make drawing faster. + private transient int minDay; // Minimum legal day # + private transient int daysInWeek; // # of days in a week + private transient int firstDayOfWeek; // First day to display in week + private transient int numWeeks; // # full or partial weeks in month + private transient int daysInMonth; // # days in this month + private transient int firstDayInMonth; // Day of week of first day in month + + private transient Holiday[] fAllHolidays; + private transient Vector fHolidays = new Vector(5,5); + + private transient boolean dirty = true; + } + + private static class HolidayInfo { + public HolidayInfo(int date, Holiday holiday, String name) { + this.date = date; + this.holiday = holiday; + this.name = name; + } + + public Holiday holiday; + public int date; + public String name; + } +} + diff --git a/demos/src/com/ibm/icu/dev/demo/holiday/package.html b/demos/src/com/ibm/icu/dev/demo/holiday/package.html new file mode 100644 index 00000000000..d05e2f55ccf --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/holiday/package.html @@ -0,0 +1,12 @@ + + + + + + +Holiday demo application. + + \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/impl/AppletFrame.java b/demos/src/com/ibm/icu/dev/demo/impl/AppletFrame.java new file mode 100644 index 00000000000..d4089367562 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/impl/AppletFrame.java @@ -0,0 +1,149 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.demo.impl; +import java.applet.Applet; +import java.applet.AppletContext; +import java.applet.AppletStub; +import java.applet.AudioClip; +import java.awt.Frame; +import java.awt.Image; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.Enumeration; +import java.util.Iterator; + +/** + *

A Frame that runs an Applet within itself, making it possible + * for an applet to run as an application. Usage: + * + *

+ * public class MyApplet extends Applet {
+ *     public static void main(String args[]) {
+ *         MyApplet applet = new MyApplet();
+ *         new AppletFrame("My Applet Running As An App", applet, 640, 480);
+ *     }
+ *     ...
+ * }
+ * 
+ *
+ * @author Alan Liu
+ */
+public class AppletFrame extends Frame implements AppletStub, AppletContext {
+
+    /**
+     * For serialization
+     */
+    private static final long serialVersionUID = 818828281190757725L;
+    Applet applet;
+
+    /**
+     * Construct a Frame running the given Applet with the default size
+     * of 640 by 480.
+     * When the Frame is closed, the applet's stop() method is called,
+     * the Frame is dispose()d of, and System.exit(0) is called.
+     *
+     * @param name the Frame title
+     * @param applet the applet to be run
+     */
+    public AppletFrame(String name, Applet applet) {
+        this(name, applet, 640, 480);
+    }
+
+    /**
+     * Construct a Frame running the given Applet with the given size.
+     * When the Frame is closed, the applet's stop() method is called,
+     * the Frame is dispose()d of, and System.exit(0) is called.
+     *
+     * @param name the Frame title
+     * @param applet the applet to be run
+     * @param width width of the Frame
+     * @param height height of the Frame
+     */
+    public AppletFrame(String name, Applet applet, int width, int height) {
+        super(name);
+        this.applet = applet;
+        applet.setStub(this);
+
+        setSize(width, height);
+        add("Center", applet);
+        show();
+        addWindowListener(new WindowAdapter() {
+            public void windowClosing(WindowEvent e) {
+                AppletFrame.this.applet.stop();
+                dispose();
+                System.exit(0);
+            }
+        });
+
+        applet.init();
+        applet.start();
+    }
+
+    // AppletStub API
+    public void appletResize(int width, int height) {
+        setSize(width, height);
+    }
+
+    public AppletContext getAppletContext() {
+        return this;
+    }
+
+    public URL getCodeBase() {
+        return null;
+    }
+
+    public URL getDocumentBase() {
+        return null;
+    }
+    
+    public String getParameter(String name) {
+        return "PARAMETER";
+    }
+
+    public boolean isActive() {
+        return true;
+    }
+    
+    
+    // AppletContext API
+    public Applet getApplet(String name) {
+        return applet;
+    }
+
+    public Enumeration getApplets() {
+        return null;
+    }
+
+    public AudioClip getAudioClip(URL url) {
+        return null;
+    }
+
+    public Image getImage(URL url) {
+        return null;
+    }
+
+    public void showDocument(URL url) {}
+    public void showDocument(URL url, String target) {}
+
+    public void showStatus(String status) {
+        System.out.println(status);
+    }
+    
+    public void setStream(String key, InputStream stream) throws IOException {
+    }
+    
+    public InputStream getStream(String key) {
+        return null;
+    }
+    
+    public Iterator getStreamKeys() {
+        return null;
+    }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DemoApplet.java b/demos/src/com/ibm/icu/dev/demo/impl/DemoApplet.java
new file mode 100644
index 00000000000..339e6a76160
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DemoApplet.java
@@ -0,0 +1,80 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.dev.demo.impl;
+
+import java.awt.Button;
+import java.awt.Color;
+import java.awt.Dimension;
+import java.awt.Frame;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+
+public abstract class DemoApplet extends java.applet.Applet {
+    private static final long serialVersionUID = -8983602961925702071L;
+    private Button   demoButton;
+    private Frame    demoFrame;
+    private static int demoFrameCount = 0;
+
+    protected abstract Frame createDemoFrame(DemoApplet applet);
+    protected Dimension getDefaultFrameSize(DemoApplet applet, Frame f) {
+        return new Dimension(700, 550);
+    }
+
+    //Create a button that will display the demo
+    public void init()
+    {
+        setBackground(Color.white);
+        demoButton = new Button("Demo");
+        demoButton.setBackground(Color.yellow);
+        add( demoButton );
+
+        demoButton.addActionListener( new ActionListener() {
+             public void actionPerformed(ActionEvent e) {
+                if (e.getID() == ActionEvent.ACTION_PERFORMED) {
+                    demoButton.setLabel("loading");
+
+                    if (demoFrame == null) {
+                       demoFrame = createDemoFrame(DemoApplet.this);
+                       showDemo();
+                    }
+
+                    demoButton.setLabel("Demo");
+                }
+             }
+        } );
+    }
+
+    public void showDemo()
+    {
+        demoFrame = createDemoFrame(this);
+        demoFrame.doLayout();
+        Dimension d = getDefaultFrameSize(this, demoFrame);
+        demoFrame.setSize(d.width, d.height);
+        demoFrame.show();
+        demoFrameOpened();
+    }
+
+    public void demoClosed()
+    {
+        demoFrame = null;
+        demoFrameClosed();
+    }
+
+    public static void demoFrameOpened() {
+        demoFrameCount++;
+        System.err.println("DemoFrameOpened, now at:"+demoFrameCount);
+    }
+    public static void demoFrameClosed() {
+        if (--demoFrameCount == 0) {
+            System.err.println("DemoFrameClosed, now at:"+demoFrameCount + " - quitting");
+            System.exit(0);
+        }
+        System.err.println("DemoFrameClosed, now at:"+demoFrameCount);
+    }
+}
+
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DemoTextBox.java b/demos/src/com/ibm/icu/dev/demo/impl/DemoTextBox.java
new file mode 100644
index 00000000000..a3d83499421
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DemoTextBox.java
@@ -0,0 +1,96 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+
+
+import java.awt.FontMetrics;
+import java.awt.Graphics;
+import java.text.BreakIterator;
+
+public class DemoTextBox {
+
+    public DemoTextBox(Graphics g, String text, int width)
+    {
+        this.text = text;
+        this.chars = new char[text.length()];
+        text.getChars(0, text.length(), chars, 0);
+
+        this.width = width;
+//        this.port = g;
+        this.metrics = g.getFontMetrics();
+
+        breakText();
+    }
+
+    public  int getHeight() {
+        return (nbreaks + 1) * metrics.getHeight();
+    }
+
+    public  void draw(Graphics g, int x, int y)
+    {
+        int index = 0;
+
+        y += metrics.getAscent();
+
+        for (int i = 0; i < nbreaks; i++)
+        {
+            g.drawChars(chars, index, breakPos[i] - index, x, y);
+            index = breakPos[i];
+            y += metrics.getHeight();
+        }
+
+        g.drawChars(chars, index, chars.length - index, x, y);
+    }
+
+
+    private void breakText()
+    {
+        if (metrics.charsWidth(chars, 0, chars.length) > width)
+        {
+            BreakIterator iter = BreakIterator.getWordInstance();
+            iter.setText(text);
+
+            int start = iter.first();
+            int end = start;
+            int pos;
+
+            while ( (pos = iter.next()) != BreakIterator.DONE )
+            {
+                int w = metrics.charsWidth(chars, start, pos - start);
+                if (w > width)
+                {
+                    // We've gone past the maximum width, so break the line
+                    if (end > start) {
+                        // There was at least one break position before this point
+                        breakPos[nbreaks++] = end;
+                        start = end;
+                        end = pos;
+                    } else {
+                        // There weren't any break positions before this one, so
+                        // let this word overflow the margin (yuck)
+                        breakPos[nbreaks++] = pos;
+                        start = end = pos;
+                    }
+                } else {
+                    // the current position still fits on the line; it's the best
+                    // tentative break position we have so far.
+                    end = pos;
+                }
+
+            }
+        }
+    }
+
+    private String          text;
+    private char[]          chars;
+//    private Graphics        port;
+    private FontMetrics     metrics;
+    private int             width;
+
+    private int[]           breakPos = new int[10]; // TODO: get real
+    private int             nbreaks = 0;
+}
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DemoUtility.java b/demos/src/com/ibm/icu/dev/demo/impl/DemoUtility.java
new file mode 100644
index 00000000000..c838a13ea2b
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DemoUtility.java
@@ -0,0 +1,136 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+
+import java.awt.Color;
+import java.awt.Component;
+import java.awt.Container;
+import java.awt.Font;
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.awt.Insets;
+import java.awt.Label;
+import java.awt.Panel;
+import java.awt.TextComponent;
+import java.util.Locale;
+
+public class DemoUtility
+{
+    public static final Font titleFont = new Font("TimesRoman",Font.BOLD,18);
+    public static final Font labelFont = new Font("TimesRoman",Font.BOLD,14);
+    public static final Font choiceFont = new Font("Helvetica",Font.BOLD,12);
+    public static final Font editFont = new Font("Helvetica",Font.PLAIN,14);
+    public static final Font creditFont = new Font("Helvetica",Font.PLAIN,10);
+    public static final Font numberFont = new Font("sansserif", Font.PLAIN, 14);
+
+    public static final Color bgColor = Color.lightGray;
+    public static final Color choiceColor = Color.white;
+
+    public static final String copyright1 =
+        "Copyright (C) IBM Corp and others. 1997 - 2002 All Rights Reserved";
+
+    /**
+    Provides easy way to use basic functions of GridBagLayout, without
+    the complications. After building a panel, and inserting all the
+    * subcomponents, call this to lay it out in the desired number of columns.
+    */
+    public static void fixGrid(Container cont, int columns) {
+        GridBagLayout gridbag = new GridBagLayout();
+        cont.setLayout(gridbag);
+
+        GridBagConstraints c = new GridBagConstraints();
+        c.fill = GridBagConstraints.VERTICAL;
+        c.weightx = 1.0;
+        c.insets = new Insets(2,2,2,2);
+
+        Component[] components = cont.getComponents();
+        for (int i = 0; i < components.length; ++i) {
+            // not used int colNumber = i%columns;
+            c.gridwidth = 1;    // default
+            if ((i%columns) == columns - 1)
+                c.gridwidth = GridBagConstraints.REMAINDER;    // last in grid
+            if (components[i] instanceof Label) {
+                switch (((Label)components[i]).getAlignment()) {
+                case Label.CENTER: c.anchor = GridBagConstraints.CENTER; break;
+                case Label.LEFT: c.anchor = GridBagConstraints.WEST; break;
+                case Label.RIGHT: c.anchor = GridBagConstraints.EAST; break;
+                }
+            }
+            gridbag.setConstraints(components[i], c);
+        }
+
+    }
+
+    /**
+    Provides easy way to change the spacing around an object in a GridBagLayout.
+    Call AFTER fixGridBag, passing in the container, the component, and the
+    new insets.
+    */
+    public static void setInsets(Container cont, Component comp, Insets insets) {
+        GridBagLayout gbl = (GridBagLayout)cont.getLayout();
+        GridBagConstraints g = gbl.getConstraints(comp);
+        g.insets = insets;
+        gbl.setConstraints(comp,g);
+    }
+
+    public static Panel createSpacer() {
+        Panel spacer = new Panel();
+        spacer.setLayout(null);
+        spacer.setSize(1000, 1);
+        return spacer;
+    }
+
+    // to avoid goofy updates and misplaced cursors
+    public static void setText(TextComponent area, String newText) {
+        String foo = area.getText();
+        if (foo.equals(newText)) return;
+        area.setText(newText);
+    }
+    
+    /**
+     * Compares two locals. Return value is negative
+     * if they're different, and more positive the more
+     * fields that match.
+     */
+     
+    public static int compareLocales(Locale l1, Locale l2)
+    {
+        int result = -1;
+        
+        if (l1.getLanguage().equals(l2.getLanguage())) {
+            result += 1;
+            
+            if (l1.getCountry().equals(l2.getCountry())) {
+                result += 1;
+                
+                if (l1.getVariant().equals(l2.getVariant())) {
+                    result += 1;
+                }
+            }
+        }
+        
+        return result;
+    }
+    
+    /**
+     * Get the G7 locale list for demos.
+     */
+    public static Locale[] getG7Locales() {
+        return localeList;
+    }
+    private static Locale[] localeList = {
+        new Locale("DA", "DK", ""),
+        new Locale("EN", "US", ""),
+        new Locale("EN", "GB", ""),
+        new Locale("EN", "CA", ""),
+        new Locale("FR", "FR", ""),
+        new Locale("FR", "CA", ""),
+        new Locale("DE", "DE", ""),
+        new Locale("IT", "IT", ""),
+    //new Locale("JA", "JP", ""),
+    };
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DumbTextComponent.java b/demos/src/com/ibm/icu/dev/demo/impl/DumbTextComponent.java
new file mode 100644
index 00000000000..e6147be986e
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DumbTextComponent.java
@@ -0,0 +1,827 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+import java.awt.AWTEventMulticaster;
+import java.awt.Canvas;
+import java.awt.Color;
+import java.awt.Cursor;
+import java.awt.Dimension;
+import java.awt.Font;
+import java.awt.FontMetrics;
+import java.awt.Graphics;
+import java.awt.Image;
+import java.awt.Point;
+import java.awt.datatransfer.Clipboard;
+import java.awt.datatransfer.DataFlavor;
+import java.awt.datatransfer.StringSelection;
+import java.awt.datatransfer.Transferable;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.FocusEvent;
+import java.awt.event.FocusListener;
+import java.awt.event.InputEvent;
+import java.awt.event.KeyEvent;
+import java.awt.event.KeyListener;
+import java.awt.event.MouseEvent;
+import java.awt.event.MouseListener;
+import java.awt.event.MouseMotionListener;
+import java.awt.event.TextEvent;
+import java.awt.event.TextListener;
+import java.text.BreakIterator;
+
+// LIU: Changed from final to non-final
+public class DumbTextComponent extends Canvas
+  implements KeyListener, MouseListener, MouseMotionListener, FocusListener
+{
+    
+    /**
+     * For serialization
+     */
+    private static final long serialVersionUID = 8265547730738652151L;
+
+//    private transient static final String copyright =
+//      "Copyright \u00A9 1998, Mark Davis. All Rights Reserved.";
+    private transient static boolean DEBUG = false;
+
+    private String contents = "";
+    private Selection selection = new Selection();
+    private int activeStart = -1;
+    private boolean editable = true;
+
+    private transient Selection tempSelection = new Selection();
+    private transient boolean focus;
+    private transient BreakIterator lineBreaker = BreakIterator.getLineInstance();
+    private transient BreakIterator wordBreaker = BreakIterator.getWordInstance();
+    private transient BreakIterator charBreaker = BreakIterator.getCharacterInstance();
+    private transient int lineAscent;
+    private transient int lineHeight;
+    private transient int lineLeading;
+    private transient int lastHeight = 10;
+    private transient int lastWidth = 50;
+    private static final int MAX_LINES = 200; // LIU: Use symbolic name
+    private transient int[] lineStarts = new int[MAX_LINES]; // LIU
+    private transient int lineCount = 1;
+
+    private transient boolean valid = false;
+    private transient FontMetrics fm;
+    private transient boolean redoLines = true;
+    private transient boolean doubleClick = false;
+    private transient TextListener textListener;
+    private transient ActionListener selectionListener;
+    private transient Image cacheImage;
+    private transient Dimension mySize;
+    private transient int xInset = 5;
+    private transient int yInset = 5;
+    private transient Point startPoint = new Point();
+    private transient Point endPoint = new Point();
+    private transient Point caretPoint = new Point();
+    private transient Point activePoint = new Point();
+    
+    //private transient static String clipBoard;
+
+    private static final char CR = '\015'; // LIU
+
+    // ============================================
+
+    public DumbTextComponent() {
+        addMouseListener(this);
+        addMouseMotionListener(this);
+        addKeyListener(this);
+        addFocusListener(this);
+        setCursor(Cursor.getPredefinedCursor(Cursor.TEXT_CURSOR));
+
+    }
+
+// ================ Events ====================
+
+    // public boolean isFocusTraversable() { return true; }
+
+    public void addActionListener(ActionListener l) {
+        selectionListener = AWTEventMulticaster.add(selectionListener, l);
+    }
+
+    public void removeActionListener(ActionListener l) {
+        selectionListener = AWTEventMulticaster.remove(selectionListener, l);
+    }
+
+    public void addTextListener(TextListener l) {
+        textListener = AWTEventMulticaster.add(textListener, l);
+    }
+
+    public void removeTextListener(TextListener l) {
+        textListener = AWTEventMulticaster.remove(textListener, l);
+    }
+
+    private transient boolean pressed;
+
+    public void mousePressed(MouseEvent e) {
+        if (DEBUG) System.out.println("mousePressed");
+        if (pressed) {
+            select(e,false);
+        } else {
+            doubleClick = e.getClickCount() > 1;
+            requestFocus();
+            select(e, true);
+            pressed = true;
+        }
+    }
+
+    public void mouseDragged(MouseEvent e) {
+        if (DEBUG) System.out.println("mouseDragged");
+        select(e, false);
+    }
+
+    public void mouseReleased(MouseEvent e) {
+        if (DEBUG) System.out.println("mouseReleased");
+        pressed = false;
+    }
+
+    public void mouseEntered(MouseEvent e) {
+        //if (pressed) select(e, false);
+    }
+
+    public void mouseExited(MouseEvent e){
+        //if (pressed) select(e, false);
+    }
+
+    public void mouseClicked(MouseEvent e) {}
+    public void mouseMoved(MouseEvent e) {}
+
+
+    public void focusGained(FocusEvent e) {
+        if (DEBUG) System.out.println("focusGained");
+        focus = true;
+        valid = false;
+        repaint(16);
+    }
+    public void focusLost(FocusEvent e) {
+        if (DEBUG) System.out.println("focusLost");
+        focus = false;
+        valid = false;
+        repaint(16);
+    }
+
+    public void select(MouseEvent e, boolean first) {
+        setKeyStart(-1);
+        point2Offset(e.getPoint(), tempSelection);
+        if (first) {
+            if ((e.getModifiers() & InputEvent.SHIFT_MASK) == 0) {
+                tempSelection.anchor = tempSelection.caret;
+            }
+        }
+        // fix words
+        if (doubleClick) {
+            tempSelection.expand(wordBreaker);
+        }
+        select(tempSelection);
+    }
+    
+    public void keyPressed(KeyEvent e) {
+        int code = e.getKeyCode();
+        if (DEBUG) System.out.println("keyPressed "
+          + hex((char)code) + ", " + hex((char)e.getModifiers()));
+        int start = selection.getStart();
+        int end = selection.getEnd();
+        boolean shift = (e.getModifiers() & InputEvent.SHIFT_MASK) != 0;
+        boolean ctrl = (e.getModifiers() & InputEvent.CTRL_MASK) != 0;
+                
+        switch (code) {
+        case KeyEvent.VK_Q:
+            if (!ctrl || !editable) break;
+            setKeyStart(-1);
+            fixHex();
+            break;
+        case KeyEvent.VK_V:
+            if (!ctrl) break;
+            if (!editable) {
+                this.getToolkit().beep();
+            } else {
+                paste();
+            }
+            break;
+        case KeyEvent.VK_C:
+            if (!ctrl) break;
+            copy();
+            break;
+        case KeyEvent.VK_X:
+            if (!ctrl) break;
+            if (!editable) {
+                this.getToolkit().beep();
+            } else {
+                copy();
+                insertText("");
+            }
+            break;
+        case KeyEvent.VK_A:
+            if (!ctrl) break;
+            setKeyStart(-1);
+            select(Integer.MAX_VALUE, 0, false);
+            break;
+        case KeyEvent.VK_RIGHT:
+            setKeyStart(-1);
+            tempSelection.set(selection);
+            tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, +1, shift);
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_LEFT:
+            setKeyStart(-1);
+            tempSelection.set(selection);
+            tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, -1, shift);
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_UP: // LIU: Add support for up arrow
+            setKeyStart(-1);
+            tempSelection.set(selection);
+            tempSelection.caret = lineDelta(tempSelection.caret, -1);
+            if (!shift) {
+                tempSelection.anchor = tempSelection.caret;
+            }
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_DOWN: // LIU: Add support for down arrow
+            setKeyStart(-1);
+            tempSelection.set(selection);
+            tempSelection.caret = lineDelta(tempSelection.caret, +1);
+            if (!shift) {
+                tempSelection.anchor = tempSelection.caret;
+            }
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_DELETE: // LIU: Add delete key support
+            if (!editable) break;
+            setKeyStart(-1);
+            if (contents.length() == 0) break;
+            start = selection.getStart();
+            end = selection.getEnd();
+            if (start == end) {
+                ++end;
+                if (end > contents.length()) {
+                    getToolkit().beep();
+                    return;
+                }
+            }
+            replaceRange("", start, end);
+            break;            
+        }
+    }
+
+    void copy() {
+        Clipboard cb = this.getToolkit().getSystemClipboard();
+        StringSelection ss = new StringSelection(
+            contents.substring(selection.getStart(), selection.getEnd()));
+        cb.setContents(ss, ss);
+    }
+    
+    void paste () {
+        Clipboard cb = this.getToolkit().getSystemClipboard();
+        Transferable t = cb.getContents(this);
+        if (t == null) {
+            this.getToolkit().beep();
+            return;
+        }
+        try {
+            String temp = (String) t.getTransferData(DataFlavor.stringFlavor);
+            insertText(temp);
+        } catch (Exception e) {
+            this.getToolkit().beep();
+        }            
+    }
+
+    /**
+     * LIU: Given an offset into contents, moves up or down by lines,
+     * according to lineStarts[].
+     * @param off the offset into contents
+     * @param delta how many lines to move up (< 0) or down (> 0)
+     * @return the new offset into contents
+     */
+    private int lineDelta(int off, int delta) {
+        int line = findLine(off, false);
+        int posInLine = off - lineStarts[line];
+        // System.out.println("off=" + off + " at " + line + ":" + posInLine);
+        line += delta;
+        if (line < 0) {
+            line = posInLine = 0;
+        } else if (line >= lineCount) {
+            return contents.length();
+        }
+        off = lineStarts[line] + posInLine;
+        if (off >= lineStarts[line+1]) {
+            off = lineStarts[line+1] - 1;
+        }
+        return off;
+    }
+      
+    public void keyReleased(KeyEvent e) {
+        int code = e.getKeyCode();
+        if (DEBUG) System.out.println("keyReleased "
+          + hex((char)code) + ", " + hex((char)e.getModifiers()));
+    }
+
+    public void keyTyped(KeyEvent e) {
+        char ch = e.getKeyChar();
+        if (DEBUG) System.out.println("keyTyped "
+          + hex((char)ch) + ", " + hex((char)e.getModifiers()));
+        if ((e.getModifiers() & InputEvent.CTRL_MASK) != 0) return;
+        int start, end;
+        switch (ch) {
+        case KeyEvent.CHAR_UNDEFINED:
+            break;
+        case KeyEvent.VK_BACK_SPACE:
+            //setKeyStart(-1);
+            if (!editable) break;
+            if (contents.length() == 0) break;
+            start = selection.getStart();
+            end = selection.getEnd();
+            if (start == end) {
+                --start;
+                if (start < 0) {
+                    getToolkit().beep(); // LIU: Add audio feedback of NOP
+                    return;
+                }
+            }
+            replaceRange("", start, end);
+            break;        
+        case KeyEvent.VK_DELETE:
+            //setKeyStart(-1);
+            if (!editable) break;
+            if (contents.length() == 0) break;
+            start = selection.getStart();
+            end = selection.getEnd();
+            if (start == end) {
+                ++end;
+                if (end > contents.length()) {
+                    getToolkit().beep(); // LIU: Add audio feedback of NOP
+                    return;
+                }
+            }
+            replaceRange("", start, end);
+            break;
+        default:
+            if (!editable) break;
+            // LIU: Dispatch to subclass API
+            handleKeyTyped(e);
+            break;
+        }
+    }
+
+    // LIU: Subclass API for handling of key typing
+    protected void handleKeyTyped(KeyEvent e) {
+        insertText(String.valueOf(e.getKeyChar()));
+    }
+    
+    protected void setKeyStart(int keyStart) {
+        if (activeStart != keyStart) {
+            activeStart = keyStart;
+            repaint(10);
+        }
+    }
+    
+    protected void validateKeyStart() {
+        if (activeStart > selection.getStart()) {
+            activeStart = selection.getStart();
+            repaint(10);
+        }
+    }
+    
+    protected int getKeyStart() {
+        return activeStart;
+    }
+
+// ===================== Control ======================
+
+    public synchronized void setEditable(boolean b) {
+        editable = b;
+    }
+
+    public boolean isEditable() {
+        return editable;
+    }
+
+    public void select(Selection newSelection) {
+        newSelection.pin(contents);
+        if (!selection.equals(newSelection)) {
+            selection.set(newSelection);
+            if (selectionListener != null) {
+                selectionListener.actionPerformed(
+                  new ActionEvent(this, ActionEvent.ACTION_PERFORMED,
+                    "Selection Changed", 0));
+            }
+            repaint(10);
+            valid = false;
+        }
+    }
+
+    public void select(int start, int end) {
+        select(start, end, false);
+    }
+
+    public void select(int start, int end, boolean clickAfter) {
+        tempSelection.set(start, end, clickAfter);
+        select(tempSelection);
+    }
+
+    public int getSelectionStart() {
+        return selection.getStart();
+    }
+
+    public int getSelectionEnd() {
+        return selection.getEnd();
+    }
+
+    public void setBounds(int x, int y, int w, int h) {
+        super.setBounds(x,y,w,h);
+        redoLines = true;
+    }
+
+    public Dimension getPreferredSize() {
+        return new Dimension(lastWidth,lastHeight);
+    }
+
+    public Dimension getMaximumSize() {
+        return new Dimension(lastWidth,lastHeight);
+    }
+
+    public Dimension getMinimumSize() {
+        return new Dimension(lastHeight,lastHeight);
+    }
+
+    public void setText(String text) {
+        setText2(text);
+        select(tempSelection.set(selection).pin(contents));
+    }
+
+    public void setText2(String text) {
+        contents = text;
+        charBreaker.setText(text);
+        wordBreaker.setText(text);
+        lineBreaker.setText(text);
+        redoLines = true;
+        if (textListener != null)
+            textListener.textValueChanged(
+              new TextEvent(this, TextEvent.TEXT_VALUE_CHANGED));
+        repaint(16);
+    }
+
+    public void insertText(String text) {
+        if (activeStart == -1) activeStart = selection.getStart();
+        replaceRange(text, selection.getStart(), selection.getEnd());
+    }
+
+    public void replaceRange(String s, int start, int end) {
+        setText2(contents.substring(0,start) + s
+          + contents.substring(end));
+        select(tempSelection.set(selection).
+          fixAfterReplace(start, end, s.length()));
+        validateKeyStart();
+    }
+
+    public String getText() {
+        return contents;
+    }
+
+    public void setFont(Font font) {
+        super.setFont(font);
+        redoLines = true;
+        repaint(16);
+    }
+
+    // ================== Graphics ======================
+
+    public void update(Graphics g) {
+        if (DEBUG) System.out.println("update");
+        paint(g);
+    }
+
+    public void paint(Graphics g) {
+        mySize = getSize();
+        if (cacheImage == null
+          || cacheImage.getHeight(this) != mySize.height
+          || cacheImage.getWidth(this) != mySize.width) {
+            cacheImage = createImage(mySize.width, mySize.height);
+            valid = false;
+        }
+        if (!valid || redoLines) {
+            if (DEBUG) System.out.println("painting");
+            paint2(cacheImage.getGraphics());
+            valid = true;
+        }
+        //getToolkit().sync();
+        if (DEBUG) System.out.println("copying");
+        g.drawImage(cacheImage,
+          0, 0, mySize.width, mySize.height,
+          0, 0, mySize.width, mySize.height,
+          this);
+    }
+
+    public void paint2(Graphics g) {
+        g.clearRect(0, 0, mySize.width, mySize.height);
+        if (DEBUG) System.out.println("print");
+        if (focus) g.setColor(Color.black);
+        else g.setColor(Color.gray);
+        g.drawRect(0,0,mySize.width-1,mySize.height-1);
+        g.setClip(1,1,
+          mySize.width-2,mySize.height-2);
+        g.setColor(Color.black);
+        g.setFont(getFont());
+        fm = g.getFontMetrics();
+        lineAscent = fm.getAscent();
+        lineLeading = fm.getLeading();
+        lineHeight = lineAscent + fm.getDescent() + lineLeading;
+        int y = yInset + lineAscent;
+        String lastSubstring = "";
+        if (redoLines) fixLineStarts(mySize.width-xInset-xInset);
+        for (int i = 0; i < lineCount; y += lineHeight, ++i) {
+            // LIU: Don't display terminating ^M characters
+            int lim = lineStarts[i+1];
+            if (lim > 0 && contents.length() > 0 &&
+                contents.charAt(lim-1) == CR) --lim;
+            lastSubstring = contents.substring(lineStarts[i],lim);
+            g.drawString(lastSubstring, xInset, y);
+        }
+        drawSelection(g, lastSubstring);
+        lastHeight = y + yInset - lineHeight + yInset;
+        lastWidth = mySize.width-xInset-xInset;
+    }
+
+    void paintRect(Graphics g, int x, int y, int w, int h) {
+        if (focus) {
+            g.fillRect(x, y, w, h);
+        } else {
+            g.drawRect(x, y, w-1, h-1);
+        }
+    }
+
+    public void drawSelection(Graphics g, String lastSubstring) {
+        g.setXORMode(Color.black);
+        if (activeStart != -1) {
+            offset2Point(activeStart, false, activePoint);
+            g.setColor(Color.magenta);
+            int line = activePoint.x - 1;
+            g.fillRect(line, activePoint.y, 1, lineHeight);
+        }
+        if (selection.isCaret()) {
+            offset2Point(selection.caret, selection.clickAfter, caretPoint);
+        } else {
+            if (focus) g.setColor(Color.blue);
+            else g.setColor(Color.yellow);
+            offset2Point(selection.getStart(), true, startPoint);
+            offset2Point(selection.getEnd(), false, endPoint);
+            if (selection.getStart() == selection.caret)
+                caretPoint.setLocation(startPoint);
+            else caretPoint.setLocation(endPoint);
+            if (startPoint.y == endPoint.y) {
+                paintRect(g, startPoint.x, startPoint.y,
+                  Math.max(1,endPoint.x-startPoint.x), lineHeight);
+            } else {
+                paintRect(g, startPoint.x, startPoint.y,
+                  (mySize.width-xInset)-startPoint.x, lineHeight);
+                if (startPoint.y + lineHeight < endPoint.y)
+                  paintRect(g, xInset, startPoint.y + lineHeight,
+                  (mySize.width-xInset)-xInset, endPoint.y - startPoint.y - lineHeight);
+                paintRect(g, xInset, endPoint.y, endPoint.x-xInset, lineHeight);
+            }
+        }
+        if (focus || selection.isCaret()) {
+            if (focus) g.setColor(Color.green);
+            else g.setColor(Color.red);
+            int line = caretPoint.x - (selection.clickAfter ? 0 : 1);
+            g.fillRect(line, caretPoint.y, 1, lineHeight);
+            int w = lineHeight/12 + 1;
+            int braces = line - (selection.clickAfter ? -1 : w);
+            g.fillRect(braces, caretPoint.y, w, 1);
+            g.fillRect(braces, caretPoint.y + lineHeight - 1, w, 1);
+        }
+    }
+
+    public Point offset2Point(int off, boolean start, Point p) {
+        int line = findLine(off, start);
+        int width = 0;
+        try {
+            width = fm.stringWidth(
+              contents.substring(lineStarts[line], off));
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+        p.x = width + xInset;
+        if (p.x > mySize.width - xInset)
+            p.x = mySize.width - xInset;
+        p.y = lineHeight * line + yInset;
+        return p;
+    }
+
+    private int findLine(int off, boolean start) {
+        // if it is start, then go to the next line!
+        if (start) ++off;
+        for (int i = 1; i < lineCount; ++i) {
+            // LIU: This was <= ; changed to < to make caret after
+            // final CR in line appear at START of next line.
+            if (off < lineStarts[i]) return i-1;
+        }
+        // LIU: Check for special case; after CR at end of the last line
+        if (off == lineStarts[lineCount] &&
+            off > 0 && contents.length() > 0 && contents.charAt(off-1) == CR) {
+            return lineCount;
+        }
+        return lineCount-1;
+    }
+
+    // offsets on any line will go from start,true to end,false
+    // excluding start,false and end,true
+    public Selection point2Offset(Point p, Selection o) {
+        if (p.y < yInset) {
+            o.caret = 0;
+            o.clickAfter = true;
+            return o;
+        }
+        int line = (p.y - yInset)/lineHeight;
+        if (line >= lineCount) {
+            o.caret = contents.length();
+            o.clickAfter = false;
+            return o;
+        }
+        int target = p.x - xInset;
+        if (target <= 0) {
+            o.caret = lineStarts[line];
+            o.clickAfter = true;
+            return o;
+        }
+        int lowGuess = lineStarts[line];
+        int lowWidth = 0;
+        int highGuess = lineStarts[line+1];
+        int highWidth = fm.stringWidth(contents.substring(lineStarts[line],highGuess));
+        if (target >= highWidth) {
+            o.caret = lineStarts[line+1];
+            o.clickAfter = false;
+            return o;
+        }
+        while (lowGuess < highGuess - 1) {
+            int guess = (lowGuess + highGuess)/2;
+            int width = fm.stringWidth(contents.substring(lineStarts[line],guess));
+            if (width <= target) {
+                lowGuess = guess;
+                lowWidth = width;
+                if (width == target) break;
+            } else {
+                highGuess = guess;
+                highWidth = width;
+            }
+        }
+        // at end, either lowWidth < target < width(low+1), or lowWidth = target
+        int highBound = charBreaker.following(lowGuess);
+        int lowBound = charBreaker.previous();
+        // we are now at character boundaries
+        if (lowBound != lowGuess)
+            lowWidth = fm.stringWidth(contents.substring(lineStarts[line],lowBound));
+        if (highBound != highGuess)
+            highWidth = fm.stringWidth(contents.substring(lineStarts[line],highBound));
+        // we now have the right widths
+        if (target - lowWidth < highWidth - target) {
+            o.caret = lowBound;
+            o.clickAfter = true;
+        } else {
+            o.caret = highBound;
+            o.clickAfter = false;
+        }
+        // we now have the closest!
+        return o;
+    }
+
+    private void fixLineStarts(int width) {
+        lineCount = 1;
+        lineStarts[0] = 0;
+        if (contents.length() == 0) {
+            lineStarts[1] = 0;
+            return;
+        }
+        int end = 0;
+        // LIU: Add check for MAX_LINES
+        for (int start = 0; start < contents.length() && lineCount < MAX_LINES;
+             start = end) {
+            end = nextLine(fm, start, width);
+            lineStarts[lineCount++] = end;
+            if (end == start) { // LIU: Assertion
+                throw new RuntimeException("nextLine broken");
+            }
+        }
+        --lineCount;
+        redoLines = false;
+    }
+
+    // LIU: Enhanced to wrap long lines.  Bug with return of start fixed.
+    public int nextLine(FontMetrics fMtr, int start, int width) {
+        int len = contents.length();
+        for (int i = start; i < len; ++i) {
+            // check for line separator
+            char ch = (contents.charAt(i));
+            if (ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029) {
+                len = i + 1;
+                if (ch == 0x000D && i+1 < len && contents.charAt(i+1) == 0x000A) // crlf
+                    ++len; // grab extra char
+                break;
+            }
+        }
+        String subject = contents.substring(start,len);
+        if (visibleWidth(fMtr, subject) <= width)
+          return len;
+
+        // LIU: Remainder of this method rewritten to accomodate lines
+        // longer than the component width by first trying to break
+        // into lines; then words; finally chars.
+        int n = findFittingBreak(fMtr, subject, width, lineBreaker);
+        if (n == 0) {
+            n = findFittingBreak(fMtr, subject, width, wordBreaker);
+        }
+        if (n == 0) {
+            n = findFittingBreak(fMtr, subject, width, charBreaker);
+        }
+        return n > 0 ? start + n : len;
+    }
+
+    /**
+     * LIU: Finds the longest substring that fits a given width
+     * composed of subunits returned by a BreakIterator.  If the smallest
+     * subunit is too long, returns 0.
+     * @param fMtr metrics to use
+     * @param line the string to be fix into width
+     * @param width line.substring(0, result) must be <= width
+     * @param breaker the BreakIterator that will be used to find subunits
+     * @return maximum characters, at boundaries returned by breaker,
+     * that fit into width, or zero on failure
+     */
+    private int findFittingBreak(FontMetrics fMtr, String line, int width,
+                                 BreakIterator breaker) {
+        breaker.setText(line);
+        int last = breaker.first();
+        int end = breaker.next();
+        while (end != BreakIterator.DONE &&
+               visibleWidth(fMtr, line.substring(0, end)) <= width) {
+            last = end;
+            end = breaker.next();
+        }
+        return last;
+    }
+
+    public int visibleWidth(FontMetrics fMtr, String s) {
+        int i;
+        for (i = s.length()-1; i >= 0; --i) {
+            char ch = s.charAt(i);
+            if (!(ch == ' ' || ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029))
+                return fMtr.stringWidth(s.substring(0,i+1));
+        }
+        return 0;
+    }
+
+// =============== Utility ====================
+
+    private void fixHex() {
+        if (selection.getEnd() == 0) return;
+        int store = 0;
+        int places = 1;
+        int count = 0;
+        int min = Math.min(8,selection.getEnd());
+        for (int i = 0; i < min; ++i) {
+            char ch = contents.charAt(selection.getEnd()-1-i);
+            int value = Character.getNumericValue(ch);
+            if (value < 0 || value > 15) break;
+            store += places * value;
+            ++count;
+            places *= 16;
+        }
+        String add = "";
+        int bottom = store & 0xFFFF;
+        if (store >= 0xD8000000 && store < 0xDC000000
+          && bottom >= 0xDC00 && bottom < 0xE000) { // surrogates
+            add = "" + (char)(store >> 16) + (char)bottom;
+        } else if (store > 0xFFFF && store <= 0x10FFFF) {
+            store -= 0x10000;
+            add = "" + (char)(((store >> 10) & 0x3FF) + 0xD800)
+              + (char)((store & 0x3FF) + 0xDC00);
+              
+        } else if (count >= 4) {
+            count = 4;
+            add = ""+(char)(store & 0xFFFF);
+        } else {
+            count = 1;
+            char ch = contents.charAt(selection.getEnd()-1);
+            add = hex(ch);
+            if (ch >= 0xDC00 && ch <= 0xDFFF && selection.getEnd() > 1) {
+                ch = contents.charAt(selection.getEnd()-2);
+                if (ch >= 0xD800 && ch <= 0xDBFF) {
+                    count = 2;
+                    add = hex(ch) + add;
+                }
+            }
+        }
+        replaceRange(add, selection.getEnd()-count, selection.getEnd());
+    }
+
+    public static String hex(char ch) {
+        String result = Integer.toString(ch,16).toUpperCase();
+        result = "0000".substring(result.length(),4) + result;
+        return result;
+    }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/Selection.java b/demos/src/com/ibm/icu/dev/demo/impl/Selection.java
new file mode 100644
index 00000000000..c07b7704489
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/Selection.java
@@ -0,0 +1,161 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+import java.text.BreakIterator;
+
+public final class Selection {
+
+    public int anchor;
+    public int caret;
+    public boolean clickAfter;
+
+    public int getStart() {
+        return anchor < caret ? anchor : caret;
+    }
+
+    public int getEnd() {
+        return anchor > caret ? anchor : caret;
+    }
+
+    public boolean isCaret() {
+        return anchor == caret;
+    }
+
+    public Selection set(Selection other) {
+        anchor = other.anchor;
+        caret = other.caret;
+        clickAfter = other.clickAfter;
+        return this;
+    }
+
+    public Selection set(int anchor, int caret, boolean clickAfter) {
+        this.anchor = anchor;
+        this.caret = caret;
+        this.clickAfter = clickAfter;
+        return this;
+    }
+
+    public boolean equals(Object other) {
+        Selection other2 = (Selection)other;
+        return anchor == other2.anchor
+          && caret == other2.caret
+          && clickAfter == other2.clickAfter;
+    }
+
+    public boolean isLessThan(Selection other) {
+        return getStart() < other.getEnd();
+    }
+
+    public Selection pin(String text) {
+        if (anchor > text.length()) {
+            anchor = text.length();
+        } else if (anchor < 0) {
+            anchor = 0;
+        }
+        if (caret > text.length()) {
+            caret = text.length();
+            clickAfter = true;
+        } else if (caret < 0) {
+            caret = 0;
+            clickAfter = false;
+        }
+        return this;
+    }
+
+    public Selection swap(Selection after) {
+        int temp = anchor;
+        anchor = after.anchor;
+        after.anchor = temp;
+        temp = caret;
+        caret = after.caret;
+        after.caret = temp;
+        boolean b = clickAfter;
+        clickAfter = after.clickAfter;
+        after.clickAfter = b;
+        return this;
+    }
+
+    public Selection fixAfterReplace(int start, int end, int len) {
+        if (anchor >= start) {
+            if (anchor < end) anchor = end;
+            anchor = start + len + anchor - end;
+        }
+        if (caret >= start) {
+            if (caret < end) caret = end;
+            caret = start + len + caret - end;
+        }
+        return this;
+    }
+
+        // Mac & Windows considerably different
+        // Mac: end++. If start!=end, start=end
+        //  SHIFT: move end right
+        //  CTL: no different
+        // Windows:
+        //  UNSHIFTED: if start!=end, start = end, else start=end=end+1;
+        //       anchor = tip = start
+        //  SHIFT: tip++
+        //  CTL: if start!=end, start = end = nextbound(end-1),
+        //   else start=end=nextbound(end)
+        //       anchor = tip = start
+        //  CTL/SHIFT: tip = nextbound(tip)
+
+    public Selection nextBound(BreakIterator breaker,
+      int direction, boolean extend) {
+        if (!extend && anchor != caret) caret -= direction;
+        caret = next(caret, breaker, direction, true);
+        if (!extend) anchor = caret;
+        clickAfter = false;
+        return this;
+    }
+
+    // expand start and end to word breaks--if they are not already on one
+    public void expand(BreakIterator breaker) {
+        if (anchor <= caret) {
+            anchor = next(anchor,breaker,-1,false);
+            caret = next(caret,breaker,1,false);
+            /*
+            try {
+                breaker.following(anchor);
+                anchor = breaker.previous();
+            } catch (Exception e) {}
+            try {
+                caret = breaker.following(caret-1);
+            } catch (Exception e) {}
+            */
+        } else {
+            anchor = next(anchor,breaker,1,false);
+            caret = next(caret,breaker,-1,false);
+            /*
+            try {
+                breaker.following(caret);
+                caret = breaker.previous();
+            } catch (Exception e) {}
+            try {
+                anchor = breaker.following(anchor-1);
+            } catch (Exception e) {}
+            */
+        }
+    }
+
+    // different = false - move to next boundary, unless on one
+    // true - move to next boundary, even if on one
+    public static int next(int position, BreakIterator breaker,
+      int direction, boolean different) {
+        if (!different) position -= direction;
+        try {
+            if (direction > 0) {
+                position = breaker.following(position);
+            } else {
+                breaker.following(position-1);
+                position = breaker.previous();
+            }
+        } catch (Exception e) {}
+        return position;
+    }
+}
+
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/package.html b/demos/src/com/ibm/icu/dev/demo/impl/package.html
new file mode 100644
index 00000000000..a7e8d35a2ac
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/package.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+Shared utilities for demo applications and Applets.
+
+
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/number/CurrencyDemo.java b/demos/src/com/ibm/icu/dev/demo/number/CurrencyDemo.java
new file mode 100644
index 00000000000..6e36765fa1c
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/number/CurrencyDemo.java
@@ -0,0 +1,114 @@
+/*
+**********************************************************************
+* Copyright (c) 2003-2010, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Mark Davis
+* Created: May 22 2003
+* Since: ICU 2.6
+**********************************************************************
+*/
+package com.ibm.icu.dev.demo.number;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.text.DecimalFormat;
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.util.Currency;
+
+/**
+ * Demonstration code to illustrate how to obtain ICU 2.6-like currency
+ * behavior using pre-ICU 2.6 ICU4J.
+ * @author Mark Davis
+ */
+public class CurrencyDemo {
+
+    public static void main(String[] args) {
+        testFormatHack(true);
+    }
+
+    static NumberFormat getCurrencyFormat(Currency currency,
+                                          Locale displayLocale,
+                                          boolean ICU26) {
+        // code for ICU 2.6
+        if (ICU26) {
+            NumberFormat result = NumberFormat.getCurrencyInstance(displayLocale);
+            result.setCurrency(currency);
+            return result;
+        }
+
+        // ugly work-around for 2.4
+        DecimalFormat result = (DecimalFormat)NumberFormat.getCurrencyInstance(displayLocale);
+        HackCurrencyInfo hack = (HackCurrencyInfo)(hackData.get(currency.getCurrencyCode()));
+        result.setMinimumFractionDigits(hack.decimals);
+        result.setMaximumFractionDigits(hack.decimals);
+        result.setRoundingIncrement(hack.rounding);
+        DecimalFormatSymbols symbols = result.getDecimalFormatSymbols();
+        symbols.setCurrencySymbol(hack.symbol);
+        result.setDecimalFormatSymbols(symbols);
+        return result;
+    }
+        
+    static Map hackData = new HashMap();
+    static class HackCurrencyInfo {
+        int decimals;
+        double rounding;
+        String symbol;
+        HackCurrencyInfo(int decimals, double rounding, String symbol) {
+            this.decimals = decimals;
+            this.rounding = rounding;
+            this.symbol = symbol;
+        }
+    }
+    static {
+        hackData.put("USD", new HackCurrencyInfo(2, 0, "$"));
+        hackData.put("GBP", new HackCurrencyInfo(2, 0, "\u00A3"));
+        hackData.put("JPY", new HackCurrencyInfo(0, 0, "\u00A5"));
+        hackData.put("EUR", new HackCurrencyInfo(2, 0, "\u20AC"));
+    }
+
+    /**
+     * Walk through all locales and compare the output of the ICU26
+     * currency format with the "hacked" currency format.
+     * @param quiet if true, only display discrepancies.  Otherwise,
+     * display all results.
+     */
+    static void testFormatHack(boolean quiet) {
+        String[] testCurrencies = {"USD","GBP","JPY","EUR"};
+        Locale[] testLocales = NumberFormat.getAvailableLocales();
+        for (int i = 0; i < testLocales.length; ++i) {
+            // since none of this should vary by country, we'll just do by language
+            if (!testLocales[i].getCountry().equals("")) continue;
+            boolean noOutput = true;
+            if (!quiet) {
+                System.out.println(testLocales[i].getDisplayName());
+                noOutput = false;
+            }
+            for (int j = 0; j < testCurrencies.length; ++j) {
+                NumberFormat nf26 = getCurrencyFormat(Currency.getInstance(testCurrencies[j]), testLocales[i], true);
+                String str26 = nf26.format(1234.567);
+                if (!quiet) {
+                    System.out.print("\t" + Utility.escape(str26));
+                }
+                NumberFormat nf24 = getCurrencyFormat(Currency.getInstance(testCurrencies[j]), testLocales[i], false);
+                String str24 = nf24.format(1234.567);
+                if (!str24.equals(str26)) {
+                    if (noOutput) {
+                        System.out.println(testLocales[i].getDisplayName());
+                        noOutput = false;
+                    }
+                    if (quiet) {
+                        System.out.print("\t" + Utility.escape(str26));
+                    }
+                    System.out.print(" (" + Utility.escape(str24) + ")");
+                }
+            }
+            if (!noOutput) {
+                System.out.println();
+            }
+        }
+    }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfDemo.java b/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfDemo.java
new file mode 100644
index 00000000000..76f8f01937d
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfDemo.java
@@ -0,0 +1,580 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.rbnf;
+
+import java.awt.BorderLayout;
+import java.awt.Button;
+import java.awt.CardLayout;
+import java.awt.Checkbox;
+import java.awt.Choice;
+import java.awt.Component;
+import java.awt.Dimension;
+import java.awt.Font;
+import java.awt.FontMetrics;
+import java.awt.Frame;
+import java.awt.Graphics;
+import java.awt.GridLayout;
+import java.awt.Panel;
+import java.awt.ScrollPane;
+import java.awt.TextArea;
+import java.awt.TextComponent;
+import java.awt.TextField;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.FocusAdapter;
+import java.awt.event.FocusEvent;
+import java.awt.event.FocusListener;
+import java.awt.event.ItemEvent;
+import java.awt.event.ItemListener;
+import java.awt.event.KeyAdapter;
+import java.awt.event.KeyEvent;
+import java.awt.event.KeyListener;
+import java.awt.event.TextEvent;
+import java.awt.event.TextListener;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.text.BreakIterator;
+import java.text.DecimalFormat;
+import java.text.ParsePosition;
+import java.util.Locale;
+
+import com.ibm.icu.dev.demo.impl.DemoApplet;
+import com.ibm.icu.text.RuleBasedNumberFormat;
+
+public class RbnfDemo extends DemoApplet {
+    /**
+     * For serialization
+     */
+    private static final long serialVersionUID = -9119861296873763536L;
+
+    /**
+     * Puts a copyright in the .class file
+     */
+//    private static final String copyrightNotice
+//        = "Copyright \u00a91997-1998 IBM Corp.  All rights reserved.";
+
+    /*
+     * code to run the demo as an application
+     */
+    public static void main(String[] argv) {
+        new RbnfDemo().showDemo();
+    }
+
+    protected Dimension getDefaultFrameSize(DemoApplet applet, Frame f) {
+        return new Dimension(430,270);
+    }
+
+    protected Frame createDemoFrame(DemoApplet applet) {
+        final Frame window = new Frame("Number Spellout Demo");
+        window.setSize(800, 600);
+        window.setLayout(new BorderLayout());
+
+        Panel mainPanel = new Panel();
+        mainPanel.setLayout(new GridLayout(1,2));
+
+        commentaryField = new TextArea("", 0, 0, TextArea.SCROLLBARS_VERTICAL_ONLY);
+        commentaryField.setSize(800, 50);
+        commentaryField.setText(RbnfSampleRuleSets.sampleRuleSetCommentary[0]);
+        commentaryField.setEditable(false);
+        commentaryField.setFont(new Font("Helvetica", Font.PLAIN, 14));
+
+        spelloutFormatter = new RuleBasedNumberFormat(RbnfSampleRuleSets.usEnglish, Locale.US);
+        spelloutFormatter.setLenientParseMode(lenientParse);
+        populateRuleSetMenu();
+        numberFormatter = new DecimalFormat("#,##0.##########");
+        parsePosition = new ParsePosition(0);
+        theNumber = 0;
+
+        numberField = new TextField();
+        numberField.setFont(new Font("Serif", Font.PLAIN, 24));
+        textField = new DemoTextFieldHolder();
+        textField.setFont(new Font("Serif", Font.PLAIN, 24));
+        rulesField = new DemoTextFieldHolder();
+        rulesField.setFont(new Font("Serif", Font.PLAIN, 14));
+        lenientParseButton = new Checkbox("Lenient parse", lenientParse);
+
+        numberField.addTextListener(new TextListener() {
+            public void textValueChanged(TextEvent e) {
+                if (!numberFieldHasFocus)
+                    return;
+
+                String fieldText = ((TextComponent)(e.getSource())).getText();
+                parsePosition.setIndex(0);
+                Number temp = numberFormatter.parse(fieldText, parsePosition);
+                if (temp == null || parsePosition.getIndex() == 0) {
+                    theNumber = 0;
+                    textField.setText("PARSE ERROR");
+                }
+                else {
+                    theNumber = temp.doubleValue();
+                    textField.setText(spelloutFormatter.format(theNumber, ruleSetName));
+                }
+            }
+        } );
+
+        numberField.addFocusListener(new FocusAdapter() {
+            public void focusLost(FocusEvent e) {
+                numberFieldHasFocus = false;
+                numberField.setText(numberFormatter.format(theNumber));
+            }
+
+            public void focusGained(FocusEvent e) {
+                numberFieldHasFocus = true;
+                numberField.selectAll();
+            }
+        } );
+
+        textField.addKeyListener(new KeyAdapter() {
+            public void keyTyped(KeyEvent e) {
+                if (e.getKeyChar() == '\t') {
+                    String fieldText = ((TextComponent)(e.getSource())).getText();
+                    parsePosition.setIndex(0);
+                    theNumber = spelloutFormatter.parse(fieldText, parsePosition)
+                                        .doubleValue();
+                    if (parsePosition.getIndex() == 0) {
+                        theNumber = 0;
+                        numberField.setText("PARSE ERROR");
+                        textField.selectAll();
+                    }
+                    else if (parsePosition.getIndex() < fieldText.length()) {
+                        textField.select(parsePosition.getIndex(), fieldText.length());
+                        numberField.setText(numberFormatter.format(theNumber));
+                    }
+                    else {
+                        textField.selectAll();
+                        numberField.setText(numberFormatter.format(theNumber));
+                    }
+                    e.consume();
+                }
+            }
+        } );
+
+        textField.addFocusListener(new FocusAdapter() {
+            public void focusLost(FocusEvent e) {
+                String fieldText = ((TextComponent)(e.getSource())).getText();
+                parsePosition.setIndex(0);
+                theNumber = spelloutFormatter.parse(fieldText, parsePosition)
+                                .doubleValue();
+                if (parsePosition.getIndex() == 0)
+                    numberField.setText("PARSE ERROR");
+                else
+                    numberField.setText(numberFormatter.format(theNumber));
+                textField.setText(textField.getText()); // textField.repaint() didn't work right
+            }
+
+            public void focusGained(FocusEvent e) {
+                textField.selectAll();
+            }
+        } );
+
+        rulesField.addKeyListener(new KeyAdapter() {
+            public void keyTyped(KeyEvent e) {
+                if (e.getKeyChar() == '\t') {
+                    String fieldText = ((TextComponent)(e.getSource())).getText();
+                    if (formatterMenu.getSelectedItem().equals("Custom") || !fieldText.equals(
+                                    RbnfSampleRuleSets.sampleRuleSets[formatterMenu.getSelectedIndex()])) {
+                        try {
+                            RuleBasedNumberFormat temp = new RuleBasedNumberFormat(fieldText);
+                            temp.setLenientParseMode(lenientParse);
+                            populateRuleSetMenu();
+                            spelloutFormatter = temp;
+                            customRuleSet = fieldText;
+                            formatterMenu.select("Custom");
+                            commentaryField.setText(RbnfSampleRuleSets.
+                                sampleRuleSetCommentary[RbnfSampleRuleSets.
+                                sampleRuleSetCommentary.length - 1]);
+                            redisplay();
+                        }
+                        catch (Exception x) {
+                            textField.setText(x.toString());
+                        }
+                    }
+                    e.consume();
+                }
+            }
+        } );
+
+        rulesField.addFocusListener(new FocusAdapter() {
+            public void focusLost(FocusEvent e) {
+                String fieldText = ((TextComponent)(e.getSource())).getText();
+                if (formatterMenu.getSelectedItem().equals("Custom") || !fieldText.equals(
+                                RbnfSampleRuleSets.sampleRuleSets[formatterMenu.getSelectedIndex()])) {
+                    try {
+                        RuleBasedNumberFormat temp = new RuleBasedNumberFormat(fieldText);
+                        temp.setLenientParseMode(lenientParse);
+                        populateRuleSetMenu();
+                        spelloutFormatter = temp;
+                        customRuleSet = fieldText;
+                        formatterMenu.select("Custom");
+                        redisplay();
+                    }
+                    catch (Exception x) {
+                        textField.setText(x.toString());
+                    }
+                }
+                rulesField.setText(rulesField.getText()); // rulesField.repaint() didn't work right
+            }
+        } );
+
+        lenientParseButton.addItemListener(new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                lenientParse = lenientParseButton.getState();
+                spelloutFormatter.setLenientParseMode(lenientParse);
+            }
+        } );
+
+        numberField.setText(numberFormatter.format(theNumber));
+        numberField.selectAll();
+        textField.setText(spelloutFormatter.format(theNumber, ruleSetName));
+
+        Panel leftPanel = new Panel();
+        leftPanel.setLayout(new BorderLayout());
+        Panel panel = new Panel();
+        panel.setLayout(new BorderLayout());
+        Panel panel1 = new Panel();
+        panel1.setLayout(new GridLayout(3, 1));
+        panel1.add(new Panel());
+        panel1.add(numberField, "Center");
+        panel1.add(lenientParseButton);
+        panel.add(panel1, "Center");
+        Panel panel2 = new Panel();
+        panel2.setLayout(new GridLayout(3, 3));
+        Button button = new Button("+100");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                roll(100);
+            }
+        } );
+        panel2.add(button);
+        button = new Button("+10");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                roll(10);
+            }
+        } );
+        panel2.add(button);
+        button = new Button("+1");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                roll(1);
+            }
+        } );
+        panel2.add(button);
+        button = new Button("<");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                theNumber *= 10;
+                redisplay();
+            }
+        } );
+        panel2.add(button);
+        panel2.add(new Panel());
+        button = new Button(">");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                theNumber /= 10;
+                redisplay();
+            }
+        } );
+        panel2.add(button);
+        button = new Button("-100");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                roll(-100);
+            }
+        } );
+        panel2.add(button);
+        button = new Button("-10");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                roll(-10);
+            }
+        } );
+        panel2.add(button);
+        button = new Button("-1");
+        button.addActionListener( new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                roll(-1);
+            }
+        } );
+        panel2.add(button);
+        panel.add(panel2, "East");
+        leftPanel.add(panel, "North");
+        leftPanel.add(textField, "Center");
+
+        Panel rightPanel = new Panel();
+        rightPanel.setLayout(new BorderLayout());
+        formatterMenu = new Choice();
+        for (int i = 0; i < RbnfSampleRuleSets.sampleRuleSetNames.length; i++)
+            formatterMenu.addItem(RbnfSampleRuleSets.sampleRuleSetNames[i]);
+        formatterMenu.addItem("Custom");
+        formatterMenu.addItemListener(new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                Choice source = (Choice)(e.getSource());
+                int item = source.getSelectedIndex();
+                Locale locale = RbnfSampleRuleSets.sampleRuleSetLocales[item];
+
+                commentaryField.setText(RbnfSampleRuleSets.
+                                sampleRuleSetCommentary[item]);
+
+                if (locale != null && (locale.getLanguage().equals("iw")
+                        || locale.getLanguage().equals("ru") || locale.getLanguage().equals("ja")
+                        || locale.getLanguage().equals("el")
+                        || locale.getLanguage().equals("zh"))) {
+                    textField.togglePanes(false);
+                    rulesField.togglePanes(false);
+                }
+                else {
+                    textField.togglePanes(true);
+                    rulesField.togglePanes(true);
+                }
+
+                makeNewSpelloutFormatter();
+                redisplay();
+            }
+        } );
+
+        ruleSetMenu = new Choice();
+        populateRuleSetMenu();
+
+        ruleSetMenu.addItemListener(new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                ruleSetName = ruleSetMenu.getSelectedItem();
+                redisplay();
+            }
+        } );
+
+        Panel menuPanel = new Panel();
+        menuPanel.setLayout(new GridLayout(1, 2));
+        menuPanel.add(formatterMenu);
+        menuPanel.add(ruleSetMenu);
+        rightPanel.add(menuPanel, "North");
+
+        rulesField.setText(RbnfSampleRuleSets.sampleRuleSets[formatterMenu.getSelectedIndex()]);
+        rightPanel.add(rulesField, "Center");
+
+        mainPanel.add(leftPanel);
+        mainPanel.add(rightPanel);
+
+        window.add(mainPanel, "Center");
+        window.add(commentaryField, "South");
+
+        window.doLayout();
+        window.show();
+        final DemoApplet theApplet = applet;
+        window.addWindowListener(
+                new WindowAdapter() {
+                    public void windowClosing(WindowEvent e) {
+                        setVisible(false);
+                        window.dispose();
+
+                        if (theApplet != null) {
+                            theApplet.demoClosed();
+                        } else System.exit(0);
+                    }
+                } );
+        return window;
+    }
+
+    void roll(int delta) {
+        theNumber += delta;
+        redisplay();
+    }
+
+    void redisplay() {
+        numberField.setText(numberFormatter.format(theNumber));
+        textField.setText(spelloutFormatter.format(theNumber, ruleSetName));
+    }
+
+    void makeNewSpelloutFormatter() {
+        int item = formatterMenu.getSelectedIndex();
+        String formatterMenuItem = formatterMenu.getSelectedItem();
+
+        if (formatterMenuItem.equals("Custom")) {
+            rulesField.setText(customRuleSet);
+            spelloutFormatter = new RuleBasedNumberFormat(customRuleSet);
+        }
+        else {
+            rulesField.setText(RbnfSampleRuleSets.sampleRuleSets[item]);
+
+            Locale locale = RbnfSampleRuleSets.sampleRuleSetLocales[item];
+            if (locale == null)
+                locale = Locale.getDefault();
+
+            spelloutFormatter = new RuleBasedNumberFormat(RbnfSampleRuleSets.
+                            sampleRuleSets[item], locale);
+        }
+        spelloutFormatter.setLenientParseMode(lenientParse);
+        populateRuleSetMenu();
+    }
+
+    void populateRuleSetMenu() {
+        String[] ruleSetNames = spelloutFormatter.getRuleSetNames();
+
+        if (ruleSetMenu != null) {
+            ruleSetMenu.removeAll();
+            for (int i = 0; i < ruleSetNames.length; i++)
+                ruleSetMenu.addItem(ruleSetNames[i]);
+
+            ruleSetName = ruleSetMenu.getSelectedItem();
+        }
+        else
+            ruleSetName = ruleSetNames[0];
+    }
+
+//    private Frame demoWindow = null;
+
+    private TextComponent numberField;
+    private DemoTextFieldHolder textField;
+    private DemoTextFieldHolder rulesField;
+    private TextComponent commentaryField;
+    private Checkbox lenientParseButton;
+
+    private boolean numberFieldHasFocus = true;
+
+    private RuleBasedNumberFormat spelloutFormatter;
+    private DecimalFormat numberFormatter;
+    private ParsePosition parsePosition;
+
+    private boolean lenientParse = true;
+
+    private double theNumber = 0;
+//    private boolean canEdit = true;
+
+    private Choice formatterMenu;
+    private Choice ruleSetMenu;
+    private String ruleSetName;
+
+    private String customRuleSet = "NO RULES!";
+}
+
+class DemoTextField extends Component {
+    /**
+     * For serialization
+     */
+    private static final long serialVersionUID = -7947090021239472658L;
+    public DemoTextField() {
+    }
+
+    public void setText(String text) {
+        this.text = text;
+        this.repaint();
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public void paint(Graphics g) {
+        Font font = getFont();
+        FontMetrics fm = g.getFontMetrics();
+        g.setFont(font);
+        String txt = getText();
+        BreakIterator bi = BreakIterator.getLineInstance();
+        bi.setText(txt);
+        int lineHeight = fm.getHeight();
+        int width = getSize().width;
+        int penY = fm.getAscent();
+        int lineStart = 0;
+        int tempLineEnd = bi.first();
+        int lineEnd = 0;
+        int maxLineEnd = 0;
+        totalHeight = 0;
+
+        while (lineStart < txt.length()) {
+            maxLineEnd = txt.indexOf('\n', lineStart);
+            if (maxLineEnd == -1)
+                maxLineEnd = Integer.MAX_VALUE;
+            while (tempLineEnd != BreakIterator.DONE && fm.stringWidth(txt.substring(
+                            lineStart, tempLineEnd)) < width) {
+                lineEnd = tempLineEnd;
+                tempLineEnd = bi.next();
+            }
+            if (lineStart >= lineEnd) {
+                if (tempLineEnd == BreakIterator.DONE)
+                    lineEnd = txt.length();
+                else
+                    lineEnd = tempLineEnd;
+            }
+            if (lineEnd > maxLineEnd)
+                lineEnd = maxLineEnd;
+            g.drawString(txt.substring(lineStart, lineEnd), 0, penY);
+            penY += lineHeight;
+            totalHeight += lineHeight;
+            lineStart = lineEnd;
+            if (lineStart < txt.length() && txt.charAt(lineStart) == '\n')
+                ++lineStart;
+        }
+    }
+
+/*
+    public Dimension getPreferredSize() {
+        Dimension size = getParent().getSize();
+        return new Dimension(size.width, totalHeight);
+    }
+*/
+
+    private String text;
+    private int totalHeight;
+}
+
+class DemoTextFieldHolder extends Panel {
+    /**
+     * For serialization
+     */
+    private static final long serialVersionUID = 7514498764062569858L;
+    public DemoTextFieldHolder() {
+        tf1 = new TextArea("", 0, 0, TextArea.SCROLLBARS_VERTICAL_ONLY);
+        tf2 = new DemoTextField();
+        sp = new ScrollPane();
+
+        setLayout(new CardLayout());
+
+        sp.add(tf2, "TextField1");
+        sp.setVisible(false);
+        add(tf1, "TestField2");
+        add(sp, "ScrollPane");
+    }
+
+    public void addFocusListener(FocusListener l) {
+        tf1.addFocusListener(l);
+    }
+
+    public void addKeyListener(KeyListener l) {
+        tf1.addKeyListener(l);
+    }
+
+    public void setText(String text) {
+        tf1.setText(text);
+        tf2.setText(text);
+    }
+
+    public String getText() {
+        return tf1.getText();
+    }
+
+    public void select(int start, int end) {
+        tf1.select(start, end);
+    }
+
+    public void selectAll() {
+        tf1.selectAll();
+    }
+
+    public void togglePanes(boolean canShowRealTextField) {
+        if (canShowRealTextField != showingRealTextField) {
+            CardLayout layout = (CardLayout)(getLayout());
+            layout.next(this);
+            showingRealTextField = canShowRealTextField;
+        }
+    }
+
+    private TextArea tf1 = null;
+    private DemoTextField tf2 = null;
+    private ScrollPane sp = null;
+    private boolean showingRealTextField = true;
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java b/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java
new file mode 100644
index 00000000000..c03ed71a186
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java
@@ -0,0 +1,1941 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2007, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.rbnf;
+
+import java.util.Locale;
+
+/**
+ * A collection of example rule sets for use with RuleBasedNumberFormat.
+ * These examples are intended to serve both as demonstrations of what can
+ * be done with this framework, and as starting points for designing new
+ * rule sets.
+ *
+ * For those that claim to represent number-spellout rules for languages
+ * other than U.S. English, we make no claims of either accuracy or
+ * completeness.  In fact, we know them to be incomplete, and suspect
+ * most have mistakes in them.  If you see something that you know is wrong,
+ * please tell us!
+ *
+ * @author Richard Gillam
+ */
+public class RbnfSampleRuleSets {
+    /**
+     * Puts a copyright in the .class file
+     */
+//    private static final String copyrightNotice
+//        = "Copyright \u00a91997-1998 IBM Corp.  All rights reserved.";
+
+    //========================================================================
+    // Spellout rules for various languages
+    //
+    // The following RuleBasedNumberFormat descriptions show the rules for
+    // spelling out numeric values in various languages.  As mentioned
+    // before, we cannot vouch for the accuracy or completeness of this
+    // data, although we believe it's pretty close.  Basically, this
+    // represents one day's worth of Web-surfing.  If you can supply the
+    // missing information in any of these rule sets, or if you find errors,
+    // or if you can supply spellout rules for languages that aren't shown
+    // here, we want to hear from you!
+    //========================================================================
+
+    /**
+     * Spellout rules for U.S. English.  This demonstration version of the
+     * U.S. English spellout rules has four variants: 1) %simplified is a
+     * set of rules showing the simple method of spelling out numbers in
+     * English: 289 is formatted as "two hundred eighty-nine".  2) %alt-teens
+     * is the same as %simplified, except that values between 1,000 and 9,999
+     * whose hundreds place isn't zero are formatted in hundreds.  For example,
+     * 1,983 is formatted as "nineteen hundred eighty-three," and 2,183 is
+     * formatted as "twenty-one hundred eighty-three," but 2,083 is still
+     * formatted as "two thousand eighty-three."  3) %ordinal formats the
+     * values as ordinal numbers in English (e.g., 289 is "two hundred eighty-
+     * ninth").  4) %default uses a more complicated algorithm to format
+     * numbers in a more natural way: 289 is formatted as "two hundred AND
+     * eighty-nine" and commas are inserted between the thousands groups for
+     * values above 100,000.
+     */
+    public static final String usEnglish =
+        // This rule set shows the normal simple formatting rules for English
+        "%simplified:\n"
+               // negative number rule.  This rule is used to format negative
+               // numbers.  The result of formatting the number's absolute
+               // value is placed where the >> is.
+        + "    -x: minus >>;\n"
+               // faction rule.  This rule is used for formatting numbers
+               // with fractional parts.  The result of formatting the
+               // number's integral part is substituted for the <<, and
+               // the result of formatting the number's fractional part
+               // (one digit at a time, e.g., 0.123 is "zero point one two
+               // three") replaces the >>.
+        + "    x.x: << point >>;\n"
+               // the rules for the values from 0 to 19 are simply the
+               // words for those numbers
+        + "    zero; one; two; three; four; five; six; seven; eight; nine;\n"
+        + "    ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+        + "        seventeen; eighteen; nineteen;\n"
+               // beginning at 20, we use the >> to mark the position where
+               // the result of formatting the number's ones digit.  Thus,
+               // we only need a new rule at every multiple of 10.  Text in
+               // backets is omitted if the value being formatted is an
+               // even multiple of 10.
+        + "    20: twenty[->>];\n"
+        + "    30: thirty[->>];\n"
+        + "    40: forty[->>];\n"
+        + "    50: fifty[->>];\n"
+        + "    60: sixty[->>];\n"
+        + "    70: seventy[->>];\n"
+        + "    80: eighty[->>];\n"
+        + "    90: ninety[->>];\n"
+               // beginning at 100, we can use << to mark the position where
+               // the result of formatting the multiple of 100 is to be
+               // inserted.  Notice also that the meaning of >> has shifted:
+               // here, it refers to both the ones place and the tens place.
+               // The meanings of the << and >> tokens depend on the base value
+               // of the rule.  A rule's divisor is (usually) the highest
+               // power of 10 that is less than or equal to the rule's base
+               // value.  The value being formatted is divided by the rule's
+               // divisor, and the integral quotient is used to get the text
+               // for <<, while the remainder is used to produce the text
+               // for >>.  Again, text in brackets is omitted if the value
+               // being formatted is an even multiple of the rule's divisor
+               // (in this case, an even multiple of 100)
+        + "    100: << hundred[ >>];\n"
+               // The rules for the higher numbers work the same way as the
+               // rule for 100: Again, the << and >> tokens depend on the
+               // rule's divisor, which for all these rules is also the rule's
+               // base value.  To group by thousand, we simply don't have any
+               // rules between 1,000 and 1,000,000.
+        + "    1000: << thousand[ >>];\n"
+        + "    1,000,000: << million[ >>];\n"
+        + "    1,000,000,000: << billion[ >>];\n"
+        + "    1,000,000,000,000: << trillion[ >>];\n"
+               // overflow rule.  This rule specifies that values of a
+               // quadrillion or more are shown in numerals rather than words.
+               // The == token means to format (with new rules) the value
+               // being formatted by this rule and place the result where
+               // the == is.  The #,##0 inside the == signs is a
+               // DecimalFormat pattern.  It specifies that the value should
+               // be formatted with a DecimalFormat object, and that it
+               // should be formatted with no decimal places, at least one
+               // digit, and a thousands separator.
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+
+        // This rule set formats numbers between 1,000 and 9,999 somewhat
+        // differently: If the hundreds digit is not zero, the first two
+        // digits are treated as a number of hundreds.  For example, 2,197
+        // would come out as "twenty-one hundred ninety-seven."
+        + "%alt-teens:\n"
+               // just use %simplified to format values below 1,000
+        + "    =%simplified=;\n"
+               // values between 1,000 and 9,999 are delegated to %%alt-hundreds
+               // for formatting.  The > after "1000" decreases the exponent
+               // of the rule's radix by one, causing the rule's divisor
+               // to be 100 instead of 1,000.  This causes the first TWO
+               // digits of the number, instead of just the first digit,
+               // to be sent to %%alt-hundreds
+        + "    1000>: <%%alt-hundreds<[ >>];\n"
+               // for values of 10,000 and more, we again just use %simplified
+        + "    10,000: =%simplified=;\n"
+        // This rule set uses some obscure voodoo of the description language
+        // to format the first two digits of a value in the thousands.
+        // The rule at 10 formats the first two digits as a multiple of 1,000
+        // and the rule at 11 formats the first two digits as a multiple of
+        // 100.  This works because of something known as the "rollback rule":
+        // if the rule applicable to the value being formatted has two
+        // substitutions, the value being formatted is an even multiple of
+        // the rule's divisor, and the rule's base value ISN'T an even multiple
+        // if the rule's divisor, then the rule that precedes this one in the
+        // list is used instead.  (The [] notation is implemented internally
+        // using this notation: a rule containing [] is split into two rules,
+        // and the right one is chosen using the rollback rule.) In this case,
+        // it means that if the first two digits are an even multiple of 10,
+        // they're formatted with the 10 rule (containing "thousand"), and if
+        // they're not, they're formatted with the 11 rule (containing
+        // "hundred").  %%empty is a hack to cause the rollback rule to be
+        // invoked: it makes the 11 rule have two substitutions, even though
+        // the second substitution (calling %%empty) doesn't actually do
+        // anything.
+        + "%%alt-hundreds:\n"
+        + "    0: SHOULD NEVER GET HERE!;\n"
+        + "    10: <%simplified< thousand;\n"
+        + "    11: =%simplified= hundred>%%empty>;\n"
+        + "%%empty:\n"
+        + "    0:;"
+
+        // this rule set is the same as %simplified, except that it formats
+        // the value as an ordinal number: 234 is formatted as "two hundred
+        // thirty-fourth".  Notice the calls to ^simplified: we have to
+        // call %simplified to avoid getting "second hundred thirty-fourth."
+        + "%ordinal:\n"
+        + "    zeroth; first; second; third; fourth; fifth; sixth; seventh;\n"
+        + "        eighth; ninth;\n"
+        + "    tenth; eleventh; twelfth; thirteenth; fourteenth;\n"
+        + "        fifteenth; sixteenth; seventeenth; eighteenth;\n"
+        + "        nineteenth;\n"
+        + "    twentieth; twenty->>;\n"
+        + "    30: thirtieth; thirty->>;\n"
+        + "    40: fortieth; forty->>;\n"
+        + "    50: fiftieth; fifty->>;\n"
+        + "    60: sixtieth; sixty->>;\n"
+        + "    70: seventieth; seventy->>;\n"
+        + "    80: eightieth; eighty->>;\n"
+        + "    90: ninetieth; ninety->>;\n"
+        + "    100: <%simplified< hundredth; <%simplified< hundred >>;\n"
+        + "    1000: <%simplified< thousandth; <%simplified< thousand >>;\n"
+        + "    1,000,000: <%simplified< millionth; <%simplified< million >>;\n"
+        + "    1,000,000,000: <%simplified< billionth;\n"
+        + "        <%simplified< billion >>;\n"
+        + "    1,000,000,000,000: <%simplified< trillionth;\n"
+        + "        <%simplified< trillion >>;\n"
+        + "    1,000,000,000,000,000: =#,##0=;"
+
+        // %default is a more elaborate form of %simplified;  It is basically
+        // the same, except that it introduces "and" before the ones digit
+        // when appropriate (basically, between the tens and ones digits) and
+        // separates the thousands groups with commas in values over 100,000.
+        + "%default:\n"
+               // negative-number and fraction rules.  These are the same
+               // as those for %simplified, but ave to be stated here too
+               // because this is an entry point
+        + "    -x: minus >>;\n"
+        + "    x.x: << point >>;\n"
+               // just use %simplified for values below 100
+        + "    =%simplified=;\n"
+               // for values from 100 to 9,999 use %%and to decide whether or
+               // not to interpose the "and"
+        + "    100: << hundred[ >%%and>];\n"
+        + "    1000: << thousand[ >%%and>];\n"
+               // for values of 100,000 and up, use %%commas to interpose the
+               // commas in the right places (and also to interpose the "and")
+        + "    100,000>>: << thousand[>%%commas>];\n"
+        + "    1,000,000: << million[>%%commas>];\n"
+        + "    1,000,000,000: << billion[>%%commas>];\n"
+        + "    1,000,000,000,000: << trillion[>%%commas>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        // if the value passed to this rule set is greater than 100, don't
+        // add the "and"; if it's less than 100, add "and" before the last
+        // digits
+        + "%%and:\n"
+        + "    and =%default=;\n"
+        + "    100: =%default=;\n"
+        // this rule set is used to place the commas
+        + "%%commas:\n"
+               // for values below 100, add "and" (the apostrophe at the
+               // beginning is ignored, but causes the space that follows it
+               // to be significant: this is necessary because the rules
+               // calling %%commas don't put a space before it)
+        + "    ' and =%default=;\n"
+               // put a comma after the thousands (or whatever preceded the
+               // hundreds)
+        + "    100: , =%default=;\n"
+               // put a comma after the millions (or whatever precedes the
+               // thousands)
+        + "    1000: , <%default< thousand, >%default>;\n"
+               // and so on...
+        + "    1,000,000: , =%default=;"
+        // %%lenient-parse isn't really a set of number formatting rules;
+        // it's a set of collation rules.  Lenient-parse mode uses a Collator
+        // object to compare fragments of the text being parsed to the text
+        // in the rules, allowing more leeway in the matching text.  This set
+        // of rules tells the formatter to ignore commas when parsing (it
+        // already ignores spaces, which is why we refer to the space; it also
+        // ignores hyphens, making "twenty one" and "twenty-one" parse
+        // identically)
+        + "%%lenient-parse:\n"
+        + "    & ' ' , ',' ;\n";
+
+    /**
+     * Spellout rules for U.K. English.  U.K. English has one significant
+     * difference from U.S. English: the names for values of 1,000,000,000
+     * and higher.  In American English, each successive "-illion" is 1,000
+     * times greater than the preceding one: 1,000,000,000 is "one billion"
+     * and 1,000,000,000,000 is "one trillion."  In British English, each
+     * successive "-illion" is one million times greater than the one before:
+     * "one billion" is 1,000,000,000,000 (or what Americans would call a
+     * "trillion"), and "one trillion" is 1,000,000,000,000,000,000.
+     * 1,000,000,000 in British English is "one thousand million."  (This
+     * value is sometimes called a "milliard," but this word seems to have
+     * fallen into disuse.)
+     */
+    public static final String ukEnglish =
+        "%simplified:\n"
+        + "    -x: minus >>;\n"
+        + "    x.x: << point >>;\n"
+        + "    zero; one; two; three; four; five; six; seven; eight; nine;\n"
+        + "    ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+        + "        seventeen; eighteen; nineteen;\n"
+        + "    20: twenty[->>];\n"
+        + "    30: thirty[->>];\n"
+        + "    40: forty[->>];\n"
+        + "    50: fifty[->>];\n"
+        + "    60: sixty[->>];\n"
+        + "    70: seventy[->>];\n"
+        + "    80: eighty[->>];\n"
+        + "    90: ninety[->>];\n"
+        + "    100: << hundred[ >>];\n"
+        + "    1000: << thousand[ >>];\n"
+        + "    1,000,000: << million[ >>];\n"
+        + "    1,000,000,000,000: << billion[ >>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        + "%alt-teens:\n"
+        + "    =%simplified=;\n"
+        + "    1000>: <%%alt-hundreds<[ >>];\n"
+        + "    10,000: =%simplified=;\n"
+        + "    1,000,000: << million[ >%simplified>];\n"
+        + "    1,000,000,000,000: << billion[ >%simplified>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        + "%%alt-hundreds:\n"
+        + "    0: SHOULD NEVER GET HERE!;\n"
+        + "    10: <%simplified< thousand;\n"
+        + "    11: =%simplified= hundred>%%empty>;\n"
+        + "%%empty:\n"
+        + "    0:;"
+        + "%ordinal:\n"
+        + "    zeroth; first; second; third; fourth; fifth; sixth; seventh;\n"
+        + "        eighth; ninth;\n"
+        + "    tenth; eleventh; twelfth; thirteenth; fourteenth;\n"
+        + "        fifteenth; sixteenth; seventeenth; eighteenth;\n"
+        + "        nineteenth;\n"
+        + "    twentieth; twenty->>;\n"
+        + "    30: thirtieth; thirty->>;\n"
+        + "    40: fortieth; forty->>;\n"
+        + "    50: fiftieth; fifty->>;\n"
+        + "    60: sixtieth; sixty->>;\n"
+        + "    70: seventieth; seventy->>;\n"
+        + "    80: eightieth; eighty->>;\n"
+        + "    90: ninetieth; ninety->>;\n"
+        + "    100: <%simplified< hundredth; <%simplified< hundred >>;\n"
+        + "    1000: <%simplified< thousandth; <%simplified< thousand >>;\n"
+        + "    1,000,000: <%simplified< millionth; <%simplified< million >>;\n"
+        + "    1,000,000,000,000: <%simplified< billionth;\n"
+        + "        <%simplified< billion >>;\n"
+        + "    1,000,000,000,000,000: =#,##0=;"
+        + "%default:\n"
+        + "    -x: minus >>;\n"
+        + "    x.x: << point >>;\n"
+        + "    =%simplified=;\n"
+        + "    100: << hundred[ >%%and>];\n"
+        + "    1000: << thousand[ >%%and>];\n"
+        + "    100,000>>: << thousand[>%%commas>];\n"
+        + "    1,000,000: << million[>%%commas>];\n"
+        + "    1,000,000,000,000: << billion[>%%commas>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        + "%%and:\n"
+        + "    and =%default=;\n"
+        + "    100: =%default=;\n"
+        + "%%commas:\n"
+        + "    ' and =%default=;\n"
+        + "    100: , =%default=;\n"
+        + "    1000: , <%default< thousand, >%default>;\n"
+        + "    1,000,000: , =%default=;"
+        + "%%lenient-parse:\n"
+        + "    & ' ' , ',' ;\n";
+    // Could someone please correct me if I'm wrong about "milliard" falling
+    // into disuse, or have missed any other details of how large numbers
+    // are rendered.  Also, could someone please provide me with information
+    // on which other English-speaking countries use which system?  Right now,
+    // I'm assuming that the U.S. system is used in Canada and that all the
+    // other English-speaking countries follow the British system.  Can
+    // someone out there confirm this?
+
+    /**
+     * Spellout rules for Spanish.  The Spanish rules are quite similar to
+     * the English rules, but there are some important differences:
+     * First, we have to provide separate rules for most of the twenties
+     * because the ones digit frequently picks up an accent mark that it
+     * doesn't have when standing alone.  Second, each multiple of 100 has
+     * to be specified separately because the multiplier on 100 very often
+     * changes form in the contraction: 500 is "quinientos," not
+     * "cincocientos."  In addition, the word for 100 is "cien" when
+     * standing alone, but changes to "ciento" when followed by more digits.
+     * There also some other differences.
+     */
+    public static final String spanish =
+        // negative-number and fraction rules
+        "-x: menos >>;\n"
+        + "x.x: << punto >>;\n"
+        // words for values from 0 to 19
+        + "cero; uno; dos; tres; cuatro; cinco; seis; siete; ocho; nueve;\n"
+        + "diez; once; doce; trece; catorce; quince; diecis\u00e9is;\n"
+        + "    diecisiete; dieciocho; diecinueve;\n"
+        // words for values from 20 to 29 (necessary because the ones digit
+        // often picks up an accent mark it doesn't have when standing alone)
+        + "veinte; veintiuno; veintid\u00f3s; veintitr\u00e9s; veinticuatro;\n"
+        + "    veinticinco; veintis\u00e9is; veintisiete; veintiocho;\n"
+        + "    veintinueve;\n"
+        // words for multiples of 10 (notice that the tens digit is separated
+        // from the ones digit by the word "y".)
+        + "30: treinta[ y >>];\n"
+        + "40: cuarenta[ y >>];\n"
+        + "50: cincuenta[ y >>];\n"
+        + "60: sesenta[ y >>];\n"
+        + "70: setenta[ y >>];\n"
+        + "80: ochenta[ y >>];\n"
+        + "90: noventa[ y >>];\n"
+        // 100 by itself is "cien," but 100 followed by something is "cineto"
+        + "100: cien;\n"
+        + "101: ciento >>;\n"
+        // words for multiples of 100 (must be stated because they're
+        // rarely simple concatenations)
+        + "200: doscientos[ >>];\n"
+        + "300: trescientos[ >>];\n"
+        + "400: cuatrocientos[ >>];\n"
+        + "500: quinientos[ >>];\n"
+        + "600: seiscientos[ >>];\n"
+        + "700: setecientos[ >>];\n"
+        + "800: ochocientos[ >>];\n"
+        + "900: novecientos[ >>];\n"
+        // for 1,000, the multiplier on "mil" is omitted: 2,000 is "dos mil,"
+        // but 1,000 is just "mil."
+        + "1000: mil[ >>];\n"
+        + "2000: << mil[ >>];\n"
+        // 1,000,000 is "un millon," not "uno millon"
+        + "1,000,000: un mill\u00f3n[ >>];\n"
+        + "2,000,000: << mill\u00f3n[ >>];\n"
+        // overflow rule
+        + "1,000,000,000: =#,##0= (incomplete data);";
+    // The Spanish rules are incomplete.  I'm missing information on negative
+    // numbers and numbers with fractional parts.  I also don't have
+    // information on numbers higher than the millions
+
+    /**
+     * Spellout rules for French.  French adds some interesting quirks of its
+     * own: 1) The word "et" is interposed between the tens and ones digits,
+     * but only if the ones digit if 1: 20 is "vingt," and 2 is "vingt-deux,"
+     * but 21 is "vingt-et-un."  2)  There are no words for 70, 80, or 90.
+     * "quatre-vingts" ("four twenties") is used for 80, and values proceed
+     * by score from 60 to 99 (e.g., 73 is "soixante-treize" ["sixty-thirteen"]).
+     * Numbers from 1,100 to 1,199 are rendered as hundreds rather than
+     * thousands: 1,100 is "onze cents" ("eleven hundred"), rather than
+     * "mille cent" ("one thousand one hundred")
+     */
+    public static final String french =
+        // the main rule set
+        "%main:\n"
+               // negative-number and fraction rules
+        + "    -x: moins >>;\n"
+        + "    x.x: << virgule >>;\n"
+               // words for numbers from 0 to 10
+        + "    z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
+        + "    dix; onze; douze; treize; quatorze; quinze; seize;\n"
+        + "        dix-sept; dix-huit; dix-neuf;\n"
+               // ords for the multiples of 10: %%alt-ones inserts "et"
+               // when needed
+        + "    20: vingt[->%%alt-ones>];\n"
+        + "    30: trente[->%%alt-ones>];\n"
+        + "    40: quarante[->%%alt-ones>];\n"
+        + "    50: cinquante[->%%alt-ones>];\n"
+               // rule for 60.  The /20 causes this rule's multiplier to be
+               // 20 rather than 10, allowinhg us to recurse for all values
+               // from 60 to 79...
+        + "    60/20: soixante[->%%alt-ones>];\n"
+               // ...except for 71, which must be special-cased
+        + "    71: soixante et onze;\n"
+               // at 72, we have to repeat the rule for 60 to get us to 79
+        + "    72/20: soixante->%%alt-ones>;\n"
+               // at 80, we state a new rule with the phrase for 80.  Since
+               // it changes form when there's a ones digit, we need a second
+               // rule at 81.  This rule also includes "/20," allowing it to
+               // be used correctly for all values up to 99
+        + "    80: quatre-vingts; 81/20: quatre-vingt->>;\n"
+               // "cent" becomes plural when preceded by a multiplier, and
+               // the multiplier is omitted from the singular form
+        + "    100: cent[ >>];\n"
+        + "    200: << cents[ >>];\n"
+        + "    1000: mille[ >>];\n"
+               // values from 1,100 to 1,199 are rendered as "onze cents..."
+               // instead of "mille cent..."  The > after "1000" decreases
+               // the rule's exponent, causing its multiplier to be 100 instead
+               // of 1,000.  This prevents us from getting "onze cents cent
+               // vingt-deux" ("eleven hundred one hundred twenty-two").
+        + "    1100>: onze cents[ >>];\n"
+               // at 1,200, we go back to formating in thousands, so we
+               // repeat the rule for 1,000
+        + "    1200: mille >>;\n"
+               // at 2,000, the multiplier is added
+        + "    2000: << mille[ >>];\n"
+        + "    1,000,000: << million[ >>];\n"
+        + "    1,000,000,000: << milliarde[ >>];\n"
+        + "    1,000,000,000,000: << billion[ >>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        // %%alt-ones is used to insert "et" when the ones digit is 1
+        + "%%alt-ones:\n"
+        + "    ; et-un; =%main=;";
+
+    /**
+     * Spellout rules for Swiss French.  Swiss French differs from French French
+     * in that it does have words for 70, 80, and 90.  This rule set shows them,
+     * and is simpler as a result.
+     */
+    public static final String swissFrench =
+        "%main:\n"
+        + "    -x: moins >>;\n"
+        + "    x.x: << virgule >>;\n"
+        + "    z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
+        + "    dix; onze; douze; treize; quatorze; quinze; seize;\n"
+        + "        dix-sept; dix-huit; dix-neuf;\n"
+        + "    20: vingt[->%%alt-ones>];\n"
+        + "    30: trente[->%%alt-ones>];\n"
+        + "    40: quarante[->%%alt-ones>];\n"
+        + "    50: cinquante[->%%alt-ones>];\n"
+        + "    60: soixante[->%%alt-ones>];\n"
+               // notice new words for 70, 80, and 90
+        + "    70: septante[->%%alt-ones>];\n"
+        + "    80: octante[->%%alt-ones>];\n"
+        + "    90: nonante[->%%alt-ones>];\n"
+        + "    100: cent[ >>];\n"
+        + "    200: << cents[ >>];\n"
+        + "    1000: mille[ >>];\n"
+        + "    1100>: onze cents[ >>];\n"
+        + "    1200: mille >>;\n"
+        + "    2000: << mille[ >>];\n"
+        + "    1,000,000: << million[ >>];\n"
+        + "    1,000,000,000: << milliarde[ >>];\n"
+        + "    1,000,000,000,000: << billion[ >>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        + "%%alt-ones:\n"
+        + "    ; et-un; =%main=;";
+    // I'm not 100% sure about Swiss French.  Is
+    // this correct?  Is "onze cents" commonly used for 1,100 in both France
+    // and Switzerland?  Can someone fill me in on the rules for the other
+    // French-speaking countries?  I've heard conflicting opinions on which
+    // version is used in Canada, and I understand there's an alternate set
+    // of words for 70, 80, and 90 that is used somewhere, but I don't know
+    // what those words are or where they're used.
+
+    /**
+     * Spellout rules for German.  German also adds some interesting
+     * characteristics.  For values below 1,000,000, numbers are customarily
+     * written out as a single word.  And the ones digit PRECEDES the tens
+     * digit (e.g., 23 is "dreiundzwanzig," not "zwanzigunddrei").
+     */
+    public static final String german =
+        // 1 is "eins" when by itself, but turns into "ein" in most
+        // combinations
+        "%alt-ones:\n"
+        + "    null; eins; =%%main=;\n"
+        + "%%main:\n"
+               // words for numbers from 0 to 12.  Notice that the values
+               // from 13 to 19 can derived algorithmically, unlike in most
+               // other languages
+        + "    null; ein; zwei; drei; vier; f\u00fcnf; sechs; sieben; acht; neun;\n"
+        + "    zehn; elf; zw\u00f6lf; >>zehn;\n"
+               // rules for the multiples of 10.  Notice that the ones digit
+               // goes on the front
+        + "    20: [>>und]zwanzig;\n"
+        + "    30: [>>und]drei\u00dfig;\n"
+        + "    40: [>>und]vierzig;\n"
+        + "    50: [>>und]f\u00fcnfzig;\n"
+        + "    60: [>>und]sechzig;\n"
+        + "    70: [>>und]siebzig;\n"
+        + "    80: [>>und]achtzig;\n"
+        + "    90: [>>und]neunzig;\n"
+        + "    100: hundert[>%alt-ones>];\n"
+        + "    200: <%alt-ones>];\n"
+        + "    1000: tausend[>%alt-ones>];\n"
+        + "    2000: <%alt-ones>];\n"
+        + "    1,000,000: eine Million[ >%alt-ones>];\n"
+        + "    2,000,000: << Millionen[ >%alt-ones>];\n"
+        + "    1,000,000,000: eine Milliarde[ >%alt-ones>];\n"
+        + "    2,000,000,000: << Milliarden[ >%alt-ones>];\n"
+        + "    1,000,000,000,000: eine Billion[ >%alt-ones>];\n"
+        + "    2,000,000,000,000: << Billionen[ >%alt-ones>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;";
+    // again, I'm not 100% sure of these rules.  I think both "hundert" and
+    // "einhundert" are correct or 100, but I'm not sure which is preferable
+    // in situations where this framework is likely to be used.  Also, is it
+    // really true that numbers are run together into compound words all the
+    // time?  And again, I'm missing information on negative numbers and
+    // decimals.
+
+    /**
+     * Spellout rules for Italian.  Like German, most Italian numbers are
+     * written as single words.  What makes these rules complicated is the rule
+     * that says that when a word ending in a vowel and a word beginning with
+     * a vowel are combined into a compound, the vowel is dropped from the
+     * end of the first word: 180 is "centottanta," not "centoottanta."
+     * The complexity of this rule set is to produce this behavior.
+     */
+    public static final String italian =
+        // main rule set.  Follows the patterns of the preceding rule sets,
+        // except that the final vowel is omitted from words ending in
+        // vowels when they are followed by another word; instead, we have
+        // separate rule sets that are identical to this one, except that
+        // all the words that don't begin with a vowel have a vowel tacked
+        // onto them at the front.  A word ending in a vowel calls a
+        // substitution that will supply that vowel, unless that vowel is to
+        // be elided.
+        "%main:\n"
+        + "    -x: meno >>;\n"
+        + "    x.x: << virgola >>;\n"
+        + "    zero; uno; due; tre; quattro; cinque; sei; sette; otto;\n"
+        + "        nove;\n"
+        + "    dieci; undici; dodici; tredici; quattordici; quindici; sedici;\n"
+        + "        diciasette; diciotto; diciannove;\n"
+        + "    20: venti; vent>%%with-i>;\n"
+        + "    30: trenta; trent>%%with-i>;\n"
+        + "    40: quaranta; quarant>%%with-a>;\n"
+        + "    50: cinquanta; cinquant>%%with-a>;\n"
+        + "    60: sessanta; sessant>%%with-a>;\n"
+        + "    70: settanta; settant>%%with-a>;\n"
+        + "    80: ottanta; ottant>%%with-a>;\n"
+        + "    90: novanta; novant>%%with-a>;\n"
+        + "    100: cento; cent[>%%with-o>];\n"
+        + "    200: <%%with-o>];\n"
+        + "    1000: mille; mill[>%%with-i>];\n"
+        + "    2000: <%%with-a>];\n"
+        + "    100,000>>: <>];\n"
+        + "    1,000,000: =#,##0= (incomplete data);\n"
+        + "%%with-a:\n"
+        + "    azero; uno; adue; atre; aquattro; acinque; asei; asette; otto;\n"
+        + "        anove;\n"
+        + "    adieci; undici; adodici; atredici; aquattordici; aquindici; asedici;\n"
+        + "        adiciasette; adiciotto; adiciannove;\n"
+        + "    20: aventi; avent>%%with-i>;\n"
+        + "    30: atrenta; atrent>%%with-i>;\n"
+        + "    40: aquaranta; aquarant>%%with-a>;\n"
+        + "    50: acinquanta; acinquant>%%with-a>;\n"
+        + "    60: asessanta; asessant>%%with-a>;\n"
+        + "    70: asettanta; asettant>%%with-a>;\n"
+        + "    80: ottanta; ottant>%%with-a>;\n"
+        + "    90: anovanta; anovant>%%with-a>;\n"
+        + "    100: acento; acent[>%%with-o>];\n"
+        + "    200: <%%with-a%%with-o>];\n"
+        + "    1000: amille; amill[>%%with-i>];\n"
+        + "    2000: <%%with-a%%with-a>];\n"
+        + "    100,000: =%main=;\n"
+        + "%%with-i:\n"
+        + "    izero; uno; idue; itre; iquattro; icinque; isei; isette; otto;\n"
+        + "        inove;\n"
+        + "    idieci; undici; idodici; itredici; iquattordici; iquindici; isedici;\n"
+        + "        idiciasette; idiciotto; idiciannove;\n"
+        + "    20: iventi; ivent>%%with-i>;\n"
+        + "    30: itrenta; itrent>%%with-i>;\n"
+        + "    40: iquaranta; iquarant>%%with-a>;\n"
+        + "    50: icinquanta; icinquant>%%with-a>;\n"
+        + "    60: isessanta; isessant>%%with-a>;\n"
+        + "    70: isettanta; isettant>%%with-a>;\n"
+        + "    80: ottanta; ottant>%%with-a>;\n"
+        + "    90: inovanta; inovant>%%with-a>;\n"
+        + "    100: icento; icent[>%%with-o>];\n"
+        + "    200: <%%with-i%%with-o>];\n"
+        + "    1000: imille; imill[>%%with-i>];\n"
+        + "    2000: <%%with-i%%with-a>];\n"
+        + "    100,000: =%main=;\n"
+        + "%%with-o:\n"
+        + "    ozero; uno; odue; otre; oquattro; ocinque; osei; osette; otto;\n"
+        + "        onove;\n"
+        + "    odieci; undici; ododici; otredici; oquattordici; oquindici; osedici;\n"
+        + "        odiciasette; odiciotto; odiciannove;\n"
+        + "    20: oventi; ovent>%%with-i>;\n"
+        + "    30: otrenta; otrent>%%with-i>;\n"
+        + "    40: oquaranta; oquarant>%%with-a>;\n"
+        + "    50: ocinquanta; ocinquant>%%with-a>;\n"
+        + "    60: osessanta; osessant>%%with-a>;\n"
+        + "    70: osettanta; osettant>%%with-a>;\n"
+        + "    80: ottanta; ottant>%%with-a>;\n"
+        + "    90: onovanta; onovant>%%with-a>;\n"
+        + "    100: ocento; ocent[>%%with-o>];\n"
+        + "    200: <%%with-o%%with-o>];\n"
+        + "    1000: omille; omill[>%%with-i>];\n"
+        + "    2000: <%%with-o%%with-a>];\n"
+        + "    100,000: =%main=;\n";
+    // Can someone confirm that I did the vowel-eliding thing right?  I'm
+    // not 100% sure I'm doing it in all the right places, or completely
+    // correctly.  Also, I don't have information for negatives and decimals,
+    // and I lack words fror values from 1,000,000 on up.
+
+    /**
+     * Spellout rules for Swedish.
+     */
+    public static final String swedish =
+        "noll; ett; tv\u00e5; tre; fyra; fem; sex; sjo; \u00e5tta; nio;\n"
+        + "tio; elva; tolv; tretton; fjorton; femton; sexton; sjutton; arton; nitton;\n"
+        + "20: tjugo[>>];\n"
+        + "30: trettio[>>];\n"
+        + "40: fyrtio[>>];\n"
+        + "50: femtio[>>];\n"
+        + "60: sextio[>>];\n"
+        + "70: sjuttio[>>];\n"
+        + "80: \u00e5ttio[>>];\n"
+        + "90: nittio[>>];\n"
+        + "100: hundra[>>];\n"
+        + "200: <>];\n"
+        + "1000: tusen[ >>];\n"
+        + "2000: << tusen[ >>];\n"
+        + "1,000,000: en miljon[ >>];\n"
+        + "2,000,000: << miljon[ >>];\n"
+        + "1,000,000,000: en miljard[ >>];\n"
+        + "2,000,000,000: << miljard[ >>];\n"
+        + "1,000,000,000,000: en biljon[ >>];\n"
+        + "2,000,000,000,000: << biljon[ >>];\n"
+        + "1,000,000,000,000,000: =#,##0=";
+    // can someone supply me with information on negatives and decimals?
+
+    /**
+     * Spellout rules for Dutch.  Notice that in Dutch, as in German,
+     * the ones digit precedes the tens digit.
+     */
+    public static final String dutch =
+        " -x: min >>;\n"
+        + "x.x: << komma >>;\n"
+        + "(zero?); een; twee; drie; vier; vijf; zes; zeven; acht; negen;\n"
+        + "tien; elf; twaalf; dertien; veertien; vijftien; zestien;\n"
+        + "zeventien; achtien; negentien;\n"
+        + "20: [>> en ]twintig;\n"
+        + "30: [>> en ]dertig;\n"
+        + "40: [>> en ]veertig;\n"
+        + "50: [>> en ]vijftig;\n"
+        + "60: [>> en ]zestig;\n"
+        + "70: [>> en ]zeventig;\n"
+        + "80: [>> en ]tachtig;\n"
+        + "90: [>> en ]negentig;\n"
+        + "100: << honderd[ >>];\n"
+        + "1000: << duizend[ >>];\n"
+        + "1,000,000: << miljoen[ >>];\n"
+        + "1,000,000,000: << biljoen[ >>];\n"
+        + "1,000,000,000,000: =#,##0=";
+
+    /**
+     * Spellout rules for Japanese.  In Japanese, there really isn't any
+     * distinction between a number written out in digits and a number
+     * written out in words: the ideographic characters are both digits
+     * and words.  This rule set provides two variants:  %traditional
+     * uses the traditional CJK numerals (which are also used in China
+     * and Korea).  %financial uses alternate ideographs for many numbers
+     * that are harder to alter than the traditional numerals (one could
+     * fairly easily change a one to
+     * a three just by adding two strokes, for example).  This is also done in
+     * the other countries using Chinese idographs, but different ideographs
+     * are used in those places.
+     */
+    public static final String japanese =
+        "%financial:\n"
+        + "    \u96f6; \u58f1; \u5f10; \u53c2; \u56db; \u4f0d; \u516d; \u4e03; \u516b; \u4e5d;\n"
+        + "    \u62fe[>>];\n"
+        + "    20: <<\u62fe[>>];\n"
+        + "    100: <<\u767e[>>];\n"
+        + "    1000: <<\u5343[>>];\n"
+        + "    10,000: <<\u4e07[>>];\n"
+        + "    100,000,000: <<\u5104[>>];\n"
+        + "    1,000,000,000,000: <<\u5146[>>];\n"
+        + "    10,000,000,000,000,000: =#,##0=;\n"
+        + "%traditional:\n"
+        + "    \u96f6; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
+        + "    \u5341[>>];\n"
+        + "    20: <<\u5341[>>];\n"
+        + "    100: <<\u767e[>>];\n"
+        + "    1000: <<\u5343[>>];\n"
+        + "    10,000: <<\u4e07[>>];\n"
+        + "    100,000,000: <<\u5104[>>];\n"
+        + "    1,000,000,000,000: <<\u5146[>>];\n"
+        + "    10,000,000,000,000,000: =#,##0=;";
+    // Can someone supply me with the right fraud-proof ideographs for
+    // Simplified and Traditional Chinese, and for Korean?  Can someone
+    // supply me with information on negatives and decimals?
+
+    /**
+     * Spellout rules for Greek.  Again in Greek we have to supply the words
+     * for the multiples of 100 because they can't be derived algorithmically.
+     * Also, the tens dgit changes form when followed by a ones digit: an
+     * accent mark disappears from the tens digit and moves to the ones digit.
+     * Therefore, instead of using the [] notation, we actually have to use
+     * two separate rules for each multiple of 10 to show the two forms of
+     * the word.
+     */
+    public static final String greek =
+        "zero (incomplete data); \u03ad\u03bd\u03b1; \u03b4\u03cd\u03bf; \u03b4\u03c1\u03af\u03b1; "
+        + "\u03c4\u03ad\u03c3\u03c3\u03b5\u03c1\u03b1; \u03c0\u03ad\u03bd\u03c4\u03b5; "
+        + "\u03ad\u03be\u03b9; \u03b5\u03c0\u03c4\u03ac; \u03bf\u03ba\u03c4\u03ce; "
+        + "\u03b5\u03bd\u03bd\u03ad\u03b1;\n"
+        + "10: \u03b4\u03ad\u03ba\u03b1; "
+        + "\u03ad\u03bd\u03b4\u03b5\u03ba\u03b1; \u03b4\u03ce\u03b4\u03b5\u03ba\u03b1; "
+        + "\u03b4\u03b5\u03ba\u03b1>>;\n"
+        + "20: \u03b5\u03af\u03ba\u03bf\u03c3\u03b9; \u03b5\u03b9\u03ba\u03bf\u03c3\u03b9>>;\n"
+        + "30: \u03c4\u03c1\u03b9\u03ac\u03bd\u03c4\u03b1; \u03c4\u03c1\u03b9\u03b1\u03bd\u03c4\u03b1>>;\n"
+        + "40: \u03c3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1; \u03c3\u03b1\u03c1\u03b1\u03bd\u03c4\u03b1>>;\n"
+        + "50: \u03c0\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; \u03c0\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
+        + "60: \u03b5\u03be\u03ae\u03bd\u03c4\u03b1; \u03b5\u03be\u03b7\u03bd\u03c4\u03b1>>;\n"
+        + "70: \u03b5\u03b2\u03b4\u03bf\u03bc\u03ae\u03bd\u03c4\u03b1; "
+        + "\u03b5\u03b2\u03b4\u03bf\u03bc\u03b7\u03bd\u03c4\u03b1>>;\n"
+        + "80: \u03bf\u03b3\u03b4\u03cc\u03bd\u03c4\u03b1; \u03bf\u03b3\u03b4\u03bf\u03bd\u03c4\u03b1>>;\n"
+        + "90: \u03b5\u03bd\u03bd\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; "
+        + "\u03b5\u03bd\u03bd\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
+        + "100: \u03b5\u03ba\u03b1\u03c4\u03cc[\u03bd >>];\n"
+        + "200: \u03b4\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "300: \u03c4\u03c1\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "400: \u03c4\u03b5\u03c4\u03c1\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "500: \u03c0\u03b5\u03bd\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "600: \u03b5\u03be\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "700: \u03b5\u03c0\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "800: \u03bf\u03ba\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "900: \u03b5\u03bd\u03bd\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+        + "1000: \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
+        + "2000: << \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
+        + "1,000,000: << \u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
+        + "1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
+        + "1,000,000,000,000: =#,##0=";
+    // Can someone supply me with information on negatives and decimals?
+    // I'm also missing the word for zero.  Can someone clue me in?
+
+    /**
+     * Spellout rules for Russian.
+     */
+    public static final String russian =
+        "\u043d\u043e\u043b\u044c; \u043e\u0434\u0438\u043d; \u0434\u0432\u0430; \u0442\u0440\u0438; "
+        + "\u0447\u0435\u0442\u044b\u0440\u0435; \u043f\u044f\u0442; \u0448\u0435\u0441\u0442; "
+        + "\u0441\u0435\u043c\u044c; \u0432\u043e\u0441\u0435\u043c\u044c; \u0434\u0435\u0432\u044f\u0442;\n"
+        + "10: \u0434\u0435\u0441\u044f\u0442; "
+        + "\u043e\u0434\u0438\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
+        + "\u0434\u0432\u0435\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+        + "\u0442\u0440\u0438\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+        + "\u0447\u0435\u0442\u044b\u0440\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
+        + "15: \u043f\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+        + "\u0448\u0435\u0441\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+        + "\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+        + "\u0432\u043e\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+        + "\u0434\u0435\u0432\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
+        + "20: \u0434\u0432\u0430\u0434\u0446\u0430\u0442\u044c[ >>];\n"
+        + "30: \u0442\u0440\u043b\u0434\u0446\u0430\u0442\u044c[ >>];\n"
+        + "40: \u0441\u043e\u0440\u043e\u043a[ >>];\n"
+        + "50: \u043f\u044f\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+        + "60: \u0448\u0435\u0441\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+        + "70: \u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+        + "80: \u0432\u043e\u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+        + "90: \u0434\u0435\u0432\u044f\u043d\u043e\u0441\u0442\u043e[ >>];\n"
+        + "100: \u0441\u0442\u043e[ >>];\n"
+        + "200: << \u0441\u0442\u043e[ >>];\n"
+        + "1000: \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
+        + "2000: << \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
+        + "1,000,000: \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
+        + "2,000,000: << \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
+        + "1,000,000,000: =#,##0=;";
+    // Can someone supply me with information on negatives and decimals?
+    // How about words for billions and trillions?
+
+    /**
+     * Spellout rules for Hebrew.  Hebrew actually has inflected forms for
+     * most of the lower-order numbers.  The masculine forms are shown
+     * here.
+     */
+    public static final String hebrew =
+        "zero (incomplete data); \u05d0\u05d4\u05d3; \u05e9\u05d2\u05d9\u05d9\u05dd; \u05e9\u05dc\u05d5\u05e9\u05d4;\n"
+        + "4: \u05d0\u05d3\u05d1\u05e6\u05d4; \u05d7\u05d2\u05d5\u05d9\u05e9\u05d4; \u05e9\u05e9\u05d4;\n"
+        + "7: \u05e9\u05d1\u05e6\u05d4; \u05e9\u05de\u05d5\u05d2\u05d4; \u05ea\u05e9\u05e6\u05d4;\n"
+        + "10: \u05e6\u05e9\u05d3\u05d4[ >>];\n"
+        + "20: \u05e6\u05e9\u05d3\u05d9\u05dd[ >>];\n"
+        + "30: \u05e9\u05dc\u05d5\u05e9\u05d9\u05dd[ >>];\n"
+        + "40: \u05d0\u05d3\u05d1\u05e6\u05d9\u05dd[ >>];\n"
+        + "50: \u05d7\u05de\u05d9\u05e9\u05d9\u05dd[ >>];\n"
+        + "60: \u05e9\u05e9\u05d9\u05dd[ >>];\n"
+        + "70: \u05e9\u05d1\u05e6\u05d9\u05dd[ >>];\n"
+        + "80: \u05e9\u05de\u05d5\u05d2\u05d9\u05dd[ >>];\n"
+        + "90: \u05ea\u05e9\u05e6\u05d9\u05dd[ >>];\n"
+        + "100: \u05de\u05d0\u05d4[ >>];\n"
+        + "200: << \u05de\u05d0\u05d4[ >>];\n"
+        + "1000: \u05d0\u05dc\u05e3[ >>];\n"
+        + "2000: << \u05d0\u05dc\u05e3[ >>];\n"
+        + "1,000,000: =#,##0= (incomplete data);";
+    // This data is woefully incomplete.  Can someone fill me in on the
+    // various inflected forms of the numbers, which seem to be necessary
+    // to do Hebrew correctly?  Can somone supply me with data for values
+    // from 1,000,000 on up?  What about the word for zero?  What about
+    // information on negatives and decimals?
+
+    //========================================================================
+    // Simple examples
+    //========================================================================
+
+    /**
+     * This rule set adds an English ordinal abbreviation to the end of a
+     * number.  For example, 2 is formatted as "2nd".  Parsing doesn't work with
+     * this rule set.  To parse, use DecimalFormat on the numeral.
+     */
+    public static final String ordinal =
+        // this rule set formats the numeral and calls %%abbrev to
+        // supply the abbreviation
+        "%main:\n"
+        + "    =#,##0==%%abbrev=;\n"
+        // this rule set supplies the abbreviation
+        + "%%abbrev:\n"
+               // the abbreviations.  Everything from 4 to 19 ends in "th"
+        + "    th; st; nd; rd; th;\n"
+               // at 20, we begin repeating the cycle every 10 (13 is "13th",
+               // but 23 and 33 are "23rd" and "33rd")  We do this by
+               // ignoring all bug the ones digit in selecting the abbreviation
+        + "    20: >>;\n"
+               // at 100, we repeat the whole cycle by considering only the
+               // tens and ones digits in picking an abbreviation
+        + "    100: >>;\n";
+
+    /**
+     * This is a simple message-formatting example.  Normally one would
+     * use ChoiceFormat and MessageFormat to do something this simple,
+     * but this shows it could be done with RuleBasedNumberFormat too.
+     * A message-formatting example that might work better with
+     * RuleBasedNumberFormat appears later.
+     */
+    public static final String message1 =
+        // this rule surrounds whatever the other rules produce with the
+        // rest of the sentence
+        "x.0: The search found <<.;\n"
+        // use words for values below 10 (and change to "file" for 1)
+        + "no files; one file; two files; three files; four files; five files;\n"
+        + "    six files; seven files; eight files; nine files;\n"
+        // use numerals for values higher than 10
+        + "=#,##0= files;";
+
+    //========================================================================
+    // Fraction handling
+    //
+    // The next few examples show how RuleBasedNumberFormat can be used for
+    // more flexible handling of fractions
+    //========================================================================
+
+    /**
+     * This example formats a number in one of the two styles often used
+     * on checks.  %dollars-and-hundredths formats cents as hundredths of
+     * a dollar (23.40 comes out as "twenty-three and 40/100 dollars").
+     * %dollars-and-cents formats in dollars and cents (23.40 comes out as
+     * "twenty-three dollars and forty cents")
+     */
+    public static final String dollarsAndCents =
+        // this rule set formats numbers as dollars and cents
+        "%dollars-and-cents:\n"
+               // if the value is 1 or more, put "xx dollars and yy cents".
+               // the "and y cents" part is suppressed if the value is an
+               // even number of dollars
+        + "    x.0: << [and >%%cents>];\n"
+               // if the value is between 0 and 1, put "xx cents"
+        + "    0.x: >%%cents>;\n"
+               // these three rules take care of the singular and plural
+               // forms of "dollar" and use %%main to format the number
+        + "    0: zero dollars; one dollar; =%%main= dollars;\n"
+        // these are the regular U.S. English number spellout rules
+        + "%%main:\n"
+        + "    zero; one; two; three; four; five; six; seven; eight; nine;\n"
+        + "    ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+        + "        seventeen; eighteen; nineteen;\n"
+        + "    20: twenty[->>];\n"
+        + "    30: thirty[->>];\n"
+        + "    40: forty[->>];\n"
+        + "    50: fifty[->>];\n"
+        + "    60: sixty[->>];\n"
+        + "    70: seventy[->>];\n"
+        + "    80: eighty[->>];\n"
+        + "    90: ninety[->>];\n"
+        + "    100: << hundred[ >>];\n"
+        + "    1000: << thousand[ >>];\n"
+        + "    1,000,000: << million[ >>];\n"
+        + "    1,000,000,000: << billion[ >>];\n"
+        + "    1,000,000,000,000: << trillion[ >>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        // this rule takes care of the fractional part of the value.  It
+        // multiplies the fractional part of the number being formatted by
+        // 100, formats it with %%main, and then addes the word "cent" or
+        // "cents" to the end.  (The text in brackets is omitted if the
+        // numerator of the fraction is 1.)
+        + "%%cents:\n"
+        + "    100: <%%main< cent[s];\n"
+
+        // this rule set formats numbers as dollars and hundredths of dollars
+        + "%dollars-and-hundredths:\n"
+               // this rule takes care of the general shell of the output
+               // string.  We always show the cents, even when there aren't
+               // any.  Because of this, the word is always "dollars"--
+               // we don't have to worry about the singular form.  We use
+               // %%main to format the number of dollars and %%hundredths to
+               // format the number of cents
+        + "    x.0: <%%main< and >%%hundredths>/100 dollars;\n"
+        // this rule set formats the cents for %dollars-and-hundredths.
+        // It multiplies the fractional part of the number by 100 and formats
+        // the result using a DecimalFormat ("00" tells the DecimalFormat to
+        // always use two digits, even for numbers under 10)
+        + "%%hundredths:\n"
+        + "    100: <00<;\n";
+
+    /**
+     * This rule set shows the fractional part of the number as a fraction
+     * with a power of 10 as the denominator.  Some languages don't spell
+     * out the fractional part of a number as "point one two three," but
+     * always render it as a fraction.  If we still want to treat the fractional
+     * part of the number as a decimal, then the fraction's denominator
+     * is always a power of 10.  This example does that: 23.125 is formatted
+     * as "twenty-three and one hundred twenty-five thousandths" (as opposed
+     * to "twenty-three point one two five" or "twenty-three and one eighth").
+     */
+    public static final String decimalAsFraction =
+        // the regular U.S. English spellout rules, with one difference
+        "%main:\n"
+        + "    -x: minus >>;\n"
+               // the difference.  This rule uses %%frac to show the fractional
+               // part of the number.  Text in brackets is omitted when the
+               // value is between 0 and 1 (causing 0.3 to come out as "three
+               // tenths" instead of "zero and three tenths").
+        + "    x.x: [<< and ]>%%frac>;\n"
+        + "    zero; one; two; three; four; five; six; seven; eight; nine;\n"
+        + "    ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+        + "        seventeen; eighteen; nineteen;\n"
+        + "    twenty[->>];\n"
+        + "    30: thirty[->>];\n"
+        + "    40: forty[->>];\n"
+        + "    50: fifty[->>];\n"
+        + "    60: sixty[->>];\n"
+        + "    70: seventy[->>];\n"
+        + "    80: eighty[->>];\n"
+        + "    90: ninety[->>];\n"
+        + "    100: << hundred[ >>];\n"
+        + "    1000: << thousand[ >>];\n"
+        + "    1,000,000: << million[ >>];\n"
+        + "    1,000,000,000: << billion[ >>];\n"
+        + "    1,000,000,000,000: << trillion[ >>];\n"
+        + "    1,000,000,000,000,000: =#,##0=;\n"
+        // the rule set that formats the fractional part of the number.
+        // The rule that is used is the one that, when its baase value is
+        // multiplied by the fractional part of the number being formatted,
+        // produces the result closest to zero.  Thus, the base values are
+        // prospective denominators of the fraction.  The << marks the place
+        // where the numerator of the fraction (the result of multiplying the
+        // fractional part of the number by the rule's base value) is
+        // placed.  Text in brackets is omitted when the numerator is 1, giving
+        // us the singular and plural forms of the words.
+        // [In languages where the singular and plural are completely different
+        // words, the rule can just be stated twice: the second time with
+        // the plural form.]
+        + "%%frac:\n"
+        + "    10: << tenth[s];\n"
+        + "    100: << hundredth[s];\n"
+        + "    1000: << thousandth[s];\n"
+        + "    10,000: << ten-thousandth[s];\n"
+        + "    100,000: << hundred-thousandth[s];\n"
+        + "    1,000,000: << millionth[s];";
+
+    /**
+     * Number with closest fraction.  This example formats a value using
+     * numerals, but shows the fractional part as a ratio (fraction) rather
+     * than a decimal.  The fraction always has a denominator between 2 and 10.
+     */
+    public static final String closestFraction =
+        "%main:\n"
+               // this rule formats the number if it's 1 or more.  It formats
+               // the integral part using a DecimalFormat ("#,##0" puts
+               // thousands separators in the right places) and the fractional
+               // part using %%frac.  If there is no fractional part, it
+               // just shows the integral part.
+        + "    x.0: <#,##0<[ >%%frac>];\n"
+               // this rule formats the number if it's between 0 and 1.  It
+               // shows only the fractional part (0.5 shows up as "1/2," not
+               // "0 1/2")
+        + "    0.x: >%%frac>;\n"
+        // the fraction rule set.  This works the same way as the one in the
+        // preceding example: We multiply the fractional part of the number
+        // being formatted by each rule's base value and use the rule that
+        // produces the result closest to 0 (or the first rule that produces 0).
+        // Since we only provide rules for the numbers from 2 to 10, we know
+        // we'll get a fraction with a denominator between 2 and 10.
+        // "<0<" causes the numerator of the fraction to be formatted
+        // using numerals
+        + "%%frac:\n"
+        + "    2: 1/2;\n"
+        + "    3: <0%%frac1>];\n"
+               // this rule is used for values between 0 and 1 and omits the
+               // integral part
+        + "    0.x: >%%frac2>;\n"
+        // this rule set is used to format the fractional part of the number when
+        // there's an integral part before it (again, we try all denominators
+        // and use the "best" one)
+        + "%%frac1:\n"
+               // for even multiples of 1/4, format the fraction using the
+               // typographer's fractions
+        + "    4: <%%quarters<;\n"
+               // format the value as a number of eighths, sixteenths, or
+               // thirty-seconds, whichever produces the most accurate value.
+               // The apostrophe at the front of these rules is ignored, but
+               // it makes the space that follows it significant.  This puts a
+               // space between the value's integral and fractional parts so
+               // you can read it
+        + "    8: ' <0%%small>;\n"
+               // otherwise, show between 3 and 6 significant digits of the value
+               // along with the most appropriate unit
+        + "    0: =##0.###= m;\n"
+        + "    1,000: <##0.###< km;\n"
+        + "    1,000,000: <##0.###< Mm;\n"
+        + "    1,000,000,000: <##0.###< Gm;\n"
+        + "    1,000,000,000,000: <#,##0.###< Tm;\n"
+        // %%small formats the number when it's less then 1.  It multiplies the
+        // value by one billion, and then uses %%small2 to actually do the
+        // formatting.
+        + "%%small:\n"
+        + "    1,000,000,000,000: <%%small2<;\n"
+        // this rule set actually formats small values.  %%small passes this
+        // rule set a number of picometers, and it takes care of scaling up as
+        // appropriate in exactly the same way %main does (we can't normally
+        // handle fractional values this way: here, we're concerned about
+        // magnitude; most of the time, we're concerned about precsion)
+        + "%%small2:\n"
+        + "    0: =##0= pm;\n"
+        + "    1,000: <##0.###< nm;\n"
+        + "    1,000,000: <##0.###< \u00b5m;\n"
+        + "    1,000,000,000: <##0.###< mm;\n";
+
+    /**
+     * A more complicated message-formatting example.  Here, in addition to
+     * handling the singular and plural versions of the word, the value is
+     * denominated in bytes, kilobytes, or megabytes depending on its magnitude.
+     * Also notice that it correctly treats a kilobyte as 1,024 bytes (not 1,000),
+     * and a megabyte as 1,024 kilobytes (not 1,000).
+     */
+    public static final String message2 =
+        // this rule supplies the shell of the sentence
+        "x.0: There << free space on the disk.;\n"
+        // handle singular and plural forms of "byte" (and format 0 as
+        // "There is no free space...")
+        + "0: is no;\n"
+        + "is one byte of;\n"
+        + "are =0= bytes of;\n"
+        // for values above 1,024, format the number in K (since "K" is usually
+        // promounced "K" regardless of whether it's singular or plural, we
+        // don't worry about the plural form).  The "/1024" here causes us to
+        // treat a K as 1,024 bytes rather than 1,000 bytes.
+        + "1024/1024: is <0>];\n"
+        // format values over 144 in gross
+        + "144/12: << gross[, >>];\n"
+        // format values over 1,000 in thousands
+        + "1000: << thousand[, >>];\n"
+        // overflow rule.  Format values over 10,000 in numerals
+        + "10,000: =#,##0=;\n";
+
+    //========================================================================
+    // Major and minor units
+    //
+    // These examples show how a single value can be divided up into major
+    // and minor units that don't relate to each other by a factor of 10.
+    //========================================================================
+
+    /**
+     * This example formats a number of seconds in sexagesimal notation
+     * (i.e., hours, minutes, and seconds).  %with-words formats it with
+     * words (3740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals
+     * formats it entirely in numerals (3740 is "1:02:20").
+     */
+    public static final String durationInSeconds =
+        // main rule set for formatting with words
+        "%with-words:\n"
+               // take care of singular and plural forms of "second"
+        + "    0 seconds; 1 second; =0= seconds;\n"
+               // use %%min to format values greater than 60 seconds
+        + "    60/60: <%%min<[, >>];\n"
+               // use %%hr to format values greater than 3,600 seconds
+               // (the ">>>" below causes us to see the number of minutes
+               // when when there are zero minutes)
+        + "    3600/60: <%%hr<[, >>>];\n"
+        // this rule set takes care of the singular and plural forms
+        // of "minute"
+        + "%%min:\n"
+        + "    0 minutes; 1 minute; =0= minutes;\n"
+        // this rule set takes care of the singular and plural forms
+        // of "hour"
+        + "%%hr:\n"
+        + "    0 hours; 1 hour; =0= hours;\n"
+
+        // main rule set for formatting in numerals
+        + "%in-numerals:\n"
+               // values below 60 seconds are shown with "sec."
+        + "    =0= sec.;\n"
+               // higher values are shown with colons: %%min-sec is used for
+               // values below 3,600 seconds...
+        + "    60: =%%min-sec=;\n"
+               // ...and %%hr-min-sec is used for values of 3,600 seconds
+               // and above
+        + "    3600: =%%hr-min-sec=;\n"
+        // this rule causes values of less than 10 minutes to show without
+        // a leading zero
+        + "%%min-sec:\n"
+        + "    0: :=00=;\n"
+        + "    60/60: <0<>>;\n"
+        // this rule set is used for values of 3,600 or more.  Minutes are always
+        // shown, and always shown with two digits
+        + "%%hr-min-sec:\n"
+        + "    0: :=00=;\n"
+        + "    60/60: <00<>>;\n"
+        + "    3600/60: <#,##0<:>>>;\n"
+        // the lenient-parse rules allow several different characters to be used
+        // as delimiters between hours, minutes, and seconds
+        + "%%lenient-parse:\n"
+        + "    & : = . = ' ' = -;\n";
+
+    /**
+     * This example formats a number of hours in sexagesimal notation (i.e.,
+     * hours, minutes, and seconds).  %with-words formats the value using
+     * words for the units, and %in-numerals formats the value using only
+     * numerals.
+     */
+    public static final String durationInHours =
+        // main entry point for formatting with words
+        "%with-words:\n"
+               // this rule omits minutes and seconds when the value is
+               // an even number of hours
+        + "    x.0: <<[, >%%min-sec>];\n"
+               // these rules take care of the singular and plural forms
+               // of hours
+        + "    0 hours; 1 hour; =#,##0= hours;\n"
+        // this rule set takes the fractional part of the number and multiplies
+        // it by 3,600 (turning it into a number of seconds).  Then it delegates
+        // to %%min-sec-implementation to format the resulting value
+        + "%%min-sec:\n"
+        + "    3600: =%%min-sec-implementation=;\n"
+        // this rule set formats the seconds as either seconds or minutes and
+        // seconds, and takes care of the singular and plural forms of
+        // "minute" and "second"
+        + "%%min-sec-implementation:\n"
+        + "    0 seconds; 1 second; =0= seconds;\n"
+        + "    60/60: 1 minute[, >>];\n"
+        + "    120/60: <0< minutes[, >>];\n"
+
+        // main entry point for formatting in numerals
+        + "%in-numerals:\n"
+               // show minutes even for even numbers of hours
+        + "    x.0: <#,##0<:00;\n"
+               // delegate to %%min-sec2 to format minutes and seconds
+        + "    x.x: <#,##0<:>%%min-sec2>;\n"
+        // this rule set formats minutes when there is an even number of
+        // minutes, and delegates to %%min-sec2-implementation when there
+        // are seconds
+        + "%%min-sec2:\n"
+        + "    60: <00<;\n"
+        + "    3600: <%%min-sec2-implementation<;\n"
+        // these two rule sets are used to format the minutes and seconds
+        + "%%min-sec2-implementation:\n"
+               // if there are fewer than 60 seconds, show the minutes anyway
+        + "    0: 00:=00=;\n"
+               // if there are minutes, format them too, and always use 2 digits
+               // for both minutes and seconds
+        + "    60: =%%min-sec3=;\n"
+        + "%%min-sec3:\n"
+        + "    0: :=00=;\n"
+        + "    60/60: <00<>>;\n"
+        // the lenient-parse rules allow the user to use any of several
+        // characters as delimiters between hours, minutes, and seconds
+        + "%%lenient-parse:\n"
+        + "    & : = . = ' ' = -;\n";
+
+    /**
+     * This rule set formats a number of pounds as pounds, shillings, and
+     * pence in the old English system of currency.
+     */
+    public static final String poundsShillingsAndPence =
+        // for values of 1 or more, format the integral part with a pound
+        // sign in front, and show shillings and pence if necessary
+        "%main:\n"
+        + "    x.0: \u00a3<#,##0<[ >%%shillings-and-pence>];\n"
+        // for values between 0 and 1, omit the number of pounds
+        + "    0.x: >%%pence-alone>;\n"
+        // this rule set is used to show shillings and pence.  It multiplies
+        // the fractional part of the number by 240 (the number of pence in a
+        // pound) and uses %%shillings-and-pence-implementation to format
+        // the result
+        + "%%shillings-and-pence:\n"
+        + "    240: <%%shillings-and-pence-implementation<;\n"
+        // this rule set is used to show shillings and pence when there are
+        // no pounds.  It also multiplies the value by 240, and then it uses
+        // %%pence-alone-implementation to format the result.
+        + "%%pence-alone:\n"
+        + "    240: <%%pence-alone-implementation<;\n"
+        // this rule set formats a number of pence when we know we also
+        // have pounds.  We always show shillings (with a 0 if necessary),
+        // but only show pence if the value isn't an even number of shillings
+        + "%%shillings-and-pence-implementation:\n"
+        + "    0/; 0/=0=;\n"
+        + "    12/12: <00>];\n"
+        // this rule set formats a number of pence when we know there are
+        // no pounds.  Values less than a shilling are shown with "d." (the
+        // abbreviation for pence), and values greater than a shilling are
+        // shown with a shilling bar (and without pence when the value is
+        // an even number of shillings)
+        + "%%pence-alone-implementation:\n"
+        + "    =0= d.;\n"
+        + "    12/12: <00>];\n";
+
+    //========================================================================
+    // Alternate numeration systems
+    //
+    // These examples show how RuleBasedNumberFormat can be used to format
+    // numbers using non-positional numeration systems.
+    //========================================================================
+
+    /**
+     * Arabic digits.  This example formats numbers in Arabic numerals.
+     * Normally, you'd do this with DecimalFormat, but this shows that
+     * RuleBasedNumberFormat can handle it too.
+     */
+    public static final String arabicNumerals =
+        "0; 1; 2; 3; 4; 5; 6; 7; 8; 9;\n"
+        + "10: <<>>;\n"
+        + "100: <<>>>;\n"
+        + "1000: <<,>>>;\n"
+        + "1,000,000: <<,>>>;\n"
+        + "1,000,000,000: <<,>>>;\n"
+        + "1,000,000,000,000: <<,>>>;\n"
+        + "1,000,000,000,000,000: =#,##0=;\n"
+        + "-x: ->>;\n"
+        + "x.x: <<.>>;";
+
+    /**
+     * Words for digits.  Follows the same pattern as the Arabic-numerals
+     * example above, but uses words for the various digits (e.g., 123 comes
+     * out as "one two three").
+     */
+    public static final String wordsForDigits =
+        "-x: minus >>;\n"
+        + "x.x: << point >>;\n"
+        + "zero; one; two; three; four; five; six;\n"
+        + "    seven; eight; nine;\n"
+        + "10: << >>;\n"
+        + "100: << >>>;\n"
+        + "1000: <<, >>>;\n"
+        + "1,000,000: <<, >>>;\n"
+        + "1,000,000,000: <<, >>>;\n"
+        + "1,000,000,000,000: <<, >>>;\n"
+        + "1,000,000,000,000,000: =#,##0=;\n";
+
+    /**
+     * This example formats numbers using Chinese characters in the Arabic
+     * place-value method.  This was used historically in China for a while.
+     */
+    public static final String chinesePlaceValue =
+        "\u3007; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
+        + "10: <<>>;\n"
+        + "100: <<>>>;\n"
+        + "1000: <<>>>;\n"
+        + "1,000,000: <<>>>;\n"
+        + "1,000,000,000: <<>>>;\n"
+        + "1,000,000,000,000: <<>>>;\n"
+        + "1,000,000,000,000,000: =#,##0=;\n";
+
+    /**
+     * Roman numerals.  This example has two variants: %modern shows how large
+     * numbers are usually handled today; %historical ses the older symbols for
+     * thousands.
+     */
+    public static final String romanNumerals =
+        "%historical:\n"
+        + "    =%modern=;\n"
+               // in early Roman numerals, 1,000 was shown with a circle
+               // bisected by a vertical line.  Additional thousands were
+               // shown by adding more concentric circles, and fives were
+               // shown by cutting the symbol for next-higher power of 10
+               // in half (the letter D for 500 evolved from this).
+               // We could go beyond 40,000, but Unicode doesn't encode
+               // the symbols for higher numbers/
+        + "    1000: \u2180[>>]; 2000: \u2180\u2180[>>]; 3000: \u2180\u2180\u2180[>>]; 4000: \u2180\u2181[>>];\n"
+        + "    5000: \u2181[>>]; 6000: \u2181\u2180[>>]; 7000: \u2181\u2180\u2180[>>];\n"
+        + "    8000: \u2181\u2180\u2180\u2180[>>]; 9000: \u2180\u2182[>>];\n"
+        + "    10,000: \u2182[>>]; 20,000: \u2182\u2182[>>]; 30,000: \u2182\u2182\u2182[>>];\n"
+        + "    40,000: =#,##0=;\n"
+        + "%modern:\n"
+        + "    ; I; II; III; IV; V; VI; VII; VIII; IX;\n"
+        + "    10: X[>>]; 20: XX[>>]; 30: XXX[>>]; 40: XL[>>]; 50: L[>>];\n"
+        + "    60: LX[>>]; 70: LXX[>>]; 80: LXXX[>>]; 90: XC[>>];\n"
+        + "    100: C[>>]; 200: CC[>>]; 300: CCC[>>]; 400: CD[>>]; 500: D[>>];\n"
+        + "    600: DC[>>]; 700: DCC[>>]; 800: DCCC[>>]; 900: CM[>>];\n"
+               // in modern Roman numerals, high numbers are generally shown
+               // by placing a bar over the letters for the lower numbers:
+               // the bar multiplied a letter's value by 1,000
+        + "    1000: M[>>]; 2000: MM[>>]; 3000: MMM[>>]; 4000: MV\u0306[>>];\n"
+        + "    5000: V\u0306[>>]; 6000: V\u0306M[>>]; 7000: V\u0306MM[>>];\n"
+        + "    8000: V\u0306MMM[>>]; 9000: MX\u0306[>>];\n"
+        + "    10,000: X\u0306[>>]; 20,000: X\u0306X\u0306[>>]; 30,000: X\u0306X\u0306X\u0306[>>];\n"
+        + "    40,000: X\u0306L\u0306[>>]; 50,000: L\u0306[>>]; 60,000: L\u0306X\u0306[>>];\n"
+        + "    70,000: L\u0306X\u0306X\u0306[>>]; 80,000: L\u0306X\u0306X\u0306X\u0306[>>];\n"
+        + "    90,000: X\u0306C\u0306[>>];\n"
+        + "    100,000: C\u0306[>>]; 200,000: C\u0306C\u0306[>>]; 300,000: C\u0306C\u0306[>>];\n"
+        + "    400,000: C\u0306D\u0306[>>]; 500,000: D\u0306[>>]; 600,000: D\u0306C\u0306[>>];\n"
+        + "    700,000: D\u0306C\u0306C\u0306[>>]; 800,000: D\u0306C\u0306C\u0306C\u0306[>>];\n"
+        + "    900,000: =#,##0=;\n";
+
+    /**
+     * Hebrew alphabetic numerals.  Before adoption of Arabic numerals, Hebrew speakers
+     * used the letter of their alphabet as numerals.  The first nine letters of
+     * the alphabet repesented the values from 1 to 9, the second nine letters the
+     * multiples of 10, and the remaining letters the multiples of 100.  Since they
+     * ran out of letters at 400, the remaining multiples of 100 were represented
+     * using combinations of the existing letters for the hundreds.  Numbers were
+     * distinguished from words in a number of different ways: the way shown here
+     * uses a single mark after a number consisting of one letter, and a double
+     * mark between the last two letters of a number consisting of two or more
+     * letters.  Two dots over a letter multiplied its value by 1,000.  Also, since
+     * the letter for 10 is the first letter of God's name and the letters for 5 and 6
+     * are letters in God's name, which wasn't supposed to be written or spoken, 15 and
+     * 16 were usually written as 9 + 6 and 9 + 7 instead of 10 + 5 and 10 + 6.
+     */
+    public static final String hebrewAlphabetic =
+        // letters for the ones
+        "%%ones:\n"
+        + "    (no zero); \u05d0; \u05d1; \u05d2; \u05d3; \u05d4; \u05d5; \u05d6; \u05d7; \u05d8;\n"
+        // letters for the tens
+        + "%%tens:\n"
+        + "    ; \u05d9; \u05db; \u05dc; \u05de; \u05e0; \u05e1; \u05e2; \u05e4; \u05e6;\n"
+        // letters for the first four hundreds
+        + "%%hundreds:\n"
+        + "    ; \u05e7; \u05e8; \u05e9; \u05ea;\n"
+        // this rule set is used to write the combination of the tens and ones digits
+        // when we know that no other digits precede them: they put the numeral marks
+        // in the right place and properly handle 15 and 16 (I'm using the mathematical
+        // prime characters for the numeral marks because my Unicode font doesn't
+        // include the real Hebrew characters, which look just like the prime marks)
+        + "%%tens-and-ones:\n"
+               // for values less than 10, just use %%ones and put the numeral mark
+               // afterward
+        + "    =%%ones=\u2032;\n"
+               // put the numeral mark at the end for 10, but in the middle for
+               // 11 through 14
+        + "    10: <%%tens<\u2032; <%%tens<\u2033>%%ones>;\n"
+               // special-case 15 and 16
+        + "    15: \u05d8\u2033\u05d5; 16: \u05d8\u2033\u05d6;\n"
+               // go back to the normal method at 17
+        + "    17: <%%tens<\u2033>%%ones>;\n"
+               // repeat the rules for 10 and 11 to cover the values from 20 to 99
+        + "    20: <%%tens<\u2032; <%%tens<\u2033>%%ones>;\n"
+        // this rule set is used to format numbers below 1,000.  It relies on
+        // %%tens-and-ones to format the tens and ones places, and adds logic
+        // to handle the high hundreds and the numeral marks when there is no
+        // tens digit.  Notice how the rules are paired: all of these pairs of
+        // rules take advantage of the rollback rule: if the value (between 100
+        // and 499) is an even multiple of 100, the rule for 100 is used; otherwise,
+        // the rule for 101 (the following rule) is used.  The first rule in each
+        // pair (the one for the even multiple) places the numeral mark in a different
+        // spot than the second rule in each pair (which knows there are more digits
+        // and relies on the rule supplying them to also supply the numeral mark).
+        // The call to %%null in line 10 is there simply to invoke the rollback
+        // rule.
+        + "%%low-order:\n"
+               // this rule is only called when there are other characters before.
+               // It places the numeral mark before the last digit
+        + "    \u2033=%%ones=;\n"
+               // the rule for 10 places the numeral mark before the 10 character
+               // (because we know it's the last character); the rule for 11 relies
+               // on %%tens-and-ones to place the numeral mark
+        + "    10: \u2033<%%tens<; =%%tens-and-ones=>%%null>;\n"
+               // the rule for 100 places the numeral mark before the 100 character
+               // (we know it's the last character); the rule for 101 recurses to
+               // fill in the remaining digits and the numeral mark
+        + "    100: <%%hundreds<\u2032; <%%hundreds<>>;\n"
+               // special-case the hundreds from 500 to 900 because they consist of
+               // more than one character
+        + "    500: \u05ea\u2033\u05e7; \u05ea\u05e7>>;\n"
+        + "    600: \u05ea\u2033\u05e8; \u05ea\u05e8>>;\n"
+        + "    700: \u05ea\u2033\u05e9; \u05ea\u05e9>>;\n"
+        + "    800: \u05ea\u2033\u05ea; \u05ea\u05ea>>;\n"
+        + "    900: \u05ea\u05ea\u2033\u05e7; \u05ea\u05ea\u05e7>>;\n"
+        // this rule set is used to format values of 1,000 or more.  Here, we don't
+        // worry about the numeral mark, and we add two dots (the Unicode combining
+        // diaeresis character) to ever letter
+        + "%%high-order:\n"
+               // put the ones digit, followed by the diaeresis
+        + "    =%%ones=\u0308;\n"
+               // the tens can be handled with recursion
+        + "    10: <%%tens<\u0308[>>];\n"
+               // still have to special-case 15 and 16
+        + "    15: \u05d8\u0308\u05d5\u0308; 16: \u05d8\u003078\u05d6\u0308;\n"
+               // back to the regular rules at 17
+        + "    17: <%%tens<\u0308[>>];\n"
+               // the hundreds with the dots added (and without worrying about
+               // placing the numeral mark)
+        + "    100: <%%hundreds<\u0308[>>];\n"
+        + "    500: \u05ea\u0308\u05e7\u0308[>>];\n"
+        + "    600: \u05ea\u0308\u05e8\u0308[>>];\n"
+        + "    700: \u05ea\u0308\u05e9\u0308[>>];\n"
+        + "    800: \u05ea\u0308\u05ea\u0308[>>];\n"
+        + "    900: \u05ea\u0308\u05ea\u0308\u05e7\u0308[>>];\n"
+        // this rule set doesn't do anything; it's used by some other rules to
+        // invoke the rollback rule
+        + " %%null:\n"
+        + "    ;\n"
+        // the main rule set.
+        + "%main:\n"
+               // for values below 10, just output the letter and the numeral mark
+        + "    =%%ones=\u2032;\n"
+               // for values from 10 to 99, use %%tens-and-ones to do the formatting
+        + "    10: =%%tens-and-ones=;\n"
+               // for values from 100 to 999, use %%low-order to do the formatting
+        + "    100: =%%low-order=;\n"
+               // for values of 1,000 and over, use %%high-order to do the formatting
+        + "    1000: <%%high-order<[>%%low-order>];\n";
+
+    /**
+     * Greek alphabetic numerals.  The Greeks, before adopting the Arabic numerals,
+     * also used the letters of their alphabet as numerals.  There are three now-
+     * obsolete Greek letters that are used as numerals; many fonts don't have them.
+     * Large numbers were handled many different ways; the way shown here divides
+     * large numbers into groups of four letters (factors of 10,000), and separates
+     * the groups with the capital letter mu (for myriad).  Capital letters are used
+     * for values below 10,000; small letters for higher numbers (to make the capital
+     * mu stand out).
+     */
+    public static final String greekAlphabetic =
+        // this rule set is used for formatting numbers below 10,000.  It uses
+        // capital letters.
+        "%%low-order:\n"
+        + "    (no zero); \u0391; \u0392; \u0393; \u0394; \u0395; \u03dc; \u0396; \u0397; \u0398;\n"
+        + "    10: \u0399[>>]; 20: \u039a[>>]; 30: \u039b[>>]; 40: \u039c[>>]; 50: \u039d[>>];\n"
+        + "    60: \u039e[>>]; 70: \u039f[>>]; 80: \u03a0[>>]; 90: \u03de[>>];\n"
+        + "    100: \u03a1[>>]; 200: \u03a3[>>]; 300: \u03a4[>>]; 400: \u03a5[>>];\n"
+        + "    500: \u03a6[>>]; 600: \u03a7[>>]; 700: \u03a8[>>]; 800: \u03a9[>>];\n"
+        + "    900: \u03e0[>>];\n"
+               // the thousands are represented by the same numbers as the ones, but
+               // with a comma-like mark added to their left shoulder
+        + "    1000: \u0391\u0313[>>]; 2000: \u0392\u0313[>>]; 3000: \u0393\u0313[>>];\n"
+        + "    4000: \u0394\u0313[>>]; 5000: \u0395\u0313[>>]; 6000: \u03dc\u0313[>>];\n"
+        + "    7000: \u0396\u0313[>>]; 8000: \u0397\u0313[>>]; 9000: \u0398\u0313[>>];\n"
+        // this rule set is the same as above, but uses lowercase letters.  It is used
+        // for formatting the groups in numbers above 10,000.
+        + "%%high-order:\n"
+        + "    (no zero); \u03b1; \u03b2; \u03b3; \u03b4; \u03b5; \u03dc; \u03b6; \u03b7; \u03b8;\n"
+        + "    10: \u03b9[>>]; 20: \u03ba[>>]; 30: \u03bb[>>]; 40: \u03bc[>>]; 50: \u03bd[>>];\n"
+        + "    60: \u03be[>>]; 70: \u03bf[>>]; 80: \u03c0[>>]; 90: \u03de[>>];\n"
+        + "    100: \u03c1[>>]; 200: \u03c3[>>]; 300: \u03c4[>>]; 400: \u03c5[>>];\n"
+        + "    500: \u03c6[>>]; 600: \u03c7[>>]; 700: \u03c8[>>]; 800: \u03c9[>>];\n"
+        + "    900: \u03c0[>>];\n"
+        + "    1000: \u03b1\u0313[>>]; 2000: \u03b2\u0313[>>]; 3000: \u03b3\u0313[>>];\n"
+        + "    4000: \u03b4\u0313[>>]; 5000: \u03b5\u0313[>>]; 6000: \u03dc\u0313[>>];\n"
+        + "    7000: \u03b6\u0313[>>]; 8000: \u03b7\u0313[>>]; 9000: \u03b8\u0313[>>];\n"
+        // the main rule set
+        + "%main:\n"
+               // for values below 10,000, just use %%low-order
+        + "    =%%low-order=;\n"
+               // for values above 10,000, split into two groups of four digits
+               // and format each with %%high-order (putting an M in betwen)
+        + "    10,000: <%%high-order<\u039c>%%high-order>;\n"
+               // for values above 100,000,000, add another group onto the front
+               // and another M
+        + "    100,000,000: <%%high-order<\u039c>>\n";
+
+    /**
+     * A list of all the sample rule sets, used by the demo program.
+     */
+    public static final String[] sampleRuleSets =
+        { usEnglish,
+          ukEnglish,
+          spanish,
+          french,
+          swissFrench,
+          german,
+          italian,
+          swedish,
+          dutch,
+          japanese,
+          greek,
+          russian,
+          hebrew,
+          ordinal,
+          message1,
+          dollarsAndCents,
+          decimalAsFraction,
+          closestFraction,
+          stock,
+          abbEnglish,
+          units,
+          message2,
+          dozens,
+          durationInSeconds,
+          durationInHours,
+          poundsShillingsAndPence,
+          arabicNumerals,
+          wordsForDigits,
+          chinesePlaceValue,
+          romanNumerals,
+          hebrewAlphabetic,
+          greekAlphabetic };
+
+    /**
+     * The displayable names for all the sample rule sets, in the same order as
+     * the preceding array.
+     */
+    public static final String[] sampleRuleSetNames =
+        { "English (US)",
+          "English (UK)",
+          "Spanish",
+          "French (France)",
+          "French (Switzerland)",
+          "German",
+          "Italian",
+          "Swedish",
+          "Dutch",
+          "Japanese",
+          "Greek",
+          "Russian",
+          "Hebrew",
+          "English ordinal abbreviations",
+          "Simple message formatting",
+          "Dollars and cents",
+          "Decimals as fractions",
+          "Closest fraction",
+          "Stock prices",
+          "Abbreviated US English",
+          "Changing dimensions",
+          "Complex message formatting",
+          "Dozens",
+          "Duration (value in seconds)",
+          "Duration (value in hours)",
+          "Pounds, shillings, and pence",
+          "Arabic numerals",
+          "Words for digits",
+          "Chinese place-value notation",
+          "Roman numerals",
+          "Hebrew ahlphabetic numerals",
+          "Greek alphabetic numerals" };
+
+    /**
+     * The base locale for each of the sample rule sets.  The locale is used to
+     * determine DecimalFormat behavior, lenient-parse behavior, and text-display
+     * selection (we have a hack in here to allow display of non-Latin scripts).
+     * Null means the locale setting is irrelevant and the default can be used.
+     */
+    public static final Locale[] sampleRuleSetLocales =
+        { Locale.US,
+          Locale.UK,
+          new Locale("es", "", ""),
+          Locale.FRANCE,
+          new Locale("fr", "CH", ""),
+          Locale.GERMAN,
+          Locale.ITALIAN,
+          new Locale("sv", "", ""),
+          new Locale("nl", "", ""),
+          Locale.JAPANESE,
+          new Locale("el", "", ""),
+          new Locale("ru", "", ""),
+          new Locale("iw", "", ""),
+          Locale.ENGLISH,
+          Locale.ENGLISH,
+          Locale.US,
+          Locale.ENGLISH,
+          null,
+          null,
+          Locale.ENGLISH,
+          null,
+          Locale.ENGLISH,
+          Locale.ENGLISH,
+          null,
+          null,
+          Locale.UK,
+          null,
+          Locale.ENGLISH,
+          new Locale("zh", "", ""),
+          null,
+          new Locale("iw", "", ""),
+          new Locale("el", "", ""),
+          null };
+
+        public static final String[] sampleRuleSetCommentary = {
+            "This demonstration version of the "
+            + "U.S. English spellout rules has four variants: 1) %simplified is a "
+            + "set of rules showing the simple method of spelling out numbers in "
+            + "English: 289 is formatted as \"two hundred eighty-nine\".  2) %alt-teens "
+            + "is the same as %simplified, except that values between 1,000 and 9,999 "
+            + "whose hundreds place isn't zero are formatted in hundreds.  For example, "
+            + "1,983 is formatted as \"nineteen hundred eighty-three,\" and 2,183 is "
+            + "formatted as \"twenty-one hundred eighty-three,\" but 2,083 is still "
+            + "formatted as \"two thousand eighty-three.\"  3) %ordinal formats the "
+            + "values as ordinal numbers in English (e.g., 289 is \"two hundred eighty-"
+            + "ninth\").  4) %default uses a more complicated algorithm to format "
+            + "numbers in a more natural way: 289 is formatted as \"two hundred AND "
+            + "eighty-nine\" and commas are inserted between the thousands groups for "
+            + "values above 100,000.",
+
+            "U.K. English has one significant "
+            + "difference from U.S. English: the names for values of 1,000,000,000 "
+            + "and higher.  In American English, each successive \"-illion\" is 1,000 "
+            + "times greater than the preceding one: 1,000,000,000 is \"one billion\" "
+            + "and 1,000,000,000,000 is \"one trillion.\"  In British English, each "
+            + "successive \"-illion\" is one million times greater than the one before: "
+            + "\"one billion\" is 1,000,000,000,000 (or what Americans would call a "
+            + "\"trillion\"), and \"one trillion\" is 1,000,000,000,000,000,000.  "
+            + "1,000,000,000 in British English is \"one thousand million.\"  (This "
+            + "value is sometimes called a \"milliard,\" but this word seems to have "
+            + "fallen into disuse.)",
+
+            "The Spanish rules are quite similar to "
+            + "the English rules, but there are some important differences: "
+            + "First, we have to provide separate rules for most of the twenties "
+            + "because the ones digit frequently picks up an accent mark that it "
+            + "doesn't have when standing alone.  Second, each multiple of 100 has "
+            + "to be specified separately because the multiplier on 100 very often "
+            + "changes form in the contraction: 500 is \"quinientos,\" not "
+            + "\"cincocientos.\"  In addition, the word for 100 is \"cien\" when "
+            + "standing alone, but changes to \"ciento\" when followed by more digits.  "
+            + "There also some other differences.",
+
+            "French adds some interesting quirks of its "
+            + "own: 1) The word \"et\" is interposed between the tens and ones digits, "
+            + "but only if the ones digit if 1: 20 is \"vingt,\" and 2 is \"vingt-deux,\" "
+            + "but 21 is \"vingt-et-un.\"  2)  There are no words for 70, 80, or 90.  "
+            + "\"quatre-vingts\" (\"four twenties\") is used for 80, and values proceed "
+            + "by score from 60 to 99 (e.g., 73 is \"soixante-treize\" [\"sixty-thirteen\"]).  "
+            + "Numbers from 1,100 to 1,199 are rendered as hundreds rather than "
+            + "thousands: 1,100 is \"onze cents\" (\"eleven hundred\"), rather than "
+            + "\"mille cent\" (\"one thousand one hundred\")",
+
+            "Swiss French differs from French French "
+            + "in that it does have words for 70, 80, and 90.  This rule set shows them, "
+            + "and is simpler as a result.",
+
+            "German also adds some interesting "
+            + "characteristics.  For values below 1,000,000, numbers are customarily "
+            + "written out as a single word.  And the ones digit PRECEDES the tens "
+            + "digit (e.g., 23 is \"dreiundzwanzig,\" not \"zwanzigunddrei\").",
+
+            "Like German, most Italian numbers are "
+            + "written as single words.  What makes these rules complicated is the rule "
+            + "that says that when a word ending in a vowel and a word beginning with "
+            + "a vowel are combined into a compound, the vowel is dropped from the "
+            + "end of the first word: 180 is \"centottanta,\" not \"centoottanta.\"  "
+            + "The complexity of this rule set is to produce this behavior.",
+
+            "Spellout rules for Swedish.",
+
+            "Spellout rules for Dutch.  Notice that in Dutch, as in German,"
+            + "the ones digit precedes the tens digit.",
+
+            "In Japanese, there really isn't any "
+            + "distinction between a number written out in digits and a number "
+            + "written out in words: the ideographic characters are both digits "
+            + "and words.  This rule set provides two variants:  %traditional "
+            + "uses the traditional CJK numerals (which are also used in China "
+            + "and Korea).  %financial uses alternate ideographs for many numbers "
+            + "that are harder to alter than the traditional numerals (one could "
+            + "fairly easily change a one to "
+            + "a three just by adding two strokes, for example).  This is also done in "
+            + "the other countries using Chinese idographs, but different ideographs "
+            + "are used in those places.",
+
+            "Again in Greek we have to supply the words "
+            + "for the multiples of 100 because they can't be derived algorithmically.  "
+            + "Also, the tens dgit changes form when followed by a ones digit: an "
+            + "accent mark disappears from the tens digit and moves to the ones digit.  "
+            + "Therefore, instead of using the [] notation, we actually have to use "
+            + "two separate rules for each multiple of 10 to show the two forms of "
+            + "the word.",
+
+            "Spellout rules for Russian.",
+
+            "Spellout rules for Hebrew.  Hebrew actually has inflected forms for "
+            + "most of the lower-order numbers.  The masculine forms are shown "
+            + "here.",
+
+            "This rule set adds an English ordinal abbreviation to the end of a "
+            + "number.  For example, 2 is formatted as \"2nd\".  Parsing doesn't work with "
+            + "this rule set.  To parse, use DecimalFormat on the numeral.",
+
+            "This is a simple message-formatting example.  Normally one would "
+            + "use ChoiceFormat and MessageFormat to do something this simple, "
+            + "but this shows it could be done with RuleBasedNumberFormat too.  "
+            + "A message-formatting example that might work better with "
+            + "RuleBasedNumberFormat appears later.",
+
+            "The next few examples demonstrate fraction handling.  "
+            + "This example formats a number in one of the two styles often used "
+            + "on checks.  %dollars-and-hundredths formats cents as hundredths of "
+            + "a dollar (23.40 comes out as \"twenty-three and 40/100 dollars\").  "
+            + "%dollars-and-cents formats in dollars and cents (23.40 comes out as "
+            + "\"twenty-three dollars and forty cents\")",
+
+            "This rule set shows the fractional part of the number as a fraction "
+            + "with a power of 10 as the denominator.  Some languages don't spell "
+            + "out the fractional part of a number as \"point one two three,\" but "
+            + "always render it as a fraction.  If we still want to treat the fractional "
+            + "part of the number as a decimal, then the fraction's denominator "
+            + "is always a power of 10.  This example does that: 23.125 is formatted "
+            + "as \"twenty-three and one hundred twenty-five thousandths\" (as opposed "
+            + "to \"twenty-three point one two five\" or \"twenty-three and one eighth\").",
+
+            "Number with closest fraction.  This example formats a value using "
+            + "numerals, but shows the fractional part as a ratio (fraction) rather "
+            + "than a decimal.  The fraction always has a denominator between 2 and 10.",
+
+            "American stock-price formatting.  Non-integral stock prices are still "
+            + "generally shown in eighths or sixteenths of dollars instead of dollars "
+            + "and cents.  This example formats stock prices in this way if possible, "
+            + "and in dollars and cents if not.",
+
+            "The next few examples demonstrate using a RuleBasedNumberFormat to "
+            + "change the units a value is denominated in depending on its magnitude.  "
+            + "The example shows large numbers the way they often appear is nwespapers: "
+            + "1,200,000 is formatted as \"1.2 million\".",
+
+            "This example takes a number of meters and formats it in whatever unit "
+            + "will produce a number with from one to three digits before the decimal "
+            + "point.  For example, 230,000 is formatted as \"230 km\".",
+
+            "A more complicated message-formatting example.  Here, in addition to "
+            + "handling the singular and plural versions of the word, the value is "
+            + "denominated in bytes, kilobytes, or megabytes depending on its magnitude.  "
+            + "Also notice that it correctly treats a kilobyte as 1,024 bytes (not 1,000), "
+            + "and a megabyte as 1,024 kilobytes (not 1,000).",
+
+            "This example formats a number in dozens and gross.  This is intended to "
+            + "demonstrate how this rule set can be used to format numbers in systems "
+            + "other than base 10.  The \"/12\" after the rules' base values controls this.  "
+            + "Also notice that the base doesn't have to be consistent throughout the "
+            + "whole rule set: we go back to base 10 for values over 1,000.",
+
+            "The next few examples show how a single value can be divided up into major "
+            + "and minor units that don't relate to each other by a factor of 10.  "
+            + "This example formats a number of seconds in sexagesimal notation "
+            + "(i.e., hours, minutes, and seconds).  %with-words formats it with "
+            + "words (3740 is \"1 hour, 2 minutes, 20 seconds\") and %in-numerals "
+            + "formats it entirely in numerals (3740 is \"1:02:20\").",
+
+            "This example formats a number of hours in sexagesimal notation (i.e., "
+            + "hours, minutes, and seconds).  %with-words formats the value using "
+            + "words for the units, and %in-numerals formats the value using only "
+            + "numerals.",
+
+            "This rule set formats a number of pounds as pounds, shillings, and "
+            + "pence in the old English system of currency.",
+
+            "These examples show how RuleBasedNumberFormat can be used to format "
+            + "numbers using non-positional numeration systems.  "
+            + "This example formats numbers in Arabic numerals.  "
+            + "Normally, you'd do this with DecimalFormat, but this shows that "
+            + "RuleBasedNumberFormat can handle it too.",
+
+            "This example follows the same pattern as the Arabic-numerals "
+            + "example, but uses words for the various digits (e.g., 123 comes "
+            + "out as \"one two three\").",
+
+            "This example formats numbers using Chinese characters in the Arabic "
+            + "place-value method.  This was used historically in China for a while.",
+
+            "Roman numerals.  This example has two variants: %modern shows how large "
+            + "numbers are usually handled today; %historical ses the older symbols for "
+            + "thousands.  Not all of the characters are displayable with most fonts.",
+
+            "Hebrew alphabetic numerals.  Before adoption of Arabic numerals, Hebrew speakers "
+            + "used the letter of their alphabet as numerals.  The first nine letters of "
+            + "the alphabet repesented the values from 1 to 9, the second nine letters the "
+            + "multiples of 10, and the remaining letters the multiples of 100.  Since they "
+            + "ran out of letters at 400, the remaining multiples of 100 were represented "
+            + "using combinations of the existing letters for the hundreds.  Numbers were "
+            + "distinguished from words in a number of different ways: the way shown here "
+            + "uses a single mark after a number consisting of one letter, and a double "
+            + "mark between the last two letters of a number consisting of two or more "
+            + "letters.  Two dots over a letter multiplied its value by 1,000.  Also, since "
+            + "the letter for 10 is the first letter of God's name and the letters for 5 and 6 "
+            + "are letters in God's name, which wasn't supposed to be written or spoken, 15 and "
+            + "16 were usually written as 9 + 6 and 9 + 7 instead of 10 + 5 and 10 + 6.",
+
+            "Greek alphabetic numerals.  The Greeks, before adopting the Arabic numerals, "
+            + "also used the letters of their alphabet as numerals.  There are three now-"
+            + "obsolete Greek letters that are used as numerals; many fonts don't have them.  "
+            + "Large numbers were handled many different ways; the way shown here divides "
+            + "large numbers into groups of four letters (factors of 10,000), and separates "
+            + "the groups with the capital letter mu (for myriad).  Capital letters are used "
+            + "for values below 10,000; small letters for higher numbers (to make the capital "
+            + "mu stand out).",
+
+            "This is a custom (user-defined) rule set."
+        };
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/rbnf/package.html b/demos/src/com/ibm/icu/dev/demo/rbnf/package.html
new file mode 100644
index 00000000000..8a0507f1ff3
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/rbnf/package.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+RuleBasedNumberFormat demo appliation.
+
+
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/timescale/PivotDemo.java b/demos/src/com/ibm/icu/dev/demo/timescale/PivotDemo.java
new file mode 100644
index 00000000000..72d83048a99
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/timescale/PivotDemo.java
@@ -0,0 +1,78 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2008, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ *
+ */
+
+package com.ibm.icu.dev.demo.timescale;
+
+import java.util.Locale;
+
+import com.ibm.icu.text.MessageFormat;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.SimpleTimeZone;
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.UniversalTimeScale;
+
+/**
+ * This class demonstrates how to use UniversalTimeScale to
+ * convert from one local time scale to another.
+ * 
+ * @see UniversalTimeScale
+ */
+public class PivotDemo {
+
+    /**
+     * The default constructor.
+     */
+    public PivotDemo()
+    {
+    }
+
+    /**
+     * The main() method uses UniversalTimeScale to
+     * convert from the Java and Unix time scales to the ICU time scale. It uses
+     * a Calendar object to display the ICU time values.
+     * 
+     * @param args the command line arguments.
+     */
+    public static void main(String[] args)
+    {
+        TimeZone utc = new SimpleTimeZone(0, "UTC");
+        Calendar cal = Calendar.getInstance(utc, Locale.ENGLISH);
+        MessageFormat fmt = new MessageFormat("{1} = {0, date, full} {0, time, full}");
+        Object arguments[] = {cal, null};
+        
+        arguments[0] = cal;
+        
+        System.out.println("\nJava test:");
+        cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0, UniversalTimeScale.JAVA_TIME), UniversalTimeScale.ICU4C_TIME));
+        arguments[1] = " 000000000000000";
+        System.out.println(fmt.format(arguments));
+        
+        cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(-62164684800000L, UniversalTimeScale.JAVA_TIME), UniversalTimeScale.ICU4C_TIME));
+        arguments[1] = "-62164684800000L";
+        System.out.println(fmt.format(arguments));
+        
+        cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(-62135769600000L, UniversalTimeScale.JAVA_TIME), UniversalTimeScale.ICU4C_TIME));
+        arguments[1] = "-62135769600000L";
+        System.out.println(fmt.format(arguments));
+        
+        System.out.println("\nUnix test:");
+        
+        cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0x80000000, UniversalTimeScale.UNIX_TIME), UniversalTimeScale.ICU4C_TIME));
+        arguments[1] = "0x80000000";
+        System.out.println(fmt.format(arguments));
+        
+        cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0, UniversalTimeScale.UNIX_TIME), UniversalTimeScale.ICU4C_TIME));
+        arguments[1] = "0x00000000";
+        System.out.println(fmt.format(arguments));
+        
+        cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0x7FFFFFFF, UniversalTimeScale.UNIX_TIME), UniversalTimeScale.ICU4C_TIME));
+        arguments[1] = "0x7FFFFFFF";
+        System.out.println(fmt.format(arguments));
+        
+    }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java b/demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java
new file mode 100644
index 00000000000..3f458d8199a
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java
@@ -0,0 +1,308 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Replaceable;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeFilter;
+
+public class AnyTransliterator extends Transliterator {
+    
+    static final boolean DEBUG = false;
+    private String targetName;
+    private RunIterator it;
+    private Position run;
+    
+    
+    public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
+        super("Any-" + targetName, filter);
+        this.targetName = targetName;
+        this.it = it;
+        run = new Position();
+    }
+    
+    public AnyTransliterator(String targetName, UnicodeFilter filter){
+        this(targetName, filter, new ScriptRunIterator());
+    }
+    
+    static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
+    
+    protected void handleTransliterate(Replaceable text,
+                                       Position offsets, boolean isIncremental) {
+        if (DEBUG) {
+            System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
+                + ", " + toString(offsets));
+        }
+        it.reset(text, offsets);
+        
+        while (it.next(run)) {
+            if (targetName.equalsIgnoreCase(it.getName())) {
+                if (DEBUG) System.out.println("Skipping identical: " + targetName);
+                run.start = run.limit; // show we processed
+                continue; // skip if same
+            }
+            
+            Transliterator t;
+            String id = it.getName() + '-' + targetName;
+            try {
+                t = Transliterator.getInstance(id);
+            } catch (IllegalArgumentException ex) {
+                if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
+                id = it.getName() + "-Latin; Latin-" + targetName;
+                try {
+                    t = Transliterator.getInstance(id);
+                } catch (IllegalArgumentException ex2) {
+                    if (DEBUG) System.out.println("Couldn't find: " + id);
+                    continue;
+                }
+            }
+            // TODO catch error later!!
+                
+            if (DEBUG) {
+                System.out.println(t.getID());
+                System.out.println("input: " + hex.transliterate(text.toString())
+                 + ", " + toString(run));
+            }
+            
+            if (isIncremental && it.atEnd()) {
+                t.transliterate(text, run);
+            } else {
+                t.finishTransliteration(text, run);
+            }
+            // adjust the offsets in line with the changes
+            it.adjust(run.limit);
+            
+            if (DEBUG) {
+                System.out.println("output: " + hex.transliterate(text.toString())
+                 + ", " + toString(run));
+            }
+        }
+
+        // show how far we got!
+        it.getExpanse(offsets);
+        if (run.start == run.limit) offsets.start = offsets.limit;
+        else offsets.start = run.start;
+        if (DEBUG) {
+            System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
+            System.out.println();
+        }
+    }
+    
+    // should be method on Position
+    public static String toString(Position offsets) {
+        return "[cs: " + offsets.contextStart
+                + ", s: " + offsets.start
+                + ", l: " + offsets.limit
+                + ", cl: " + offsets.contextLimit
+                + "]";
+    }
+    
+    public interface RunIterator {
+        public void reset(Replaceable text, Position expanse);
+        public void getExpanse(Position run);
+        public void reset();
+        public boolean next(Position run);
+        public void getCurrent(Position run);
+        public String getName();
+        public void adjust(int newCurrentLimit);
+        public boolean atEnd();
+    }
+    
+    /**
+     * Returns a series of ranges corresponding to scripts. They will be of the form:
+     * ccccSScSSccccTTcTcccc    - where c is common, S is the first script and T is the second
+     *|            |            - first run
+     *         |            |    - second run
+     * That is, the runs will overlap. The reason for this is so that a transliterator can
+     * consider common characters both before and after the scripts.
+     * The only time that contextStart != start is for the first run 
+     *    (the context is the start context of the entire expanse)
+     * The only time that contextLimit != limit is for the last run 
+     *    (the context is the end context of the entire expanse)
+     */
+    public static class ScriptRunIterator implements RunIterator {
+        private Replaceable text;
+        private Position expanse = new Position();
+        private Position current = new Position();
+        private int script;
+        private boolean done = true;
+        
+
+        public void reset(Replaceable repText, Position expansePos) {
+            set(this.expanse, expansePos);
+            this.text = repText;
+            reset();
+        }
+            
+        public void reset() {
+            done = false;
+            //this.expanse = expanse;
+            script = UScript.INVALID_CODE;
+            // set up first range to be empty, at beginning
+            current.contextStart = expanse.contextStart;
+            current.start = current.limit = current.contextLimit = expanse.start;            
+        }
+            
+        public boolean next(Position run) {
+            if (done) return false;
+            if (DEBUG) {
+                System.out.println("+cs: " + current.contextStart
+                    + ", s: " + current.start
+                    + ", l: " + current.limit
+                    + ", cl: " + current.contextLimit);
+            }
+            // reset start context run to the last end
+            current.start = current.limit;
+            
+            // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
+            int i, cp;
+            int limit = expanse.start;
+            for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
+                cp = text.char32At(i);
+                int scrpt = UScript.getScript(cp);
+                if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
+            }
+            current.start = i;
+            current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
+            
+            // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
+            int lastScript = UScript.COMMON;
+            //int veryLastScript = UScript.COMMON;
+            limit = expanse.limit; 
+            for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
+                cp = text.char32At(i);
+                int scrpt = UScript.getScript(cp);
+                if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
+                if (scrpt != UScript.COMMON) {
+                    // if we find a real script:
+                    //   if we already had a script, bail
+                    //   otherwise set our script
+                    if (lastScript == UScript.COMMON) lastScript = scrpt;
+                    else if (lastScript != scrpt) break;
+                }
+            }
+            current.limit = i;
+            current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
+            done = (i == limit);
+            script = lastScript;
+            
+            if (DEBUG) {
+                System.out.println("-cs: " + current.contextStart
+                    + ", s: " + current.start
+                    + ", l: " + current.limit
+                    + ", cl: " + current.contextLimit);
+            }
+            
+            set(run, current);
+            return true;
+        }
+        
+        // SHOULD BE METHOD ON POSITION
+        public static void set(Position run, Position current) {
+            run.contextStart = current.contextStart;
+            run.start = current.start;
+            run.limit = current.limit;
+            run.contextLimit = current.contextLimit;
+        }
+        
+        public boolean atEnd() {
+            return current.limit == expanse.limit;
+        }
+        
+        public void getCurrent(Position run) {
+            set(run, current);
+        }
+        
+        public void getExpanse(Position run) {
+            set(run, expanse);
+        }
+        
+        public String getName() {
+            return UScript.getName(script);
+        }
+        
+        public void adjust(int newCurrentLimit) {
+            if (expanse == null) {
+                throw new IllegalArgumentException("Must reset() before calling");
+            }
+            int delta = newCurrentLimit - current.limit;
+            current.limit += delta;
+            current.contextLimit += delta;
+            expanse.limit += delta;
+            expanse.contextLimit += delta;
+        }
+        
+        // register Any-Script for every script.
+        
+        private static Set scriptList = new HashSet();
+        
+        public static void registerAnyToScript() {
+            synchronized (scriptList) {
+                Enumeration sources = Transliterator.getAvailableSources();
+                while(sources.hasMoreElements()) {
+                    String source = (String) sources.nextElement();
+                    if (source.equals("Any")) continue; // to keep from looping
+                    
+                    Enumeration targets = Transliterator.getAvailableTargets(source);
+                    while(targets.hasMoreElements()) {
+                        String target = (String) targets.nextElement();
+                        if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
+                        if (scriptList.contains(target)) continue; // already encountered
+                        scriptList.add(target); // otherwise add for later testing
+                        
+                        Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
+                        if (variantSet.size() < 2) {
+                            AnyTransliterator at = new AnyTransliterator(target, null);
+                            DummyFactory.add(at.getID(), at);
+                        } else {
+                            Iterator variants = variantSet.iterator();
+                            while(variants.hasNext()) {
+                                String variant = (String) variants.next();
+                                AnyTransliterator at = new AnyTransliterator(
+                                    (variant.length() > 0) ? target + "/" + variant : target, null);
+                                DummyFactory.add(at.getID(), at);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        
+        static class DummyFactory implements Transliterator.Factory {
+            static DummyFactory singleton = new DummyFactory();
+            static HashMap m = new HashMap();
+
+            // Since Transliterators are immutable, we don't have to clone on set & get
+            static void add(String ID, Transliterator t) {
+                m.put(ID, t);
+                System.out.println("Registering: " + ID + ", " + t.toRules(true));
+                Transliterator.registerFactory(ID, singleton);
+            }
+            public Transliterator getInstance(String ID) {
+                return (Transliterator) m.get(ID);
+            }
+        }
+        
+        // Nice little Utility for converting Enumeration to collection
+        static Set add(Set s, Enumeration enumeration) {
+            while(enumeration.hasMoreElements()) {
+                s.add(enumeration.nextElement());
+            }
+            return s;
+        }
+        
+        
+    }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/CaseIterator.java b/demos/src/com/ibm/icu/dev/demo/translit/CaseIterator.java
new file mode 100644
index 00000000000..b2b477ab42e
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/CaseIterator.java
@@ -0,0 +1,560 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2010, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+
+package com.ibm.icu.dev.demo.translit;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Incrementally returns the set of all strings that case-fold to the same value.
+ */
+public class CaseIterator {
+    
+    // testing stuff
+    static Transliterator toName = Transliterator.getInstance("[:^ascii:] Any-Name");
+    static Transliterator toHex = Transliterator.getInstance("[:^ascii:] Any-Hex");
+    static Transliterator toHex2 = Transliterator.getInstance("[[^\u0021-\u007F]-[,]] Any-Hex");
+    
+    // global tables (could be precompiled)
+    private static Map fromCaseFold = new HashMap();
+    private static Map toCaseFold = new HashMap();
+    private static int maxLength = 0;
+    
+    // This exception list is generated on the console by turning on the GENERATED flag, 
+    // which MUST be false for normal operation.
+    // Once the list is generated, it is pasted in here.
+    // A bit of a cludge, but this bootstrapping is the easiest way 
+    // to get around certain complications in the data.
+    
+    private static final boolean GENERATE = false;
+
+    private static final boolean DUMP = false;
+    
+    private static String[][] exceptionList = {
+        // a\N{MODIFIER LETTER RIGHT HALF RING}
+        {"a\u02BE","A\u02BE","a\u02BE",},
+        // ff
+        {"ff","FF","Ff","fF","ff",},
+        // ffi
+        {"ffi","FFI","FFi","FfI","Ffi","F\uFB01","fFI","fFi","ffI","ffi","f\uFB01","\uFB00I","\uFB00i",},
+        // ffl
+        {"ffl","FFL","FFl","FfL","Ffl","F\uFB02","fFL","fFl","ffL","ffl","f\uFB02","\uFB00L","\uFB00l",},
+        // fi
+        {"fi","FI","Fi","fI","fi",},
+        // fl
+        {"fl","FL","Fl","fL","fl",},
+        // h\N{COMBINING MACRON BELOW}
+        {"h\u0331","H\u0331","h\u0331",},
+        // i\N{COMBINING DOT ABOVE}
+        {"i\u0307","I\u0307","i\u0307",},
+        // j\N{COMBINING CARON}
+        {"j\u030C","J\u030C","j\u030C",},
+        // ss
+        {"ss","SS","Ss","S\u017F","sS","ss","s\u017F","\u017FS","\u017Fs","\u017F\u017F",},
+        // st
+        {"st","ST","St","sT","st","\u017FT","\u017Ft",},
+        // t\N{COMBINING DIAERESIS}
+        {"t\u0308","T\u0308","t\u0308",},
+        // w\N{COMBINING RING ABOVE}
+        {"w\u030A","W\u030A","w\u030A",},
+        // y\N{COMBINING RING ABOVE}
+        {"y\u030A","Y\u030A","y\u030A",},
+        // \N{MODIFIER LETTER APOSTROPHE}n
+        {"\u02BCn","\u02BCN","\u02BCn",},
+        // \N{GREEK SMALL LETTER ALPHA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
+        {"\u03AC\u03B9","\u0386\u0345","\u0386\u0399","\u0386\u03B9","\u0386\u1FBE","\u03AC\u0345","\u03AC\u0399","\u03AC\u03B9","\u03AC\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
+        {"\u03AE\u03B9","\u0389\u0345","\u0389\u0399","\u0389\u03B9","\u0389\u1FBE","\u03AE\u0345","\u03AE\u0399","\u03AE\u03B9","\u03AE\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03B1\u0342","\u0391\u0342","\u03B1\u0342",},
+        // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u03B1\u0342\u03B9","\u0391\u0342\u0345","\u0391\u0342\u0399","\u0391\u0342\u03B9","\u0391\u0342\u1FBE",
+            "\u03B1\u0342\u0345","\u03B1\u0342\u0399","\u03B1\u0342\u03B9","\u03B1\u0342\u1FBE","\u1FB6\u0345",
+            "\u1FB6\u0399","\u1FB6\u03B9","\u1FB6\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER IOTA}
+        {"\u03B1\u03B9","\u0391\u0345","\u0391\u0399","\u0391\u03B9","\u0391\u1FBE","\u03B1\u0345","\u03B1\u0399","\u03B1\u03B9","\u03B1\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03B7\u0342","\u0397\u0342","\u03B7\u0342",},
+        // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u03B7\u0342\u03B9","\u0397\u0342\u0345","\u0397\u0342\u0399","\u0397\u0342\u03B9","\u0397\u0342\u1FBE",
+            "\u03B7\u0342\u0345","\u03B7\u0342\u0399","\u03B7\u0342\u03B9","\u03B7\u0342\u1FBE","\u1FC6\u0345","\u1FC6\u0399",
+            "\u1FC6\u03B9","\u1FC6\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA}\N{GREEK SMALL LETTER IOTA}
+        {"\u03B7\u03B9","\u0397\u0345","\u0397\u0399","\u0397\u03B9","\u0397\u1FBE","\u03B7\u0345","\u03B7\u0399","\u03B7\u03B9","\u03B7\u1FBE",},
+        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
+        {"\u03B9\u0308\u0300","\u0345\u0308\u0300","\u0399\u0308\u0300","\u03B9\u0308\u0300","\u1FBE\u0308\u0300",},
+        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
+        {"\u03B9\u0308\u0301","\u0345\u0308\u0301","\u0399\u0308\u0301","\u03B9\u0308\u0301","\u1FBE\u0308\u0301",},
+        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03B9\u0308\u0342","\u0345\u0308\u0342","\u0399\u0308\u0342","\u03B9\u0308\u0342","\u1FBE\u0308\u0342",},
+        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03B9\u0342","\u0345\u0342","\u0399\u0342","\u03B9\u0342","\u1FBE\u0342",},
+        // \N{GREEK SMALL LETTER RHO}\N{COMBINING COMMA ABOVE}
+        {"\u03C1\u0313","\u03A1\u0313","\u03C1\u0313","\u03F1\u0313",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
+        {"\u03C5\u0308\u0300","\u03A5\u0308\u0300","\u03C5\u0308\u0300",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
+        {"\u03C5\u0308\u0301","\u03A5\u0308\u0301","\u03C5\u0308\u0301",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03C5\u0308\u0342","\u03A5\u0308\u0342","\u03C5\u0308\u0342",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}
+        {"\u03C5\u0313","\u03A5\u0313","\u03C5\u0313",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GRAVE ACCENT}
+        {"\u03C5\u0313\u0300","\u03A5\u0313\u0300","\u03C5\u0313\u0300","\u1F50\u0300",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING ACUTE ACCENT}
+        {"\u03C5\u0313\u0301","\u03A5\u0313\u0301","\u03C5\u0313\u0301","\u1F50\u0301",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03C5\u0313\u0342","\u03A5\u0313\u0342","\u03C5\u0313\u0342","\u1F50\u0342",},
+        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03C5\u0342","\u03A5\u0342","\u03C5\u0342",},
+        // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}
+        {"\u03C9\u0342","\u03A9\u0342","\u03C9\u0342","\u2126\u0342",},
+        // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u03C9\u0342\u03B9","\u03A9\u0342\u0345","\u03A9\u0342\u0399","\u03A9\u0342\u03B9","\u03A9\u0342\u1FBE","\u03C9\u0342\u0345","\u03C9\u0342\u0399","\u03C9\u0342\u03B9","\u03C9\u0342\u1FBE","\u1FF6\u0345",
+            "\u1FF6\u0399","\u1FF6\u03B9","\u1FF6\u1FBE","\u2126\u0342\u0345","\u2126\u0342\u0399","\u2126\u0342\u03B9","\u2126\u0342\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA}\N{GREEK SMALL LETTER IOTA}
+        {"\u03C9\u03B9","\u03A9\u0345","\u03A9\u0399","\u03A9\u03B9","\u03A9\u1FBE","\u03C9\u0345","\u03C9\u0399","\u03C9\u03B9","\u03C9\u1FBE","\u2126\u0345","\u2126\u0399","\u2126\u03B9","\u2126\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
+        {"\u03CE\u03B9","\u038F\u0345","\u038F\u0399","\u038F\u03B9","\u038F\u1FBE","\u03CE\u0345","\u03CE\u0399","\u03CE\u03B9","\u03CE\u1FBE",},
+        // \N{ARMENIAN SMALL LETTER ECH}\N{ARMENIAN SMALL LETTER YIWN}
+        {"\u0565\u0582","\u0535\u0552","\u0535\u0582","\u0565\u0552","\u0565\u0582",},
+        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER ECH}
+        {"\u0574\u0565","\u0544\u0535","\u0544\u0565","\u0574\u0535","\u0574\u0565",},
+        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER INI}
+        {"\u0574\u056B","\u0544\u053B","\u0544\u056B","\u0574\u053B","\u0574\u056B",},
+        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER XEH}
+        {"\u0574\u056D","\u0544\u053D","\u0544\u056D","\u0574\u053D","\u0574\u056D",},
+        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER NOW}
+        {"\u0574\u0576","\u0544\u0546","\u0544\u0576","\u0574\u0546","\u0574\u0576",},
+        // \N{ARMENIAN SMALL LETTER VEW}\N{ARMENIAN SMALL LETTER NOW}
+        {"\u057E\u0576","\u054E\u0546","\u054E\u0576","\u057E\u0546","\u057E\u0576",},
+        // \N{GREEK SMALL LETTER ALPHA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F00\u03B9","\u1F00\u0345","\u1F00\u0399","\u1F00\u03B9","\u1F00\u1FBE","\u1F08\u0345","\u1F08\u0399","\u1F08\u03B9","\u1F08\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F01\u03B9","\u1F01\u0345","\u1F01\u0399","\u1F01\u03B9","\u1F01\u1FBE","\u1F09\u0345","\u1F09\u0399","\u1F09\u03B9","\u1F09\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F02\u03B9","\u1F02\u0345","\u1F02\u0399","\u1F02\u03B9","\u1F02\u1FBE","\u1F0A\u0345","\u1F0A\u0399","\u1F0A\u03B9","\u1F0A\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F03\u03B9","\u1F03\u0345","\u1F03\u0399","\u1F03\u03B9","\u1F03\u1FBE","\u1F0B\u0345","\u1F0B\u0399","\u1F0B\u03B9","\u1F0B\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F04\u03B9","\u1F04\u0345","\u1F04\u0399","\u1F04\u03B9","\u1F04\u1FBE","\u1F0C\u0345","\u1F0C\u0399","\u1F0C\u03B9","\u1F0C\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F05\u03B9","\u1F05\u0345","\u1F05\u0399","\u1F05\u03B9","\u1F05\u1FBE","\u1F0D\u0345","\u1F0D\u0399","\u1F0D\u03B9","\u1F0D\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F06\u03B9","\u1F06\u0345","\u1F06\u0399","\u1F06\u03B9","\u1F06\u1FBE","\u1F0E\u0345","\u1F0E\u0399","\u1F0E\u03B9","\u1F0E\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F07\u03B9","\u1F07\u0345","\u1F07\u0399","\u1F07\u03B9","\u1F07\u1FBE","\u1F0F\u0345","\u1F0F\u0399","\u1F0F\u03B9","\u1F0F\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F20\u03B9","\u1F20\u0345","\u1F20\u0399","\u1F20\u03B9","\u1F20\u1FBE","\u1F28\u0345","\u1F28\u0399","\u1F28\u03B9","\u1F28\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F21\u03B9","\u1F21\u0345","\u1F21\u0399","\u1F21\u03B9","\u1F21\u1FBE","\u1F29\u0345","\u1F29\u0399","\u1F29\u03B9","\u1F29\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F22\u03B9","\u1F22\u0345","\u1F22\u0399","\u1F22\u03B9","\u1F22\u1FBE","\u1F2A\u0345","\u1F2A\u0399","\u1F2A\u03B9","\u1F2A\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F23\u03B9","\u1F23\u0345","\u1F23\u0399","\u1F23\u03B9","\u1F23\u1FBE","\u1F2B\u0345","\u1F2B\u0399","\u1F2B\u03B9","\u1F2B\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F24\u03B9","\u1F24\u0345","\u1F24\u0399","\u1F24\u03B9","\u1F24\u1FBE","\u1F2C\u0345","\u1F2C\u0399","\u1F2C\u03B9","\u1F2C\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F25\u03B9","\u1F25\u0345","\u1F25\u0399","\u1F25\u03B9","\u1F25\u1FBE","\u1F2D\u0345","\u1F2D\u0399","\u1F2D\u03B9","\u1F2D\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F26\u03B9","\u1F26\u0345","\u1F26\u0399","\u1F26\u03B9","\u1F26\u1FBE","\u1F2E\u0345","\u1F2E\u0399","\u1F2E\u03B9","\u1F2E\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F27\u03B9","\u1F27\u0345","\u1F27\u0399","\u1F27\u03B9","\u1F27\u1FBE","\u1F2F\u0345","\u1F2F\u0399","\u1F2F\u03B9","\u1F2F\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F60\u03B9","\u1F60\u0345","\u1F60\u0399","\u1F60\u03B9","\u1F60\u1FBE","\u1F68\u0345","\u1F68\u0399","\u1F68\u03B9","\u1F68\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F61\u03B9","\u1F61\u0345","\u1F61\u0399","\u1F61\u03B9","\u1F61\u1FBE","\u1F69\u0345","\u1F69\u0399","\u1F69\u03B9","\u1F69\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F62\u03B9","\u1F62\u0345","\u1F62\u0399","\u1F62\u03B9","\u1F62\u1FBE","\u1F6A\u0345","\u1F6A\u0399","\u1F6A\u03B9","\u1F6A\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F63\u03B9","\u1F63\u0345","\u1F63\u0399","\u1F63\u03B9","\u1F63\u1FBE","\u1F6B\u0345","\u1F6B\u0399","\u1F6B\u03B9","\u1F6B\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F64\u03B9","\u1F64\u0345","\u1F64\u0399","\u1F64\u03B9","\u1F64\u1FBE","\u1F6C\u0345","\u1F6C\u0399","\u1F6C\u03B9","\u1F6C\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F65\u03B9","\u1F65\u0345","\u1F65\u0399","\u1F65\u03B9","\u1F65\u1FBE","\u1F6D\u0345","\u1F6D\u0399","\u1F6D\u03B9","\u1F6D\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F66\u03B9","\u1F66\u0345","\u1F66\u0399","\u1F66\u03B9","\u1F66\u1FBE","\u1F6E\u0345","\u1F6E\u0399","\u1F6E\u03B9","\u1F6E\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F67\u03B9","\u1F67\u0345","\u1F67\u0399","\u1F67\u03B9","\u1F67\u1FBE","\u1F6F\u0345","\u1F6F\u0399","\u1F6F\u03B9","\u1F6F\u1FBE",},
+        // \N{GREEK SMALL LETTER ALPHA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F70\u03B9","\u1F70\u0345","\u1F70\u0399","\u1F70\u03B9","\u1F70\u1FBE","\u1FBA\u0345","\u1FBA\u0399","\u1FBA\u03B9","\u1FBA\u1FBE",},
+        // \N{GREEK SMALL LETTER ETA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F74\u03B9","\u1F74\u0345","\u1F74\u0399","\u1F74\u03B9","\u1F74\u1FBE","\u1FCA\u0345","\u1FCA\u0399","\u1FCA\u03B9","\u1FCA\u1FBE",},
+        // \N{GREEK SMALL LETTER OMEGA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
+        {"\u1F7C\u03B9","\u1F7C\u0345","\u1F7C\u0399","\u1F7C\u03B9","\u1F7C\u1FBE","\u1FFA\u0345","\u1FFA\u0399","\u1FFA\u03B9","\u1FFA\u1FBE",},
+    };
+    
+    // this initializes the data used to generated the case-equivalents
+
+    static {
+        
+        // Gather up the exceptions in a form we can use
+        
+        if (!GENERATE) {
+            for (int i = 0; i < exceptionList.length; ++i) {
+                String[] exception = exceptionList[i];
+                Set s = new HashSet();
+                // there has to be some method to do the following, but I can't find it in the collections
+                for (int j = 0; j < exception.length; ++j) {
+                    s.add(exception[j]);
+                }
+                fromCaseFold.put(exception[0], s);
+            }
+        }
+        
+        // walk through all the characters, and at every case fold result,
+        // put a set of all the characters that map to that result
+
+        boolean defaultmapping = true; // false for turkish
+        for (int i = 0; i <= 0x10FFFF; ++i) {
+            int cat = UCharacter.getType(i);
+            if (cat == Character.UNASSIGNED || cat == Character.PRIVATE_USE) continue;
+            
+            String cp = UTF16.valueOf(i);
+            String mapped = UCharacter.foldCase(cp, defaultmapping);
+            if (mapped.equals(cp)) continue;
+            
+            if (maxLength < mapped.length()) maxLength = mapped.length();
+            
+            // at this point, have different case folding
+            
+            Set s = (Set) fromCaseFold.get(mapped);
+            if (s == null) {
+                s = new HashSet();
+                s.add(mapped); // add the case fold result itself
+                fromCaseFold.put(mapped, s);
+            }
+            s.add(cp);
+            toCaseFold.put(cp, mapped);
+            toCaseFold.put(mapped, mapped); // add mapping to self
+        }
+        
+        // Emit the final data
+
+        if (DUMP) {
+            System.out.println("maxLength = " + maxLength);
+
+            System.out.println("\nfromCaseFold:");
+            Iterator it = fromCaseFold.keySet().iterator();
+            while (it.hasNext()) {
+                Object key = it.next();
+                System.out.print(" " + toHex2.transliterate((String)key) + ": ");
+                Set s = (Set) fromCaseFold.get(key);
+                Iterator it2 = s.iterator();
+                boolean first = true;
+                while (it2.hasNext()) {
+                    if (first) {
+                        first = false;
+                    } else {
+                        System.out.print(", ");
+                    }
+                    System.out.print(toHex2.transliterate((String)it2.next()));
+                }
+                System.out.println("");
+            }
+
+            System.out.println("\ntoCaseFold:");
+            it = toCaseFold.keySet().iterator();
+            while (it.hasNext()) {
+                String key = (String) it.next();
+                String value = (String) toCaseFold.get(key);
+                System.out.println(" " + toHex2.transliterate(key) + ": " + toHex2.transliterate(value));
+            }            
+        }
+        
+        // Now convert all those sets into linear arrays
+        // We can't do this in place in Java, so make a temporary target array
+        
+        // Note: This could be transformed into a single array, with offsets into it.
+        // Might be best choice in C.
+        
+        
+        Map fromCaseFold2 = new HashMap();
+        Iterator it = fromCaseFold.keySet().iterator();
+        while (it.hasNext()) {
+            Object key = it.next();
+            Set s = (Set) fromCaseFold.get(key);
+            String[] temp = new String[s.size()];
+            s.toArray(temp);
+            fromCaseFold2.put(key, temp);
+        }
+        fromCaseFold = fromCaseFold2;
+
+        // We have processed everything, so the iterator will now work
+        // The following is normally OFF. 
+        // It is here to generate (under the GENERATE flag) the static exception list.
+        // It must be at the very end of initialization, so that the iterator is functional.
+        // (easiest to do it that way)
+            
+        if (GENERATE) {
+
+            // first get small set of items that have multiple characters
+            
+            Set multichars = new TreeSet();
+            it = fromCaseFold.keySet().iterator();
+            while (it.hasNext()) {
+                String key = (String) it.next();
+                if (UTF16.countCodePoint(key) < 2) continue;
+                multichars.add(key);
+            }            
+            
+            // now we will go through each of them.
+            
+            CaseIterator ci = new CaseIterator();
+            it = multichars.iterator();
+            
+            while (it.hasNext()) {
+                String key = (String) it.next();
+                
+                // here is a nasty complication. Take 'ffi' ligature. We
+                // can't just close it, since we would miss the combination
+                // that includes the 'fi' => "fi" ligature
+                // so first do a pass through, and add substring combinations
+                // we call this a 'partial closure'
+                
+                Set partialClosure = new TreeSet();
+                partialClosure.add(key);
+                
+                if (UTF16.countCodePoint(key) > 2) {
+                    Iterator multiIt2 = multichars.iterator();
+                    while (multiIt2.hasNext()) {
+                        String otherKey = (String) multiIt2.next();
+                        if (otherKey.length() >= key.length()) continue;
+                        int pos = -1;
+                        while (true) {
+                            // The following is not completely general
+                            // but works for the actual cased stuff,
+                            // and should work for future characters, since we won't have
+                            // more ligatures & other oddities.
+                            pos = key.indexOf(otherKey, pos+1);
+                            if (pos < 0) break;
+                            int endPos = pos + otherKey.length();
+                            // we know we have a proper substring,
+                            // so get the combinations
+                            String[] choices = (String[]) fromCaseFold.get(otherKey);
+                            for (int ii = 0; ii < choices.length; ++ii) {
+                                String patchwork = key.substring(0, pos)
+                                    + choices[ii]
+                                    + key.substring(endPos);
+                                partialClosure.add(patchwork);
+                            }
+                        }
+                    }
+                }
+                
+                // now, for each thing in the partial closure, get its
+                // case closure and add it to the final result.
+                
+                Set closure = new TreeSet(); // this will be the real closure
+                Iterator partialIt = partialClosure.iterator();
+                while (partialIt.hasNext()) {
+                    String key2 = (String) partialIt.next();
+                    ci.reset(key2);
+                    for (String temp = ci.next(); temp != null; temp = ci.next()) {
+                        closure.add(temp);
+                    }
+                    // form closure
+                    /*String[] choices = (String[]) fromCaseFold.get(key2);
+                    for (int i = 0; i < choices.length; ++i) {
+                        ci.reset(choices[i]);
+                        String temp;
+                        while (null != (temp = ci.next())) {
+                            closure.add(temp);
+                        }
+                    }
+                    */
+                }
+                
+                // print it out, so that it can be cut and pasted back into this document.
+                
+                Iterator it2 = closure.iterator();
+                System.out.println("\t// " + toName.transliterate(key));
+                System.out.print("\t{\"" + toHex.transliterate(key) + "\",");
+                while (it2.hasNext()) {
+                    String item = (String)it2.next();
+                    System.out.print("\"" + toHex.transliterate(item) + "\",");
+                }
+                System.out.println("},");
+            }
+        }
+    }
+    
+    // ============ PRIVATE CLASS DATA ============ 
+    
+    // pieces that we will put together
+    // is not changed during iteration
+    private int count = 0;
+    private String[][] variants;
+    
+    // state information, changes during iteration
+    private boolean done = false;
+    private int[] counts;
+    
+    // internal buffer for efficiency
+    private StringBuffer nextBuffer = new StringBuffer();
+    
+    // ========================  
+
+    /**
+     * Reset to different source. Once reset, the iteration starts from the beginning.
+     * @param source The string to get case variants for
+     */
+    public void reset(String source) {
+        
+        // allocate arrays to store pieces
+        // using length might be slightly too long, but we don't care much
+        
+        counts = new int[source.length()];
+        variants = new String[source.length()][];
+        
+        // walk through the source, and break up into pieces
+        // each piece becomes an array of equivalent values
+        // TODO: could optimized this later to coalesce all single string pieces
+        
+        String piece = null;
+        count = 0;
+        for (int i = 0; i < source.length(); i += piece.length()) {
+            
+            // find *longest* matching piece
+            String caseFold = null;
+            
+            if (GENERATE) {
+                // do exactly one CP
+                piece = UTF16.valueOf(source, i);
+                caseFold = (String) toCaseFold.get(piece);
+            } else {               
+                int max = i + maxLength;
+                if (max > source.length()) max = source.length();
+                for (int j = max; j > i; --j) {
+                    piece = source.substring(i, j);
+                    caseFold = (String) toCaseFold.get(piece);
+                    if (caseFold != null) break;
+                }
+            }
+            
+            // if we fail, pick one code point
+            if (caseFold == null) {
+                piece = UTF16.valueOf(source, i);
+                variants[count++] = new String[] {piece}; // single item string
+            } else {
+                variants[count++] = (String[])fromCaseFold.get(caseFold);
+            }
+        }
+        reset();
+    }
+    
+    /**
+     * Restart the iteration from the beginning, but with same source
+     */
+    public void reset() {
+        done = false;
+        for (int i = 0; i < count; ++i) {
+            counts[i] = 0;
+        }
+    }
+    
+    /**
+     * Iterates through the case variants.
+     * @return next case variant. Each variant will case-fold to the same value as the source will.
+     * When the iteration is done, null is returned.
+     */
+    public String next() {
+        
+        if (done) return null;
+        int i;
+        
+        // TODO Optimize so we keep the piece before and after the current position
+        // so we don't have so much concatenation
+        
+        // get the result, a concatenation
+        
+        nextBuffer.setLength(0);
+        for (i = 0; i < count; ++i) {
+            nextBuffer.append(variants[i][counts[i]]);
+        }
+        
+        // find the next right set of pieces to concatenate
+        
+        for (i = count-1; i >= 0; --i) {
+            counts[i]++;
+            if (counts[i] < variants[i].length) break;
+            counts[i] = 0;
+        }
+        
+        // if we go too far, bail
+        
+        if (i < 0) {
+            done = true;
+        }
+        
+        return nextBuffer.toString();            
+    }
+        
+        
+    /**
+     * Temporary test, just to see how the stuff works.
+     */
+    static public void main(String[] args) {
+        String[] testCases = {"fiss", "h\u03a3"};
+        CaseIterator ci = new CaseIterator();
+        
+        for (int i = 0; i < testCases.length; ++i) {
+            String item = testCases[i];
+            System.out.println();
+            System.out.println("Testing: " + toName.transliterate(item));
+            System.out.println();
+            ci.reset(item);
+            int count = 0;
+            for (String temp = ci.next(); temp != null; temp = ci.next()) {
+                System.out.println(toName.transliterate(temp));
+                count++;
+            }
+            System.out.println("Total: " + count);
+        }
+
+        // generate a list of all caseless characters -- characters whose
+        // case closure is themselves.
+
+        UnicodeSet caseless = new UnicodeSet();
+
+        for (int i = 0; i <= 0x10FFFF; ++i) {
+            String cp = UTF16.valueOf(i);
+            ci.reset(cp);
+            int count = 0;
+            String fold = null;
+            for (String temp = ci.next(); temp != null; temp = ci.next()) {
+                fold = temp;
+                if (++count > 1) break;
+            }
+            if (count==1 && fold.equals(cp)) {
+                caseless.add(i);
+            }
+        }
+
+        System.out.println("caseless = " + caseless.toPattern(true));
+
+        UnicodeSet not_lc = new UnicodeSet("[:^lc:]");
+        
+        UnicodeSet a = new UnicodeSet();
+        a.set(not_lc);
+        a.removeAll(caseless);
+        System.out.println("[:^lc:] - caseless = " + a.toPattern(true));
+
+        a.set(caseless);
+        a.removeAll(not_lc);
+        System.out.println("caseless - [:^lc:] = " + a.toPattern(true));
+    }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Demo.java b/demos/src/com/ibm/icu/dev/demo/translit/Demo.java
new file mode 100644
index 00000000000..87882f9027b
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Demo.java
@@ -0,0 +1,1417 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+
+import java.awt.Button;
+import java.awt.CheckboxMenuItem;
+import java.awt.FileDialog;
+import java.awt.Font;
+import java.awt.Frame;
+import java.awt.GraphicsEnvironment;
+import java.awt.Label;
+import java.awt.Menu;
+import java.awt.MenuBar;
+import java.awt.MenuItem;
+import java.awt.MenuShortcut;
+import java.awt.TextField;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.ItemEvent;
+import java.awt.event.ItemListener;
+import java.awt.event.KeyEvent;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.text.CharacterIterator;
+import java.util.Comparator;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.impl.Differ;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.CanonicalIterator;
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.ReplaceableString;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+
+/**
+ * A frame that allows the user to experiment with keyboard
+ * transliteration.  This class has a main() method so it can be run
+ * as an application.  The frame contains an editable text component
+ * and uses keyboard transliteration to process keyboard events.
+ *
+ * 

Copyright (c) IBM Corporation 1999. All rights reserved. + * + * @author Alan Liu + */ +public class Demo extends Frame { + + /** + * For serialization + */ + private static final long serialVersionUID = 1L; + static final boolean DEBUG = false; + static final String START_TEXT = "(cut,\u03BA\u03C5\u03C4,\u05D0,\u30AF\u30C8,\u4E80,\u091A\u0941\u0924\u094D)"; + + Transliterator translit = null; + String fontName = "Arial Unicode MS"; + int fontSize = 18; + + + + /* + boolean compound = false; + Transliterator[] compoundTranslit = new Transliterator[MAX_COMPOUND]; + static final int MAX_COMPOUND = 128; + int compoundCount = 0; + */ + + TransliteratingTextComponent text = null; + + Menu translitMenu; + CheckboxMenuItem translitItem; + CheckboxMenuItem noTranslitItem; + + static final String NO_TRANSLITERATOR = "None"; + + //private static final String COPYRIGHT = + // "\u00A9 IBM Corporation 1999. All rights reserved."; + + public static void main(String[] args) { + Frame f = new Demo(600, 200); + f.addWindowListener(new WindowAdapter() { + public void windowClosing(WindowEvent e) { + com.ibm.icu.dev.demo.impl.DemoApplet.demoFrameClosed(); +// System.exit(0); + } + }); + f.setVisible(true); + com.ibm.icu.dev.demo.impl.DemoApplet.demoFrameOpened(); + } + + public Demo(int width, int height) { + super("Transliteration Demo"); + + initMenus(); + + addWindowListener(new WindowAdapter() { + public void windowClosing(WindowEvent e) { + handleClose(); + } + }); + + text = new TransliteratingTextComponent(); + Font font = new Font(fontName, Font.PLAIN, fontSize); + text.setFont(font); + text.setSize(width, height); + text.setVisible(true); + text.setText(START_TEXT); + add(text); + + setSize(width, height); + setTransliterator("Latin-Greek", null); + } + + private void initMenus() { + MenuBar mbar; + Menu menu; + MenuItem mitem; + //CheckboxMenuItem citem; + + setMenuBar(mbar = new MenuBar()); + mbar.add(menu = new Menu("File")); + menu.add(mitem = new MenuItem("Quit")); + mitem.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + handleClose(); + } + }); +/* + final ItemListener setTransliteratorListener = new ItemListener() { + public void itemStateChanged(ItemEvent e) { + CheckboxMenuItem item = (CheckboxMenuItem) e.getSource(); + if (e.getStateChange() == ItemEvent.DESELECTED) { + // Don't let the current transliterator be deselected. + // Just reselect it. + item.setState(true); + } else if (compound) { + // Adding an item to a compound transliterator + handleAddToCompound(item.getLabel()); + } else if (item != translitItem) { + // Deselect previous choice. Don't need to call + // setState(true) on new choice. + translitItem.setState(false); + translitItem = item; + handleSetTransliterator(item.getLabel()); + } + } + }; +*/ + /* + translitMenu.add(translitItem = noTranslitItem = + new CheckboxMenuItem(NO_TRANSLITERATOR, true)); + noTranslitItem.addItemListener(new ItemListener() { + public void itemStateChanged(ItemEvent e) { + // Can't uncheck None -- any action here sets None to true + setNoTransliterator(); + } + }); + + translitMenu.addSeparator(); + */ + +/* + translitMenu.add(citem = new CheckboxMenuItem("Compound")); + citem.addItemListener(new ItemListener() { + public void itemStateChanged(ItemEvent e) { + CheckboxMenuItem item = (CheckboxMenuItem) e.getSource(); + if (e.getStateChange() == ItemEvent.DESELECTED) { + // If compound gets deselected, then select NONE + setNoTransliterator(); + } else if (!compound) { + // Switching from non-compound to compound + translitItem.setState(false); + translitItem = item; + translit = null; + compound = true; + compoundCount = 0; + for (int i=0; i &Hex($1) &Name($1);\r\n" + + "&Hex-Any($1) < ('\\' [uU] [a-fA-F0-9]*);\r\n" + + "&Name-Any($1) < ('{' [^\\}]* '}');" + ); + button = new Button("Set"); + button.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + String compound = ""; + try { + compound = rulesDialog.getArea().getText(); + String id = ruleId.getText(); + setTransliterator(compound, id); + } catch (RuntimeException ex) { + rulesDialog.getArea().setText(compound + "\n#" + ex.getMessage()); + } + } + }); + rulesDialog.getBottom().add(button); + ruleId = new TextField("test1", 20); + Label temp = new Label(" Name:"); + rulesDialog.getBottom().add(temp); + rulesDialog.getBottom().add(ruleId); + + + translitMenu.add(mitem = new MenuItem("From Rules...", + new MenuShortcut(KeyEvent.VK_R))); + mitem.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + rulesDialog.show(); + } + }); + + + translitMenu.add(mitem = new MenuItem("From File...", + new MenuShortcut(KeyEvent.VK_F))); + mitem.addActionListener(new FileListener(this, RULE_FILE)); + + translitMenu.add(mitem = new MenuItem("Test File...")); + mitem.addActionListener(new FileListener(this, TEST_FILE)); + + // Flesh out the menu with the installed transliterators + + translitMenu.addSeparator(); + + Iterator sources = add(new TreeSet(), Transliterator.getAvailableSources()).iterator(); + while(sources.hasNext()) { + String source = (String) sources.next(); + Iterator targets = add(new TreeSet(), Transliterator.getAvailableTargets(source)).iterator(); + Menu targetMenu = new Menu(source); + while(targets.hasNext()) { + String target = (String) targets.next(); + Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target)); + if (variantSet.size() < 2) { + mitem = new MenuItem(target); + mitem.addActionListener(new TransliterationListener(source + "-" + target)); + targetMenu.add(mitem); + } else { + Iterator variants = variantSet.iterator(); + Menu variantMenu = new Menu(target); + while(variants.hasNext()) { + String variant = (String) variants.next(); + String menuName = variant.length() == 0 ? "" : variant; + //System.out.println("<" + source + "-" + target + "/" + variant + ">, <" + menuName + ">"); + mitem = new MenuItem(menuName); + mitem.addActionListener(new TransliterationListener(source + "-" + target + "/" + variant)); + variantMenu.add(mitem); + } + targetMenu.add(variantMenu); + } + } + translitMenu.add(targetMenu); + } + + + } + + static final int RULE_FILE = 0, TEST_FILE = 1; + // + static class FileListener implements ActionListener { + Demo frame; + int choice; + + FileListener(Demo frame, int choice) { + this.frame = frame; + this.choice = choice; + } + + public void actionPerformed(ActionEvent e) { + String id = frame.translit.getID(); + int slashPos = id.indexOf('/'); + String variant = ""; + if (slashPos >= 0) { + variant = "_" + id.substring(slashPos+1); + id = id.substring(0, slashPos); + } + + FileDialog fileDialog = new FileDialog(frame, "Input File"); + fileDialog.setFile("Test_" + id + ".txt"); + fileDialog.show(); + String fileName = fileDialog.getFile(); + String fileDirectory = fileDialog.getDirectory(); + if (fileName != null) { + try { + File f = new File(fileDirectory, fileName); + if (choice == RULE_FILE) { + + // read stuff into buffer + + StringBuffer buffer = new StringBuffer(); + FileInputStream fis = new FileInputStream(f); + InputStreamReader isr = new InputStreamReader(fis, "UTF8"); + BufferedReader br = new BufferedReader(isr, 32*1024); + while (true) { + String line = br.readLine(); + if (line == null) break; + if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); // strip BOM + buffer.append('\n'); + buffer.append(line); + } + br.close(); + + // Transform file name into id + if (fileName.startsWith("Transliterator_")) { + fileName = fileName.substring("Transliterator_".length()); + } + int pos = fileName.indexOf('_'); + if (pos < 0) { + id = fileName; + } else { + id = fileName.substring(0, pos) + "-"; + int pos2 = fileName.indexOf('_', pos+1); + if (pos2 < 0) { + id += fileName.substring(pos+1); + } else { + id += fileName.substring(pos+1, pos2) + "/" + fileName.substring(pos2 + 1); + } + } + pos = id.lastIndexOf('.'); + if (pos >= 0) id = id.substring(0, pos); + + // Now set + + frame.setTransliterator(buffer.toString(), id); + } else if (choice == TEST_FILE) { + genTestFile(f, frame.translit, variant); + } + } catch (Exception e2) { + e2.printStackTrace(); + System.out.println("Problem opening/reading: " + fileDirectory + ", " + fileName); + } + } + fileDialog.dispose(); + } + } + + + boolean transliterateTyping = true; + Transliterator fromHex = Transliterator.getInstance("Hex-Any"); + InfoDialog helpDialog; + InfoDialog hexDialog; + InfoDialog compoundDialog; + InfoDialog rulesDialog; + TextField ruleId; + MenuItem convertSelectionItem = null; + MenuItem swapSelectionItem = null; + MenuItem convertTypingItem = null; + Menu historyMenu; + Map historyMap = new HashMap(); + Set historySet = new TreeSet(new Comparator() { + public int compare(Object a, Object b) { + MenuItem aa = (MenuItem)a; + MenuItem bb = (MenuItem)b; + return aa.getLabel().compareTo(bb.getLabel()); + } + }); + + // ADD Factory since otherwise getInverse blows out + static class DummyFactory implements Transliterator.Factory { + static DummyFactory singleton = new DummyFactory(); + static HashMap m = new HashMap(); + + // Since Transliterators are immutable, we don't have to clone on set & get + static void add(String ID, Transliterator t) { + m.put(ID, t); + System.out.println("Registering: " + ID + ", " + t.toRules(true)); + Transliterator.registerFactory(ID, singleton); + } + public Transliterator getInstance(String ID) { + return (Transliterator) m.get(ID); + } + } + + static void printBreaks(int num, String testSource, BreakIterator brkItr) { + String result = ""; + int lastPos = 0; + while (true) { + int pos = brkItr.next(); + if (pos == BreakIterator.DONE) break; + result += testSource.substring(lastPos, pos) + "&"; + lastPos = pos; + System.out.println(pos); + } + System.out.println("Test" + num + ": " + result); + } + + static void printIteration(int num, String testSource, CharacterIterator ci) { + String result = ""; + while (true) { + char ch = ci.next(); + if (ch == CharacterIterator.DONE) break; + result += ch + "(" + ci.getIndex() + ")"; + } + System.out.println("Test" + num + ": " + result); + } + + static void printSources() { + String[] list = {"Latin-ThaiLogical", "ThaiLogical-Latin", "Thai-ThaiLogical", "ThaiLogical-Thai"}; + UnicodeSet all = new UnicodeSet(); + for (int i = 0; i < list.length; ++i) { + Transliterator tr = Transliterator.getInstance(list[i]); + UnicodeSet src = tr.getSourceSet(); + System.out.println(list[i] + ": " + src.toPattern(true)); + all.addAll(src); + } + System.out.println("All: " + all.toPattern(true)); + UnicodeSet rem = new UnicodeSet("[[:latin:][:thai:]]"); + System.out.println("missing from [:latin:][:thai:]: " + all.removeAll(rem).toPattern(true)); + } + + // 200E;LEFT-TO-RIGHT MARK;Cf;0;L;;;;;N;;;;; + + static Transliterator title = Transliterator.getInstance("title"); + static String hexAndNameRules = " ([:c:]) > \\u200E &hex/unicode($1) ' ( ) ' &name($1) \\u200E ' ';" + + "([:mark:]) > \\u200E &hex/unicode($1) ' ( ' \\u200E \u25CC $1 \\u200E ' ) ' &name($1) \\u200E ' ';" + + "(.) > \\u200E &hex/unicode($1) ' ( ' \\u200E $1 \\u200E ' ) ' &name($1) ' ' \\u200E;"; + + static Transliterator hexAndName = Transliterator.createFromRules("any-hexAndName", + hexAndNameRules, Transliterator.FORWARD); + + + + //static Transliterator upper = Transliterator.getInstance("upper"); + + static final byte NONE = 0, TITLEWORD = 1, TITLELINE = 2; + + static void genTestFile(File sourceFile, Transliterator translit, String variant) { + try { + + System.out.println("Reading: " + sourceFile.getCanonicalPath()); + BufferedReader in = new BufferedReader( + new InputStreamReader( + new FileInputStream(sourceFile), "UTF-8")); + String targetFile = sourceFile.getCanonicalPath(); + int dotPos = targetFile.lastIndexOf('.'); + if (dotPos >= 0) targetFile = targetFile.substring(0,dotPos); + targetFile += variant; + + File outFile = new File(targetFile + ".html"); + System.out.println("Writing: " + outFile.getCanonicalPath()); + + PrintWriter out = new PrintWriter( + new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream(outFile), "UTF-8"))); + + String direction = ""; + String id = translit.getID(); + if (id.indexOf("Arabic") >= 0 || id.indexOf("Hebrew") >= 0) { + direction = " direction: rtl;"; + } + boolean testRoundTrip = true; + boolean generateSets = true; + if (id.startsWith("Han-") || id.startsWith("ja-")) { + testRoundTrip = false; + generateSets = false; + } + out.println(""); + out.println(""); + out.println("" + id + " Transliteration Check"); + out.println("

See Test_Instructions.html for details.

"); + out.println(""); + + //out.println(""); + + Transliterator tl = translit; + Transliterator lt = tl.getInverse(); + + Transliterator ltFilter = tl.getInverse(); + ltFilter.setFilter(new UnicodeSet("[:^Lu:]")); + Transliterator tlFilter = lt.getInverse(); + tlFilter.setFilter(new UnicodeSet("[:^Lu:]")); + + //Transliterator.getInstance("[:^Lu:]" + lt.getID()); + + BreakIterator sentenceBreak = BreakIterator.getSentenceInstance(); + + byte titleSetting = TITLELINE; + //boolean upperfilter = false; + boolean first = true; + while (true) { + String line = in.readLine(); + if (line == null) break; + line = line.trim(); + if (line.length() == 0) continue; + if (line.charAt(0) == '\uFEFF') line = line.substring(1); // remove BOM + + if (line.charAt(0) == '#') continue; // comments + + if (line.equals("@TITLECASE@")) { + titleSetting = TITLEWORD; + out.println(""); + continue; + } else if (line.equals("@UPPERFILTER@")) { + //upperfilter = true; + continue; + } else if (line.startsWith("@SET")) { + UnicodeSet s = new UnicodeSet(line.substring(4).trim()); + out.println(""); + UnicodeSetIterator it = new UnicodeSetIterator(s); + while (it.next()) { + addSentenceToTable(out, it.codepoint != UnicodeSetIterator.IS_STRING + ? UTF16.valueOf(it.codepoint) + : it.string, + NONE, true, testRoundTrip, first, tl, lt); + } + continue; + } + + sentenceBreak.setText(line); + int start = 0; + while (true) { + int end = sentenceBreak.next(); + if (end == BreakIterator.DONE) break; + String coreSentence = line.substring(start, end); + //System.out.println("Core: " + hex.transliterate(coreSentence)); + end = start; + + int oldPos = 0; + while (oldPos < coreSentence.length()) { + // hack, because sentence doesn't seem to be working right + int pos = coreSentence.indexOf(". ", oldPos); + if (pos < 0) pos = coreSentence.length(); else pos = pos+2; + int pos2 = coreSentence.indexOf('\u3002', oldPos); + if (pos2 < 0) pos2 = coreSentence.length(); else pos2 = pos2 + 1; + if (pos > pos2) pos = pos2; + String sentence = coreSentence.substring(oldPos, pos).trim(); + //System.out.println("Sentence: " + hex.transliterate(coreSentence)); + oldPos = pos; + + addSentenceToTable(out, sentence, + titleSetting, false, testRoundTrip, first, tl, lt); + + first = false; + } + } + } + out.println("
ThaiLatinThai
Names
Characters
"); + out.close(); + + // Now write the source/target sets + if (generateSets) { + outFile = new File(targetFile + "_Sets.html"); + System.out.println("Writing: " + outFile.getCanonicalPath()); + + out = new PrintWriter( + new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream(outFile), "UTF-8"))); + out.println(""); + out.println(""); + out.println("" + id + " Transliteration Sets"); + out.println(""); + + int dashPos = id.indexOf('-'); + int slashPos = id.indexOf('/'); + if (slashPos < 0) slashPos = id.length(); + UnicodeSet sourceSuper = null; + try { + String temp = id.substring(0,dashPos); + if (temp.equals("ja")) sourceSuper = new UnicodeSet("[[:Han:][:hiragana:][:katakana:]]"); + else sourceSuper = new UnicodeSet("[[:" + temp + ":][:Mn:][:Me:]]"); + } catch (Exception e) {} + + UnicodeSet targetSuper = null; + try { + targetSuper = new UnicodeSet("[[:" + id.substring(dashPos+1, slashPos) + ":][:Mn:][:Me:]]"); + } catch (Exception e) {} + + int nfdStyle = CLOSE_CASE | CLOSE_FLATTEN | CLOSE_CANONICAL; + int nfkdStyle = nfdStyle | CLOSE_COMPATIBILITY; + out.println("
    "); + out.println("

    None

    "); + showSets(out, translit, lt, null, null, 0); + out.println("

    NFD

    "); + showSets(out, translit, lt, sourceSuper, targetSuper, nfdStyle); + out.println("

    NFKD

    "); + showSets(out, translit, lt, sourceSuper, targetSuper, nfkdStyle); + out.println("
"); + out.close(); + } + System.out.println("Done Writing"); + } catch (Exception e) { + e.printStackTrace(); + } + } + + static void addSentenceToTable(PrintWriter out, String sentence, + byte titleSetting, boolean addName, boolean testRoundTrip, boolean first, + Transliterator tl, Transliterator lt) { + if (sentence.length() == 0) return; // skip empty lines + + String originalShow = sentence; + String latin; + latin = tl.transliterate(saveAscii.transliterate(sentence)); + + String latinShow = latin; + if (titleSetting == TITLEWORD) { + latinShow = title.transliterate(latin); + } else if (titleSetting == TITLELINE) { + latinShow = titlecaseFirstWord(latinShow); + } + latinShow = restoreAscii.transliterate(latinShow); + + String reverse; + reverse = restoreAscii.transliterate(lt.transliterate(latin)); + + String NFKDSentence = Normalizer.normalize(sentence, Normalizer.NFKD); + String NFKDLatin = Normalizer.normalize(latin, Normalizer.NFKD); + String NFKDReverse = Normalizer.normalize(reverse, Normalizer.NFKD); + + if (latinShow.length() == 0) { + latinShow = "empty"; + } else if (NFKDSentence.equals(NFKDLatin)) { + latinShow = "" + latinShow + ""; + } + String reverseShow = reverse; + + if (testRoundTrip && !NFKDReverse.equals(NFKDSentence)) { + int minLen = reverse.length(); + if (minLen > sentence.length()) minLen = sentence.length(); + int i; + for (i = 0; i < minLen; ++i) { + if (reverse.charAt(i) != sentence.charAt(i)) break; + } + //originalShow = sentence.substring(0,i) + "" + sentence.substring(i) + ""; + reverseShow = reverseShow.length() == 0 + ? "empty" + //: reverse.substring(0,i) + "" + reverse.substring(i) + ""; + : showDifference(sentence, reverse); + out.println("" : ">") + originalShow + + "" + latinShow + + "" + reverseShow + + ""); + } else { + out.println("" : ">") + originalShow + + "" + latinShow + + ""); + } + if (addName) { + latinShow = hexAndName.transliterate(latin); + if (latinShow.length() == 0) latinShow = "empty"; + originalShow = hexAndName.transliterate(sentence); + if (originalShow.length() == 0) originalShow = "empty"; + + out.println("" + originalShow + + "" + latinShow + + ""); + } + out.println(""); + + } + + static String showDifference(String as, String bs) { + Differ differ = new Differ(300, 3); + StringBuffer out = new StringBuffer(); + int max = as.length(); + if (max < bs.length()) max = bs.length(); + for (int j = 0; j <= max; ++j) { + if (j < as.length()) differ.addA(as.substring(j, j+1)); + if (j < bs.length()) differ.addB(bs.substring(j, j+1)); + differ.checkMatch(j == max); + + if (differ.getACount() != 0 || differ.getBCount() != 0) { + out.append("..."); + if (differ.getACount() != 0) { + out.append(""); + for (int i = 0; i < differ.getACount(); ++i) { + out.append(differ.getA(i)); + } + out.append(""); + } + if (differ.getBCount() != 0) { + out.append(""); + for (int i = 0; i < differ.getBCount(); ++i) { + out.append(differ.getB(i)); + } + out.append(""); + } + out.append("..."); + } + } + return out.toString(); + } + + static void showSets(PrintWriter out, Transliterator translit, Transliterator inverse, + UnicodeSet sourceSuper, UnicodeSet targetSuper, int options) { + out.println("
  • Source Set:
    • " + toPattern(closeUnicodeSet(translit.getSourceSet(), options), sourceSuper) + "
  • "); + out.println("
  • Reverse Target Set:
    • " + toPattern(closeUnicodeSet(inverse.getTargetSet(), options), sourceSuper) + "
  • "); + out.println("
  • Target Set:
    • " + toPattern(closeUnicodeSet(translit.getTargetSet(), options), targetSuper) + "
  • "); + out.println("
  • Reverse Source Set:
    • " + toPattern(closeUnicodeSet(inverse.getSourceSet(), options), targetSuper) + "
  • "); + } + + static final int CLOSE_CASE = 1, CLOSE_FLATTEN = 2, CLOSE_CANONICAL = 4, CLOSE_COMPATIBILITY = 8; + + static UnicodeSet closeUnicodeSet(UnicodeSet source, int options) { + if (options == 0) return source; + + UnicodeSetIterator it = new UnicodeSetIterator(source); + UnicodeSet additions = new UnicodeSet(); // to avoid messing up iterator + UnicodeSet removals = new UnicodeSet(); // to avoid messing up iterator + String base; + int cp; + + // Add all case equivalents + if ((options & CLOSE_CASE) != 0) { + while (it.next()) { + cp = it.codepoint; + if (cp == UnicodeSetIterator.IS_STRING) continue; + int type = UCharacter.getType(cp); + if (type == Character.UPPERCASE_LETTER || type == Character.LOWERCASE_LETTER || type == Character.TITLECASE_LETTER) { + additions.add(UCharacter.toLowerCase(UTF16.valueOf(cp))); + additions.add(UCharacter.toUpperCase(UTF16.valueOf(cp))); + } + } + source.addAll(additions); + } + + // Add the canonical closure of all strings and characters in source + if ((options & CLOSE_CANONICAL) != 0) { + it.reset(); + additions.clear(); + CanonicalIterator ci = new CanonicalIterator("."); + while (it.next()) { + if (it.codepoint == UnicodeSetIterator.IS_STRING) base = it.string; + else base = UTF16.valueOf(it.codepoint); + ci.setSource(base); + while (true) { + String trial = ci.next(); + if (trial == null) break; + if (trial.equals(base)) continue; + additions.add(trial); + } + } + source.addAll(additions); + } + + // flatten strings + if ((options & CLOSE_FLATTEN) != 0) { + it.reset(); + additions.clear(); + while (it.next()) { + if (it.codepoint != UnicodeSetIterator.IS_STRING) continue; + additions.addAll(it.string); + removals.add(it.string); + //System.out.println("flattening '" + hex.transliterate(it.string) + "'"); + } + source.addAll(additions); + source.removeAll(removals); + } + + // Now add decompositions of characters in source + if ((options & CLOSE_COMPATIBILITY) != 0) { + it.reset(source); + additions.clear(); + while (it.next()) { + if (it.codepoint == UnicodeSetIterator.IS_STRING) base = it.string; + else base = UTF16.valueOf(it.codepoint); + if (Normalizer.isNormalized(base, Normalizer.NFKD,0)) continue; + String decomp = Normalizer.normalize(base, Normalizer.NFKD); + additions.add(decomp); + } + source.addAll(additions); + + // Now add any other character that decomposes to a character in source + for (cp = 0; cp < 0x10FFFF; ++cp) { + if (!UCharacter.isDefined(cp)) continue; + if (Normalizer.isNormalized(cp, Normalizer.NFKD,0)) continue; + if (source.contains(cp)) continue; + + String decomp = Normalizer.normalize(cp, Normalizer.NFKD); + if (source.containsAll(decomp)) { + // System.out.println("Adding: " + Integer.toString(cp,16) + " " + UCharacter.getName(cp)); + source.add(cp); + } + } + } + + return source; + } + + static String toPattern(UnicodeSet source, UnicodeSet superset) { + if (superset != null) { + source.removeAll(superset); + return "[" + superset.toPattern(true) + " " + source.toPattern(true) + "]"; + } + return source.toPattern(true); + } + + static BreakIterator bi = BreakIterator.getWordInstance(); + + static String titlecaseFirstWord(String line) { + // search for first word with letters. If the first letter is lower, then titlecase it. + bi.setText(line); + int start = 0; + while (true) { + int end = bi.next(); + if (end == BreakIterator.DONE) break; + int firstLetterType = getFirstLetterType(line, start, end); + if (firstLetterType != Character.UNASSIGNED) { + if (firstLetterType != Character.LOWERCASE_LETTER) break; + line = line.substring(0, start) + + UCharacter.toTitleCase(line.substring(start, end), bi) + + line.substring(end); + break; + } + end = start; + } + return line; + } + + static final int LETTER_MASK = + (1< XXX # " + UCharacter.getName(it.codepoint)); + main.add(it.codepoint); + } + + if (others.size() != 0) { + out.println("Decomposed characters found above: "); + others.removeAll(main); + it.reset(others); + while (it.next()) { + out.println(" " + UTF16.valueOf(it.codepoint) + " <> XXX # " + UCharacter.getName(it.codepoint)); + } + } + + out.close(); + System.out.println("Done Writing"); + } catch (Exception e) { + e.printStackTrace(); + } + } + + static Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex"); + static final String saveRules = + "A <> \uEA41; B <> \uEA42; C <> \uEA43; D <> \uEA44; E <> \uEA45; F <> \uEA46; G <> \uEA47; H <> \uEA48; I <> \uEA49; " + + "J <> \uEA4A; K <> \uEA4B; L <> \uEA4C; M <> \uEA4D; N <> \uEA4E; O <> \uEA4F; P <> \uEA50; Q <> \uEA51; R <> \uEA52; " + + "S <> \uEA53; T <> \uEA54; U <> \uEA55; V <> \uEA56; W <> \uEA57; X <> \uEA58; Y <> \uEA59; Z <> \uEA5A; " + + "a <> \uEA61; b <> \uEA62; c <> \uEA63; d <> \uEA64; e <> \uEA65; f <> \uEA66; g <> \uEA67; h <> \uEA68; i <> \uEA69; " + + "j <> \uEA6A; k <> \uEA6B; l <> \uEA6C; m <> \uEA6D; n <> \uEA6E; o <> \uEA6F; p <> \uEA70; q <> \uEA71; r <> \uEA72; " + + "s <> \uEA73; t <> \uEA74; u <> \uEA75; v <> \uEA76; w <> \uEA77; x <> \uEA78; y <> \uEA79; z <> \uEA7A;"; + + static Transliterator saveAscii = Transliterator.createFromRules("ascii-saved", saveRules, Transliterator.FORWARD); + static Transliterator restoreAscii = Transliterator.createFromRules("ascii-saved", saveRules, Transliterator.REVERSE); + + static { + + if (false) { + + for (char i = 'A'; i <= 'z'; ++i) { + System.out.print(i + " <> " + hex.transliterate(String.valueOf((char)(0xEA00 + i))) + "; "); + } + + UnicodeSet x = new UnicodeSet("[[:^ccc=0:]&[:^ccc=230:]]"); + x = x.complement(); + x = x.complement(); + System.out.println("Test: " + x.toPattern(true)); + + Transliterator y = Transliterator.createFromRules("xxx", "$notAbove = [[:^ccc=0:]&[:^ccc=230:]]; u ($notAbove*) \u0308 > XXX | $1; ", Transliterator.FORWARD); + + String[] testList = {"u\u0308", "u\u0316\u0308", "u\u0308\u0316", "u\u0301\u0308", "u\u0308\u0301"}; + for (int i = 0; i < testList.length; ++i) { + String yy = y.transliterate(testList[i]); + System.out.println(hex.transliterate(testList[i]) + " => " + hex.transliterate(yy)); + } + + //printNames(new UnicodeSet("[\u0600-\u06FF]"), "Arabic-Latin.txt"); + + + /* + BreakTransliterator.register(); + + BreakTransliterator testTrans = new BreakTransliterator("Any-XXX", null, null, "$"); + String testSource = "The Quick: Brown fox--jumped."; + BreakIterator bi = testTrans.getBreakIterator(); + bi.setText(new StringCharacterIterator(testSource)); + printBreaks(0, testSource, bi); + //bi.setText(UCharacterIterator.getInstance(testSource)); + //printBreaks(1, testSource, bi); + + printIteration(2, testSource, new StringCharacterIterator(testSource)); + //printIteration(3, testSource, UCharacterIterator.getInstance(testSource)); + + + + String test = testTrans.transliterate(testSource); + System.out.println("Test3: " + test); + DummyFactory.add(testTrans.getID(), testTrans); + */ + + // AnyTransliterator.ScriptRunIterator.registerAnyToScript(); + + AnyTransliterator at = new AnyTransliterator("Greek", null); + at.transliterate("(cat,\u03b1,\u0915)"); + DummyFactory.add(at.getID(), at); + + at = new AnyTransliterator("Devanagari", null); + at.transliterate("(cat,\u03b1,\u0915)"); + DummyFactory.add(at.getID(), at); + + at = new AnyTransliterator("Latin", null); + at.transliterate("(cat,\u03b1,\u0915)"); + DummyFactory.add(at.getID(), at); + + DummyFactory.add("Any-gif", Transliterator.createFromRules("gif", "'\\'u(..)(..) > '';", Transliterator.FORWARD)); + DummyFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); + + DummyFactory.add("Any-RemoveCurly", Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ;", Transliterator.FORWARD)); + DummyFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); + + System.out.println("Trying &hex"); + Transliterator t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); + System.out.println("Registering"); + DummyFactory.add("Any-hex2", t); + + System.out.println("Trying &gif"); + t = Transliterator.createFromRules("gif2", "(.) > &any-gif($1);", Transliterator.FORWARD); + System.out.println("Registering"); + DummyFactory.add("Any-gif2", t); + } + } + + + void setTransliterator(String name, String id) { + if (DEBUG) System.out.println("Got: " + name); + if (id == null) { + translit = Transliterator.getInstance(name); + } else { + String reverseId = ""; + int pos = id.indexOf('-'); + if (pos < 0) { + reverseId = id + "-Any"; + id = "Any-" + id; + } else { + int pos2 = id.indexOf("/", pos); + if (pos2 < 0) { + reverseId = id.substring(pos+1) + "-" + id.substring(0,pos); + } else { + reverseId = id.substring(pos+1, pos2) + "-" + id.substring(0,pos) + id.substring(pos2); + } + } + + + translit = Transliterator.createFromRules(id, name, Transliterator.FORWARD); + if (DEBUG) { + System.out.println("***Forward Rules"); + System.out.println(translit.toRules(true)); + System.out.println("***Source Set"); + System.out.println(translit.getSourceSet().toPattern(true)); + } + System.out.println("***Target Set"); + UnicodeSet target = translit.getTargetSet(); + System.out.println(target.toPattern(true)); + UnicodeSet rest = new UnicodeSet("[a-z]").removeAll(target); + System.out.println("***ASCII - Target Set"); + System.out.println(rest.toPattern(true)); + + DummyFactory.add(id, translit); + + Transliterator translit2 = Transliterator.createFromRules(reverseId, name, Transliterator.REVERSE); + if (DEBUG) { + System.out.println("***Backward Rules"); + System.out.println(translit2.toRules(true)); + } + DummyFactory.add(reverseId, translit2); + + Transliterator rev = translit.getInverse(); + if (DEBUG) System.out.println("***Inverse Rules"); + if (DEBUG) System.out.println(rev.toRules(true)); + + } + text.flush(); + text.setTransliterator(translit); + convertSelectionItem.setLabel(Transliterator.getDisplayName(translit.getID())); + + addHistory(translit); + + Transliterator inv; + try { + inv = translit.getInverse(); + } catch (Exception ex) { + inv = null; + } + if (inv != null) { + addHistory(inv); + swapSelectionItem.setEnabled(true); + } else { + swapSelectionItem.setEnabled(false); + } + System.out.println("Set transliterator: " + translit.getID() + + (inv != null ? " and " + inv.getID() : "")); + } + + void addHistory(Transliterator trans) { + String name = trans.getID(); + MenuItem cmi = (MenuItem) historyMap.get(name); + if (cmi == null) { + cmi = new MenuItem(Transliterator.getDisplayName(name)); + cmi.addActionListener(new TransliterationListener(name)); + historyMap.put(name, cmi); + historySet.add(cmi); + historyMenu.removeAll(); + Iterator it = historySet.iterator(); + while (it.hasNext()) { + historyMenu.add((MenuItem)it.next()); + } + } + } + + class TransliterationListener implements ActionListener, ItemListener { + String name; + public TransliterationListener(String name) { + this.name = name; + } + public void actionPerformed(ActionEvent e) { + setTransliterator(name, null); + } + public void itemStateChanged(ItemEvent e) { + if (e.getStateChange() == ItemEvent.SELECTED) { + setTransliterator(name, null); + } else { + setTransliterator("Any-Null", null); + } + } + } + + class FontActionListener implements ActionListener { + String name; + public FontActionListener(String name) { + this.name = name; + } + public void actionPerformed(ActionEvent e) { + if (DEBUG) System.out.println("Font: " + name); + fontName = name; + text.setFont(new Font(fontName, Font.PLAIN, fontSize)); + } + } + + class SizeActionListener implements ActionListener { + int size; + public SizeActionListener(int size) { + this.size = size; + } + public void actionPerformed(ActionEvent e) { + if (DEBUG) System.out.println("Size: " + size); + fontSize = size; + text.setFont(new Font(fontName, Font.PLAIN, fontSize)); + } + } + + Set add(Set s, Enumeration enumeration) { + while(enumeration.hasMoreElements()) { + s.add(enumeration.nextElement()); + } + return s; + } + + /** + * Get a sorted list of the system transliterators. + */ + /* + private static Vector getSystemTransliteratorNames() { + Vector v = new Vector(); + for (Enumeration e=Transliterator.getAvailableIDs(); + e.hasMoreElements(); ) { + v.addElement(e.nextElement()); + } + // Insertion sort, O(n^2) acceptable for small n + for (int i=0; i<(v.size()-1); ++i) { + String a = (String) v.elementAt(i); + for (int j=i+1; j 0) { + v.setElementAt(b, i); + v.setElementAt(a, j); + a = b; + } + } + } + return v; + } + */ + +/* + private void setNoTransliterator() { + translitItem = noTranslitItem; + noTranslitItem.setState(true); + handleSetTransliterator(noTranslitItem.getLabel()); + compound = false; + for (int i=0; i. + */ + /* + private static Transliterator decodeTranslitItem(String name) { + return (name.equals(NO_TRANSLITERATOR)) + ? null : Transliterator.getInstance(name); + } + */ + + private void handleBatchTransliterate(Transliterator trans) { + if (trans == null) { + return; + } + + int start = text.getSelectionStart(); + int end = text.getSelectionEnd(); + ReplaceableString s = + new ReplaceableString(text.getText().substring(start, end)); + + StringBuffer log = null; + if (DEBUG) { + log = new StringBuffer(); + log.append('"' + s.toString() + "\" (start " + start + + ", end " + end + ") -> \""); + } + + trans.transliterate(s); + String str = s.toString(); + + if (DEBUG) { + log.append(str + "\""); + System.out.println("Batch " + trans.getID() + ": " + log.toString()); + } + + text.replaceRange(str, start, end); + text.select(start, start + str.length()); + } + + private void handleClose() { + helpDialog.dispose(); + dispose(); + } + + /* + class InfoDialog extends Dialog { + protected Button button; + protected TextArea area; + protected Dialog me; + protected Panel bottom; + + public TextArea getArea() { + return area; + } + + public Panel getBottom() { + return bottom; + } + + InfoDialog(Frame parent, String title, String label, String message) { + super(parent, title, false); + me = this; + this.setLayout(new BorderLayout()); + if (label.length() != 0) { + this.add("North", new Label(label)); + } + + area = new TextArea(message, 8, 80, TextArea.SCROLLBARS_VERTICAL_ONLY); + this.add("Center", area); + + button = new Button("Hide"); + button.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + me.hide(); + } + }); + bottom = new Panel(); + bottom.setLayout(new FlowLayout(FlowLayout.CENTER, 0, 0)); + bottom.add(button); + this.add("South", bottom); + this.pack(); + addWindowListener(new WindowAdapter() { + public void windowClosing(WindowEvent e) { + me.hide(); + } + }); + } + } + */ +} diff --git a/demos/src/com/ibm/icu/dev/demo/translit/DemoApplet.java b/demos/src/com/ibm/icu/dev/demo/translit/DemoApplet.java new file mode 100644 index 00000000000..99820e0611d --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/DemoApplet.java @@ -0,0 +1,73 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.demo.translit; +import java.applet.Applet; +import java.awt.Button; +import java.awt.Dimension; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; + +import com.ibm.icu.dev.demo.impl.AppletFrame; + +/** + * A simple Applet that shows a button. When pressed, the button + * shows the DemoAppletFrame. This Applet is meant to be embedded + * in a web page. + * + *

    Copyright (c) IBM Corporation 1999. All rights reserved. + * + * @author Alan Liu + */ +public class DemoApplet extends Applet { + + /** + * For serialization + */ + private static final long serialVersionUID = 8214879807740061678L; + Demo frame = null; + + public static void main(String args[]) { + final DemoApplet applet = new DemoApplet(); + new AppletFrame("Transliteration Demo", applet, 640, 480); + } + + public void init() { + + Button button = new Button("Transliteration Demo"); + button.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + if (frame == null) { + frame = new Demo(600, 200); + frame.addWindowListener(new WindowAdapter() { + public void windowClosing(WindowEvent we) { + frame = null; + } + }); + } + frame.setVisible(true); + frame.toFront(); + } + }); + + add(button); + + Dimension size = button.getPreferredSize(); + size.width += 10; + size.height += 10; + + resize(size); + } + + public void stop() { + if (frame != null) { + frame.dispose(); + } + frame = null; + } +} diff --git a/demos/src/com/ibm/icu/dev/demo/translit/InfoDialog.java b/demos/src/com/ibm/icu/dev/demo/translit/InfoDialog.java new file mode 100644 index 00000000000..4ea16e3b9a4 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/InfoDialog.java @@ -0,0 +1,66 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.demo.translit; +import java.awt.BorderLayout; +import java.awt.Button; +import java.awt.Dialog; +import java.awt.FlowLayout; +import java.awt.Frame; +import java.awt.Label; +import java.awt.Panel; +import java.awt.TextArea; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; +public class InfoDialog extends Dialog { + /** + * For serialization + */ + private static final long serialVersionUID = -3086665546137919018L; + protected Button button; + protected TextArea area; + protected Dialog me; + protected Panel bottom; + + public TextArea getArea() { + return area; + } + + public Panel getBottom() { + return bottom; + } + + InfoDialog(Frame parent, String title, String label, String message) { + super(parent, title, false); + me = this; + this.setLayout(new BorderLayout()); + if (label.length() != 0) { + this.add("North", new Label(label)); + } + + area = new TextArea(message, 8, 80, TextArea.SCROLLBARS_VERTICAL_ONLY); + this.add("Center", area); + + button = new Button("Hide"); + button.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + me.hide(); + } + }); + bottom = new Panel(); + bottom.setLayout(new FlowLayout(FlowLayout.CENTER, 0, 0)); + bottom.add(button); + this.add("South", bottom); + this.pack(); + addWindowListener(new WindowAdapter() { + public void windowClosing(WindowEvent e) { + me.hide(); + } + }); + } +} diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Arabic-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Arabic-Latin.txt new file mode 100644 index 00000000000..146c659ac3c --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Arabic-Latin.txt @@ -0,0 +1,24 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +@UPPERFILTER@ +ما هي الشفرة الموحدة "يونِكود" ؟ + +أساسًا، تتعامل الحواسيب فقط مع الأرقام، وتقوم بتخزين الأحرف والمحارف الأخرى بعد أن تُعطي رقما معينا لكل واحد منها. وقبل اختراع "يونِكود"، كان هناك مئات الأنظمة للتشفير وتخصيص هذه الأرقام للمحارف، ولم يوجد نظام تشفير واحد يحتوي على جميع المحارف الضرورية. وعلى سبيل المثال، فإن الاتحاد الأوروبي لوحده، احتوى العديد من الشفرات المختلفة ليغطي جميع اللغات المستخدمة في الاتحاد. وحتى لو اعتبرنا لغة واحدة، كاللغة الإنجليزية، فإن جدول شفرة واحد لم يكف لاستيعاب جميع الأحرف وعلامات الترقيم والرموز الفنية والعلمية الشائعة الاستعمال. + + + +وتجدر الملاحظة أن أنظمة التشفير المختلفة تتعارض مع بعضها البعض. وبعبارة أخرى، يمكن أن يستخدِم جدولي شفرة نفس الرقم لتمثيل محرفين مختلفين، أو رقمين مختلفين لتمثيل نفس المحرف. ولو أخذنا أي جهاز حاسوب، وبخاصة جهاز النادل (server)، فيجب أن تكون لديه القدرة على التعامل مع عدد كبير من الشفرات المختلفة، ويتم تصميمه على هذا الأساس. ومع ذلك، فعندما تمر البيانات عبر أنظمة مختلفة، توجد هناك خطورة لضياع أو تحريف بعض هذه البيانات. + + + +"يونِكود" تغير هذا كليا ! + +تخصص الشفرة الموحدة "يونِكود" رقما وحيدا لكل محرف في جميع اللغات العالمية، وذلك بغض النظر عن نوع الحاسوب أو البرامج المستخدمة. وقد تم تبني مواصفة "يونِكود" من قبل قادة الصانعين لأنظمة الحواسيب في العالم، مثل شركات آي.بي.إم. (IBM)، أبل (APPLE)، هِيْولِت باكرد (Hewlett-Packard) ، مايكروسوفت (Microsoft)، أوراكِل (Oracle) ، صن (Sun) وغيرها. كما أن المواصفات والمقاييس الحديثة (مثل لغة البرمجة "جافا" "JAVA" ولغة "إكس إم إل" "XML" التي تستخدم لبرمجة الانترنيت) تتطلب استخدام "يونِكود". علاوة على ذلك ، فإن "يونِكود" هي الطريقة الرسمية لتطبيق المقياس العالمي إيزو ١٠٦٤٦ (ISO 10646) . + + + +إن بزوغ مواصفة "يونِكود" وتوفُّر الأنظمة التي تستخدمه وتدعمه، يعتبر من أهم الاختراعات الحديثة في عولمة البرمجيات لجميع اللغات في العالم. وإن استخدام "يونِكود" في عالم الانترنيت سيؤدي إلى توفير كبير مقارنة مع استخدام المجموعات التقليدية للمحارف المشفرة. كما أن استخدام "يونِكود" سيُمكِّن المبرمج من كتابة البرنامج مرة واحدة، واستخدامه على أي نوع من الأجهزة أو الأنظمة، ولأي لغة أو دولة في العالم أينما كانت، دون الحاجة لإعادة البرمجة أو إجراء أي تعديل. وأخيرا، فإن استخدام "يونِكود" سيمكن البيانات من الانتقال عبر الأنظمة والأجهزة المختلفة دون أي خطورة لتحريفها، مهما تعددت الشركات الصانعة للأنظمة واللغات، والدول التي تمر من خلالها هذه البيانات. + +@SET [[[:Arabic:] & [\u0600-\u06FF]] [‎\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9]] \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Greek-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Greek-Latin.txt new file mode 100644 index 00000000000..63800742530 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Greek-Latin.txt @@ -0,0 +1,73 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +Τι είναι το Unicode? + +Η κωδικοσελίδα Unicode προτείνει έναν και μοναδικό αριθμό για κάθε χαρακτήρα, ανεξάρτητα από το λειτουργικό σύστημα, ανεξάρτητα από το λογισμικό, ανεξάρτητα από την γλώσσα. + +Οι ηλεκτρονικοί υπολογιστές, σε τελική ανάλυση, χειρίζονται απλώς αριθμούς. Αποθηκεύουν γράμματα και άλλους χαρακτήρες αντιστοιχώντας στο καθένα τους από έναν αριθμό (ονομάζουμε μία τέτοια αντιστοιχία κωδικοσελίδα). Πριν την εφεύρεση του Unicode, υπήρχαν εκατοντάδες διαφορετικές κωδικοσελίδες. Λόγω περιορισμών μεγέθους όμως, σε καμία κωδικοσελίδα δεν χωρούσαν αρκετοί χαρακτήρες: λόγου χάριν, η Ευρωπαϊκή Ένωση χρειαζόταν πλήθος διαφορετικών κωδικοσελίδων για να καλύψει όλες τις γλώσσες των χωρών-μελών της. Ακόμα και για μία και μόνη γλώσσα, όπως π.χ. τα Αγγλικά, μία κωδικοσελίδα δεν επαρκούσε για να καλύψει όλα τα γράμματα, σημεία στίξης και τεχνικά σύμβολα ευρείας χρήσης. + +Εκτός αυτού, οι κωδικοσελίδες αυτές διαφωνούσαν μεταξύ τους. Έτσι, δύο κωδικοσελίδες μπορούσαν κάλλιστα να χρησιμοποιούν τον ίδιο αριθμό για δύο διαφορετικούς χαρακτήρες, ή να χρησιμοποιούν διαφορετικούς αριθμούς για τον ίδιο χαρακτήρα. Κάθε υπολογιστής (και ιδίως εάν ήταν διακομιστής) έπρεπε να υποστηρίζει πλήθος διαφορετικών κωδικοσελίδων· ταυτόχρονα κάθε φορά που δεδομένα μεταφέρονταν μεταξύ διαφορετικών κωδικοσελίδων ή λειτουργικών συστημάτων, τα δεδομένα αυτά κινδύνευαν να αλλοιωθούν. + +Το Unicode αλλάζει αυτή την κατάσταση! +Το Unicode προτείνει έναν μοναδικό αριθμό για κάθε χαρακτήρα, ανεξάρτητα από το λειτουργικό σύστημα, ανεξάρτητα από το λογισμικό, ανεξάρτητα από την γλώσσα. Την κωδικοσελίδα Unicode έχουν ασπασθεί κορυφαίοι παράγοντες του χώρου των λογισμικών όπως οι: Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys και πολλοί άλλοι. Το Unicode απαιτούν πολλές σύγχρονες τυποποιήσεις όπως οι: XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML, κ.λπ., και είναι η επίσημη μέθοδος εφαρμογής της τυποποίησης ISO/IEC 10646. Υποστηρίζεται από πολλά λειτουργικά συστήματα, όλους τους σύχρονους περιηγητές Διαδικτύου, και πολλά άλλα προϊόντα. Η εμφάνιση της κωδικοσελίδας Unicode, και η διαθεσιμότητα εργαλείων που να την υποστηρίζουν είναι από τις σημαντικότερες εξελίξεις της πρόσφατης τεχνολογίας λογισμικών. + +Η ενσωμάτωση του Unicode σε εφαρμογές πελάτη-διακομιστή ή "multi-tiered" προσφέρει σημαντικές οικονομίες σε σχέση με τις ως τώρα υπάρχουσες κωδικοσελίδες. Χάρις στο Unicode ένα και μόνο προϊόν ή μία και μόνη τοποθεσία Διαδικτύου μπορεί να επικοινωνεί με διάφορα λειτουργικά συστήματα, σε διάφορες γλώσσες και χώρες, χωρίς την ανάγκη επαναπρογραμματισμού. Γίνεται έτσι δυνατή η μεταφορά δεδομένων ανάμεσα σε πλήθος διαφορετικών συστημάτων δίχως κίνδυνο αλλοίωσης. + +Σχετικά με το Κονσόρτιουμ Unicode +Tο Κονσόρτιουμ Unicode είναι ένας κοινωφελής οργανισμός· ιδρύθηκε για να αναπτύξει, να επεκτείνει και να μεταδώσει την χρήση της κωδικοσελίδας Unicode που καθορίζει την αναπαράσταση κειμένου σε σύγχρονα λογισμικά προϊόντα και τυποποιήσεις. Μεγάλος αριθμός εταιρειών και οργανισμών της διεθνούς βιομηχανίας υπολογιστών και λογισμικών είναι μέλη του Κονσόρτιουμ Unicode. Το Κονσόρτιουμ χρηματοδοτείται μόνο από τις συνδρομές των μελών του. Μέλος του κονσόρτιουμ Unicode μπορεί να γίνει οιοσδήποτε (οργανισμός, εταιρεία ή ιδιώτης, οπουδήποτε στον κόσμο) που να υποστηρίζει την κωδικοσελίδα Unicode και να επιθυμεί να συνδράμει στην επέκταση και στην εφαρμογή της. + +Για περαιτέρω πληροφορίες, βλέπε τις εξής ιστοσελίδες: Γλωσσάρι, Δείγματα προϊόντων συμβατών με το Unicode, Τεχνική Εισαγωγή και Χρήσιμες πηγές πληροφοριών. + +(ANCIENT) + +ἄνδρα μοι ἔννεπε, μοῦσα, πολύτροπον, ὃς μάλα πολλὰ +πλάγχθη, ἐπεὶ Τροίης ἱερὸν πτολίεθρον ἔπερσεν: +πολλῶν δ’ ἀνθρώπων ἴδεν ἄστεα καὶ νόον ἔγνω, +πολλὰ δ’ ὅ γ’ ἐν πόντῳ πάθεν ἄλγεα ὃν κατὰ θυμόν, +ἀρνύμενος ἥν τε ψυχὴν καὶ νόστον ἑταίρων. +ἀλλ’ οὐδ’ ὣς ἑτάρους ἐρρύσατο, ἱέμενός περ: +αὐτῶν γὰρ σφετέρῃσιν ἀτασθαλίῃσιν ὄλοντο, +νήπιοι, οἳ κατὰ βοῦς Ὑπερίονος Ἠελίοιο +ἤσθιον: αὐτὰρ ὁ τοῖσιν ἀφείλετο νόστιμον ἦμαρ. +τῶν ἁμόθεν γε, θεά, θύγατερ Διός, εἰπὲ καὶ ἡμῖν. +* +ἔνθ’ ἄλλοι μὲν πάντες, ὅσοι φύγον αἰπὺν ὄλεθρον, +οἴκοι ἔσαν, πόλεμόν τε πεφευγότες ἠδὲ θάλασσαν: +τὸν δ’ οἶον νόστου κεχρημένον ἠδὲ γυναικὸς +νύμφη πότνι’ ἔρυκε Καλυψὼ δῖα θεάων +ἐν σπέσσι γλαφυροῖσι, λιλαιομένη πόσιν εἶναι. +ἀλλ’ ὅτε δὴ ἔτος ἦλθε περιπλομένων ἐνιαυτῶν, +τῷ οἱ ἐπεκλώσαντο θεοὶ οἶκόνδε νέεσθαι +εἰς Ἰθάκην, οὐδ’ ἔνθα πεφυγμένος ἦεν ἀέθλων +καὶ μετὰ οἷσι φίλοισι. θεοὶ δ’ ἐλέαιρον ἅπαντες +νόσφι Ποσειδάωνος: ὁ δ’ ἀσπερχὲς μενέαινεν +ἀντιθέῳ Ὀδυσῆι πάρος ἣν γαῖαν ἱκέσθαι. +* +ἀλλ’ ὁ μὲν Αἰθίοπας μετεκίαθε τηλόθ’ ἐόντας, +Αἰθίοπας τοὶ διχθὰ δεδαίαται, ἔσχατοι ἀνδρῶν, +οἱ μὲν δυσομένου Ὑπερίονος οἱ δ’ ἀνιόντος, +ἀντιόων ταύρων τε καὶ ἀρνειῶν ἑκατόμβης. +ἔνθ’ ὅ γ’ ἐτέρπετο δαιτὶ παρήμενος: οἱ δὲ δὴ ἄλλοι +Ζηνὸς ἐνὶ μεγάροισιν Ὀλυμπίου ἁθρόοι ἦσαν. +τοῖσι δὲ μύθων ἦρχε πατὴρ ἀνδρῶν τε θεῶν τε: +μνήσατο γὰρ κατὰ θυμὸν ἀμύμονος Αἰγίσθοιο, +τόν ῥ’ Ἀγαμεμνονίδης τηλεκλυτὸς ἔκταν’ Ὀρέστης: +τοῦ ὅ γ’ ἐπιμνησθεὶς ἔπε’ ἀθανάτοισι μετηύδα: +* +“ὢ πόποι, οἷον δή νυ θεοὺς βροτοὶ αἰτιόωνται: +ἐξ ἡμέων γάρ φασι κάκ’ ἔμμεναι, οἱ δὲ καὶ αὐτοὶ +σφῇσιν ἀτασθαλίῃσιν ὑπὲρ μόρον ἄλγε’ ἔχουσιν, +ὡς καὶ νῦν Αἴγισθος ὑπὲρ μόρον Ἀτρεί̈δαο +γῆμ’ ἄλοχον μνηστήν, τὸν δ’ ἔκτανε νοστήσαντα, +εἰδὼς αἰπὺν ὄλεθρον, ἐπεὶ πρό οἱ εἴπομεν ἡμεῖς, +Ἑρμείαν πέμψαντες, ἐύσκοπον ἀργεϊφόντην, +μήτ’ αὐτὸν κτείνειν μήτε μνάασθαι ἄκοιτιν: +ἐκ γὰρ Ὀρέσταο τίσις ἔσσεται Ἀτρεί̈δαο, +ὁππότ’ ἂν ἡβήσῃ τε καὶ ἧς ἱμείρεται αἴης. +ὣς ἔφαθ’ Ἑρμείας, ἀλλ’ οὐ φρένας Αἰγίσθοιο +πεῖθ’ ἀγαθὰ φρονέων: νῦν δ’ ἁθρόα πάντ’ ἀπέτισεν.” + +@SET [[[:Greek:]&[\u0370-\u03E1 \u03F0-\u03FF]] [\:-;?\u00B7\u037E\u0387]] \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Han-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Han-Latin.txt new file mode 100644 index 00000000000..8603663c655 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Han-Latin.txt @@ -0,0 +1,26 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +@UPPERFILTER@ +什么是Unicode(统一码)? +Unicode给每个字符提供了一个唯一的数字, +不论是什么平台, +不论是什么程序, +不论是什么语言。 + +基本上,计算机只是处理数字。它们指定一个数字,来储存字母或其他字符。在创造Unicode之前,有数百种指定这些数字的编码系统。没有一个编码可以包含足够的字符:例如,单单欧州共同体就需要好几种不同的编码来包括所有的语言。即使是单一种语言,例如英语,也没有哪一个编码可以适用于所有的字母,标点符号,和常用的技术符号。 + +这些编码系统也会互相冲突。也就是说,两种编码可能使用相同的数字代表两个不同的字符,或使用不同的数字代表相同的字符。任何一台特定的计算机(特别是服务器)都需要支持许多不同的编码,但是,不论什么时候数据通过不同的编码或平台之间,那些数据总会有损坏的危险。 + +Unicode正在改变所有这一切! +Unicode给每个字符提供了一个唯一的数字,不论是什么平台,不论是什么程序,不论什么语言。Unicode标准已经被这些工业界的领导们所采用,例如:Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys和其它许多公司。最新的标准都需要Unicode,例如XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML等等,并且,Unicode是实现ISO/IEC 10646的正规方式。许多操作系统,所有最新的浏览器和许多其他产品都支持它。Unicode标准的出现和支持它工具的存在,是近来全球软件技术最重要的发展趋势。 + +将Unicode与客户服务器或多层应用程序和网站结合,比使用传统字符集节省费用。Unicode使单一软件产品或单一网站能够贯穿多个平台,语言和国家,而不需要重建。它可将数据传输到许多不同的系统,而无损坏。 + +关于Unicode学术学会 +Unicode学术学会是一个非盈利的组织,是为发展,扩展和推广使用Unicode标准而建立的,Unicode学术学会设立了现代软件产品和标准文本的表示法。学术学会的会员代表了广泛领域的计算机和资讯工业的公司和组织。学术学会只由会员提供资金。Unicode学术学会的会员资格开放给世界上任何支持Unicode标准和希望协助其扩展和执行的组织及个人。 + +欲知更多信息,请参阅术语词汇表,Unicode产品样本,技术简介和参考资料。 + +Chinese translation by 黎國珍, Xerox \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Hebrew-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Hebrew-Latin.txt new file mode 100644 index 00000000000..9d26745a874 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Hebrew-Latin.txt @@ -0,0 +1,26 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +@UPPERFILTER@ +מה זה יוניקוד (Unicode)? +יוניקוד מקצה מספר ייחודי לכל תו, +לא משנה על איזו פלטפורמה, +לא משנה באיזו תוכנית, +ולא משנה באיזו שפה. + +באופן בסיסי, מחשבים עוסקים רק במספרים. הם מאחסנים אותיות ותווים אחרים על-ידי הקצאת מספר לכל אחד מהם. בטרם הומצא היוניקוד, היו מאות מערכות קידוד שונות להקצאת המספרים הללו. אף לא אחת מהן יכלה להכיל כמות תווים מספקת. לדוגמא: רק לאיחוד האירופאי נדרשים כמה סוגי קידודים שונים על מנת לכסות את כל השפות המדוברות בו. יתירה מזאת אף לשפה בודדת, כמו אנגלית למשל, לא היה די במערכת קידוד אחת בעבור כל האותיות, סימני הפיסוק והסמלים הטכניים שבשימוש שוטף. + +מערכות קידוד אלו אף סותרות זו את זו. כלומר, שני קידודים יכולים להשתמש באותו מספר לשני תוים נבדלים, או להשתמש במספרים שונים לאותו תו. על כל מחשב (ובמיוחד שרתים) לתמוך במספר רב של מערכות קידוד שונות; אולם כל אימת שנתונים עוברים בין מערכות קידוד או פלטפורמות שונות קיים הסיכון שייפגמו. + +יוניקוד משנה את כל זה! +יוניקוד מקצה מספר ייחודי לכל תו, ללא תלות בפלטפורמה, בתוכנית, או בשפה. תקן היוניקוד אומץ על-ידי המובילים בתעשייה כמו Apple‏, HP‏, IBM‏, JustSystem‏, Microsoft‏, Oracle‏, SAP‏, Sun‏, Sybase‏, Unisys‏ ורבים אחרים. יוניקוד נדרש על-ידי תקנים מודרניים כמו XML‏, Java‏, ECMAScript (JavaScript)‎‏, LDAP‏, CORBA 3.0‎‏, WML‏ וכדומה, ומהווה למעשה את היישום הרשמי של תקן ISO/IEC 10646. הוא נתמך על ידי מערכות הפעלה רבות, כל הדפדפנים החדישים, ומוצרים רבים אחרים. הופעת תקן היוניקוד וזמינות הכלים התומכים בו נמנות עם המגמות הכלל-עולמיות החשובות ביותר, אשר מסתמנות לאחרונה בטכנולוגיית התוכנה. + +שילוב יוניקוד ביישומי שרת-לקוח או ביישומים רבי-שכבות ובאתרי אינטרנט מאפשר חיסכון ניכר בעלויות לעומת השימוש בסדרות התווים המסורתיות. הודות ליוניקוד, מוצר תוכנה אחד או אתר יחיד ברשת יכול להרחיב את יעדיו למגוון פלטפורמות, ארצות ושפות ללא צורך בשינויים מרחיקים. יוניקוד מאפשר מעבר נתונים דרך מערכות רבות ושונות מבלי שייפגמו. + +פרטים אודות הקונסורציום של יוניקוד (Unicode Consortium) +הקונסורציום של יוניקוד הוא ארגון ללא מטרת רווח שנוסד כדי לפתח, להרחיב ולקדם את השימוש בתקן יוניקוד, אשר מגדיר את ייצוג הטקסט במוצרי תוכנה ותקנים מודרניים. חברים בקונסורציום מגוון רחב של תאגידים וארגונים בתעשיית המחשבים ועיבוד המידע. הקונסורציום ממומן על-ידי דמי-חבר בלבד. החברות בקונסורציום יוניקוד פתוחה לארגונים ולאנשים פרטיים, בכל רחבי העולם, אשר תומכים בתקן יוניקוד ומעוניינים לסייע בהתפתחותו והטמעתו. + +למידע נוסף, ראה מילון מונחים, רשימה חלקית של מוצרים מותאמים ליוניקוד, מבוא טכני ו- חומרי עזר [קישורים באנגלית]. + +@SET [[:Hebrew:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138]] \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Instructions.html b/demos/src/com/ibm/icu/dev/demo/translit/Test_Instructions.html new file mode 100644 index 00000000000..026394a11e4 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Instructions.html @@ -0,0 +1,154 @@ + + + + + + +New Transliteration Test Files + + + + +

    New Transliteration Test Files

    +

    The Test_*.html files show the transliteration of characters for given +languages. The sample for each language consists of "What Is Unicode" +in Thai, followed by other available text. The text is broken apart into +sentences for ease of viewing (note: we know of some problems with the sentence +rules for Japanese and Chinese). The left column is the original, and the right +is the romanization. The program also converts back to the original script. If +there is a discrepancy between the source and the reverse transformation, that +is indicated by making the background red +from that point on.

    +
    +

    Note: If you have some more text that you would like added to the + sample, just let me know. I am particularly interested in name lists, since + they are the typical source.

    +
    +

    Standards

    +

    The goal is to follow a given standard, such as ISO* or UNGEGN wherever +possible. We also need to round-trip, so in some cases, that means adding some +additional accent marks to disambiguate characters. And often the source +standards are missing some characters, such as characters with combining Hamzas +in Arabic. Remember that the goal for these is transliteration (unambiguously +representing all the letters in the original), not transcription (representing +the best pronunciation).

    +
      +
    • Thai: ISO 11940 < http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf + > plus a few items: +
        +
      • Accents may be added to the Latin for disambiguation.
      • +
      • In the next release, we'd like to do the UNGEGN version < http://www.eki.ee/wgrs/rom1_th.pdf + > which is probably more useful (and readable), and follows more + closely the Thai standard.
      • +
      • Spaces are provided at word-breaks, using the Thai BreakIterator.
      • +
      • An inherent vowel (ọ) is added, as in UNGEGN. The dot is for + disambiguation. +
          +
        • Note: if the inherent vowel positions cannot be algorithmically + determined, let me know and I will remove them.
        • +
        +
      • +
      +
    • +
    • Arabic: Generally follows + UNGEGN < http://www.eki.ee/wgrs/rom1_ar.pdf + > +
        +
      • Accents may be added to the Latin for disambiguation.
      • +
      • Occasionally deviates in the direction of ISO 233 < http://homepage.mac.com/sirbinks/pdf/Arabic.pdf + > +
          +
        • with underdot instead of cedilla for letter like SAD, since those + are explicitly in Unicode for transliteration of Arabic
        • +
        • adding extra non-Arabic-language letters, like PEH. Note: not all + extended Arabic characters are handled yet.
        • +
        +
      • +
      • Does not do assimilation of "al", nor hyphenation of + it. +
          +
        • While it could be done, we need to determine whether a prefix + "al" could occur other than as the definite article (since + no space is used).
        • +
        +
      • +
      • This is transliteration. For transcription one would want an + engine that added points appropriately to the Hebrew.
      • +
      +
    • +
    • Hebrew: Generally + follows UNGEGN < http://www.eki.ee/wgrs/rom1_he.pdf + >, with some exceptions: +
        +
      • Accents may be added to the Latin for disambiguation.
      • +
      • Combinations of dagesh, shin/sin dot that would produce different + letters are not yet called out.
      • +
      • Note that the final forms are not preserved. Thus, when going from + Latin to Hebrew, a character is given final form depending on its + position. +
          +
        • E.g. מםמם => mmmm => + מממם
        • +
        +
      • +
      • This is transliteration. For transcription one would want an + engine that added points appropriately to the Hebrew.
      • +
      • See also < http://homepage.mac.com/sirbinks/pdf/Hebrew.r1.pdf + > for the ISO version. The Chicago Manual of Style has a clear table + of mappings for the vowel marks.
      • +
      +
    • +
    • Han: Uses the CEDICT + data plus Unicode Unihan kMandarin values for pinyin. Doesn't + roundtrip! +
        +
      • Note: the Chinese pronunciation of Han characters varies by + context and grammar, though nowhere near as much a Japanese. +
          +
        • Ideally we'd have an underlying engine for this. In 2.4 we will + have a plug-in interface so that people could add one, such as the + IBM engine.
        • +
        • The data from CEDICT and Unihan don't list the most frequent + choice first, so we will be updating that.
        • +
        +
      • +
      +
    • +
    • Greek/UNGEGN: Uses a + modern Greek transliteration, based on the UNGEGN rules at < http://www.eki.ee/wgrs/rom1_el.pdf + >. This version will not roundtrip ancient Greek.
    • +
    • Greek: Uses a classic Greek + transliteration. This version will not roundtrip modern Greek.
    • +
    +

    Notes

    +
      +
    1. For readability, the files have a few other things besides just the + transliteration: +
        +
      • The first word of the sentences are titlecased, as are names (where we + have a name-list, such as in Thai).
      • +
      • The Latin in the original is mapped to the private-use zone before + conversion, and then again after conversion. This does have the downside + that any rules (such as in Han) that need to know the context (e.g. for + inserting spaces or capitalization) will gum up a little bit. This is + just an artifact of the test display.
      • +
      +
    2. +
    3. I don't think that ISO 11940 is a particularly good way to romanize, but + it is at least complete and a standard. So what I am interested in just for + now is whether the samples in the file follow it (with the above + exceptions).
    4. +
    5. Some of the files also have a set of characters at the end, one character + per row, with a following row listing the hex and name.
    6. +
    7. The source rules for all of these is in the following URL. So if you want + to know the details of how the characters are handled, that is the place to + look. + +
    8. +
    + + + + diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Thai-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Thai-Latin.txt new file mode 100644 index 00000000000..631f191529a --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Thai-Latin.txt @@ -0,0 +1,69 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +@UPPERFILTER@ +Unicode คืออะไร? +Unicode กำหนดหมายเลขเฉพาะสำหรับทุกอักขระ +โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด +ไม่ขึ้นกับว่าจะเป็นโปรแกรมใด +และไม่ว่าจะเป็นภาษาใด + +โดยพื้นฐานแล้ว, คอมพิวเตอร์จะเกี่ยวข้องกับเรื่องของตัวเลข. คอมพิวเตอร์จัดเก็บตัวอักษรและอักขระอื่นๆ โดยการกำหนดหมายเลขให้สำหรับแต่ละตัว. ก่อนหน้าที่๊ Unicode จะถูกสร้างขึ้น, ได้มีระบบ encoding อยู่หลายร้อยระบบสำหรับการกำหนดหมายเลขเหล่านี้. ไม่มี encoding ใดที่มีจำนวนตัวอักขระมากเพียงพอ: ยกตัวอย่างเช่น, เฉพาะในกลุ่มสหภาพยุโรปเพียงแห่งเดียว ก็ต้องการหลาย encoding ในการครอบคลุมทุกภาษาในกลุ่ม. หรือแม้แต่ในภาษาเดี่ยว เช่น ภาษาอังกฤษ ก็ไม่มี encoding ใดที่เพียงพอสำหรับทุกตัวอักษร, เครื่องหมายวรรคตอน และสัญลักษณ์ทางเทคนิคที่ใช้กันอยู่ทั่วไป. + +ระบบ encoding เหล่านี้ยังขัดแย้งซึ่งกันและกัน. นั่นก็คือ, ในสอง encoding สามารถใช้หมายเลขเดียวกันสำหรับตัวอักขระสองตัวที่แตกต่างกัน,หรือใช้หมายเลขต่างกันสำหรับอักขระตัวเดียวกัน. ในระบบคอมพิวเตอร์ (โดยเฉพาะเซิร์ฟเวอร์) ต้องมีการสนับสนุนหลาย encoding; และเมื่อข้อมูลที่ผ่านไปมาระหว่างการเข้ารหัสหรือแพล็ตฟอร์มที่ต่างกัน, ข้อมูลนั้นจะเสี่ยงต่อการผิดพลาดเสียหาย. + +Unicode จะเปลี่ยนแปลงสิ่งเหล่านั้นทั้งหมด! +Unicode กำหนดหมายเลขเฉพาะสำหรับแต่ละอักขระ, โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด, ไม่ขึ้นกับว่าจะเป็นโปรแกรมใดและไม่ว่าจะเป็นภาษาใด. มาตรฐาน Unicode ได้ถูกนำไปใช้โดยผู้นำในอุตสาหกรรม เช่น Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys และอื่นๆ อีกมาก. Unicode เป็นสิ่งที่จำเป็นสำหรับมาตรฐานใหม่ๆ เช่น XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML ฯลฯ., และเป็นแนวทางอย่างเป็นทางการในการทำ ISO/IEC 10646. Unicode ได้รับการสนับสนุนในระบบปฏิบัติการจำนวนมาก, บราวเซอร์ใหม่ๆ ทกตัว, และผลิตภัณฑ์อื่นๆ อีกมาก. การเกิดขึ้นของ Unicode Standard และทูลส์ต่างๆ ที่มีในการสนับสนุน Unicode, เป็นหนึ่งในแนวโน้มทางเทคโนโลยีซอฟต์แวร์ระดับโลกที่มีความสำคัญที่สุด. + +การรวม Unicode เข้าไปในระบบไคลเอ็นต์-เซิร์ฟเวอร์ หรือแอ็พพลิเคชันแบบ multi-tiered และเว็บไซต์ จะทำให้เกิดการประหยัดค่าใช้จ่ายมากกว่าการใช้ชุดอักขระแบบเดิม. Unicode ทำให้ผลิตภัณฑ์ซอฟต์แวร์หนึ่งเดียว หรือเว็บไซต์แห่งเดียว รองรับได้หลายแพล็ตฟอร์ม, หลายภาษาและหลายประเทศโดยไม่ต้องทำการรื้อปรับระบบ. Unicode ยังทำให้ข้อมูลสามารถเคลื่อนย้ายไปมาในหลายๆ ระบบโดยไม่เกิดความผิดพลาดเสียหาย. + +เกี่ยวกับ Unicode Consortium +Unicode Consortium เป็นองค์กรไม่แสวงหากำไรที่ก่อตั้งขึ้นเพื่อพัฒนา, ขยายและส่งเสริมการใช้ Unicode Standard, ซึ่งกำหนดรูปแบบการแทนค่าของข้อความในผลิตภัณฑ์ซอฟต์แวร์และมาตรฐานใหม่ๆ. สมาชิกของสมาคมเป็นตัวแทนจากบริษัทและองค์กรในอุตสาหกรรมคอมพิวเตอร์และการประมวลผลสารสนเทศ. สมาคมได้รับการสนับสนุนทางการเงินผ่านทางค่าธรรมเนียมของการเป็นสมาชิกเท่านั้น. สมาชิกภาพของ Unicode Consortium เปิดกว้างสำหรับองค์กรหรือบุคคลใดๆ ในโลกที่ต้องการสนับสนุน Unicode Standard และช่วยเหลือการขยายตัวและการนำ Unicode ไปใช้งาน. + +สำหรับข้อมูลเพิ่มเติม, ให้ดูที่ Glossary, Sample Unicode-Enabled Products, Technical Introduction และ Useful Resources. + +เป็นมนุษย์สุดประเสริฐเลิศคุณค่า +กว่าบรรดาฝูงสัตว์เดรัจฉาน +จงฝ่าฟันพัฒนาวิชาการ +อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร +ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า +หัดอภัยเหมือนกีฬาอัชฌาสัย +ปฏิบัติประพฤติกฎกำหนดใจ +พูดจาให้จ๊ะ ๆ จ๋า ๆ น่าฟังเอยฯ + +แหล่งที่มา : สมาคมคอมพิวเตอร์แห่งประเทศไทย + +ฅนฃวด kho khuat and kho khon +@TITLECASE@ +ก๊กเฮง แซ่แต้ +กชกร ศราทธทัต +กติกา อังคสุภณ +กนก ธรรมประทีป +กนก วงศ์ทองศรี +กนกกร ช้างเย็นฉ่ำ +กนกฉัตร์ ถาวรนันท์ +กนกนวล โปษยะนันทน์ +กนกพร คมคาย +กนกพร ตีรเลิศพานิช +กนกพร พันทร +กนกพร ศรีบัณฑิต +กนกพร อติวรรณาพัฒน์ +กนกพรรณ ศรีวนาภิรมย์ +กนกรัตน์ เกียรติยิ่งอังศุลี +กนกรัตน์ สุธรรมพิทักษ์ +กนกวรรณ คงคาประเสริฐ +กนกวรรณ แซ่เตียว +กนกวรรณ บุญประเสริฐ +กนกวรรณ รักทรัพย์ +กนกวรรณ สัจจพงษ์ +กนกวรรณ อุ้ยวงศ์ไพศาล +กนกศักดิ์ ยิ่งยง +กนกแก้ว กรสมิต +กนิษฐา ทนุถนอมราษฎร์ +กนิษฐา หวังวิบูลย์กิจ +กมล กาญจนโรจน์ +กมล คัมภีร์ +กมล เจตน์มงคลรัตน์ +กมล ชูตระกูลธรรม +@SET [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/translit/TransliteratingTextComponent.java b/demos/src/com/ibm/icu/dev/demo/translit/TransliteratingTextComponent.java new file mode 100644 index 00000000000..597721ca3ec --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/TransliteratingTextComponent.java @@ -0,0 +1,257 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.demo.translit; + +import java.awt.event.KeyEvent; + +import com.ibm.icu.dev.demo.impl.DumbTextComponent; +import com.ibm.icu.text.ReplaceableString; +import com.ibm.icu.text.Transliterator; + +/** + * A subclass of {@link DumbTextComponent} that passes key events through + * a {@link com.ibm.icu.text.Transliterator}. + * + * @author Alan Liu + */ +public class TransliteratingTextComponent extends DumbTextComponent { + + /** + * For serialization + */ + private static final long serialVersionUID = -8672128213174154047L; + + private static boolean DEBUG = false; + + private Transliterator translit = null; + + // NOTE: DISABLE THE START AND CURSOR UNTIL WE CAN GET IT TO WORK AT ALL + + // Index into getText() where the start of transliteration is. + // As we commit text during transliteration, we advance + // this. + //private int start = 0; + + // Index into getText() where the cursor is; cursor >= start + //private int cursor = 0; + +// private static final String COPYRIGHT = +// "\u00A9 IBM Corporation 1999. All rights reserved."; + + /** + * Constructor. + */ + public TransliteratingTextComponent() { + super(); + /* + addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + // We get an ActionEvent only when the selection changes + resetTransliterationStart(); + } + }); + */ + } + + /** + * {@link DumbTextComponent} API. Framework method that is called + * when a KeyEvent is received. This implementation + * runs the new character through the current + * Transliterator, if one is set, and inserts the + * transliterated text into the buffer. + */ + protected void handleKeyTyped(KeyEvent e) { + char ch = e.getKeyChar(); + + if (translit == null) { + setKeyStart(-1); + super.handleKeyTyped(e); + return; + } + + transliterate(ch, false); + } + + public void flush() { + if (translit != null) transliterate('\uFFFF', true); + } + + + protected void transliterate(char ch, boolean flush) { + + // ------------------------------------------------------------ + // The following case motivates the two lines that recompute + // start and cursor below. + + // " " + // a b c q r|s t u m m + // 0 1 2 3 4 5 6 7 8 9 + // 0 1 2 + + // start 3, cursor 5, sel 6 -> { 0, 3, 2 } + // : new int[] { 0, sel - start, cursor - start }; + + // sz>99|9 + + // " { " + // a b c q r 9 9|9 t u m m + // 0 1 2 3 4 5 6 7 8 9 a b + // 0 1 2 3 4 + + // { 3, 5, 4 } -> start 6, cursor 7, sel 8 + // : start += index[0]; + // : cursor = start + index[2] - index[0]; + // ------------------------------------------------------------ + + // Need to save start because calls to replaceRange will update + // start and cursor. + //int saveStart = start; + + int end = flush ? getSelectionEnd() : getSelectionStart(); + String sourceText = getText().substring(0,end); + ReplaceableString buf = new ReplaceableString(sourceText); + /*buf.replace(0, 1, getText().substring(start, + getSelectionStart()));*/ + + Transliterator.Position index = new Transliterator.Position(); + index.contextLimit = buf.length(); + index.contextStart = 0; + index.start = getKeyStart(); + if (index.start == -1) index.start = getSelectionStart(); + index.limit = buf.length(); + + // StringBuffer log = null; + if (DEBUG) { + System.out.println("Transliterator: " + translit.getID()); + System.out.println("From:\t" + '"' + buf.toString() + '"' + + "; {cs: " + index.contextStart + + ", s: " + index.start + + ", l: " + index.limit + + ", cl: " + index.contextLimit + + "}" + "; '" + ch + "'" + + " " + getKeyStart() + ); + } + + if (flush) { + translit.finishTransliteration(buf, index); + } else { + translit.transliterate(buf, index, ch); + } + + if (DEBUG) { + System.out.println("To:\t" + '"' + buf.toString() + '"' + + "; {cs: " + index.contextStart + + ", s: " + index.start + + ", l: " + index.limit + + ", cl: " + index.contextLimit + + "}" + ); + System.out.println(); + } + /* + buf.replace(buf.length(), buf.length(), String.valueOf(ch)); + translit.transliterate(buf); + */ + + String result = buf.toString(); + //if (result.equals(sourceText + ch)) return; + + replaceRange(result, 0, getSelectionEnd()); + setKeyStart(index.start); + + // At this point start has been changed by the callback to + // resetTransliteratorStart() via replaceRange() -- so use our + // local copy, saveStart. + + // The START index is zero-based. On entry to transliterate(), + // it was zero. We can therefore just add it to our original + // getText()-based index value of start (in saveStart) to get + // the new getText()-based start. +// start = saveStart + index.contextStart; + + // Make the cursor getText()-based. The CURSOR index is zero-based. +// cursor = start + index.start - index.contextStart; + +/* + if (DEBUG) { + String out = buf.toString(); + log.append(out.substring(0, index.contextStart)). + append('{'). + append(out.substring(index.contextStart, index.start)). + append('|'). + append(out.substring(index.start)). + append('"'); + log.append(", {" + index.contextStart + ", " + index.contextLimit + ", " + index.start + "}, "); +// log.append("start " + start + ", cursor " + cursor); + log.append(", sel " + getSelectionStart()); + System.out.println(escape(log.toString())); + } + */ + } + + /** + * Set the {@link com.ibm.icu.text.Transliterator} and direction to + * use to process incoming KeyEvents. + * @param t the {@link com.ibm.icu.text.Transliterator} to use + */ + public void setTransliterator(Transliterator t) { + /* + if (translit != t) { // [sic] pointer compare ok; singletons + resetTransliterationStart(); + } + */ + translit = t; + } + + public Transliterator getTransliterator() { + return translit; + } + + /** + * Reset the start point at which transliteration begins. This + * needs to be done when the user moves the cursor or when the + * current {@link com.ibm.icu.text.Transliterator} is changed. + */ + /* + private void resetTransliterationStart() { + start = getSelectionStart(); + cursor = start; + } + */ + + /** + * Escape non-ASCII characters as Unicode. + * JUST FOR DEBUGGING OUTPUT. + */ + public static final String escape(String s) { + StringBuffer buf = new StringBuffer(); + for (int i=0; i= ' ' && c <= 0x007F) { + if (c == '\\') { + buf.append("\\\\"); // That is, "\\" + } else { + buf.append(c); + } + } else { + buf.append("\\u"); + if (c < 0x1000) { + buf.append('0'); + if (c < 0x100) { + buf.append('0'); + if (c < 0x10) { + buf.append('0'); + } + } + } + buf.append(Integer.toHexString(c)); + } + } + return buf.toString(); + } +} diff --git a/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java b/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java new file mode 100644 index 00000000000..d0865c3bb96 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java @@ -0,0 +1,294 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.demo.translit; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; + +import com.ibm.icu.impl.Utility; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UScript; +import com.ibm.icu.text.Normalizer; +import com.ibm.icu.text.Transliterator; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; + +public class TransliterationChart { + public static void main(String[] args) throws IOException { + System.out.println("Start"); + UnicodeSet lengthMarks = new UnicodeSet("[\u09D7\u0B56-\u0B57\u0BD7\u0C56\u0CD5-\u0CD6\u0D57\u0C55\u0CD5]"); + int[] indicScripts = { + UScript.LATIN, + UScript.DEVANAGARI, + UScript.BENGALI, + UScript.GURMUKHI, + UScript.GUJARATI, + UScript.ORIYA, + UScript.TAMIL, + UScript.TELUGU, + UScript.KANNADA, + UScript.MALAYALAM, + }; + String[] names = new String[indicScripts.length]; + UnicodeSet[] sets = new UnicodeSet[indicScripts.length]; + Transliterator[] fallbacks = new Transliterator[indicScripts.length]; + for (int i = 0; i < indicScripts.length; ++i) { + names[i] = UScript.getName(indicScripts[i]); + sets[i] = new UnicodeSet("[[:" + names[i] + ":]&[[:L:][:M:]]&[:age=3.1:]]"); + fallbacks[i] = Transliterator.getInstance("any-" + names[i]); + } + EquivClass eq = new EquivClass(new ReverseComparator()); + PrintWriter pw = openPrintWriter("transChart.html"); + pw.println(""); + pw.println("Indic Transliteration Chart"); + + Transliterator anyToLatin = Transliterator.getInstance("any-latin"); + + String testString = "\u0946\u093E"; + + UnicodeSet failNorm = new UnicodeSet(); + Set latinFail = new TreeSet(); + + for (int i = 0; i < indicScripts.length; ++i) { + if (indicScripts[i] == UScript.LATIN) continue; + String source = names[i]; + System.out.println(source); + UnicodeSet sourceChars = sets[i]; + + for (int j = 0; j < indicScripts.length; ++j) { + if (i == j) continue; + String target = names[j]; + Transliterator forward = Transliterator.getInstance(source + '-' + target); + Transliterator backward = forward.getInverse(); + UnicodeSetIterator it = new UnicodeSetIterator(sourceChars); + while (it.next()) { + if (lengthMarks.contains(it.codepoint)) continue; + String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0); + //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue; + if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) { + failNorm.add(it.codepoint); + } + String t = fix(forward.transliterate(s)); + if (t.equals(testString)) { + System.out.println("debug"); + } + + String r = fix(backward.transliterate(t)); + if (Normalizer.compare(s,r,0) == 0) { + if (indicScripts[j] != UScript.LATIN) eq.add(s,t); + } else { + if (indicScripts[j] == UScript.LATIN) { + latinFail.add(s + " - " + t + " - " + r); + } + } + } + } + } + // collect equivalents + pw.println(""); + for (int i = 0; i < indicScripts.length; ++i) { + pw.print(""); + } + pw.println(""); + + Iterator rit = eq.getSetIterator(new MyComparator()); + while(rit.hasNext()) { + Set equivs = (Set)rit.next(); + pw.print(""); + Iterator sit = equivs.iterator(); + String source = (String)sit.next(); + String item = anyToLatin.transliterate(source); + if (item.equals("") || source.equals(item)) item = " "; + pw.print(""); + for (int i = 1; i < indicScripts.length; ++i) { + sit = equivs.iterator(); + item = ""; + while (sit.hasNext()) { + String trial = (String)sit.next(); + if (!sets[i].containsAll(trial)) continue; + item = trial; + break; + } + String classString = ""; + if (item.equals("")) { + classString = " class='miss'"; + String temp = fallbacks[i].transliterate(source); + if (!temp.equals("") && !temp.equals(source)) item = temp; + } + String backup = item.equals("") ? " " : item; + pw.print("" + + backup + "
    " + Utility.hex(item) + ""); + } + /* + Iterator sit = equivs.iterator(); + while (sit.hasNext()) { + String item = (String)sit.next(); + pw.print("
    "); + } + */ + pw.println(""); + } + pw.println("
    " + names[i].substring(0,3) + "
    " + item + "" + item + "
    "); + if (true) { + pw.println("

    Failed Normalization

    "); + + UnicodeSetIterator it = new UnicodeSetIterator(failNorm); + UnicodeSet pieces = new UnicodeSet(); + while (it.next()) { + String s = UTF16.valueOf(it.codepoint); + String d = Normalizer.normalize(s,Normalizer.NFD,0); + pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint) + + "; " + d + ", " + Utility.hex(d) + ", "); + pw.println(UCharacter.getName(d.charAt(1)) + "
    "); + if (UCharacter.getName(d.charAt(1)).indexOf("LENGTH") >= 0) pieces.add(d.charAt(1)); + } + pw.println(pieces); + + pw.println("

    Failed Round-Trip

    "); + Iterator cit = latinFail.iterator(); + while (cit.hasNext()) { + pw.println(cit.next() + "
    "); + } + } + + pw.println(""); + pw.close(); + System.out.println("Done"); + } + + public static String fix(String s) { + if (s.equals("\u0946\u093E")) return "\u094A"; + if (s.equals("\u0C46\u0C3E")) return "\u0C4A"; + if (s.equals("\u0CC6\u0CBE")) return "\u0CCA"; + + if (s.equals("\u0947\u093E")) return "\u094B"; + if (s.equals("\u0A47\u0A3E")) return "\u0A4B"; + if (s.equals("\u0AC7\u0ABE")) return "\u0ACB"; + if (s.equals("\u0C47\u0C3E")) return "\u0C4B"; + if (s.equals("\u0CC7\u0CBE")) return "\u0CCB"; + + //return Normalizer.normalize(s,Normalizer.NFD,0); + return s; + } + + public static PrintWriter openPrintWriter(String fileName) throws IOException { + File lf = new File(fileName); + System.out.println("Creating file: " + lf.getAbsoluteFile()); + + return new PrintWriter( + new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream(fileName), "UTF8"), 4*1024)); + } + + + public static String getName(String s, String separator) { + int cp; + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(s,i); + if (i != 0) sb.append(separator); + sb.append(UCharacter.getName(cp)); + } + return sb.toString(); + } + + static class MyComparator implements Comparator { + public int compare(Object o1, Object o2) { + Iterator i1 = ((TreeSet) o1).iterator(); + Iterator i2 = ((TreeSet) o2).iterator(); + while (i1.hasNext() && i2.hasNext()) { + String a = (String)i1.next(); + String b = (String)i2.next(); + int result = a.compareTo(b); + if (result != 0) return result; + } + if (i1.hasNext()) return 1; + if (i2.hasNext()) return -1; + return 0; + } + + } + static class ReverseComparator implements Comparator { + public int compare(Object o1, Object o2) { + String a = o1.toString(); + char a1 = a.charAt(0); + String b = o2.toString(); + char b1 = b.charAt(0); + if (a1 < 0x900 && b1 > 0x900) return -1; + if (a1 > 0x900 && b1 < 0x900) return +1; + return a.compareTo(b); + } + } + + static class EquivClass { + EquivClass(Comparator c) { + comparator = c; + } + private HashMap itemToSet = new HashMap(); + private Comparator comparator; + + void add(Object a, Object b) { + Set sa = (Set)itemToSet.get(a); + Set sb = (Set)itemToSet.get(b); + if (sa == null && sb == null) { // new set! + Set s = new TreeSet(comparator); + s.add(a); + s.add(b); + itemToSet.put(a, s); + itemToSet.put(b, s); + } else if (sa == null) { + sb.add(a); + } else if (sb == null) { + sa.add(b); + } else { // merge sets, dumping sb + sa.addAll(sb); + Iterator it = sb.iterator(); + while (it.hasNext()) { + itemToSet.put(it.next(), sa); + } + } + } + + private class MyIterator implements Iterator { + private Iterator it; + MyIterator (Comparator comp) { + TreeSet values = new TreeSet(comp); + values.addAll(itemToSet.values()); + it = values.iterator(); + } + + public boolean hasNext() { + return it.hasNext(); + } + public Object next() { + return it.next(); + } + public void remove() { + throw new IllegalArgumentException("can't remove"); + } + } + + public Iterator getSetIterator (Comparator comp) { + return new MyIterator(comp); + } + + } +} \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/translit/demo.bat b/demos/src/com/ibm/icu/dev/demo/translit/demo.bat new file mode 100755 index 00000000000..dd9c205fbaa --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/demo.bat @@ -0,0 +1,13 @@ +REM /* +REM ******************************************************************************* +REM * Copyright (C) 1996-2004, International Business Machines Corporation and * +REM * others. All Rights Reserved. * +REM ******************************************************************************* +REM */ +REM For best results, run the demo as an applet inside of Netscape +REM with Bitstream Cyberbit installed. + +REM setup your JDK 1.1.x path and classpath here: +call JDK11 +set CLASSPATH=../translit.jar;%CLASSPATH% +javaw Demo diff --git a/demos/src/com/ibm/icu/dev/demo/translit/demo.html b/demos/src/com/ibm/icu/dev/demo/translit/demo.html new file mode 100644 index 00000000000..2a7ee5bceec --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/demo.html @@ -0,0 +1,34 @@ + + + +Transliteration Demo + + + + +
    + +If you don't see a button above, then your browser is failing to +locate the necessary Java class files. + +

    + +One way to make this work is to copy this HTML file to +icu4j/src, and make sure the Java files in the directories +under icu4j/src/com are built. Then open this HTML file +using a browser or appletviewer. + +

    + +For best results, run this demo as an applet within Netscape with +Bitstream Cyberbit installed. + + + diff --git a/demos/src/com/ibm/icu/dev/demo/translit/package.html b/demos/src/com/ibm/icu/dev/demo/translit/package.html new file mode 100644 index 00000000000..8355d1f03f7 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/package.html @@ -0,0 +1,12 @@ + + + + + + +Transliterator demo appliation. + + \ No newline at end of file diff --git a/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Han_Pinyin.txt b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Han_Pinyin.txt new file mode 100644 index 00000000000..8f7c21d3a62 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Han_Pinyin.txt @@ -0,0 +1,20365 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2001, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +# Generated: Fri Jan 26 15:14:48 2001 +# Tool: ..\..\tools\translit\hanpinyin.pl +#-------------------------------------------------------------------- + +# Han-Pinyin + +# Mechanically derived from \desk\Unihan.txt (ftp.unicode.org), version: +# +# Name: Unihan database +# Unicode version: 3.0b1 +# Table version: 1.1 +# Date: 2 July 1999 +# +# Copyright (c) 1996-1999 Unicode, Inc. All Rights reserved. + +丁 > dīng; #4E01 +丂 > kăo; #4E02 +七 > qī; #4E03 +丄 > shàng; #4E04 +丅 > xià; #4E05 +万 > mò; #4E07 +丈 > zhàng; #4E08 +三 > sān; #4E09 +上 > shàng; #4E0A +下 > xià; #4E0B +丌 > jī; #4E0C +不 > bù; #4E0D +与 > yŭ; #4E0E +丏 > miăn; #4E0F +丐 > gài; #4E10 +丑 > chŏu; #4E11 +丒 > chŏu; #4E12 +专 > zhuān; #4E13 +且 > qiĕ; #4E14 +丕 > pī; #4E15 +世 > shì; #4E16 +丗 > shì; #4E17 +丘 > qīu; #4E18 +丙 > bĭng; #4E19 +业 > yè; #4E1A +丛 > cóng; #4E1B +东 > dōng; #4E1C +丝 > sī; #4E1D +丞 > chéng; #4E1E +丟 > dīu; #4E1F +丠 > qīu; #4E20 +両 > liăng; #4E21 +丢 > dīu; #4E22 +丣 > yŏu; #4E23 +两 > liăng; #4E24 +严 > yán; #4E25 +並 > bìng; #4E26 +丧 > sāng; #4E27 +丨 > gŭn; #4E28 +丩 > jīu; #4E29 +个 > gè; #4E2A +丫 > yā; #4E2B +丬 > qiáng; #4E2C +中 > zhōng; #4E2D +丮 > jĭ; #4E2E +丯 > jiè; #4E2F +丰 > fēng; #4E30 +丱 > guàn; #4E31 +串 > chuàn; #4E32 +丳 > chăn; #4E33 +临 > lín; #4E34 +丵 > zhŭo; #4E35 +丶 > zhŭ; #4E36 +丸 > wán; #4E38 +丹 > dān; #4E39 +为 > wèi; #4E3A +主 > zhŭ; #4E3B +丼 > jĭng; #4E3C +丽 > lì; #4E3D +举 > jŭ; #4E3E +丿 > piĕ; #4E3F +乀 > fú; #4E40 +乁 > yí; #4E41 +乂 > yì; #4E42 +乃 > năi; #4E43 +久 > jĭu; #4E45 +乆 > jĭu; #4E46 +乇 > zhé; #4E47 +么 > yāo; #4E48 +义 > yì; #4E49 +之 > zhī; #4E4B +乌 > wū; #4E4C +乍 > zhà; #4E4D +乎 > hū; #4E4E +乏 > fá; #4E4F +乐 > lè; #4E50 +乑 > zhòng; #4E51 +乒 > pīng; #4E52 +乓 > pang; #4E53 +乔 > qiáo; #4E54 +乕 > hŭ; #4E55 +乖 > guāi; #4E56 +乗 > chéng; #4E57 +乘 > chéng; #4E58 +乙 > yĭ; #4E59 +乚 > yĭn; #4E5A +乜 > miē; #4E5C +九 > jĭu; #4E5D +乞 > qĭ; #4E5E +也 > yĕ; #4E5F +习 > xí; #4E60 +乡 > xiāng; #4E61 +乢 > gài; #4E62 +乣 > dīu; #4E63 +书 > shū; #4E66 +乨 > shĭ; #4E68 +乩 > jī; #4E69 +乪 > nāng; #4E6A +乫 > jiā; #4E6B +乭 > shí; #4E6D +买 > măi; #4E70 +乱 > luàn; #4E71 +乳 > rŭ; #4E73 +乴 > xué; #4E74 +乵 > yăn; #4E75 +乶 > fŭ; #4E76 +乷 > shā; #4E77 +乸 > nă; #4E78 +乹 > gān; #4E79 +乾 > gān; #4E7E +乿 > chì; #4E7F +亀 > gūi; #4E80 +亁 > gān; #4E81 +亂 > luàn; #4E82 +亃 > lín; #4E83 +亄 > yì; #4E84 +亅 > jué; #4E85 +了 > liăo; #4E86 +予 > yú; #4E88 +争 > zhēng; #4E89 +亊 > shì; #4E8A +事 > shì; #4E8B +二 > èr; #4E8C +亍 > chù; #4E8D +于 > yú; #4E8E +亏 > yú; #4E8F +亐 > yú; #4E90 +云 > yún; #4E91 +互 > hù; #4E92 +亓 > qí; #4E93 +五 > wŭ; #4E94 +井 > jĭng; #4E95 +亖 > sì; #4E96 +亗 > sùi; #4E97 +亘 > gèn; #4E98 +亙 > gèn; #4E99 +亚 > yà; #4E9A +些 > xiē; #4E9B +亜 > yà; #4E9C +亝 > qí; #4E9D +亞 > yà; #4E9E +亟 > jí; #4E9F +亠 > tóu; #4EA0 +亡 > wáng; #4EA1 +亢 > kàng; #4EA2 +亣 > tà; #4EA3 +交 > jiāo; #4EA4 +亥 > hài; #4EA5 +亦 > yì; #4EA6 +产 > chăn; #4EA7 +亨 > hēng; #4EA8 +亩 > mŭ; #4EA9 +享 > xiăng; #4EAB +京 > jīng; #4EAC +亭 > tíng; #4EAD +亮 > liàng; #4EAE +亯 > xiăng; #4EAF +亰 > jīng; #4EB0 +亱 > yè; #4EB1 +亲 > qīn; #4EB2 +亳 > bó; #4EB3 +亴 > yòu; #4EB4 +亵 > xiè; #4EB5 +亶 > dăn; #4EB6 +亷 > lián; #4EB7 +亸 > dŭo; #4EB8 +亹 > wĕi; #4EB9 +人 > rén; #4EBA +亻 > rén; #4EBB +亼 > jí; #4EBC +亾 > wáng; #4EBE +亿 > yì; #4EBF +什 > shí; #4EC0 +仁 > rén; #4EC1 +仂 > lè; #4EC2 +仃 > dīng; #4EC3 +仄 > zè; #4EC4 +仅 > jĭn; #4EC5 +仆 > pū; #4EC6 +仇 > chóu; #4EC7 +仈 > bā; #4EC8 +仉 > zhăng; #4EC9 +今 > jīn; #4ECA +介 > jiè; #4ECB +仌 > bīng; #4ECC +仍 > réng; #4ECD +从 > cóng; #4ECE +仏 > fó; #4ECF +仐 > săn; #4ED0 +仑 > lún; #4ED1 +仓 > cāng; #4ED3 +仔 > zĭ; #4ED4 +仕 > shì; #4ED5 +他 > tā; #4ED6 +仗 > zhàng; #4ED7 +付 > fù; #4ED8 +仙 > xiān; #4ED9 +仚 > xiān; #4EDA +仛 > tūo; #4EDB +仜 > hóng; #4EDC +仝 > tóng; #4EDD +仞 > rèn; #4EDE +仟 > qiān; #4EDF +仠 > gán; #4EE0 +仡 > yì; #4EE1 +仢 > dí; #4EE2 +代 > dài; #4EE3 +令 > lìng; #4EE4 +以 > yĭ; #4EE5 +仦 > chào; #4EE6 +仧 > cháng; #4EE7 +仨 > sā; #4EE8 +仪 > yí; #4EEA +仫 > mù; #4EEB +们 > men; #4EEC +仭 > rèn; #4EED +仮 > jiă; #4EEE +仯 > chào; #4EEF +仰 > yăng; #4EF0 +仱 > qián; #4EF1 +仲 > zhòng; #4EF2 +仳 > pĭ; #4EF3 +仴 > wàn; #4EF4 +仵 > wŭ; #4EF5 +件 > jiàn; #4EF6 +价 > jiè; #4EF7 +仸 > yăo; #4EF8 +仹 > fēng; #4EF9 +仺 > cāng; #4EFA +任 > rèn; #4EFB +仼 > wáng; #4EFC +份 > fèn; #4EFD +仾 > dī; #4EFE +仿 > făng; #4EFF +伀 > zhōng; #4F00 +企 > qĭ; #4F01 +伂 > pèi; #4F02 +伃 > yú; #4F03 +伄 > diào; #4F04 +伅 > dùn; #4F05 +伆 > wèn; #4F06 +伇 > yì; #4F07 +伈 > xĭn; #4F08 +伉 > kàng; #4F09 +伊 > yī; #4F0A +伋 > jí; #4F0B +伌 > ài; #4F0C +伍 > wŭ; #4F0D +伎 > jì; #4F0E +伏 > fú; #4F0F +伐 > fá; #4F10 +休 > xīu; #4F11 +伒 > jìn; #4F12 +伓 > bēi; #4F13 +伔 > dăn; #4F14 +伕 > fū; #4F15 +伖 > tăng; #4F16 +众 > zhòng; #4F17 +优 > yōu; #4F18 +伙 > hŭo; #4F19 +会 > hùi; #4F1A +伛 > yŭ; #4F1B +伜 > cùi; #4F1C +伝 > chuán; #4F1D +伞 > săn; #4F1E +伟 > wĕi; #4F1F +传 > chuán; #4F20 +伡 > chē; #4F21 +伢 > yá; #4F22 +伣 > xiàn; #4F23 +伤 > shāng; #4F24 +伥 > chāng; #4F25 +伦 > lún; #4F26 +伧 > cāng; #4F27 +伨 > xùn; #4F28 +伩 > xìn; #4F29 +伪 > wĕi; #4F2A +伫 > zhù; #4F2B +伭 > xuán; #4F2D +伮 > nú; #4F2E +伯 > bó; #4F2F +估 > gū; #4F30 +伱 > nĭ; #4F31 +伲 > nĭ; #4F32 +伳 > xiè; #4F33 +伴 > bàn; #4F34 +伵 > xù; #4F35 +伶 > líng; #4F36 +伷 > zhòu; #4F37 +伸 > shēn; #4F38 +伹 > qū; #4F39 +伺 > sì; #4F3A +伻 > bēng; #4F3B +似 > sì; #4F3C +伽 > jiā; #4F3D +伾 > pī; #4F3E +伿 > yì; #4F3F +佀 > sì; #4F40 +佁 > ăi; #4F41 +佂 > zhēng; #4F42 +佃 > diàn; #4F43 +佄 > hán; #4F44 +佅 > mài; #4F45 +但 > dàn; #4F46 +佇 > zhù; #4F47 +佈 > bù; #4F48 +佉 > qū; #4F49 +佊 > bĭ; #4F4A +佋 > shào; #4F4B +佌 > cĭ; #4F4C +位 > wèi; #4F4D +低 > dī; #4F4E +住 > zhù; #4F4F +佐 > zŭo; #4F50 +佑 > yòu; #4F51 +佒 > yāng; #4F52 +体 > tĭ; #4F53 +佔 > zhàn; #4F54 +何 > hé; #4F55 +佖 > bì; #4F56 +佗 > tūo; #4F57 +佘 > shé; #4F58 +余 > yú; #4F59 +佚 > yì; #4F5A +佛 > fó; #4F5B +作 > zùo; #4F5C +佝 > kòu; #4F5D +佞 > nìng; #4F5E +佟 > tóng; #4F5F +你 > nĭ; #4F60 +佡 > xuān; #4F61 +佢 > qú; #4F62 +佣 > yòng; #4F63 +佤 > wă; #4F64 +佥 > qiān; #4F65 +佧 > kă; #4F67 +佩 > pèi; #4F69 +佪 > huái; #4F6A +佫 > hè; #4F6B +佬 > lăo; #4F6C +佭 > xiáng; #4F6D +佮 > gé; #4F6E +佯 > yáng; #4F6F +佰 > băi; #4F70 +佱 > fă; #4F71 +佲 > míng; #4F72 +佳 > jia; #4F73 +佴 > èr; #4F74 +併 > bìng; #4F75 +佶 > jí; #4F76 +佷 > hĕn; #4F77 +佸 > húo; #4F78 +佹 > gŭi; #4F79 +佺 > quán; #4F7A +佻 > tiāo; #4F7B +佼 > jiăo; #4F7C +佽 > cì; #4F7D +佾 > yì; #4F7E +使 > shĭ; #4F7F +侀 > xíng; #4F80 +侁 > shēn; #4F81 +侂 > tūo; #4F82 +侃 > kăn; #4F83 +侄 > zhí; #4F84 +侅 > gāi; #4F85 +來 > lái; #4F86 +侇 > yí; #4F87 +侈 > chĭ; #4F88 +侉 > kuā; #4F89 +侊 > guāng; #4F8A +例 > lì; #4F8B +侌 > yīn; #4F8C +侍 > shì; #4F8D +侎 > mĭ; #4F8E +侏 > zhū; #4F8F +侐 > xù; #4F90 +侑 > yòu; #4F91 +侒 > ān; #4F92 +侓 > lù; #4F93 +侔 > móu; #4F94 +侕 > ér; #4F95 +侖 > lún; #4F96 +侗 > tóng; #4F97 +侘 > chà; #4F98 +侙 > chì; #4F99 +侚 > xùn; #4F9A +供 > gōng; #4F9B +侜 > zhōu; #4F9C +依 > yī; #4F9D +侞 > rŭ; #4F9E +侟 > jiàn; #4F9F +侠 > xiá; #4FA0 +価 > jià; #4FA1 +侢 > zài; #4FA2 +侣 > lǚ; #4FA3 +侥 > jiăo; #4FA5 +侦 > zhēn; #4FA6 +侧 > cè; #4FA7 +侨 > qiáo; #4FA8 +侩 > kuài; #4FA9 +侪 > chái; #4FAA +侫 > nìng; #4FAB +侬 > nóng; #4FAC +侭 > jĭn; #4FAD +侮 > wŭ; #4FAE +侯 > hóu; #4FAF +侰 > jĭong; #4FB0 +侱 > chĕng; #4FB1 +侲 > zhèn; #4FB2 +侳 > zùo; #4FB3 +侴 > chŏu; #4FB4 +侵 > qīn; #4FB5 +侶 > lǚ; #4FB6 +侷 > jú; #4FB7 +侸 > shù; #4FB8 +侹 > tĭng; #4FB9 +侺 > shèn; #4FBA +侻 > tūo; #4FBB +侼 > bó; #4FBC +侽 > nán; #4FBD +侾 > hāo; #4FBE +便 > biàn; #4FBF +俀 > tŭi; #4FC0 +俁 > yŭ; #4FC1 +係 > xì; #4FC2 +促 > cù; #4FC3 +俄 > é; #4FC4 +俅 > qíu; #4FC5 +俆 > xú; #4FC6 +俇 > kuăng; #4FC7 +俈 > kù; #4FC8 +俉 > wù; #4FC9 +俊 > jùn; #4FCA +俋 > yì; #4FCB +俌 > fŭ; #4FCC +俍 > láng; #4FCD +俎 > zŭ; #4FCE +俏 > qiào; #4FCF +俐 > lì; #4FD0 +俑 > yŏng; #4FD1 +俒 > hùn; #4FD2 +俓 > jìng; #4FD3 +俔 > xiàn; #4FD4 +俕 > sàn; #4FD5 +俖 > păi; #4FD6 +俗 > sú; #4FD7 +俘 > fú; #4FD8 +俙 > xī; #4FD9 +俚 > lĭ; #4FDA +俛 > fŭ; #4FDB +俜 > pīng; #4FDC +保 > băo; #4FDD +俞 > yú; #4FDE +俟 > sì; #4FDF +俠 > xiá; #4FE0 +信 > xìn; #4FE1 +俢 > xīu; #4FE2 +俣 > yŭ; #4FE3 +俤 > tì; #4FE4 +俥 > chē; #4FE5 +俦 > chóu; #4FE6 +俨 > yăn; #4FE8 +俩 > liă; #4FE9 +俪 > lì; #4FEA +俫 > lái; #4FEB +俭 > jiăn; #4FED +修 > xīu; #4FEE +俯 > fŭ; #4FEF +俰 > hè; #4FF0 +俱 > jù; #4FF1 +俲 > xiào; #4FF2 +俳 > pái; #4FF3 +俴 > jiàn; #4FF4 +俵 > biào; #4FF5 +俶 > chù; #4FF6 +俷 > fèi; #4FF7 +俸 > fèng; #4FF8 +俹 > yà; #4FF9 +俺 > ăn; #4FFA +俻 > bèi; #4FFB +俼 > yù; #4FFC +俽 > xīn; #4FFD +俾 > bĭ; #4FFE +俿 > jiàn; #4FFF +倀 > chāng; #5000 +倁 > chí; #5001 +倂 > bìng; #5002 +倃 > zán; #5003 +倄 > yáo; #5004 +倅 > cùi; #5005 +倆 > liă; #5006 +倇 > wăn; #5007 +倈 > lái; #5008 +倉 > cāng; #5009 +倊 > zòng; #500A +個 > gè; #500B +倌 > guān; #500C +倍 > bèi; #500D +倎 > tiān; #500E +倏 > shū; #500F +倐 > shū; #5010 +們 > men; #5011 +倒 > dăo; #5012 +倓 > tán; #5013 +倔 > jué; #5014 +倕 > chúi; #5015 +倖 > xìng; #5016 +倗 > péng; #5017 +倘 > tăng; #5018 +候 > hòu; #5019 +倚 > yĭ; #501A +倛 > qī; #501B +倜 > tì; #501C +倝 > gàn; #501D +倞 > jìng; #501E +借 > jiè; #501F +倠 > sūi; #5020 +倡 > chàng; #5021 +倢 > jié; #5022 +倣 > făng; #5023 +値 > zhí; #5024 +倥 > kōng; #5025 +倦 > juàn; #5026 +倧 > zōng; #5027 +倨 > jù; #5028 +倩 > qiàn; #5029 +倪 > ní; #502A +倫 > lún; #502B +倬 > zhūo; #502C +倭 > wēi; #502D +倮 > lŭo; #502E +倯 > sōng; #502F +倰 > léng; #5030 +倱 > hùn; #5031 +倲 > dōng; #5032 +倳 > zì; #5033 +倴 > bèn; #5034 +倵 > wŭ; #5035 +倶 > jù; #5036 +倷 > nài; #5037 +倸 > căi; #5038 +倹 > jiăn; #5039 +债 > zhài; #503A +倻 > yē; #503B +值 > zhí; #503C +倽 > shà; #503D +倾 > qīng; #503E +偀 > yīng; #5040 +偁 > chēng; #5041 +偂 > jiān; #5042 +偃 > yăn; #5043 +偄 > nuàn; #5044 +偅 > zhòng; #5045 +偆 > chŭn; #5046 +假 > jiă; #5047 +偈 > jié; #5048 +偉 > wĕi; #5049 +偊 > yŭ; #504A +偋 > bĭng; #504B +偌 > rùo; #504C +偍 > tí; #504D +偎 > wēi; #504E +偏 > piān; #504F +偐 > yàn; #5050 +偑 > fēng; #5051 +偒 > tăng; #5052 +偓 > wò; #5053 +偔 > è; #5054 +偕 > xié; #5055 +偖 > chĕ; #5056 +偗 > shĕng; #5057 +偘 > kăn; #5058 +偙 > dì; #5059 +做 > zùo; #505A +偛 > chā; #505B +停 > tíng; #505C +偝 > bèi; #505D +偞 > yè; #505E +偟 > huáng; #505F +偠 > yăo; #5060 +偡 > zhàn; #5061 +偢 > chŏu; #5062 +偣 > yān; #5063 +偤 > yŏu; #5064 +健 > jiàn; #5065 +偦 > xū; #5066 +偧 > zhā; #5067 +偨 > cī; #5068 +偩 > fù; #5069 +偪 > bī; #506A +偫 > zhì; #506B +偬 > zŏng; #506C +偭 > miăn; #506D +偮 > jí; #506E +偯 > yĭ; #506F +偰 > xiè; #5070 +偱 > xún; #5071 +偲 > sī; #5072 +偳 > duān; #5073 +側 > cè; #5074 +偵 > zhēn; #5075 +偶 > ŏu; #5076 +偷 > tōu; #5077 +偸 > tōu; #5078 +偹 > bèi; #5079 +偺 > zá; #507A +偻 > lǚ; #507B +偼 > jié; #507C +偽 > wĕi; #507D +偾 > fèn; #507E +偿 > cháng; #507F +傀 > gūi; #5080 +傁 > sŏu; #5081 +傂 > zhì; #5082 +傃 > sù; #5083 +傄 > xiā; #5084 +傅 > fù; #5085 +傆 > yuàn; #5086 +傇 > rŏng; #5087 +傈 > lì; #5088 +傉 > rù; #5089 +傊 > yŭn; #508A +傋 > gòu; #508B +傌 > mà; #508C +傍 > bàng; #508D +傎 > diān; #508E +傏 > táng; #508F +傐 > hào; #5090 +傑 > jié; #5091 +傒 > xī; #5092 +傓 > shàn; #5093 +傔 > qiàn; #5094 +傕 > jué; #5095 +傖 > cāng; #5096 +傗 > chù; #5097 +傘 > săn; #5098 +備 > bèi; #5099 +傚 > xiào; #509A +傛 > yŏng; #509B +傜 > yáo; #509C +傝 > tàn; #509D +傞 > sūo; #509E +傟 > yăng; #509F +傠 > fā; #50A0 +傡 > bìng; #50A1 +傢 > jiā; #50A2 +傣 > dăi; #50A3 +傤 > zài; #50A4 +傥 > tăng; #50A5 +傧 > bìn; #50A7 +储 > chŭ; #50A8 +傩 > núo; #50A9 +傪 > cān; #50AA +傫 > lĕi; #50AB +催 > cūi; #50AC +傭 > yōng; #50AD +傮 > zāo; #50AE +傯 > zŏng; #50AF +傰 > péng; #50B0 +傱 > sŏng; #50B1 +傲 > ào; #50B2 +傳 > chuán; #50B3 +傴 > yŭ; #50B4 +債 > zhài; #50B5 +傶 > còu; #50B6 +傷 > shāng; #50B7 +傸 > qiăng; #50B8 +傹 > jìng; #50B9 +傺 > chì; #50BA +傻 > shă; #50BB +傼 > hàn; #50BC +傽 > zhāng; #50BD +傾 > qīng; #50BE +傿 > yàn; #50BF +僀 > dì; #50C0 +僁 > xī; #50C1 +僂 > lǚ; #50C2 +僃 > bèi; #50C3 +僄 > piào; #50C4 +僅 > jĭn; #50C5 +僆 > lián; #50C6 +僇 > lù; #50C7 +僈 > màn; #50C8 +僉 > qiān; #50C9 +僊 > xiān; #50CA +僋 > tàn; #50CB +僌 > yíng; #50CC +働 > dòng; #50CD +僎 > zhuàn; #50CE +像 > xiàng; #50CF +僐 > shàn; #50D0 +僑 > qiáo; #50D1 +僒 > jĭong; #50D2 +僓 > tŭi; #50D3 +僔 > zŭn; #50D4 +僕 > pú; #50D5 +僖 > xī; #50D6 +僗 > láo; #50D7 +僘 > chăng; #50D8 +僙 > guāng; #50D9 +僚 > liáo; #50DA +僛 > qī; #50DB +僜 > dèng; #50DC +僝 > chán; #50DD +僞 > wĕi; #50DE +僟 > jī; #50DF +僠 > fān; #50E0 +僡 > hùi; #50E1 +僢 > chuăn; #50E2 +僣 > jiàn; #50E3 +僤 > dàn; #50E4 +僥 > jiăo; #50E5 +僦 > jìu; #50E6 +僧 > sēng; #50E7 +僨 > fèn; #50E8 +僩 > xiàn; #50E9 +僪 > jué; #50EA +僫 > è; #50EB +僬 > jiāo; #50EC +僭 > jiàn; #50ED +僮 > tóng; #50EE +僯 > lĭn; #50EF +僰 > bó; #50F0 +僱 > gù; #50F1 +僳 > sù; #50F3 +僴 > xiàn; #50F4 +僵 > jiāng; #50F5 +僶 > mĭn; #50F6 +僷 > yè; #50F7 +僸 > jìn; #50F8 +價 > jià; #50F9 +僺 > qiào; #50FA +僻 > pì; #50FB +僼 > fēng; #50FC +僽 > zhòu; #50FD +僾 > ài; #50FE +僿 > sài; #50FF +儀 > yí; #5100 +儁 > jùn; #5101 +儂 > nóng; #5102 +儃 > chán; #5103 +億 > yì; #5104 +儅 > dāng; #5105 +儆 > jĭng; #5106 +儇 > xuān; #5107 +儈 > kuài; #5108 +儉 > jiăn; #5109 +儊 > chù; #510A +儋 > dān; #510B +儌 > jiăo; #510C +儍 > shă; #510D +儎 > zài; #510E +儐 > bìn; #5110 +儑 > àn; #5111 +儒 > rú; #5112 +儓 > tái; #5113 +儔 > chóu; #5114 +儕 > chái; #5115 +儖 > lán; #5116 +儗 > nĭ; #5117 +儘 > jĭn; #5118 +儙 > qiàn; #5119 +儚 > méng; #511A +儛 > wŭ; #511B +儜 > níng; #511C +儝 > qíong; #511D +儞 > nĭ; #511E +償 > cháng; #511F +儠 > liè; #5120 +儡 > lĕi; #5121 +儢 > lǚ; #5122 +儣 > kuàng; #5123 +儤 > bào; #5124 +儥 > dú; #5125 +儦 > biāo; #5126 +儧 > zăn; #5127 +儨 > zhí; #5128 +儩 > sì; #5129 +優 > yōu; #512A +儫 > háo; #512B +儬 > chèn; #512C +儭 > chèn; #512D +儮 > lì; #512E +儯 > téng; #512F +儰 > wĕi; #5130 +儱 > lŏng; #5131 +儲 > chŭ; #5132 +儳 > chàn; #5133 +儴 > ráng; #5134 +儵 > shū; #5135 +儶 > hùi; #5136 +儷 > lì; #5137 +儸 > lúo; #5138 +儹 > zăn; #5139 +儺 > núo; #513A +儻 > tăng; #513B +儼 > yăn; #513C +儽 > lĕi; #513D +儾 > nàng; #513E +儿 > ér; #513F +兀 > wù; #5140 +允 > yŭn; #5141 +兂 > zān; #5142 +元 > yuán; #5143 +兄 > xīong; #5144 +充 > chōng; #5145 +兆 > zhào; #5146 +兇 > xīong; #5147 +先 > xiān; #5148 +光 > guāng; #5149 +兊 > dùi; #514A +克 > kè; #514B +兌 > dùi; #514C +免 > miăn; #514D +兎 > tù; #514E +兏 > cháng; #514F +児 > ér; #5150 +兑 > dùi; #5151 +兒 > ér; #5152 +兓 > xīn; #5153 +兔 > tù; #5154 +兕 > sì; #5155 +兖 > yăn; #5156 +兗 > yăn; #5157 +兘 > shĭ; #5158 +兙 > shí' 'kè; #5159 +党 > dăng; #515A +兛 > qiān; #515B +兜 > dōu; #515C +兝 > fēn; #515D +兞 > máo; #515E +兟 > shēn; #515F +兠 > dōu; #5160 +兡 > băi' 'kè; #5161 +兢 > jīng; #5162 +兣 > lĭ; #5163 +兤 > huáng; #5164 +入 > rù; #5165 +兦 > wáng; #5166 +內 > nèi; #5167 +全 > quán; #5168 +兩 > liăng; #5169 +兪 > yú; #516A +八 > bā; #516B +公 > gōng; #516C +六 > lìu; #516D +兮 > xī; #516E +兰 > lán; #5170 +共 > gòng; #5171 +兲 > tiān; #5172 +关 > guān; #5173 +兴 > xīng; #5174 +兵 > bīng; #5175 +其 > qí; #5176 +具 > jù; #5177 +典 > diăn; #5178 +兹 > zī; #5179 +养 > yăng; #517B +兼 > jiān; #517C +兽 > shòu; #517D +兾 > jì; #517E +兿 > yì; #517F +冀 > jì; #5180 +冁 > chăn; #5181 +冂 > jīong; #5182 +冃 > mao; #5183 +冄 > răn; #5184 +内 > nèi; #5185 +円 > yuan; #5186 +冇 > măo; #5187 +冈 > gāng; #5188 +冉 > răn; #5189 +冊 > cè; #518A +冋 > jīong; #518B +册 > cè; #518C +再 > zài; #518D +冎 > guă; #518E +冏 > jĭong; #518F +冐 > mào; #5190 +冑 > zhòu; #5191 +冒 > mòu; #5192 +冓 > gòu; #5193 +冔 > xŭ; #5194 +冕 > miăn; #5195 +冖 > mì; #5196 +冗 > rŏng; #5197 +冘 > yín; #5198 +写 > xiĕ; #5199 +冚 > kăn; #519A +军 > jūn; #519B +农 > nóng; #519C +冝 > yí; #519D +冞 > mí; #519E +冟 > shì; #519F +冠 > guān; #51A0 +冡 > méng; #51A1 +冢 > zhŏng; #51A2 +冣 > jù; #51A3 +冤 > yuān; #51A4 +冥 > míng; #51A5 +冦 > kòu; #51A6 +冨 > fù; #51A8 +冩 > xiĕ; #51A9 +冪 > mì; #51AA +冫 > bīng; #51AB +冬 > dōng; #51AC +冭 > tái; #51AD +冮 > gāng; #51AE +冯 > féng; #51AF +冰 > bīng; #51B0 +冱 > hù; #51B1 +冲 > chōng; #51B2 +决 > jué; #51B3 +冴 > hù; #51B4 +况 > kuàng; #51B5 +冶 > yĕ; #51B6 +冷 > lĕng; #51B7 +冸 > pàn; #51B8 +冹 > fú; #51B9 +冺 > mĭn; #51BA +冻 > dòng; #51BB +冼 > xiăn; #51BC +冽 > liè; #51BD +冾 > xiá; #51BE +冿 > jiān; #51BF +净 > jìng; #51C0 +凁 > shù; #51C1 +凂 > mĕi; #51C2 +凃 > tú; #51C3 +凄 > qī; #51C4 +凅 > gù; #51C5 +准 > zhŭn; #51C6 +凇 > sòng; #51C7 +凈 > jìng; #51C8 +凉 > liáng; #51C9 +凊 > qìng; #51CA +凋 > diāo; #51CB +凌 > líng; #51CC +凍 > dòng; #51CD +凎 > gàn; #51CE +减 > jiăn; #51CF +凐 > yīn; #51D0 +凑 > còu; #51D1 +凒 > yí; #51D2 +凓 > lì; #51D3 +凔 > cāng; #51D4 +凕 > mĭng; #51D5 +凖 > zhuĕn; #51D6 +凗 > cúi; #51D7 +凘 > sī; #51D8 +凙 > dúo; #51D9 +凚 > jìn; #51DA +凛 > lĭn; #51DB +凜 > lĭn; #51DC +凝 > níng; #51DD +凞 > xī; #51DE +凟 > dú; #51DF +几 > jī; #51E0 +凡 > fán; #51E1 +凢 > fán; #51E2 +凣 > fán; #51E3 +凤 > fèng; #51E4 +凥 > jū; #51E5 +処 > chŭ; #51E6 +凨 > fēng; #51E8 +凫 > fú; #51EB +凬 > fēng; #51EC +凭 > píng; #51ED +凮 > fēng; #51EE +凯 > kăi; #51EF +凰 > huáng; #51F0 +凱 > kăi; #51F1 +凲 > gān; #51F2 +凳 > dèng; #51F3 +凴 > píng; #51F4 +凵 > qū; #51F5 +凶 > xīong; #51F6 +凷 > kuài; #51F7 +凸 > tū; #51F8 +凹 > āo; #51F9 +出 > chū; #51FA +击 > jí; #51FB +凼 > dàng; #51FC +函 > hán; #51FD +凾 > hán; #51FE +凿 > záo; #51FF +刀 > dāo; #5200 +刁 > diāo; #5201 +刂 > dāo; #5202 +刃 > rèn; #5203 +刄 > rèn; #5204 +刅 > chuāng; #5205 +分 > fēn; #5206 +切 > qiē; #5207 +刈 > yì; #5208 +刉 > jī; #5209 +刊 > kān; #520A +刋 > qiàn; #520B +刌 > cŭn; #520C +刍 > chú; #520D +刎 > wĕn; #520E +刏 > jī; #520F +刐 > dăn; #5210 +刑 > xíng; #5211 +划 > huá; #5212 +刓 > wán; #5213 +刔 > jué; #5214 +刕 > lí; #5215 +刖 > yuè; #5216 +列 > liè; #5217 +刘 > líu; #5218 +则 > zé; #5219 +刚 > gāng; #521A +创 > chuàng; #521B +刜 > fú; #521C +初 > chū; #521D +刞 > qù; #521E +刟 > jū; #521F +删 > shān; #5220 +刡 > mĭn; #5221 +刢 > líng; #5222 +刣 > zhōng; #5223 +判 > pàn; #5224 +別 > bié; #5225 +刦 > jié; #5226 +刧 > jié; #5227 +刨 > bào; #5228 +利 > lì; #5229 +刪 > shān; #522A +别 > bié; #522B +刬 > chăn; #522C +刭 > jĭng; #522D +刮 > guā; #522E +刯 > gēn; #522F +到 > dào; #5230 +刱 > chuàng; #5231 +刲 > kūi; #5232 +刳 > kū; #5233 +刴 > dùo; #5234 +刵 > èr; #5235 +制 > zhì; #5236 +刷 > shuā; #5237 +券 > quàn; #5238 +刹 > chà; #5239 +刺 > cì; #523A +刻 > kè; #523B +刼 > jié; #523C +刽 > gùi; #523D +刾 > cì; #523E +刿 > gùi; #523F +剀 > kăi; #5240 +剁 > dùo; #5241 +剂 > jì; #5242 +剃 > tì; #5243 +剄 > jĭng; #5244 +剅 > lóu; #5245 +剆 > gēn; #5246 +則 > zé; #5247 +剈 > yuān; #5248 +剉 > cùo; #5249 +削 > xuē; #524A +剋 > kè; #524B +剌 > là; #524C +前 > qián; #524D +剎 > chà; #524E +剏 > chuàng; #524F +剐 > guă; #5250 +剑 > jiàn; #5251 +剒 > cùo; #5252 +剓 > lí; #5253 +剔 > tī; #5254 +剕 > fèi; #5255 +剖 > pōu; #5256 +剗 > chăn; #5257 +剘 > qí; #5258 +剙 > chuàng; #5259 +剚 > zì; #525A +剛 > gāng; #525B +剜 > wān; #525C +剝 > bō; #525D +剞 > jī; #525E +剟 > dūo; #525F +剠 > qíng; #5260 +剡 > yăn; #5261 +剢 > zhúo; #5262 +剣 > jiàn; #5263 +剤 > jì; #5264 +剥 > bō; #5265 +剦 > yān; #5266 +剧 > jù; #5267 +剨 > hùo; #5268 +剩 > shèng; #5269 +剪 > jiăn; #526A +剫 > dúo; #526B +剬 > duān; #526C +剭 > wū; #526D +剮 > guă; #526E +副 > fù; #526F +剰 > shèng; #5270 +剱 > jiàn; #5271 +割 > gē; #5272 +剳 > zhā; #5273 +剴 > kăi; #5274 +創 > chuàng; #5275 +剶 > juān; #5276 +剷 > chăn; #5277 +剸 > tuán; #5278 +剹 > lù; #5279 +剺 > lí; #527A +剻 > fóu; #527B +剼 > shān; #527C +剽 > piào; #527D +剾 > kōu; #527E +剿 > jiăo; #527F +劀 > guā; #5280 +劁 > qiāo; #5281 +劂 > jué; #5282 +劃 > huà; #5283 +劄 > zhá; #5284 +劅 > zhùo; #5285 +劆 > lián; #5286 +劇 > jù; #5287 +劈 > pī; #5288 +劉 > líu; #5289 +劊 > gùi; #528A +劋 > jiăo; #528B +劌 > gùi; #528C +劍 > jiàn; #528D +劎 > jiàn; #528E +劏 > tāng; #528F +劐 > hūo; #5290 +劑 > jì; #5291 +劒 > jiàn; #5292 +劓 > yì; #5293 +劔 > jiàn; #5294 +劕 > zhí; #5295 +劖 > chán; #5296 +劗 > cuán; #5297 +劘 > mó; #5298 +劙 > lí; #5299 +劚 > zhú; #529A +力 > lì; #529B +劜 > yā; #529C +劝 > quàn; #529D +办 > bàn; #529E +功 > gōng; #529F +加 > jiā; #52A0 +务 > wù; #52A1 +劢 > mài; #52A2 +劣 > liè; #52A3 +劤 > jìn; #52A4 +劥 > kēng; #52A5 +劦 > xié; #52A6 +劧 > zhĭ; #52A7 +动 > dòng; #52A8 +助 > zhù; #52A9 +努 > nŭ; #52AA +劫 > jié; #52AB +劬 > qú; #52AC +劭 > shào; #52AD +劮 > yì; #52AE +劯 > zhū; #52AF +劰 > miăo; #52B0 +励 > lì; #52B1 +劲 > jìng; #52B2 +劳 > láo; #52B3 +労 > láo; #52B4 +劵 > juàn; #52B5 +劶 > kŏu; #52B6 +劷 > yáng; #52B7 +劸 > wā; #52B8 +効 > xiào; #52B9 +劺 > móu; #52BA +劻 > kuāng; #52BB +劼 > jié; #52BC +劽 > liè; #52BD +劾 > hé; #52BE +势 > shì; #52BF +勀 > kè; #52C0 +勁 > jìng; #52C1 +勂 > háo; #52C2 +勃 > bó; #52C3 +勄 > mĭn; #52C4 +勅 > chì; #52C5 +勆 > láng; #52C6 +勇 > yŏng; #52C7 +勈 > yŏng; #52C8 +勉 > miăn; #52C9 +勊 > kè; #52CA +勋 > xūn; #52CB +勌 > juàn; #52CC +勍 > qíng; #52CD +勎 > lù; #52CE +勏 > pŏu; #52CF +勐 > mĕng; #52D0 +勑 > lài; #52D1 +勒 > lè; #52D2 +勓 > kài; #52D3 +勔 > miăn; #52D4 +動 > dòng; #52D5 +勖 > xù; #52D6 +勗 > xù; #52D7 +勘 > kān; #52D8 +務 > wù; #52D9 +勚 > yì; #52DA +勛 > xūn; #52DB +勜 > wĕng; #52DC +勝 > shèng; #52DD +勞 > láo; #52DE +募 > mù; #52DF +勠 > lù; #52E0 +勡 > piào; #52E1 +勢 > shì; #52E2 +勣 > jī; #52E3 +勤 > qín; #52E4 +勥 > qiăng; #52E5 +勦 > jiăo; #52E6 +勧 > quàn; #52E7 +勨 > yăng; #52E8 +勩 > yì; #52E9 +勪 > jué; #52EA +勫 > fán; #52EB +勬 > juàn; #52EC +勭 > tóng; #52ED +勮 > jù; #52EE +勯 > dān; #52EF +勰 > xié; #52F0 +勱 > mài; #52F1 +勲 > xūn; #52F2 +勳 > xūn; #52F3 +勴 > lǜ; #52F4 +勵 > lì; #52F5 +勶 > chè; #52F6 +勷 > ráng; #52F7 +勸 > quàn; #52F8 +勹 > bāo; #52F9 +勺 > sháo; #52FA +勻 > yún; #52FB +勼 > jīu; #52FC +勽 > bào; #52FD +勾 > gōu; #52FE +勿 > wù; #52FF +匀 > yún; #5300 +匃 > gài; #5303 +匄 > gài; #5304 +包 > bāo; #5305 +匆 > cōng; #5306 +匈 > xīong; #5308 +匉 > pēng; #5309 +匊 > jú; #530A +匋 > táo; #530B +匌 > gé; #530C +匍 > pú; #530D +匎 > àn; #530E +匏 > páo; #530F +匐 > fú; #5310 +匑 > gōng; #5311 +匒 > dá; #5312 +匓 > jìu; #5313 +匔 > qīong; #5314 +匕 > bĭ; #5315 +化 > huà; #5316 +北 > bĕi; #5317 +匘 > năo; #5318 +匙 > chí; #5319 +匚 > fāng; #531A +匛 > jìu; #531B +匜 > yí; #531C +匝 > zā; #531D +匞 > jiàng; #531E +匟 > kàng; #531F +匠 > jiàng; #5320 +匡 > kuāng; #5321 +匢 > hū; #5322 +匣 > xiá; #5323 +匤 > qū; #5324 +匥 > biàn; #5325 +匦 > gŭi; #5326 +匧 > qiè; #5327 +匨 > zāng; #5328 +匩 > kuāng; #5329 +匪 > fĕi; #532A +匫 > hū; #532B +匬 > tóu; #532C +匭 > gŭi; #532D +匮 > gùi; #532E +匯 > hùi; #532F +匰 > dān; #5330 +匱 > gùi; #5331 +匲 > lián; #5332 +匳 > lián; #5333 +匴 > suăn; #5334 +匵 > dú; #5335 +匶 > jìu; #5336 +匷 > qú; #5337 +匸 > xĭ; #5338 +匹 > pĭ; #5339 +区 > qū; #533A +医 > yì; #533B +匼 > qià; #533C +匽 > yăn; #533D +匾 > biăn; #533E +匿 > nì; #533F +區 > qū; #5340 +十 > shí; #5341 +卂 > xìn; #5342 +千 > qiān; #5343 +卄 > niàn; #5344 +卅 > sà; #5345 +卆 > zú; #5346 +升 > shēng; #5347 +午 > wŭ; #5348 +卉 > hùi; #5349 +半 > bàn; #534A +卋 > shì; #534B +卌 > xì; #534C +卍 > wàn; #534D +华 > huá; #534E +协 > xié; #534F +卐 > wàn; #5350 +卑 > bēi; #5351 +卒 > zú; #5352 +卓 > zhūo; #5353 +協 > xié; #5354 +单 > dān; #5355 +卖 > mài; #5356 +南 > nán; #5357 +単 > dān; #5358 +卙 > jí; #5359 +博 > bó; #535A +卛 > shuài; #535B +卜 > bŭ; #535C +卝 > kuàng; #535D +卞 > biàn; #535E +卟 > bŭ; #535F +占 > zhān; #5360 +卡 > qiă; #5361 +卢 > lú; #5362 +卣 > yŏu; #5363 +卤 > lŭ; #5364 +卥 > xī; #5365 +卦 > guà; #5366 +卧 > wò; #5367 +卨 > xiè; #5368 +卩 > jié; #5369 +卪 > jié; #536A +卫 > wèi; #536B +卬 > áng; #536C +卭 > qíong; #536D +卮 > zhī; #536E +卯 > măo; #536F +印 > yìn; #5370 +危 > wēi; #5371 +卲 > shào; #5372 +即 > jí; #5373 +却 > què; #5374 +卵 > luăn; #5375 +卶 > shì; #5376 +卷 > juàn; #5377 +卸 > xiè; #5378 +卹 > xù; #5379 +卺 > jĭn; #537A +卻 > què; #537B +卼 > wù; #537C +卽 > jí; #537D +卾 > è; #537E +卿 > qīng; #537F +厀 > xī; #5380 +厂 > hàn; #5382 +厃 > zhān; #5383 +厄 > è; #5384 +厅 > tīng; #5385 +历 > lì; #5386 +厇 > zhé; #5387 +厈 > hăn; #5388 +厉 > lì; #5389 +厊 > yă; #538A +压 > yā; #538B +厌 > yàn; #538C +厍 > shè; #538D +厎 > zhĭ; #538E +厏 > zhă; #538F +厐 > páng; #5390 +厒 > hé; #5392 +厓 > yá; #5393 +厔 > zhì; #5394 +厕 > cè; #5395 +厖 > páng; #5396 +厗 > tí; #5397 +厘 > lí; #5398 +厙 > shè; #5399 +厚 > hòu; #539A +厛 > tīng; #539B +厜 > zūi; #539C +厝 > cùo; #539D +厞 > fèi; #539E +原 > yuán; #539F +厠 > cè; #53A0 +厡 > yuán; #53A1 +厢 > xiāng; #53A2 +厣 > yăn; #53A3 +厤 > lì; #53A4 +厥 > jué; #53A5 +厦 > shà; #53A6 +厧 > diān; #53A7 +厨 > chú; #53A8 +厩 > jìu; #53A9 +厪 > qín; #53AA +厫 > áo; #53AB +厬 > gŭi; #53AC +厭 > yàn; #53AD +厮 > sī; #53AE +厯 > lì; #53AF +厰 > chăng; #53B0 +厱 > lán; #53B1 +厲 > lì; #53B2 +厳 > yán; #53B3 +厴 > yăn; #53B4 +厵 > yuán; #53B5 +厶 > sī; #53B6 +厷 > gōng; #53B7 +厸 > lín; #53B8 +厹 > qíu; #53B9 +厺 > qù; #53BA +去 > qù; #53BB +厽 > lĕi; #53BD +厾 > dū; #53BE +县 > xiàn; #53BF +叀 > zhuān; #53C0 +叁 > sān; #53C1 +参 > cān; #53C2 +參 > cān; #53C3 +叄 > cān; #53C4 +叅 > cān; #53C5 +叆 > ài; #53C6 +叇 > dài; #53C7 +又 > yòu; #53C8 +叉 > cha; #53C9 +及 > jí; #53CA +友 > yŏu; #53CB +双 > shuāng; #53CC +反 > făn; #53CD +収 > shōu; #53CE +叏 > guài; #53CF +叐 > bá; #53D0 +发 > fā; #53D1 +叒 > rùo; #53D2 +叓 > shì; #53D3 +叔 > shū; #53D4 +叕 > zhúo; #53D5 +取 > qū; #53D6 +受 > shòu; #53D7 +变 > biàn; #53D8 +叙 > xù; #53D9 +叚 > jiă; #53DA +叛 > pàn; #53DB +叜 > sŏu; #53DC +叝 > gào; #53DD +叞 > wèi; #53DE +叟 > sŏu; #53DF +叠 > dié; #53E0 +叡 > rùi; #53E1 +叢 > cóng; #53E2 +口 > kŏu; #53E3 +古 > gŭ; #53E4 +句 > jù; #53E5 +另 > lìng; #53E6 +叧 > guă; #53E7 +叨 > tāo; #53E8 +叩 > kòu; #53E9 +只 > zhĭ; #53EA +叫 > jiào; #53EB +召 > zhào; #53EC +叭 > bā; #53ED +叮 > dīng; #53EE +可 > kĕ; #53EF +台 > tái; #53F0 +叱 > chì; #53F1 +史 > shĭ; #53F2 +右 > yòu; #53F3 +叴 > qíu; #53F4 +叵 > pŏ; #53F5 +叶 > xié; #53F6 +号 > hào; #53F7 +司 > sī; #53F8 +叹 > tàn; #53F9 +叺 > chĭ; #53FA +叻 > lè; #53FB +叼 > diāo; #53FC +叽 > jī; #53FD +叿 > hōng; #53FF +吀 > miē; #5400 +吁 > xū; #5401 +吂 > máng; #5402 +吃 > chī; #5403 +各 > gè; #5404 +吅 > xuān; #5405 +吆 > yāo; #5406 +吇 > zĭ; #5407 +合 > hé; #5408 +吉 > jí; #5409 +吊 > diào; #540A +吋 > cùn; #540B +同 > tóng; #540C +名 > míng; #540D +后 > hòu; #540E +吏 > lì; #540F +吐 > tŭ; #5410 +向 > xiàng; #5411 +吒 > zhà; #5412 +吓 > xià; #5413 +吔 > yĕ; #5414 +吕 > lǚ; #5415 +吖 > ā; #5416 +吗 > ma; #5417 +吘 > ŏu; #5418 +吙 > xuē; #5419 +吚 > yī; #541A +君 > jūn; #541B +吜 > chŏu; #541C +吝 > lìn; #541D +吞 > tūn; #541E +吟 > yín; #541F +吠 > fèi; #5420 +吡 > bĭ; #5421 +吢 > qìn; #5422 +吣 > qìn; #5423 +吤 > jiè; #5424 +吥 > bù; #5425 +否 > fŏu; #5426 +吧 > ba; #5427 +吨 > dūn; #5428 +吩 > fēn; #5429 +吪 > é; #542A +含 > hán; #542B +听 > tīng; #542C +吭 > háng; #542D +吮 > shŭn; #542E +启 > qĭ; #542F +吰 > hóng; #5430 +吱 > zhī; #5431 +吲 > shĕn; #5432 +吳 > wú; #5433 +吴 > wú; #5434 +吵 > chăo; #5435 +吶 > nè; #5436 +吷 > xuè; #5437 +吸 > xī; #5438 +吹 > chūi; #5439 +吺 > dōu; #543A +吻 > wĕn; #543B +吼 > hŏu; #543C +吽 > óu; #543D +吾 > wú; #543E +吿 > gào; #543F +呀 > yā; #5440 +呁 > jùn; #5441 +呂 > lǚ; #5442 +呃 > è; #5443 +呄 > gé; #5444 +呅 > méi; #5445 +呆 > ái; #5446 +呇 > qĭ; #5447 +呈 > chéng; #5448 +呉 > wú; #5449 +告 > gào; #544A +呋 > fū; #544B +呌 > jiào; #544C +呍 > hōng; #544D +呎 > chĭ; #544E +呏 > shēng; #544F +呐 > nè; #5450 +呑 > tūn; #5451 +呒 > fŭ; #5452 +呓 > yì; #5453 +呔 > dāi; #5454 +呕 > ōu; #5455 +呖 > lì; #5456 +呗 > bài; #5457 +员 > yuán; #5458 +呙 > kuāi; #5459 +呛 > qiāng; #545B +呜 > wū; #545C +呝 > è; #545D +呞 > shī; #545E +呟 > quăn; #545F +呠 > pēn; #5460 +呡 > wĕn; #5461 +呢 > ní; #5462 +呣 > ḿ; #5463 +呤 > lĭng; #5464 +呥 > răn; #5465 +呦 > yōu; #5466 +呧 > dĭ; #5467 +周 > zhōu; #5468 +呩 > shì; #5469 +呪 > zhòu; #546A +呫 > tiē; #546B +呬 > xì; #546C +呭 > yì; #546D +呮 > qì; #546E +呯 > píng; #546F +呰 > zĭ; #5470 +呱 > gū; #5471 +呲 > zī; #5472 +味 > wèi; #5473 +呴 > xū; #5474 +呵 > hē; #5475 +呶 > náo; #5476 +呷 > xiā; #5477 +呸 > pēi; #5478 +呹 > yì; #5479 +呺 > xiāo; #547A +呻 > shēn; #547B +呼 > hū; #547C +命 > mìng; #547D +呾 > dá; #547E +呿 > qū; #547F +咀 > jŭ; #5480 +咁 > gèm; #5481 +咂 > zā; #5482 +咃 > tūo; #5483 +咄 > dūo; #5484 +咅 > pòu; #5485 +咆 > páo; #5486 +咇 > bì; #5487 +咈 > fú; #5488 +咉 > yāng; #5489 +咊 > hé; #548A +咋 > zhà; #548B +和 > hé; #548C +咍 > hāi; #548D +咎 > jìu; #548E +咏 > yŏng; #548F +咐 > fù; #5490 +咑 > què; #5491 +咒 > zhòu; #5492 +咓 > wă; #5493 +咔 > kă; #5494 +咕 > gū; #5495 +咖 > kā; #5496 +咗 > zŭo; #5497 +咘 > bù; #5498 +咙 > lóng; #5499 +咚 > dōng; #549A +咛 > níng; #549B +咝 > sī; #549D +咞 > xiàn; #549E +咟 > hùo; #549F +咠 > qì; #54A0 +咡 > èr; #54A1 +咢 > è; #54A2 +咣 > guāng; #54A3 +咤 > zhà; #54A4 +咥 > xì; #54A5 +咦 > yí; #54A6 +咧 > liĕ; #54A7 +咨 > zī; #54A8 +咩 > miē; #54A9 +咪 > mī; #54AA +咫 > zhĭ; #54AB +咬 > yăo; #54AC +咭 > jī; #54AD +咮 > zhòu; #54AE +咯 > gē; #54AF +咰 > shuài; #54B0 +咱 > zán; #54B1 +咲 > xiào; #54B2 +咳 > ké; #54B3 +咴 > hūi; #54B4 +咵 > kuā; #54B5 +咶 > huài; #54B6 +咷 > táo; #54B7 +咸 > xián; #54B8 +咹 > è; #54B9 +咺 > xuān; #54BA +咻 > xīu; #54BB +咼 > wāi; #54BC +咽 > yān; #54BD +咾 > lăo; #54BE +咿 > yī; #54BF +哀 > āi; #54C0 +品 > pĭn; #54C1 +哂 > shĕn; #54C2 +哃 > tóng; #54C3 +哄 > hōng; #54C4 +哅 > xīong; #54C5 +哆 > chĭ; #54C6 +哇 > wā; #54C7 +哈 > hā; #54C8 +哉 > zāi; #54C9 +哊 > yù; #54CA +哋 > dì; #54CB +哌 > pài; #54CC +响 > xiăng; #54CD +哎 > āi; #54CE +哏 > hĕn; #54CF +哐 > kuāng; #54D0 +哑 > yă; #54D1 +哒 > dā; #54D2 +哓 > xiāo; #54D3 +哔 > bì; #54D4 +哕 > yuĕ; #54D5 +哗 > huā; #54D7 +哙 > kuài; #54D9 +哚 > dŭo; #54DA +哜 > jì; #54DC +哝 > nóng; #54DD +哞 > mōu; #54DE +哟 > yo; #54DF +哠 > hào; #54E0 +員 > yuán; #54E1 +哢 > lòng; #54E2 +哣 > pŏu; #54E3 +哤 > máng; #54E4 +哥 > gē; #54E5 +哦 > é; #54E6 +哧 > chī; #54E7 +哨 > shào; #54E8 +哩 > lī; #54E9 +哪 > nă; #54EA +哫 > zú; #54EB +哬 > hé; #54EC +哭 > kū; #54ED +哮 > xiāo; #54EE +哯 > xiàn; #54EF +哰 > láo; #54F0 +哱 > bō; #54F1 +哲 > zhé; #54F2 +哳 > zhā; #54F3 +哴 > liàng; #54F4 +哵 > bā; #54F5 +哶 > miē; #54F6 +哷 > lè; #54F7 +哸 > sūi; #54F8 +哹 > fóu; #54F9 +哺 > bŭ; #54FA +哻 > hàn; #54FB +哼 > hēng; #54FC +哽 > gĕng; #54FD +哾 > shūo; #54FE +哿 > gĕ; #54FF +唀 > yŏu; #5500 +唁 > yàn; #5501 +唂 > gŭ; #5502 +唃 > gŭ; #5503 +唄 > bài; #5504 +唅 > hān; #5505 +唆 > sūo; #5506 +唇 > chún; #5507 +唈 > yì; #5508 +唉 > āi; #5509 +唊 > jiá; #550A +唋 > tŭ; #550B +唌 > xián; #550C +唍 > huăn; #550D +唎 > lī; #550E +唏 > xī; #550F +唐 > táng; #5510 +唑 > zùo; #5511 +唒 > qíu; #5512 +唓 > chē; #5513 +唔 > wú; #5514 +唕 > zào; #5515 +唖 > yă; #5516 +唗 > dōu; #5517 +唘 > qĭ; #5518 +唙 > dí; #5519 +唚 > qìn; #551A +唛 > mà; #551B +唝 > hŏng; #551D +唞 > dŏu; #551E +唠 > láo; #5520 +唡 > liăng; #5521 +唢 > sŭo; #5522 +唣 > zào; #5523 +唤 > huàn; #5524 +唦 > shā; #5526 +唧 > jī; #5527 +唨 > zŭo; #5528 +唩 > wō; #5529 +唪 > fĕng; #552A +唫 > yín; #552B +唬 > hŭ; #552C +唭 > qī; #552D +售 > shòu; #552E +唯 > wéi; #552F +唰 > shuā; #5530 +唱 > chàng; #5531 +唲 > ér; #5532 +唳 > lì; #5533 +唴 > qiàng; #5534 +唵 > ăn; #5535 +唶 > jiè; #5536 +唷 > yō; #5537 +唸 > niàn; #5538 +唹 > yū; #5539 +唺 > tiăn; #553A +唻 > lăi; #553B +唼 > shà; #553C +唽 > xī; #553D +唾 > tùo; #553E +唿 > hū; #553F +啀 > ái; #5540 +啁 > zhōu; #5541 +啂 > nòu; #5542 +啃 > kĕn; #5543 +啄 > zhúo; #5544 +啅 > zhúo; #5545 +商 > shāng; #5546 +啇 > dí; #5547 +啈 > hèng; #5548 +啉 > lán; #5549 +啊 > a; #554A +啋 > xiāo; #554B +啌 > xiāng; #554C +啍 > tūn; #554D +啎 > wŭ; #554E +問 > wèn; #554F +啐 > cùi; #5550 +啑 > shà; #5551 +啒 > hū; #5552 +啓 > qĭ; #5553 +啔 > qĭ; #5554 +啕 > táo; #5555 +啖 > dàn; #5556 +啗 > dàn; #5557 +啘 > yè; #5558 +啙 > zĭ; #5559 +啚 > bĭ; #555A +啛 > cùi; #555B +啜 > chùo; #555C +啝 > hé; #555D +啞 > yă; #555E +啟 > qĭ; #555F +啠 > zhé; #5560 +啡 > pēi; #5561 +啢 > liăng; #5562 +啣 > xián; #5563 +啤 > pí; #5564 +啥 > shà; #5565 +啦 > la; #5566 +啧 > zé; #5567 +啨 > qīng; #5568 +啩 > guà; #5569 +啪 > pā; #556A +啫 > zhĕ; #556B +啬 > sè; #556C +啭 > zhuàn; #556D +啮 > niè; #556E +啯 > guo; #556F +啰 > lūo; #5570 +啱 > yān; #5571 +啲 > dì; #5572 +啳 > quán; #5573 +啴 > tān; #5574 +啵 > bo; #5575 +啶 > dìng; #5576 +啷 > lāng; #5577 +啸 > xiào; #5578 +啺 > táng; #557A +啻 > chì; #557B +啼 > tí; #557C +啽 > án; #557D +啾 > jīu; #557E +啿 > dàn; #557F +喀 > kè; #5580 +喁 > yóng; #5581 +喂 > wèi; #5582 +喃 > nán; #5583 +善 > shàn; #5584 +喅 > yù; #5585 +喆 > zhé; #5586 +喇 > lă; #5587 +喈 > jiē; #5588 +喉 > hóu; #5589 +喊 > hăn; #558A +喋 > dié; #558B +喌 > zhōu; #558C +喍 > chái; #558D +喎 > wāi; #558E +喏 > rĕ; #558F +喐 > yù; #5590 +喑 > yīn; #5591 +喒 > zán; #5592 +喓 > yāo; #5593 +喔 > wō; #5594 +喕 > miăn; #5595 +喖 > hú; #5596 +喗 > yŭn; #5597 +喘 > chuăn; #5598 +喙 > hùi; #5599 +喚 > huàn; #559A +喛 > huàn; #559B +喜 > xĭ; #559C +喝 > hē; #559D +喞 > jī; #559E +喟 > kùi; #559F +喠 > zhŏng; #55A0 +喡 > wĕi; #55A1 +喢 > shà; #55A2 +喣 > xŭ; #55A3 +喤 > huáng; #55A4 +喥 > dù; #55A5 +喦 > niè; #55A6 +喧 > 1xuān; #55A7 +喨 > liàng; #55A8 +喩 > yù; #55A9 +喪 > sāng; #55AA +喫 > chī; #55AB +喬 > qiáo; #55AC +喭 > yàn; #55AD +單 > dān; #55AE +喯 > pēn; #55AF +喰 > cān; #55B0 +喱 > lí; #55B1 +喲 > yo; #55B2 +喳 > zhā; #55B3 +喴 > wēi; #55B4 +喵 > miāo; #55B5 +営 > yíng; #55B6 +喷 > pēn; #55B7 +喹 > kúi; #55B9 +喺 > xì; #55BA +喻 > yù; #55BB +喼 > jié; #55BC +喽 > lou; #55BD +喾 > kù; #55BE +喿 > sào; #55BF +嗀 > hùo; #55C0 +嗁 > tí; #55C1 +嗂 > yáo; #55C2 +嗃 > hè; #55C3 +嗄 > á; #55C4 +嗅 > xìu; #55C5 +嗆 > qiāng; #55C6 +嗇 > sè; #55C7 +嗈 > yōng; #55C8 +嗉 > sù; #55C9 +嗊 > hŏng; #55CA +嗋 > xié; #55CB +嗌 > yì; #55CC +嗍 > sūo; #55CD +嗎 > ma; #55CE +嗏 > chā; #55CF +嗐 > hài; #55D0 +嗑 > kè; #55D1 +嗒 > tà; #55D2 +嗓 > săng; #55D3 +嗔 > tián; #55D4 +嗕 > rù; #55D5 +嗖 > sōu; #55D6 +嗗 > wā; #55D7 +嗘 > jī; #55D8 +嗙 > păng; #55D9 +嗚 > wū; #55DA +嗛 > xián; #55DB +嗜 > shì; #55DC +嗝 > gé; #55DD +嗞 > zī; #55DE +嗟 > jiē; #55DF +嗠 > lùo; #55E0 +嗡 > wēng; #55E1 +嗢 > wà; #55E2 +嗣 > sì; #55E3 +嗤 > chī; #55E4 +嗥 > háo; #55E5 +嗦 > sūo; #55E6 +嗧 > jiā' 'lún; #55E7 +嗨 > hăi; #55E8 +嗩 > sŭo; #55E9 +嗪 > qín; #55EA +嗫 > niè; #55EB +嗬 > hē; #55EC +嗮 > sài; #55EE +嗯 > ǹg; #55EF +嗰 > gè; #55F0 +嗱 > ná; #55F1 +嗲 > diă; #55F2 +嗳 > ài; #55F3 +嗵 > tōng; #55F5 +嗶 > bì; #55F6 +嗷 > áo; #55F7 +嗸 > áo; #55F8 +嗹 > lián; #55F9 +嗺 > cūi; #55FA +嗻 > zhē; #55FB +嗼 > mò; #55FC +嗽 > sòu; #55FD +嗾 > sŏu; #55FE +嗿 > tăn; #55FF +嘀 > dí; #5600 +嘁 > qī; #5601 +嘂 > jiào; #5602 +嘃 > chōng; #5603 +嘄 > jiāo; #5604 +嘅 > kăi; #5605 +嘆 > tàn; #5606 +嘇 > sān; #5607 +嘈 > cáo; #5608 +嘉 > jiā; #5609 +嘊 > ái; #560A +嘋 > xiāo; #560B +嘌 > piāo; #560C +嘍 > lou; #560D +嘎 > gā; #560E +嘏 > gŭ; #560F +嘐 > xiāo; #5610 +嘑 > hū; #5611 +嘒 > hùi; #5612 +嘓 > guo; #5613 +嘔 > ōu; #5614 +嘕 > xiān; #5615 +嘖 > zé; #5616 +嘗 > cháng; #5617 +嘘 > xū; #5618 +嘙 > pó; #5619 +嘚 > dé; #561A +嘛 > ma; #561B +嘜 > mà; #561C +嘝 > hú; #561D +嘞 > lei; #561E +嘟 > dū; #561F +嘠 > gā; #5620 +嘡 > tāng; #5621 +嘢 > yĕ; #5622 +嘣 > bēng; #5623 +嘤 > yīng; #5624 +嘦 > jiào; #5626 +嘧 > mī; #5627 +嘨 > xiào; #5628 +嘩 > huā; #5629 +嘪 > măi; #562A +嘫 > rán; #562B +嘬 > zūo; #562C +嘭 > pēng; #562D +嘮 > láo; #562E +嘯 > xiào; #562F +嘰 > jī; #5630 +嘱 > zhŭ; #5631 +嘲 > cháo; #5632 +嘳 > kùi; #5633 +嘴 > zŭi; #5634 +嘵 > xiāo; #5635 +嘶 > sī; #5636 +嘷 > háo; #5637 +嘸 > fŭ; #5638 +嘹 > liáo; #5639 +嘺 > qiáo; #563A +嘻 > xī; #563B +嘼 > xìu; #563C +嘽 > tān; #563D +嘾 > tán; #563E +嘿 > mò; #563F +噀 > xùn; #5640 +噁 > ĕ; #5641 +噂 > zŭn; #5642 +噃 > fān; #5643 +噄 > chī; #5644 +噅 > hūi; #5645 +噆 > zăn; #5646 +噇 > chuáng; #5647 +噈 > cù; #5648 +噉 > dàn; #5649 +噊 > yù; #564A +噋 > tūn; #564B +噌 > chēng; #564C +噍 > jiào; #564D +噎 > yē; #564E +噏 > xī; #564F +噐 > qì; #5650 +噑 > háo; #5651 +噒 > lián; #5652 +噓 > xū; #5653 +噔 > dēng; #5654 +噕 > hūi; #5655 +噖 > yín; #5656 +噗 > pū; #5657 +噘 > juē; #5658 +噙 > qín; #5659 +噚 > xún; #565A +噛 > niè; #565B +噜 > lū; #565C +噝 > sī; #565D +噞 > yăn; #565E +噟 > yìng; #565F +噠 > dā; #5660 +噡 > dān; #5661 +噢 > yŭ; #5662 +噣 > zhòu; #5663 +噤 > jìn; #5664 +噥 > nóng; #5665 +噦 > yuĕ; #5666 +噧 > hùi; #5667 +器 > qì; #5668 +噩 > è; #5669 +噪 > zào; #566A +噫 > yī; #566B +噬 > shì; #566C +噭 > jiào; #566D +噮 > yuān; #566E +噯 > ài; #566F +噰 > yōng; #5670 +噱 > jué; #5671 +噲 > kuài; #5672 +噳 > yŭ; #5673 +噴 > pēn; #5674 +噵 > dào; #5675 +噶 > gé; #5676 +噷 > xīn; #5677 +噸 > dūn; #5678 +噹 > dāng; #5679 +噻 > sai; #567B +噼 > pī; #567C +噽 > pĭ; #567D +噾 > yīn; #567E +噿 > zŭi; #567F +嚀 > níng; #5680 +嚁 > dí; #5681 +嚂 > làn; #5682 +嚃 > tā; #5683 +嚄 > hùo; #5684 +嚅 > rú; #5685 +嚆 > hāo; #5686 +嚇 > xià; #5687 +嚈 > yà; #5688 +嚉 > dūo; #5689 +嚊 > xì; #568A +嚋 > chóu; #568B +嚌 > jì; #568C +嚍 > jìn; #568D +嚎 > háo; #568E +嚏 > tì; #568F +嚐 > cháng; #5690 +嚓 > cā; #5693 +嚔 > tì; #5694 +嚕 > lū; #5695 +嚖 > hùi; #5696 +嚗 > bó; #5697 +嚘 > yōu; #5698 +嚙 > niè; #5699 +嚚 > yín; #569A +嚛 > hù; #569B +嚜 > mò; #569C +嚝 > huāng; #569D +嚞 > zhé; #569E +嚟 > lí; #569F +嚠 > líu; #56A0 +嚢 > náng; #56A2 +嚣 > xiāo; #56A3 +嚤 > mó; #56A4 +嚥 > yàn; #56A5 +嚦 > lì; #56A6 +嚧 > lú; #56A7 +嚨 > lóng; #56A8 +嚩 > fú; #56A9 +嚪 > dàn; #56AA +嚫 > chèn; #56AB +嚬 > pín; #56AC +嚭 > pĭ; #56AD +嚮 > xiàng; #56AE +嚯 > hùo; #56AF +嚰 > mó; #56B0 +嚱 > xì; #56B1 +嚲 > dŭo; #56B2 +嚳 > kù; #56B3 +嚴 > yán; #56B4 +嚵 > chán; #56B5 +嚶 > yīng; #56B6 +嚷 > răng; #56B7 +嚸 > diăn; #56B8 +嚹 > lā; #56B9 +嚺 > tà; #56BA +嚻 > xiāo; #56BB +嚼 > jiáo; #56BC +嚽 > chùo; #56BD +嚾 > huān; #56BE +嚿 > hùo; #56BF +囀 > zhuàn; #56C0 +囁 > niè; #56C1 +囂 > xiāo; #56C2 +囃 > cà; #56C3 +囄 > lí; #56C4 +囅 > chăn; #56C5 +囆 > chài; #56C6 +囇 > lì; #56C7 +囈 > yì; #56C8 +囉 > lūo; #56C9 +囊 > náng; #56CA +囋 > zàn; #56CB +囌 > sū; #56CC +囍 > xĭ; #56CD +囏 > jiān; #56CF +囐 > zá; #56D0 +囑 > zhŭ; #56D1 +囒 > lán; #56D2 +囓 > niè; #56D3 +囔 > nāng; #56D4 +囗 > wéi; #56D7 +囘 > húi; #56D8 +囙 > yīn; #56D9 +囚 > qíu; #56DA +四 > sì; #56DB +囜 > nín; #56DC +囝 > jiăn; #56DD +回 > húi; #56DE +囟 > xìn; #56DF +因 > yīn; #56E0 +囡 > nān; #56E1 +团 > tuán; #56E2 +団 > tuán; #56E3 +囤 > dùn; #56E4 +囥 > kàng; #56E5 +囦 > yuān; #56E6 +囧 > jĭong; #56E7 +囨 > piān; #56E8 +囩 > yùn; #56E9 +囪 > cōng; #56EA +囫 > hú; #56EB +囬 > húi; #56EC +园 > yuán; #56ED +囮 > yóu; #56EE +囯 > gúo; #56EF +困 > kùn; #56F0 +囱 > cōng; #56F1 +囲 > wéi; #56F2 +図 > tú; #56F3 +围 > wéi; #56F4 +囵 > lún; #56F5 +囶 > gúo; #56F6 +囷 > qūn; #56F7 +囸 > rì; #56F8 +囹 > líng; #56F9 +固 > gù; #56FA +囻 > gúo; #56FB +囼 > tāi; #56FC +国 > gúo; #56FD +图 > tú; #56FE +囿 > yòu; #56FF +圀 > gúo; #5700 +圁 > yín; #5701 +圂 > hùn; #5702 +圃 > pŭ; #5703 +圄 > yŭ; #5704 +圅 > hán; #5705 +圆 > yuán; #5706 +圇 > lún; #5707 +圈 > quān; #5708 +圉 > yŭ; #5709 +圊 > qīng; #570A +國 > gúo; #570B +圌 > chuán; #570C +圍 > wéi; #570D +圎 > yuán; #570E +圏 > quān; #570F +圐 > kū; #5710 +圑 > fù; #5711 +園 > yuán; #5712 +圓 > yuán; #5713 +圔 > è; #5714 +圕 > tú' 'shū' 'guăn; #5715 +圖 > tú; #5716 +圗 > tú; #5717 +團 > tuán; #5718 +圙 > lǜe; #5719 +圚 > hùi; #571A +圛 > yì; #571B +圜 > yuán; #571C +圝 > luán; #571D +圞 > luán; #571E +土 > tŭ; #571F +圠 > yà; #5720 +圡 > tŭ; #5721 +圢 > tīng; #5722 +圣 > shèng; #5723 +圤 > pŭ; #5724 +圥 > lù; #5725 +圧 > yā; #5727 +在 > zài; #5728 +圩 > wéi; #5729 +圪 > gē; #572A +圫 > yù; #572B +圬 > wū; #572C +圭 > gūi; #572D +圮 > pĭ; #572E +圯 > yí; #572F +地 > dì; #5730 +圱 > qiān; #5731 +圲 > qiān; #5732 +圳 > zhèn; #5733 +圴 > zhúo; #5734 +圵 > dàng; #5735 +圶 > qià; #5736 +圹 > kuàng; #5739 +场 > cháng; #573A +圻 > qí; #573B +圼 > niè; #573C +圽 > mò; #573D +圾 > jí; #573E +圿 > jiá; #573F +址 > zhĭ; #5740 +坁 > zhĭ; #5741 +坂 > băn; #5742 +坃 > xūn; #5743 +坄 > tóu; #5744 +坅 > qĭn; #5745 +坆 > fén; #5746 +均 > jūn; #5747 +坈 > kēng; #5748 +坉 > tún; #5749 +坊 > fāng; #574A +坋 > fèn; #574B +坌 > bèn; #574C +坍 > tān; #574D +坎 > kăn; #574E +坏 > pī; #574F +坐 > zùo; #5750 +坑 > kēng; #5751 +坒 > bì; #5752 +坓 > xíng; #5753 +坔 > dì; #5754 +坕 > jīng; #5755 +坖 > jì; #5756 +块 > kuài; #5757 +坘 > dĭ; #5758 +坙 > jīng; #5759 +坚 > jiān; #575A +坛 > tán; #575B +坜 > lì; #575C +坝 > bà; #575D +坞 > wù; #575E +坟 > fén; #575F +坠 > zhùi; #5760 +坡 > pō; #5761 +坢 > păn; #5762 +坣 > tāng; #5763 +坤 > kūn; #5764 +坥 > qū; #5765 +坦 > tăn; #5766 +坧 > zhī; #5767 +坨 > túo; #5768 +坩 > gān; #5769 +坪 > píng; #576A +坫 > diàn; #576B +坬 > guà; #576C +坭 > ní; #576D +坮 > tái; #576E +坯 > pī; #576F +坰 > jīong; #5770 +坱 > yăng; #5771 +坲 > fó; #5772 +坳 > ào; #5773 +坴 > lìu; #5774 +坵 > qīu; #5775 +坶 > mù; #5776 +坷 > kĕ; #5777 +坸 > gòu; #5778 +坹 > xuè; #5779 +坺 > bá; #577A +坻 > chí; #577B +坼 > chè; #577C +坽 > líng; #577D +坾 > zhù; #577E +坿 > fù; #577F +垀 > hū; #5780 +垁 > zhì; #5781 +垂 > chúi; #5782 +垃 > lā; #5783 +垄 > lŏng; #5784 +垅 > lŏng; #5785 +垆 > lú; #5786 +垇 > ào; #5787 +垉 > páo; #5789 +型 > xíng; #578B +垌 > dòng; #578C +垍 > jì; #578D +垎 > kè; #578E +垏 > lù; #578F +垐 > cí; #5790 +垑 > chĭ; #5791 +垒 > lĕi; #5792 +垓 > gāi; #5793 +垔 > yīn; #5794 +垕 > hòu; #5795 +垖 > dūi; #5796 +垗 > zhào; #5797 +垘 > fú; #5798 +垙 > guāng; #5799 +垚 > yáo; #579A +垛 > dŭo; #579B +垜 > dŭo; #579C +垝 > gŭi; #579D +垞 > chá; #579E +垟 > yáng; #579F +垠 > yín; #57A0 +垡 > fá; #57A1 +垢 > gòu; #57A2 +垣 > yuán; #57A3 +垤 > dié; #57A4 +垥 > xié; #57A5 +垦 > kĕn; #57A6 +垧 > jīong; #57A7 +垨 > shŏu; #57A8 +垩 > è; #57A9 +垫 > diàn; #57AB +垬 > hóng; #57AC +垭 > wù; #57AD +垮 > kuă; #57AE +垱 > dàng; #57B1 +垲 > kăi; #57B2 +垴 > năo; #57B4 +垵 > ăn; #57B5 +垶 > xīng; #57B6 +垷 > xiàn; #57B7 +垸 > huàn; #57B8 +垹 > bāng; #57B9 +垺 > pēi; #57BA +垻 > bà; #57BB +垼 > yì; #57BC +垽 > yìn; #57BD +垾 > hàn; #57BE +垿 > xù; #57BF +埀 > chúi; #57C0 +埁 > cén; #57C1 +埂 > gĕng; #57C2 +埃 > āi; #57C3 +埄 > péng; #57C4 +埅 > fáng; #57C5 +埆 > què; #57C6 +埇 > yŏng; #57C7 +埈 > xùn; #57C8 +埉 > jiá; #57C9 +埊 > dì; #57CA +埋 > mái; #57CB +埌 > làng; #57CC +埍 > xuàn; #57CD +城 > chéng; #57CE +埏 > yán; #57CF +埐 > jīn; #57D0 +埑 > zhé; #57D1 +埒 > lèi; #57D2 +埓 > liè; #57D3 +埔 > bù; #57D4 +埕 > chéng; #57D5 +埗 > bù; #57D7 +埘 > shí; #57D8 +埙 > xūn; #57D9 +埚 > gūo; #57DA +埛 > jīong; #57DB +埜 > yĕ; #57DC +埝 > niàn; #57DD +埞 > dĭ; #57DE +域 > yù; #57DF +埠 > bù; #57E0 +埡 > yà; #57E1 +埢 > juăn; #57E2 +埣 > sùi; #57E3 +埤 > pí; #57E4 +埥 > chēng; #57E5 +埦 > wăn; #57E6 +埧 > jù; #57E7 +埨 > lŭn; #57E8 +埩 > zhēng; #57E9 +埪 > kōng; #57EA +埫 > chŏng; #57EB +埬 > dōng; #57EC +埭 > dài; #57ED +埮 > tàn; #57EE +埯 > ăn; #57EF +埰 > cài; #57F0 +埱 > shú; #57F1 +埲 > bĕng; #57F2 +埳 > kăn; #57F3 +埴 > zhí; #57F4 +埵 > dŭo; #57F5 +埶 > yì; #57F6 +執 > zhí; #57F7 +埸 > yì; #57F8 +培 > péi; #57F9 +基 > jī; #57FA +埻 > zhŭn; #57FB +埼 > qí; #57FC +埽 > sào; #57FD +埾 > jù; #57FE +埿 > ní; #57FF +堀 > kū; #5800 +堁 > kè; #5801 +堂 > táng; #5802 +堃 > kūn; #5803 +堄 > nì; #5804 +堅 > jiān; #5805 +堆 > dūi; #5806 +堇 > jĭn; #5807 +堈 > gāng; #5808 +堉 > yù; #5809 +堊 > è; #580A +堋 > péng; #580B +堌 > gù; #580C +堍 > tù; #580D +堎 > lèng; #580E +堐 > yá; #5810 +堑 > qiàn; #5811 +堓 > àn; #5813 +堕 > dùo; #5815 +堖 > năo; #5816 +堗 > tū; #5817 +堘 > chéng; #5818 +堙 > yīn; #5819 +堚 > hún; #581A +堛 > bì; #581B +堜 > liàn; #581C +堝 > gūo; #581D +堞 > dié; #581E +堟 > zhuàn; #581F +堠 > hòu; #5820 +堡 > băo; #5821 +堢 > băo; #5822 +堣 > yú; #5823 +堤 > dī; #5824 +堥 > máo; #5825 +堦 > jiē; #5826 +堧 > ruán; #5827 +堨 > è; #5828 +堩 > gèng; #5829 +堪 > kān; #582A +堫 > zōng; #582B +堬 > yú; #582C +堭 > huáng; #582D +堮 > è; #582E +堯 > yáo; #582F +堰 > yàn; #5830 +報 > bào; #5831 +堲 > jí; #5832 +堳 > méi; #5833 +場 > cháng; #5834 +堵 > dŭ; #5835 +堶 > túo; #5836 +堷 > yìn; #5837 +堸 > féng; #5838 +堹 > zhòng; #5839 +堺 > jiè; #583A +堻 > zhēn; #583B +堼 > fēng; #583C +堽 > gāng; #583D +堾 > chuăn; #583E +堿 > jiăn; #583F +塂 > xiàng; #5842 +塃 > huāng; #5843 +塄 > léng; #5844 +塅 > duàn; #5845 +塇 > xuān; #5847 +塈 > jì; #5848 +塉 > jí; #5849 +塊 > kuài; #584A +塋 > yíng; #584B +塌 > tā; #584C +塍 > chéng; #584D +塎 > yŏng; #584E +塏 > kăi; #584F +塐 > sù; #5850 +塑 > sù; #5851 +塒 > shí; #5852 +塓 > mì; #5853 +塔 > tă; #5854 +塕 > wĕng; #5855 +塖 > chéng; #5856 +塗 > tú; #5857 +塘 > táng; #5858 +塙 > què; #5859 +塚 > zhŏng; #585A +塛 > lì; #585B +塜 > péng; #585C +塝 > bàng; #585D +塞 > sāi; #585E +塟 > zàng; #585F +塠 > dūi; #5860 +塡 > tián; #5861 +塢 > wù; #5862 +塣 > chĕng; #5863 +塤 > xūn; #5864 +塥 > gé; #5865 +塦 > zhèn; #5866 +塧 > ài; #5867 +塨 > gōng; #5868 +塩 > yán; #5869 +塪 > kăn; #586A +填 > tián; #586B +塬 > yuán; #586C +塭 > wēn; #586D +塮 > xiè; #586E +塯 > lìu; #586F +塱 > lăng; #5871 +塲 > cháng; #5872 +塳 > péng; #5873 +塴 > bèng; #5874 +塵 > chén; #5875 +塶 > cù; #5876 +塷 > lŭ; #5877 +塸 > ŏu; #5878 +塹 > qiàn; #5879 +塺 > méi; #587A +塻 > mò; #587B +塼 > zhuān; #587C +塽 > shuăng; #587D +塾 > shú; #587E +塿 > lŏu; #587F +墀 > chí; #5880 +墁 > màn; #5881 +墂 > biāo; #5882 +境 > jìng; #5883 +墄 > qī; #5884 +墅 > shù; #5885 +墆 > dì; #5886 +墇 > zhāng; #5887 +墈 > kàn; #5888 +墉 > yōng; #5889 +墊 > diàn; #588A +墋 > chĕn; #588B +墌 > zhī; #588C +墍 > xì; #588D +墎 > gūo; #588E +墏 > qiăng; #588F +墐 > jìn; #5890 +墑 > dī; #5891 +墒 > shāng; #5892 +墓 > mù; #5893 +墔 > cūi; #5894 +墕 > yàn; #5895 +墖 > tă; #5896 +増 > zēng; #5897 +墘 > qí; #5898 +墙 > qiáng; #5899 +墚 > liáng; #589A +墜 > zhùi; #589C +墝 > qiāo; #589D +增 > zēng; #589E +墟 > xū; #589F +墠 > shàn; #58A0 +墡 > shàn; #58A1 +墢 > bá; #58A2 +墣 > pū; #58A3 +墤 > kuài; #58A4 +墥 > dŏng; #58A5 +墦 > fán; #58A6 +墧 > què; #58A7 +墨 > mò; #58A8 +墩 > dūn; #58A9 +墪 > dūn; #58AA +墫 > dūn; #58AB +墬 > dì; #58AC +墭 > shèng; #58AD +墮 > dùo; #58AE +墯 > dùo; #58AF +墰 > tán; #58B0 +墱 > dèng; #58B1 +墲 > wŭ; #58B2 +墳 > fén; #58B3 +墴 > huáng; #58B4 +墵 > tán; #58B5 +墶 > dā; #58B6 +墷 > yè; #58B7 +墺 > yù; #58BA +墻 > qiáng; #58BB +墼 > jī; #58BC +墽 > qiāo; #58BD +墾 > kĕn; #58BE +墿 > yì; #58BF +壀 > pí; #58C0 +壁 > bì; #58C1 +壂 > diàn; #58C2 +壃 > jiāng; #58C3 +壄 > yĕ; #58C4 +壅 > yōng; #58C5 +壆 > bó; #58C6 +壇 > tán; #58C7 +壈 > lăn; #58C8 +壉 > jù; #58C9 +壊 > huài; #58CA +壋 > dàng; #58CB +壌 > răng; #58CC +壍 > qiàn; #58CD +壎 > xūn; #58CE +壏 > làn; #58CF +壐 > xĭ; #58D0 +壑 > hè; #58D1 +壒 > ài; #58D2 +壓 > yā; #58D3 +壔 > dăo; #58D4 +壕 > háo; #58D5 +壖 > ruán; #58D6 +壘 > lĕi; #58D8 +壙 > kuàng; #58D9 +壚 > lú; #58DA +壛 > yán; #58DB +壜 > tán; #58DC +壝 > wéi; #58DD +壞 > huài; #58DE +壟 > lŏng; #58DF +壠 > lŏng; #58E0 +壡 > rùi; #58E1 +壢 > lì; #58E2 +壣 > lín; #58E3 +壤 > răng; #58E4 +壦 > xūn; #58E6 +壧 > yán; #58E7 +壨 > léi; #58E8 +壩 > bà; #58E9 +士 > shì; #58EB +壬 > rén; #58EC +壮 > zhuàng; #58EE +壯 > zhuàng; #58EF +声 > shēng; #58F0 +壱 > yī; #58F1 +売 > mài; #58F2 +壳 > ké; #58F3 +壴 > zhŭ; #58F4 +壵 > zhuàng; #58F5 +壶 > hú; #58F6 +壷 > hú; #58F7 +壸 > kŭn; #58F8 +壹 > yī; #58F9 +壺 > hú; #58FA +壻 > xù; #58FB +壼 > kŭn; #58FC +壽 > shòu; #58FD +壾 > măng; #58FE +壿 > zŭn; #58FF +夀 > shòu; #5900 +夁 > yī; #5901 +夂 > zhĭ; #5902 +夃 > gū; #5903 +处 > chù; #5904 +夅 > jiàng; #5905 +夆 > fēng; #5906 +备 > bèi; #5907 +変 > biàn; #5909 +夊 > sūi; #590A +夋 > qūn; #590B +夌 > líng; #590C +复 > fù; #590D +夎 > zùo; #590E +夏 > xià; #590F +夐 > xìong; #5910 +夒 > náo; #5912 +夓 > xià; #5913 +夔 > kúi; #5914 +夕 > xī; #5915 +外 > wài; #5916 +夗 > yuàn; #5917 +夘 > măo; #5918 +夙 > sù; #5919 +多 > dūo; #591A +夛 > dūo; #591B +夜 > yè; #591C +夝 > qíng; #591D +够 > gòu; #591F +夠 > gòu; #5920 +夡 > qì; #5921 +夢 > mèng; #5922 +夣 > mèng; #5923 +夤 > yín; #5924 +夥 > hŭo; #5925 +夦 > chèn; #5926 +大 > dà; #5927 +夨 > zè; #5928 +天 > tiān; #5929 +太 > tài; #592A +夫 > fū; #592B +夬 > guài; #592C +夭 > yăo; #592D +央 > yāng; #592E +夯 > hāng; #592F +夰 > găo; #5930 +失 > shī; #5931 +夲 > bĕn; #5932 +夳 > tài; #5933 +头 > tóu; #5934 +夵 > yăn; #5935 +夶 > bĭ; #5936 +夷 > yí; #5937 +夸 > kuā; #5938 +夹 > jiā; #5939 +夺 > dúo; #593A +夼 > kuăng; #593C +夽 > yùn; #593D +夾 > jiā; #593E +夿 > pā; #593F +奀 > ēn; #5940 +奁 > lián; #5941 +奂 > huàn; #5942 +奃 > dì; #5943 +奄 > yăn; #5944 +奅 > pào; #5945 +奆 > quăn; #5946 +奇 > qí; #5947 +奈 > nài; #5948 +奉 > fèng; #5949 +奊 > xié; #594A +奋 > fèn; #594B +奌 > diăn; #594C +奎 > kúi; #594E +奏 > zòu; #594F +奐 > huàn; #5950 +契 > qì; #5951 +奒 > kāi; #5952 +奓 > zhà; #5953 +奔 > bēn; #5954 +奕 > yì; #5955 +奖 > jiăng; #5956 +套 > tào; #5957 +奘 > zàng; #5958 +奙 > bĕn; #5959 +奚 > xī; #595A +奛 > xiăng; #595B +奜 > fĕi; #595C +奝 > diāo; #595D +奞 > xùn; #595E +奟 > kēng; #595F +奠 > diàn; #5960 +奡 > ào; #5961 +奢 > shē; #5962 +奣 > wĕng; #5963 +奤 > păn; #5964 +奥 > ào; #5965 +奦 > wù; #5966 +奧 > ào; #5967 +奨 > jiăng; #5968 +奩 > lián; #5969 +奪 > dúo; #596A +奫 > yūn; #596B +奬 > jiăng; #596C +奭 > shì; #596D +奮 > fèn; #596E +奯 > hùo; #596F +奰 > bì; #5970 +奱 > lián; #5971 +奲 > dŭo; #5972 +女 > nǚ; #5973 +奴 > nú; #5974 +奵 > dīng; #5975 +奶 > năi; #5976 +奷 > qiān; #5977 +奸 > jiān; #5978 +她 > tā; #5979 +奺 > jĭu; #597A +奻 > nán; #597B +奼 > chà; #597C +好 > hăo; #597D +奾 > xiān; #597E +奿 > fàn; #597F +妀 > jĭ; #5980 +妁 > shùo; #5981 +如 > rú; #5982 +妃 > fēi; #5983 +妄 > wàng; #5984 +妅 > hóng; #5985 +妆 > zhuāng; #5986 +妇 > fù; #5987 +妈 > mā; #5988 +妉 > dān; #5989 +妊 > rèn; #598A +妋 > fū; #598B +妌 > jìng; #598C +妍 > yán; #598D +妎 > xiè; #598E +妏 > wèn; #598F +妐 > zhōng; #5990 +妑 > pā; #5991 +妒 > dù; #5992 +妓 > jì; #5993 +妔 > kēng; #5994 +妕 > zhòng; #5995 +妖 > yāo; #5996 +妗 > jìn; #5997 +妘 > yún; #5998 +妙 > miào; #5999 +妚 > pēi; #599A +妜 > yuè; #599C +妝 > zhuāng; #599D +妞 > nīu; #599E +妟 > yàn; #599F +妠 > nà; #59A0 +妡 > xīn; #59A1 +妢 > fén; #59A2 +妣 > bĭ; #59A3 +妤 > yú; #59A4 +妥 > tŭo; #59A5 +妦 > fēng; #59A6 +妧 > yuán; #59A7 +妨 > fáng; #59A8 +妩 > wŭ; #59A9 +妪 > yù; #59AA +妫 > gūi; #59AB +妬 > dù; #59AC +妭 > bá; #59AD +妮 > nī; #59AE +妯 > zhóu; #59AF +妰 > zhúo; #59B0 +妱 > zhāo; #59B1 +妲 > dá; #59B2 +妳 > năi; #59B3 +妴 > yuăn; #59B4 +妵 > tŏu; #59B5 +妶 > xuán; #59B6 +妷 > zhí; #59B7 +妸 > ē; #59B8 +妹 > mèi; #59B9 +妺 > mò; #59BA +妻 > qī; #59BB +妼 > bì; #59BC +妽 > shēn; #59BD +妾 > qiè; #59BE +妿 > ē; #59BF +姀 > hé; #59C0 +姁 > xŭ; #59C1 +姂 > fá; #59C2 +姃 > zhēng; #59C3 +姄 > mín; #59C4 +姅 > bàn; #59C5 +姆 > mŭ; #59C6 +姇 > fū; #59C7 +姈 > líng; #59C8 +姉 > zĭ; #59C9 +姊 > zĭ; #59CA +始 > shĭ; #59CB +姌 > răn; #59CC +姍 > shān; #59CD +姎 > yāng; #59CE +姏 > mán; #59CF +姐 > jiĕ; #59D0 +姑 > gū; #59D1 +姒 > sì; #59D2 +姓 > xìng; #59D3 +委 > wĕi; #59D4 +姕 > zī; #59D5 +姖 > jù; #59D6 +姗 > shān; #59D7 +姘 > pīn; #59D8 +姙 > rèn; #59D9 +姚 > yáo; #59DA +姛 > tŏng; #59DB +姜 > jiāng; #59DC +姝 > shū; #59DD +姞 > jí; #59DE +姟 > gāi; #59DF +姠 > shàng; #59E0 +姡 > kùo; #59E1 +姢 > juān; #59E2 +姣 > jiāo; #59E3 +姤 > gòu; #59E4 +姥 > mŭ; #59E5 +姦 > jiān; #59E6 +姧 > jiān; #59E7 +姨 > yí; #59E8 +姩 > niàn; #59E9 +姪 > zhí; #59EA +姫 > jī; #59EB +姬 > jī; #59EC +姭 > xiàn; #59ED +姮 > héng; #59EE +姯 > guāng; #59EF +姰 > jūn; #59F0 +姱 > kuā; #59F1 +姲 > yàn; #59F2 +姳 > mĭng; #59F3 +姴 > liè; #59F4 +姵 > pèi; #59F5 +姶 > yăn; #59F6 +姷 > yòu; #59F7 +姸 > yán; #59F8 +姹 > chà; #59F9 +姺 > shēn; #59FA +姻 > yīn; #59FB +姼 > chĭ; #59FC +姽 > gŭi; #59FD +姾 > quān; #59FE +姿 > zī; #59FF +娀 > sōng; #5A00 +威 > wēi; #5A01 +娂 > hóng; #5A02 +娃 > wá; #5A03 +娄 > lóu; #5A04 +娅 > yà; #5A05 +娆 > răo; #5A06 +娇 > jiāo; #5A07 +娈 > luán; #5A08 +娉 > pīng; #5A09 +娊 > xiàn; #5A0A +娋 > shào; #5A0B +娌 > lĭ; #5A0C +娍 > chéng; #5A0D +娎 > xiào; #5A0E +娏 > máng; #5A0F +娐 > fu; #5A10 +娑 > sūo; #5A11 +娒 > wŭ; #5A12 +娓 > wĕi; #5A13 +娔 > kè; #5A14 +娕 > lài; #5A15 +娖 > chùo; #5A16 +娗 > dìng; #5A17 +娘 > niáng; #5A18 +娙 > xíng; #5A19 +娚 > nán; #5A1A +娛 > yú; #5A1B +娜 > núo; #5A1C +娝 > pēi; #5A1D +娞 > nĕi; #5A1E +娟 > juān; #5A1F +娠 > shēn; #5A20 +娡 > zhì; #5A21 +娢 > hán; #5A22 +娣 > dì; #5A23 +娤 > zhuāng; #5A24 +娥 > é; #5A25 +娦 > pín; #5A26 +娧 > tùi; #5A27 +娨 > hàn; #5A28 +娩 > miăn; #5A29 +娪 > wú; #5A2A +娫 > yán; #5A2B +娬 > wŭ; #5A2C +娭 > xī; #5A2D +娮 > yán; #5A2E +娯 > yú; #5A2F +娰 > sì; #5A30 +娱 > yú; #5A31 +娲 > wā; #5A32 +娴 > xián; #5A34 +娵 > jū; #5A35 +娶 > qŭ; #5A36 +娷 > shùi; #5A37 +娸 > qī; #5A38 +娹 > xián; #5A39 +娺 > zhūi; #5A3A +娻 > dōng; #5A3B +娼 > chāng; #5A3C +娽 > lù; #5A3D +娾 > ăi; #5A3E +娿 > ē; #5A3F +婀 > ē; #5A40 +婁 > lóu; #5A41 +婂 > mián; #5A42 +婃 > cóng; #5A43 +婄 > pŏu; #5A44 +婅 > jú; #5A45 +婆 > pó; #5A46 +婇 > căi; #5A47 +婈 > díng; #5A48 +婉 > wăn; #5A49 +婊 > biăo; #5A4A +婋 > xiāo; #5A4B +婌 > shŭ; #5A4C +婍 > qĭ; #5A4D +婎 > hūi; #5A4E +婏 > fù; #5A4F +婐 > ē; #5A50 +婑 > wŏ; #5A51 +婒 > tán; #5A52 +婓 > fēi; #5A53 +婔 > wei; #5A54 +婕 > jié; #5A55 +婖 > tiān; #5A56 +婗 > ní; #5A57 +婘 > quán; #5A58 +婙 > jìng; #5A59 +婚 > hūn; #5A5A +婛 > jīng; #5A5B +婜 > qiān; #5A5C +婝 > diàn; #5A5D +婞 > xìng; #5A5E +婟 > hù; #5A5F +婠 > wà; #5A60 +婡 > lái; #5A61 +婢 > bì; #5A62 +婣 > yīn; #5A63 +婤 > chōu; #5A64 +婥 > chùo; #5A65 +婦 > fù; #5A66 +婧 > jìng; #5A67 +婨 > lún; #5A68 +婩 > yàn; #5A69 +婪 > lán; #5A6A +婫 > kūn; #5A6B +婬 > yín; #5A6C +婭 > yà; #5A6D +婮 > ju; #5A6E +婯 > lì; #5A6F +婰 > diăn; #5A70 +婱 > xián; #5A71 +婳 > huà; #5A73 +婴 > yīng; #5A74 +婵 > chán; #5A75 +婶 > shĕn; #5A76 +婷 > tíng; #5A77 +婸 > dàng; #5A78 +婹 > yăo; #5A79 +婺 > wù; #5A7A +婻 > nàn; #5A7B +婼 > rùo; #5A7C +婽 > jiă; #5A7D +婾 > tōu; #5A7E +婿 > xù; #5A7F +媀 > yú; #5A80 +媁 > wēi; #5A81 +媂 > tí; #5A82 +媃 > róu; #5A83 +媄 > mĕi; #5A84 +媅 > dān; #5A85 +媆 > ruăn; #5A86 +媇 > qīn; #5A87 +媈 > hui; #5A88 +媉 > wū; #5A89 +媊 > qián; #5A8A +媋 > chūn; #5A8B +媌 > máo; #5A8C +媍 > fù; #5A8D +媎 > jiĕ; #5A8E +媏 > duān; #5A8F +媐 > xī; #5A90 +媑 > zhòng; #5A91 +媒 > méi; #5A92 +媓 > huáng; #5A93 +媔 > mián; #5A94 +媕 > ān; #5A95 +媖 > yīng; #5A96 +媗 > xuān; #5A97 +媘 > jie; #5A98 +媙 > wēi; #5A99 +媚 > mèi; #5A9A +媛 > yuàn; #5A9B +媜 > zhēn; #5A9C +媝 > qīu; #5A9D +媞 > tí; #5A9E +媟 > xiè; #5A9F +媠 > tŭo; #5AA0 +媡 > liàn; #5AA1 +媢 > mào; #5AA2 +媣 > răn; #5AA3 +媤 > sī; #5AA4 +媥 > piān; #5AA5 +媦 > wèi; #5AA6 +媧 > wā; #5AA7 +媨 > jìu; #5AA8 +媩 > hú; #5AA9 +媪 > ăo; #5AAA +媭 > xū; #5AAD +媮 > tōu; #5AAE +媯 > gūi; #5AAF +媰 > zōu; #5AB0 +媱 > yáo; #5AB1 +媲 > pì; #5AB2 +媳 > xí; #5AB3 +媴 > yuán; #5AB4 +媵 > yìng; #5AB5 +媶 > róng; #5AB6 +媷 > rù; #5AB7 +媸 > chī; #5AB8 +媹 > líu; #5AB9 +媺 > mĕi; #5ABA +媻 > pán; #5ABB +媼 > ăo; #5ABC +媽 > mā; #5ABD +媾 > gòu; #5ABE +媿 > kùi; #5ABF +嫀 > qín; #5AC0 +嫁 > jià; #5AC1 +嫂 > săo; #5AC2 +嫃 > zhēn; #5AC3 +嫄 > yuán; #5AC4 +嫅 > chā; #5AC5 +嫆 > yóng; #5AC6 +嫇 > míng; #5AC7 +嫈 > yīng; #5AC8 +嫉 > jí; #5AC9 +嫊 > sù; #5ACA +嫋 > niăo; #5ACB +嫌 > xián; #5ACC +嫍 > tāo; #5ACD +嫎 > páng; #5ACE +嫏 > láng; #5ACF +嫐 > năo; #5AD0 +嫑 > báo; #5AD1 +嫒 > ài; #5AD2 +嫓 > pì; #5AD3 +嫔 > pín; #5AD4 +嫕 > yì; #5AD5 +嫖 > piào; #5AD6 +嫗 > yù; #5AD7 +嫘 > léi; #5AD8 +嫙 > xuán; #5AD9 +嫚 > màn; #5ADA +嫛 > yī; #5ADB +嫜 > zhāng; #5ADC +嫝 > kāng; #5ADD +嫞 > yóng; #5ADE +嫟 > nì; #5ADF +嫠 > lí; #5AE0 +嫡 > dí; #5AE1 +嫢 > gūi; #5AE2 +嫣 > yān; #5AE3 +嫤 > jìn; #5AE4 +嫥 > zhuān; #5AE5 +嫦 > cháng; #5AE6 +嫧 > cè; #5AE7 +嫨 > hān; #5AE8 +嫩 > nèn; #5AE9 +嫪 > lào; #5AEA +嫫 > mó; #5AEB +嫬 > zhē; #5AEC +嫭 > hù; #5AED +嫮 > hù; #5AEE +嫯 > ào; #5AEF +嫰 > nèn; #5AF0 +嫱 > qiáng; #5AF1 +嫳 > piè; #5AF3 +嫴 > gū; #5AF4 +嫵 > wŭ; #5AF5 +嫶 > jiáo; #5AF6 +嫷 > tŭo; #5AF7 +嫸 > zhăn; #5AF8 +嫹 > máo; #5AF9 +嫺 > xián; #5AFA +嫻 > xián; #5AFB +嫼 > mò; #5AFC +嫽 > liáo; #5AFD +嫾 > lián; #5AFE +嫿 > huà; #5AFF +嬀 > gūi; #5B00 +嬁 > dēng; #5B01 +嬂 > zhī; #5B02 +嬃 > xū; #5B03 +嬄 > yi; #5B04 +嬅 > huá; #5B05 +嬆 > xī; #5B06 +嬇 > hùi; #5B07 +嬈 > răo; #5B08 +嬉 > xī; #5B09 +嬊 > yàn; #5B0A +嬋 > chán; #5B0B +嬌 > jiāo; #5B0C +嬍 > mĕi; #5B0D +嬎 > fàn; #5B0E +嬏 > fān; #5B0F +嬐 > xiān; #5B10 +嬑 > yì; #5B11 +嬒 > wèi; #5B12 +嬓 > jiào; #5B13 +嬔 > fù; #5B14 +嬕 > shì; #5B15 +嬖 > bì; #5B16 +嬗 > shàn; #5B17 +嬘 > sùi; #5B18 +嬙 > qiáng; #5B19 +嬚 > liăn; #5B1A +嬛 > huán; #5B1B +嬜 > xin; #5B1C +嬝 > niăo; #5B1D +嬞 > dŏng; #5B1E +嬟 > yì; #5B1F +嬠 > cán; #5B20 +嬡 > ài; #5B21 +嬢 > niáng; #5B22 +嬣 > néng; #5B23 +嬤 > mā; #5B24 +嬥 > tiăo; #5B25 +嬦 > chóu; #5B26 +嬧 > jìn; #5B27 +嬨 > cí; #5B28 +嬩 > yú; #5B29 +嬪 > pín; #5B2A +嬫 > yong; #5B2B +嬬 > xū; #5B2C +嬭 > năi; #5B2D +嬮 > yān; #5B2E +嬯 > tái; #5B2F +嬰 > yīng; #5B30 +嬱 > cán; #5B31 +嬲 > niăo; #5B32 +嬳 > wo; #5B33 +嬴 > yíng; #5B34 +嬵 > mián; #5B35 +嬷 > mā; #5B37 +嬸 > shĕn; #5B38 +嬹 > xìng; #5B39 +嬺 > nì; #5B3A +嬻 > dú; #5B3B +嬼 > lĭu; #5B3C +嬽 > yuān; #5B3D +嬾 > lăn; #5B3E +嬿 > yàn; #5B3F +孀 > shuāng; #5B40 +孁 > líng; #5B41 +孂 > jiăo; #5B42 +孃 > niáng; #5B43 +孄 > lăn; #5B44 +孅 > xiān; #5B45 +孆 > yīng; #5B46 +孇 > shuāng; #5B47 +孈 > shuāi; #5B48 +孉 > quán; #5B49 +孊 > mĭ; #5B4A +孋 > lí; #5B4B +孌 > luán; #5B4C +孍 > yán; #5B4D +孎 > zhŭ; #5B4E +孏 > lăn; #5B4F +子 > zĭ; #5B50 +孑 > jié; #5B51 +孒 > jué; #5B52 +孓 > jué; #5B53 +孔 > kŏng; #5B54 +孕 > yùn; #5B55 +孖 > zī; #5B56 +字 > zì; #5B57 +存 > cún; #5B58 +孙 > sūn; #5B59 +孚 > fú; #5B5A +孛 > bèi; #5B5B +孜 > zī; #5B5C +孝 > xiào; #5B5D +孞 > xìn; #5B5E +孟 > mèng; #5B5F +孠 > sì; #5B60 +孡 > tāi; #5B61 +孢 > bāo; #5B62 +季 > jì; #5B63 +孤 > gū; #5B64 +孥 > nú; #5B65 +学 > xué; #5B66 +孨 > zhuăn; #5B68 +孩 > hái; #5B69 +孪 > luán; #5B6A +孫 > sūn; #5B6B +孬 > huài; #5B6C +孭 > miē; #5B6D +孮 > cóng; #5B6E +孯 > qiān; #5B6F +孰 > shú; #5B70 +孱 > chán; #5B71 +孲 > yā; #5B72 +孳 > zī; #5B73 +孴 > nĭ; #5B74 +孵 > fū; #5B75 +孶 > zī; #5B76 +孷 > lí; #5B77 +學 > xué; #5B78 +孹 > bò; #5B79 +孺 > rú; #5B7A +孻 > lái; #5B7B +孼 > niè; #5B7C +孽 > niè; #5B7D +孾 > yīng; #5B7E +孿 > luán; #5B7F +宀 > mián; #5B80 +宁 > zhù; #5B81 +宂 > rŏng; #5B82 +它 > tā; #5B83 +宄 > gŭi; #5B84 +宅 > zhái; #5B85 +宆 > qíong; #5B86 +宇 > yŭ; #5B87 +守 > shŏu; #5B88 +安 > ān; #5B89 +宊 > tú; #5B8A +宋 > sòng; #5B8B +完 > wán; #5B8C +宍 > ròu; #5B8D +宎 > yăo; #5B8E +宏 > hóng; #5B8F +宐 > yí; #5B90 +宑 > jĭng; #5B91 +宒 > zhūn; #5B92 +宓 > mì; #5B93 +宔 > zhŭ; #5B94 +宕 > dàng; #5B95 +宖 > hóng; #5B96 +宗 > zōng; #5B97 +官 > guān; #5B98 +宙 > zhòu; #5B99 +定 > dìng; #5B9A +宛 > wăn; #5B9B +宜 > yí; #5B9C +宝 > băo; #5B9D +实 > shí; #5B9E +実 > shí; #5B9F +宠 > chŏng; #5BA0 +审 > shĕn; #5BA1 +客 > kè; #5BA2 +宣 > xuān; #5BA3 +室 > shì; #5BA4 +宥 > yòu; #5BA5 +宦 > huàn; #5BA6 +宧 > yí; #5BA7 +宨 > tiăo; #5BA8 +宩 > shĭ; #5BA9 +宪 > xiàn; #5BAA +宫 > gōng; #5BAB +宬 > chéng; #5BAC +宭 > qún; #5BAD +宮 > gōng; #5BAE +宯 > xiāo; #5BAF +宰 > zăi; #5BB0 +宱 > zhà; #5BB1 +宲 > băo; #5BB2 +害 > hài; #5BB3 +宴 > yàn; #5BB4 +宵 > xiāo; #5BB5 +家 > jiā; #5BB6 +宷 > shĕn; #5BB7 +宸 > chén; #5BB8 +容 > róng; #5BB9 +宺 > huăng; #5BBA +宻 > mì; #5BBB +宼 > kòu; #5BBC +宽 > kuān; #5BBD +宾 > bīn; #5BBE +宿 > sù; #5BBF +寀 > cài; #5BC0 +寁 > zăn; #5BC1 +寂 > jì; #5BC2 +寃 > yuān; #5BC3 +寄 > jì; #5BC4 +寅 > yín; #5BC5 +密 > mì; #5BC6 +寇 > kòu; #5BC7 +寈 > qīng; #5BC8 +寉 > què; #5BC9 +寊 > zhēn; #5BCA +寋 > jiăn; #5BCB +富 > fù; #5BCC +寍 > níng; #5BCD +寎 > bìng; #5BCE +寏 > huán; #5BCF +寐 > mèi; #5BD0 +寑 > qĭn; #5BD1 +寒 > hán; #5BD2 +寓 > yù; #5BD3 +寔 > shí; #5BD4 +寕 > níng; #5BD5 +寖 > qìn; #5BD6 +寗 > níng; #5BD7 +寘 > zhì; #5BD8 +寙 > yŭ; #5BD9 +寚 > băo; #5BDA +寛 > kuān; #5BDB +寜 > níng; #5BDC +寝 > qĭn; #5BDD +寞 > mò; #5BDE +察 > chá; #5BDF +寠 > jù; #5BE0 +寡 > guă; #5BE1 +寢 > qĭn; #5BE2 +寣 > hū; #5BE3 +寤 > wù; #5BE4 +寥 > liáo; #5BE5 +實 > shí; #5BE6 +寧 > zhù; #5BE7 +寨 > zhài; #5BE8 +審 > shĕn; #5BE9 +寪 > wĕi; #5BEA +寫 > xiĕ; #5BEB +寬 > kuān; #5BEC +寭 > hùi; #5BED +寮 > liáo; #5BEE +寯 > jùn; #5BEF +寰 > huán; #5BF0 +寱 > yì; #5BF1 +寲 > yí; #5BF2 +寳 > băo; #5BF3 +寴 > qìn; #5BF4 +寵 > chŏng; #5BF5 +寶 > băo; #5BF6 +寷 > fēng; #5BF7 +寸 > cùn; #5BF8 +对 > dùi; #5BF9 +寺 > sì; #5BFA +寻 > xún; #5BFB +导 > dăo; #5BFC +寽 > lǜ; #5BFD +対 > dùi; #5BFE +寿 > shòu; #5BFF +尀 > pŏ; #5C00 +封 > fēng; #5C01 +専 > zhuān; #5C02 +尃 > fū; #5C03 +射 > shè; #5C04 +尅 > kè; #5C05 +将 > jiāng; #5C06 +將 > jiāng; #5C07 +專 > zhuān; #5C08 +尉 > wèi; #5C09 +尊 > zūn; #5C0A +尋 > xún; #5C0B +尌 > shù; #5C0C +對 > dùi; #5C0D +導 > dăo; #5C0E +小 > xiăo; #5C0F +尐 > jī; #5C10 +少 > shăo; #5C11 +尒 > ĕr; #5C12 +尓 > ĕr; #5C13 +尔 > ĕr; #5C14 +尕 > gă; #5C15 +尖 > jiān; #5C16 +尗 > shú; #5C17 +尘 > chén; #5C18 +尙 > shàng; #5C19 +尚 > shàng; #5C1A +尛 > mo; #5C1B +尜 > gá; #5C1C +尝 > cháng; #5C1D +尞 > liào; #5C1E +尟 > xiăn; #5C1F +尠 > xiăn; #5C20 +尢 > wāng; #5C22 +尣 > wāng; #5C23 +尤 > yóu; #5C24 +尥 > liào; #5C25 +尦 > liào; #5C26 +尧 > yáo; #5C27 +尨 > máng; #5C28 +尩 > wāng; #5C29 +尪 > wāng; #5C2A +尫 > wāng; #5C2B +尬 > gà; #5C2C +尭 > yáo; #5C2D +尮 > dùo; #5C2E +尯 > kùi; #5C2F +尰 > zhŏng; #5C30 +就 > jìu; #5C31 +尲 > gān; #5C32 +尳 > gŭ; #5C33 +尴 > gān; #5C34 +尵 > túi; #5C35 +尶 > gān; #5C36 +尷 > gān; #5C37 +尸 > shī; #5C38 +尹 > yĭn; #5C39 +尺 > chĭ; #5C3A +尻 > kāo; #5C3B +尼 > ní; #5C3C +尽 > jĭn; #5C3D +尾 > wĕi; #5C3E +尿 > niào; #5C3F +局 > jú; #5C40 +屁 > pì; #5C41 +层 > céng; #5C42 +屃 > xì; #5C43 +屄 > bī; #5C44 +居 > jū; #5C45 +屆 > jiè; #5C46 +屇 > tián; #5C47 +屈 > qū; #5C48 +屉 > tì; #5C49 +届 > jiè; #5C4A +屋 > wū; #5C4B +屌 > diăo; #5C4C +屍 > shī; #5C4D +屎 > shĭ; #5C4E +屏 > píng; #5C4F +屐 > jī; #5C50 +屑 > xiè; #5C51 +屒 > chén; #5C52 +屓 > xì; #5C53 +屔 > ní; #5C54 +展 > zhăn; #5C55 +屖 > xī; #5C56 +屘 > măn; #5C58 +屙 > ē; #5C59 +屚 > lòu; #5C5A +屛 > píng; #5C5B +屜 > tì; #5C5C +屝 > fèi; #5C5D +属 > shŭ; #5C5E +屟 > xiè; #5C5F +屠 > tú; #5C60 +屡 > lǚ; #5C61 +屢 > lǚ; #5C62 +屣 > xĭ; #5C63 +層 > céng; #5C64 +履 > lǚ; #5C65 +屦 > jù; #5C66 +屧 > xiè; #5C67 +屨 > jù; #5C68 +屩 > juē; #5C69 +屪 > liáo; #5C6A +屫 > jué; #5C6B +屬 > shŭ; #5C6C +屭 > xì; #5C6D +屮 > chè; #5C6E +屯 > tún; #5C6F +屰 > nì; #5C70 +山 > shān; #5C71 +屳 > xiān; #5C73 +屴 > lì; #5C74 +屵 > xuē; #5C75 +屸 > lóng; #5C78 +屹 > yì; #5C79 +屺 > qĭ; #5C7A +屻 > rèn; #5C7B +屼 > wù; #5C7C +屽 > hàn; #5C7D +屾 > shēn; #5C7E +屿 > yŭ; #5C7F +岀 > chū; #5C80 +岁 > sùi; #5C81 +岂 > qĭ; #5C82 +岄 > yuè; #5C84 +岅 > băn; #5C85 +岆 > yăo; #5C86 +岇 > áng; #5C87 +岈 > yá; #5C88 +岉 > wù; #5C89 +岊 > jié; #5C8A +岋 > è; #5C8B +岌 > jí; #5C8C +岍 > qiān; #5C8D +岎 > fēn; #5C8E +岏 > yuán; #5C8F +岐 > qí; #5C90 +岑 > cén; #5C91 +岒 > qián; #5C92 +岓 > qí; #5C93 +岔 > chà; #5C94 +岕 > jiè; #5C95 +岖 > qū; #5C96 +岗 > găng; #5C97 +岘 > xiàn; #5C98 +岙 > ào; #5C99 +岚 > lán; #5C9A +岛 > dăo; #5C9B +岜 > bā; #5C9C +岝 > zùo; #5C9D +岞 > zùo; #5C9E +岟 > yăng; #5C9F +岠 > jù; #5CA0 +岡 > gāng; #5CA1 +岢 > kĕ; #5CA2 +岣 > gŏu; #5CA3 +岤 > xuè; #5CA4 +岥 > bēi; #5CA5 +岦 > lì; #5CA6 +岧 > tiáo; #5CA7 +岨 > jū; #5CA8 +岩 > yán; #5CA9 +岪 > fú; #5CAA +岫 > xìu; #5CAB +岬 > jiă; #5CAC +岭 > líng; #5CAD +岮 > túo; #5CAE +岯 > pēi; #5CAF +岰 > yŏu; #5CB0 +岱 > dài; #5CB1 +岲 > kuàng; #5CB2 +岳 > yuè; #5CB3 +岴 > qū; #5CB4 +岵 > hù; #5CB5 +岶 > pò; #5CB6 +岷 > mín; #5CB7 +岸 > àn; #5CB8 +岹 > tiáo; #5CB9 +岺 > líng; #5CBA +岻 > chí; #5CBB +岽 > dōng; #5CBD +岿 > kūi; #5CBF +峀 > xìu; #5CC0 +峁 > măo; #5CC1 +峂 > tóng; #5CC2 +峃 > xué; #5CC3 +峄 > yì; #5CC4 +峆 > hē; #5CC6 +峇 > kē; #5CC7 +峈 > lùo; #5CC8 +峉 > ē; #5CC9 +峊 > fù; #5CCA +峋 > xún; #5CCB +峌 > dié; #5CCC +峍 > lù; #5CCD +峎 > ān; #5CCE +峏 > ĕr; #5CCF +峐 > gāi; #5CD0 +峑 > quán; #5CD1 +峒 > tóng; #5CD2 +峓 > yí; #5CD3 +峔 > mŭ; #5CD4 +峕 > shí; #5CD5 +峖 > ān; #5CD6 +峗 > wéi; #5CD7 +峘 > hū; #5CD8 +峙 > zhì; #5CD9 +峚 > mì; #5CDA +峛 > lĭ; #5CDB +峜 > jī; #5CDC +峝 > tóng; #5CDD +峞 > wéi; #5CDE +峟 > yòu; #5CDF +峡 > xiá; #5CE1 +峢 > lĭ; #5CE2 +峣 > yáo; #5CE3 +峤 > jiào; #5CE4 +峥 > zhēng; #5CE5 +峦 > luán; #5CE6 +峧 > jiāo; #5CE7 +峨 > é; #5CE8 +峩 > é; #5CE9 +峪 > yù; #5CEA +峫 > yé; #5CEB +峬 > bū; #5CEC +峭 > qiào; #5CED +峮 > qūn; #5CEE +峯 > fēng; #5CEF +峰 > fēng; #5CF0 +峱 > náo; #5CF1 +峲 > lĭ; #5CF2 +峳 > yóu; #5CF3 +峴 > xiàn; #5CF4 +峵 > hóng; #5CF5 +島 > dăo; #5CF6 +峷 > shēn; #5CF7 +峸 > chéng; #5CF8 +峹 > tú; #5CF9 +峺 > gĕng; #5CFA +峻 > jùn; #5CFB +峼 > hào; #5CFC +峽 > xiá; #5CFD +峾 > yīn; #5CFE +峿 > yŭ; #5CFF +崀 > làng; #5D00 +崁 > kăn; #5D01 +崂 > láo; #5D02 +崃 > lái; #5D03 +崄 > xiăn; #5D04 +崅 > què; #5D05 +崆 > kōng; #5D06 +崇 > chóng; #5D07 +崈 > chóng; #5D08 +崉 > tà; #5D09 +崊 > lin; #5D0A +崋 > huá; #5D0B +崌 > jū; #5D0C +崍 > lái; #5D0D +崎 > qí; #5D0E +崏 > mín; #5D0F +崐 > kūn; #5D10 +崑 > kūn; #5D11 +崒 > zú; #5D12 +崓 > gù; #5D13 +崔 > cūi; #5D14 +崕 > yá; #5D15 +崖 > yá; #5D16 +崗 > găng; #5D17 +崘 > lún; #5D18 +崙 > lún; #5D19 +崚 > léng; #5D1A +崛 > jué; #5D1B +崜 > dūo; #5D1C +崝 > zhēng; #5D1D +崞 > gūo; #5D1E +崟 > yín; #5D1F +崠 > dōng; #5D20 +崡 > hán; #5D21 +崢 > zhēng; #5D22 +崣 > wĕi; #5D23 +崤 > yáo; #5D24 +崥 > pĭ; #5D25 +崦 > yān; #5D26 +崧 > sōng; #5D27 +崨 > jié; #5D28 +崩 > bēng; #5D29 +崪 > zú; #5D2A +崫 > jué; #5D2B +崬 > dōng; #5D2C +崭 > zhăn; #5D2D +崮 > gù; #5D2E +崯 > yín; #5D2F +崱 > zé; #5D31 +崲 > huáng; #5D32 +崳 > yú; #5D33 +崴 > wēi; #5D34 +崵 > yáng; #5D35 +崶 > fēng; #5D36 +崷 > qíu; #5D37 +崸 > dùn; #5D38 +崹 > tí; #5D39 +崺 > yĭ; #5D3A +崻 > zhì; #5D3B +崼 > shì; #5D3C +崽 > zăi; #5D3D +崾 > yăo; #5D3E +崿 > è; #5D3F +嵀 > zhù; #5D40 +嵁 > kān; #5D41 +嵂 > lǜ; #5D42 +嵃 > yăn; #5D43 +嵄 > mĕi; #5D44 +嵅 > gān; #5D45 +嵆 > jī; #5D46 +嵇 > jī; #5D47 +嵈 > huăn; #5D48 +嵉 > tíng; #5D49 +嵊 > shèng; #5D4A +嵋 > méi; #5D4B +嵌 > qiàn; #5D4C +嵍 > wù; #5D4D +嵎 > yú; #5D4E +嵏 > zōng; #5D4F +嵐 > lán; #5D50 +嵑 > jué; #5D51 +嵒 > yán; #5D52 +嵓 > yán; #5D53 +嵔 > wĕi; #5D54 +嵕 > zōng; #5D55 +嵖 > chá; #5D56 +嵗 > sùi; #5D57 +嵘 > róng; #5D58 +嵚 > qīn; #5D5A +嵛 > yú; #5D5B +嵝 > lŏu; #5D5D +嵞 > tú; #5D5E +嵟 > dūi; #5D5F +嵠 > xī; #5D60 +嵡 > wēng; #5D61 +嵢 > cāng; #5D62 +嵣 > dāng; #5D63 +嵤 > hóng; #5D64 +嵥 > jié; #5D65 +嵦 > ái; #5D66 +嵧 > líu; #5D67 +嵨 > wŭ; #5D68 +嵩 > sōng; #5D69 +嵪 > qiāo; #5D6A +嵫 > zī; #5D6B +嵬 > wéi; #5D6C +嵭 > bēng; #5D6D +嵮 > diān; #5D6E +嵯 > cúo; #5D6F +嵰 > qiăn; #5D70 +嵱 > yŏng; #5D71 +嵲 > niè; #5D72 +嵳 > cúo; #5D73 +嵴 > jí; #5D74 +嵷 > sŏng; #5D77 +嵸 > zōng; #5D78 +嵹 > jiàng; #5D79 +嵺 > liáo; #5D7A +嵻 > kang; #5D7B +嵼 > chăn; #5D7C +嵽 > dié; #5D7D +嵾 > cēn; #5D7E +嵿 > dĭng; #5D7F +嶀 > tū; #5D80 +嶁 > lŏu; #5D81 +嶂 > zhàng; #5D82 +嶃 > zhăn; #5D83 +嶄 > zhăn; #5D84 +嶅 > áo; #5D85 +嶆 > cáo; #5D86 +嶇 > qū; #5D87 +嶈 > qiāng; #5D88 +嶉 > zūi; #5D89 +嶊 > zŭi; #5D8A +嶋 > dăo; #5D8B +嶌 > dăo; #5D8C +嶍 > xí; #5D8D +嶎 > yù; #5D8E +嶏 > bó; #5D8F +嶐 > lóng; #5D90 +嶑 > xiăng; #5D91 +嶒 > céng; #5D92 +嶓 > bō; #5D93 +嶔 > qīn; #5D94 +嶕 > jiāo; #5D95 +嶖 > yăn; #5D96 +嶗 > láo; #5D97 +嶘 > zhàn; #5D98 +嶙 > lín; #5D99 +嶚 > liáo; #5D9A +嶛 > liáo; #5D9B +嶜 > jīn; #5D9C +嶝 > dèng; #5D9D +嶞 > dùo; #5D9E +嶟 > zūn; #5D9F +嶠 > jiào; #5DA0 +嶡 > gùi; #5DA1 +嶢 > yáo; #5DA2 +嶣 > qiáo; #5DA3 +嶤 > yáo; #5DA4 +嶥 > jué; #5DA5 +嶦 > zhān; #5DA6 +嶧 > yì; #5DA7 +嶨 > xué; #5DA8 +嶩 > náo; #5DA9 +嶪 > yè; #5DAA +嶫 > yè; #5DAB +嶬 > yí; #5DAC +嶭 > è; #5DAD +嶮 > xiăn; #5DAE +嶯 > jí; #5DAF +嶰 > xiè; #5DB0 +嶱 > kĕ; #5DB1 +嶲 > xī; #5DB2 +嶳 > dì; #5DB3 +嶴 > ào; #5DB4 +嶵 > zŭi; #5DB5 +嶷 > nì; #5DB7 +嶸 > róng; #5DB8 +嶹 > dăo; #5DB9 +嶺 > lĭng; #5DBA +嶻 > zá; #5DBB +嶼 > yŭ; #5DBC +嶽 > yuè; #5DBD +嶾 > yĭn; #5DBE +巀 > jiē; #5DC0 +巁 > lì; #5DC1 +巂 > sŭi; #5DC2 +巃 > lóng; #5DC3 +巄 > lóng; #5DC4 +巅 > diān; #5DC5 +巆 > yíng; #5DC6 +巇 > xī; #5DC7 +巈 > jú; #5DC8 +巉 > chán; #5DC9 +巊 > yĭng; #5DCA +巋 > kūi; #5DCB +巌 > yán; #5DCC +巍 > wēi; #5DCD +巎 > náo; #5DCE +巏 > quán; #5DCF +巐 > chăo; #5DD0 +巑 > cuán; #5DD1 +巒 > luán; #5DD2 +巓 > diān; #5DD3 +巔 > diān; #5DD4 +巖 > yán; #5DD6 +巗 > yán; #5DD7 +巘 > yăn; #5DD8 +巙 > náo; #5DD9 +巚 > yăn; #5DDA +巛 > chuān; #5DDB +巜 > gùi; #5DDC +川 > chuān; #5DDD +州 > zhōu; #5DDE +巟 > huāng; #5DDF +巠 > jīng; #5DE0 +巡 > xún; #5DE1 +巢 > cháo; #5DE2 +巣 > cháo; #5DE3 +巤 > liē; #5DE4 +工 > gōng; #5DE5 +左 > zŭo; #5DE6 +巧 > qiăo; #5DE7 +巨 > jù; #5DE8 +巩 > gŏng; #5DE9 +巫 > wū; #5DEB +差 > chāi; #5DEE +巯 > qíu; #5DEF +巰 > qíu; #5DF0 +己 > jĭ; #5DF1 +已 > yĭ; #5DF2 +巳 > sì; #5DF3 +巴 > bā; #5DF4 +巵 > zhī; #5DF5 +巶 > zhāo; #5DF6 +巷 > xiàng; #5DF7 +巸 > yí; #5DF8 +巹 > jĭn; #5DF9 +巺 > xùn; #5DFA +巻 > juàn; #5DFB +巽 > xùn; #5DFD +巾 > jīn; #5DFE +巿 > fú; #5DFF +帀 > zā; #5E00 +币 > bì; #5E01 +市 > shì; #5E02 +布 > bù; #5E03 +帄 > dīng; #5E04 +帅 > shuài; #5E05 +帆 > fān; #5E06 +帇 > niè; #5E07 +师 > shī; #5E08 +帉 > fēn; #5E09 +帊 > pà; #5E0A +帋 > zhĭ; #5E0B +希 > xī; #5E0C +帍 > hù; #5E0D +帎 > dàn; #5E0E +帏 > wéi; #5E0F +帐 > zhàng; #5E10 +帑 > tăng; #5E11 +帒 > dài; #5E12 +帓 > mà; #5E13 +帔 > pèi; #5E14 +帕 > pà; #5E15 +帖 > tiē; #5E16 +帗 > fú; #5E17 +帘 > lián; #5E18 +帙 > zhì; #5E19 +帚 > zhŏu; #5E1A +帛 > bó; #5E1B +帜 > zhì; #5E1C +帝 > dì; #5E1D +帞 > mò; #5E1E +帟 > yì; #5E1F +帠 > yì; #5E20 +帡 > píng; #5E21 +帢 > qià; #5E22 +帣 > juàn; #5E23 +帤 > rú; #5E24 +帥 > shuài; #5E25 +带 > dài; #5E26 +帧 > zhèng; #5E27 +帨 > shùi; #5E28 +帩 > qiào; #5E29 +帪 > zhēn; #5E2A +師 > shī; #5E2B +帬 > qún; #5E2C +席 > xí; #5E2D +帮 > bāng; #5E2E +帯 > dài; #5E2F +帰 > gūi; #5E30 +帱 > chóu; #5E31 +帲 > píng; #5E32 +帳 > zhàng; #5E33 +帴 > shā; #5E34 +帵 > wān; #5E35 +帶 > dài; #5E36 +帷 > wéi; #5E37 +常 > cháng; #5E38 +帹 > shà; #5E39 +帺 > qí; #5E3A +帻 > zé; #5E3B +帼 > gúo; #5E3C +帽 > mào; #5E3D +帾 > dŭ; #5E3E +帿 > hóu; #5E3F +幀 > zhèng; #5E40 +幁 > xū; #5E41 +幂 > mì; #5E42 +幃 > wéi; #5E43 +幄 > wò; #5E44 +幅 > fú; #5E45 +幆 > yì; #5E46 +幇 > bāng; #5E47 +幈 > píng; #5E48 +幊 > gōng; #5E4A +幋 > pán; #5E4B +幌 > huăng; #5E4C +幍 > dāo; #5E4D +幎 > mì; #5E4E +幏 > jiā; #5E4F +幐 > téng; #5E50 +幑 > hūi; #5E51 +幒 > zhōng; #5E52 +幓 > shān; #5E53 +幔 > màn; #5E54 +幕 > mù; #5E55 +幖 > biāo; #5E56 +幗 > gúo; #5E57 +幘 > zé; #5E58 +幙 > mù; #5E59 +幚 > bāng; #5E5A +幛 > zhàng; #5E5B +幜 > jĭong; #5E5C +幝 > chăn; #5E5D +幞 > fú; #5E5E +幟 > zhì; #5E5F +幠 > hū; #5E60 +幡 > fān; #5E61 +幢 > chuáng; #5E62 +幣 > bì; #5E63 +幦 > mì; #5E66 +幧 > qiāo; #5E67 +幨 > chān; #5E68 +幩 > fén; #5E69 +幪 > méng; #5E6A +幫 > bāng; #5E6B +幬 > chóu; #5E6C +幭 > miè; #5E6D +幮 > chú; #5E6E +幯 > jié; #5E6F +幰 > xiăn; #5E70 +幱 > lán; #5E71 +干 > gān; #5E72 +平 > píng; #5E73 +年 > nián; #5E74 +幵 > qiān; #5E75 +并 > bìng; #5E76 +幷 > bìng; #5E77 +幸 > xìng; #5E78 +幹 > gàn; #5E79 +幺 > yāo; #5E7A +幻 > huàn; #5E7B +幼 > yòu; #5E7C +幽 > yōu; #5E7D +幾 > jĭ; #5E7E +广 > yăn; #5E7F +庀 > pĭ; #5E80 +庁 > tīng; #5E81 +庂 > zè; #5E82 +広 > guăng; #5E83 +庄 > zhuāng; #5E84 +庅 > mo; #5E85 +庆 > qìng; #5E86 +庇 > bì; #5E87 +庈 > qín; #5E88 +庉 > dùn; #5E89 +床 > chuáng; #5E8A +庋 > gŭi; #5E8B +庌 > yă; #5E8C +庍 > bài; #5E8D +庎 > jiè; #5E8E +序 > xù; #5E8F +庐 > lú; #5E90 +庑 > wŭ; #5E91 +库 > kù; #5E93 +应 > yìng; #5E94 +底 > dĭ; #5E95 +庖 > páo; #5E96 +店 > diàn; #5E97 +庘 > yā; #5E98 +庙 > miào; #5E99 +庚 > gēng; #5E9A +庛 > cī; #5E9B +府 > fŭ; #5E9C +庝 > tóng; #5E9D +庞 > páng; #5E9E +废 > fèi; #5E9F +庠 > xiáng; #5EA0 +庡 > yĭ; #5EA1 +庢 > zhì; #5EA2 +庣 > tiāo; #5EA3 +庤 > zhì; #5EA4 +庥 > xīu; #5EA5 +度 > dù; #5EA6 +座 > zùo; #5EA7 +庨 > xiāo; #5EA8 +庩 > tú; #5EA9 +庪 > gŭi; #5EAA +庫 > kù; #5EAB +庬 > páng; #5EAC +庭 > tíng; #5EAD +庮 > yŏu; #5EAE +庯 > bū; #5EAF +庰 > dīng; #5EB0 +庱 > chĕng; #5EB1 +庲 > lái; #5EB2 +庳 > bēi; #5EB3 +庴 > jí; #5EB4 +庵 > ān; #5EB5 +庶 > shù; #5EB6 +康 > kāng; #5EB7 +庸 > yōng; #5EB8 +庹 > tŭo; #5EB9 +庺 > sōng; #5EBA +庻 > shù; #5EBB +庼 > qĭng; #5EBC +庽 > yù; #5EBD +庾 > yŭ; #5EBE +庿 > miào; #5EBF +廀 > sōu; #5EC0 +廁 > cè; #5EC1 +廂 > xiāng; #5EC2 +廃 > fèi; #5EC3 +廄 > jìu; #5EC4 +廅 > hé; #5EC5 +廆 > hùi; #5EC6 +廇 > lìu; #5EC7 +廈 > shà; #5EC8 +廉 > lián; #5EC9 +廊 > láng; #5ECA +廋 > sōu; #5ECB +廌 > jiàn; #5ECC +廍 > pŏu; #5ECD +廎 > qĭng; #5ECE +廏 > jìu; #5ECF +廐 > jìu; #5ED0 +廑 > qín; #5ED1 +廒 > áo; #5ED2 +廓 > kùo; #5ED3 +廔 > lóu; #5ED4 +廕 > yīn; #5ED5 +廖 > liào; #5ED6 +廗 > dài; #5ED7 +廘 > lù; #5ED8 +廙 > yì; #5ED9 +廚 > chú; #5EDA +廛 > chán; #5EDB +廜 > tū; #5EDC +廝 > sī; #5EDD +廞 > xīn; #5EDE +廟 > miào; #5EDF +廠 > chăng; #5EE0 +廡 > wŭ; #5EE1 +廢 > fèi; #5EE2 +廣 > guăng; #5EE3 +廥 > kuài; #5EE5 +廦 > bì; #5EE6 +廧 > qiáng; #5EE7 +廨 > xiè; #5EE8 +廩 > lĭn; #5EE9 +廪 > lĭn; #5EEA +廫 > liáo; #5EEB +廬 > lú; #5EEC +廮 > yíng; #5EEE +廯 > xiān; #5EEF +廰 > tīng; #5EF0 +廱 > yōng; #5EF1 +廲 > lí; #5EF2 +廳 > tīng; #5EF3 +廴 > yĭn; #5EF4 +廵 > xún; #5EF5 +延 > yán; #5EF6 +廷 > tíng; #5EF7 +廸 > dí; #5EF8 +廹 > pò; #5EF9 +建 > jiàn; #5EFA +廻 > húi; #5EFB +廼 > năi; #5EFC +廽 > húi; #5EFD +廾 > gòng; #5EFE +廿 > niàn; #5EFF +开 > kāi; #5F00 +弁 > biàn; #5F01 +异 > yì; #5F02 +弃 > qì; #5F03 +弄 > nòng; #5F04 +弅 > fén; #5F05 +弆 > jŭ; #5F06 +弇 > yăn; #5F07 +弈 > yì; #5F08 +弉 > zàng; #5F09 +弊 > bì; #5F0A +弋 > yì; #5F0B +弌 > yī; #5F0C +弍 > èr; #5F0D +弎 > sān; #5F0E +式 > shì; #5F0F +弐 > èr; #5F10 +弑 > shì; #5F11 +弒 > shì; #5F12 +弓 > gōng; #5F13 +弔 > diào; #5F14 +引 > yĭn; #5F15 +弖 > hù; #5F16 +弗 > fú; #5F17 +弘 > hóng; #5F18 +弙 > wū; #5F19 +弚 > túi; #5F1A +弛 > chí; #5F1B +弜 > jiàng; #5F1C +弝 > bà; #5F1D +弞 > shĕn; #5F1E +弟 > dì; #5F1F +张 > zhāng; #5F20 +弡 > jué; #5F21 +弢 > tāo; #5F22 +弣 > fŭ; #5F23 +弤 > dĭ; #5F24 +弥 > mí; #5F25 +弦 > xián; #5F26 +弧 > hú; #5F27 +弨 > chāo; #5F28 +弩 > nŭ; #5F29 +弪 > jìng; #5F2A +弫 > zhĕn; #5F2B +弬 > yí; #5F2C +弭 > mĭ; #5F2D +弮 > quān; #5F2E +弯 > wān; #5F2F +弰 > shāo; #5F30 +弱 > rùo; #5F31 +弲 > xuān; #5F32 +弳 > jìng; #5F33 +弴 > dūn; #5F34 +張 > zhāng; #5F35 +弶 > jiàng; #5F36 +強 > qiáng; #5F37 +弸 > péng; #5F38 +弹 > dàn; #5F39 +强 > qiáng; #5F3A +弻 > bì; #5F3B +弼 > bì; #5F3C +弽 > shè; #5F3D +弾 > dàn; #5F3E +弿 > jiăn; #5F3F +彀 > gòu; #5F40 +彂 > fā; #5F42 +彃 > bì; #5F43 +彄 > kōu; #5F44 +彆 > biè; #5F46 +彇 > xiāo; #5F47 +彈 > dàn; #5F48 +彉 > kùo; #5F49 +彊 > qiáng; #5F4A +彋 > hóng; #5F4B +彌 > mí; #5F4C +彍 > kùo; #5F4D +彎 > wān; #5F4E +彏 > jué; #5F4F +彐 > jì; #5F50 +彑 > jì; #5F51 +归 > gūi; #5F52 +当 > dāng; #5F53 +彔 > lù; #5F54 +录 > lù; #5F55 +彖 > tuàn; #5F56 +彗 > hùi; #5F57 +彘 > zhì; #5F58 +彙 > hùi; #5F59 +彚 > hùi; #5F5A +彛 > yí; #5F5B +彜 > yí; #5F5C +彝 > yí; #5F5D +彞 > yí; #5F5E +彟 > hùo; #5F5F +彠 > hùo; #5F60 +彡 > shān; #5F61 +形 > xíng; #5F62 +彣 > wén; #5F63 +彤 > tóng; #5F64 +彥 > yàn; #5F65 +彦 > yàn; #5F66 +彧 > yù; #5F67 +彨 > chī; #5F68 +彩 > căi; #5F69 +彪 > biāo; #5F6A +彫 > diāo; #5F6B +彬 > bīn; #5F6C +彭 > péng; #5F6D +彮 > yŏng; #5F6E +彯 > piāo; #5F6F +彰 > zhāng; #5F70 +影 > yĭng; #5F71 +彲 > chī; #5F72 +彳 > chì; #5F73 +彴 > zhúo; #5F74 +彵 > tŭo; #5F75 +彶 > jí; #5F76 +彷 > páng; #5F77 +彸 > zhōng; #5F78 +役 > yì; #5F79 +彺 > wáng; #5F7A +彻 > chè; #5F7B +彼 > bĭ; #5F7C +彽 > chí; #5F7D +彾 > lĭng; #5F7E +彿 > fú; #5F7F +往 > wăng; #5F80 +征 > zhēng; #5F81 +徂 > cú; #5F82 +徃 > wăng; #5F83 +径 > jìng; #5F84 +待 > dài; #5F85 +徆 > xī; #5F86 +徇 > xùn; #5F87 +很 > hĕn; #5F88 +徉 > yáng; #5F89 +徊 > huái; #5F8A +律 > lǜ; #5F8B +後 > hòu; #5F8C +徍 > wā; #5F8D +徎 > chĕng; #5F8E +徏 > zhì; #5F8F +徐 > xú; #5F90 +徑 > jìng; #5F91 +徒 > tú; #5F92 +従 > cóng; #5F93 +徕 > lái; #5F95 +徖 > cóng; #5F96 +得 > dé; #5F97 +徘 > pái; #5F98 +徙 > xĭ; #5F99 +徛 > qì; #5F9B +徜 > cháng; #5F9C +徝 > zhì; #5F9D +從 > cóng; #5F9E +徟 > zhōu; #5F9F +徠 > lái; #5FA0 +御 > yù; #5FA1 +徢 > xiè; #5FA2 +徣 > jiè; #5FA3 +徤 > jiàn; #5FA4 +徥 > chí; #5FA5 +徦 > jiă; #5FA6 +徧 > biàn; #5FA7 +徨 > huáng; #5FA8 +復 > fù; #5FA9 +循 > xún; #5FAA +徫 > wĕi; #5FAB +徬 > páng; #5FAC +徭 > yáo; #5FAD +微 > wēi; #5FAE +徯 > xī; #5FAF +徰 > zhēng; #5FB0 +徱 > piào; #5FB1 +徲 > chí; #5FB2 +徳 > dé; #5FB3 +徴 > zhēng; #5FB4 +徵 > zhēng; #5FB5 +徶 > biè; #5FB6 +德 > dé; #5FB7 +徸 > chōng; #5FB8 +徹 > chè; #5FB9 +徺 > jiăo; #5FBA +徻 > wèi; #5FBB +徼 > jiào; #5FBC +徽 > hūi; #5FBD +徾 > méi; #5FBE +徿 > lòng; #5FBF +忀 > xiāng; #5FC0 +忁 > bào; #5FC1 +忂 > qú; #5FC2 +心 > xīn; #5FC3 +忄 > shù' 'xīn' 'páng; #5FC4 +必 > bì; #5FC5 +忆 > yì; #5FC6 +忇 > lè; #5FC7 +忈 > rén; #5FC8 +忉 > dāo; #5FC9 +忊 > dìng; #5FCA +忋 > găi; #5FCB +忌 > jì; #5FCC +忍 > rĕn; #5FCD +忎 > rén; #5FCE +忏 > chàn; #5FCF +忐 > tăn; #5FD0 +忑 > tè; #5FD1 +忒 > tè; #5FD2 +忓 > gān; #5FD3 +忔 > qì; #5FD4 +忕 > shì; #5FD5 +忖 > cŭn; #5FD6 +志 > zhì; #5FD7 +忘 > wàng; #5FD8 +忙 > máng; #5FD9 +忚 > xī; #5FDA +忛 > fán; #5FDB +応 > yīng; #5FDC +忝 > tiăn; #5FDD +忞 > mín; #5FDE +忟 > mín; #5FDF +忠 > zhōng; #5FE0 +忡 > chōng; #5FE1 +忢 > wù; #5FE2 +忣 > jí; #5FE3 +忤 > wŭ; #5FE4 +忥 > xì; #5FE5 +忦 > yè; #5FE6 +忧 > yōu; #5FE7 +忨 > wàn; #5FE8 +忩 > cōng; #5FE9 +忪 > zhōng; #5FEA +快 > kuài; #5FEB +忬 > yù; #5FEC +忭 > biàn; #5FED +忮 > zhì; #5FEE +忯 > qí; #5FEF +忰 > cùi; #5FF0 +忱 > chén; #5FF1 +忲 > tài; #5FF2 +忳 > tún; #5FF3 +忴 > qián; #5FF4 +念 > niàn; #5FF5 +忶 > hún; #5FF6 +忷 > xīong; #5FF7 +忸 > nĭu; #5FF8 +忹 > wăng; #5FF9 +忺 > xiān; #5FFA +忻 > xīn; #5FFB +忼 > kāng; #5FFC +忽 > hū; #5FFD +忾 > kài; #5FFE +忿 > fèn; #5FFF +怀 > huái; #6000 +态 > tài; #6001 +怂 > sŏng; #6002 +怃 > wŭ; #6003 +怄 > òu; #6004 +怅 > chàng; #6005 +怆 > chuàng; #6006 +怇 > jù; #6007 +怈 > yì; #6008 +怉 > băo; #6009 +怊 > chāo; #600A +怋 > mín; #600B +怌 > pēi; #600C +怍 > zùo; #600D +怎 > zĕn; #600E +怏 > yàng; #600F +怐 > kòu; #6010 +怑 > bàn; #6011 +怒 > nù; #6012 +怓 > náo; #6013 +怔 > zhēng; #6014 +怕 > pà; #6015 +怖 > bù; #6016 +怗 > tiē; #6017 +怘 > gù; #6018 +怙 > hù; #6019 +怚 > jù; #601A +怛 > dá; #601B +怜 > lián; #601C +思 > sī; #601D +怞 > chōu; #601E +怟 > dì; #601F +怠 > dài; #6020 +怡 > yí; #6021 +怢 > tú; #6022 +怣 > yóu; #6023 +怤 > fū; #6024 +急 > jí; #6025 +怦 > pēng; #6026 +性 > xìng; #6027 +怨 > yuàn; #6028 +怩 > ní; #6029 +怪 > guài; #602A +怫 > fú; #602B +怬 > xì; #602C +怭 > bì; #602D +怮 > yōu; #602E +怯 > qiè; #602F +怰 > xuàn; #6030 +怱 > cōng; #6031 +怲 > bĭng; #6032 +怳 > huăng; #6033 +怴 > xù; #6034 +怵 > chù; #6035 +怶 > pī; #6036 +怷 > xī; #6037 +怸 > xī; #6038 +怹 > tān; #6039 +总 > zŏng; #603B +怼 > dùi; #603C +怿 > yì; #603F +恀 > chĭ; #6040 +恁 > rèn; #6041 +恂 > xún; #6042 +恃 > shì; #6043 +恄 > xì; #6044 +恅 > lăo; #6045 +恆 > héng; #6046 +恇 > kuāng; #6047 +恈 > mú; #6048 +恉 > zhĭ; #6049 +恊 > xié; #604A +恋 > liàn; #604B +恌 > tiāo; #604C +恍 > huăng; #604D +恎 > dié; #604E +恏 > hăo; #604F +恐 > kŏng; #6050 +恑 > gŭi; #6051 +恒 > héng; #6052 +恓 > xī; #6053 +恔 > xiào; #6054 +恕 > shù; #6055 +恖 > sī; #6056 +恗 > kuă; #6057 +恘 > qīu; #6058 +恙 > yàng; #6059 +恚 > hùi; #605A +恛 > húi; #605B +恜 > chì; #605C +恝 > jiá; #605D +恞 > yí; #605E +恟 > xīong; #605F +恠 > guài; #6060 +恡 > lìn; #6061 +恢 > hūi; #6062 +恣 > zì; #6063 +恤 > xù; #6064 +恥 > chĭ; #6065 +恦 > xiàng; #6066 +恧 > nǜ; #6067 +恨 > hèn; #6068 +恩 > ēn; #6069 +恪 > kè; #606A +恫 > tōng; #606B +恬 > tián; #606C +恭 > gōng; #606D +恮 > quán; #606E +息 > xī; #606F +恰 > qià; #6070 +恱 > yuè; #6071 +恲 > pēng; #6072 +恳 > kĕn; #6073 +恴 > dé; #6074 +恵 > hùi; #6075 +恶 > è; #6076 +恸 > tòng; #6078 +恹 > yàn; #6079 +恺 > kăi; #607A +恻 > cè; #607B +恼 > năo; #607C +恽 > yùn; #607D +恾 > máng; #607E +恿 > yŏng; #607F +悀 > yŏng; #6080 +悁 > yuān; #6081 +悂 > pī; #6082 +悃 > kŭn; #6083 +悄 > qiăo; #6084 +悅 > yuè; #6085 +悆 > yù; #6086 +悇 > yù; #6087 +悈 > jiè; #6088 +悉 > xī; #6089 +悊 > zhé; #608A +悋 > lìn; #608B +悌 > tì; #608C +悍 > hàn; #608D +悎 > hào; #608E +悏 > qiè; #608F +悐 > tì; #6090 +悑 > bù; #6091 +悒 > yì; #6092 +悓 > qiàn; #6093 +悔 > hŭi; #6094 +悕 > xī; #6095 +悖 > bèi; #6096 +悗 > mán; #6097 +悘 > yī; #6098 +悙 > hēng; #6099 +悚 > sŏng; #609A +悛 > quān; #609B +悜 > chĕng; #609C +悝 > hūi; #609D +悞 > wù; #609E +悟 > wù; #609F +悠 > yōu; #60A0 +悡 > lí; #60A1 +悢 > liàng; #60A2 +患 > huàn; #60A3 +悤 > cōng; #60A4 +悥 > yì; #60A5 +悦 > yuè; #60A6 +悧 > lì; #60A7 +您 > nín; #60A8 +悩 > năo; #60A9 +悪 > è; #60AA +悫 > què; #60AB +悬 > xuán; #60AC +悭 > qiān; #60AD +悮 > wù; #60AE +悯 > mĭn; #60AF +悰 > cóng; #60B0 +悱 > fĕi; #60B1 +悲 > bēi; #60B2 +悳 > dúo; #60B3 +悴 > cùi; #60B4 +悵 > chàng; #60B5 +悶 > mèn; #60B6 +悷 > lì; #60B7 +悸 > jì; #60B8 +悹 > guàn; #60B9 +悺 > guàn; #60BA +悻 > xìng; #60BB +悼 > dào; #60BC +悽 > qī; #60BD +悾 > kōng; #60BE +悿 > tiăn; #60BF +惀 > lún; #60C0 +惁 > xī; #60C1 +惂 > kăn; #60C2 +惃 > kūn; #60C3 +惄 > nì; #60C4 +情 > qíng; #60C5 +惆 > chóu; #60C6 +惇 > dūn; #60C7 +惈 > gŭo; #60C8 +惉 > chān; #60C9 +惊 > liáng; #60CA +惋 > wăn; #60CB +惌 > yuān; #60CC +惍 > jīn; #60CD +惎 > jì; #60CE +惏 > lín; #60CF +惐 > yù; #60D0 +惑 > hùo; #60D1 +惒 > hé; #60D2 +惓 > quán; #60D3 +惔 > tán; #60D4 +惕 > tì; #60D5 +惖 > tì; #60D6 +惗 > niē; #60D7 +惘 > wăng; #60D8 +惙 > chùo; #60D9 +惚 > bū; #60DA +惛 > hūn; #60DB +惜 > xī; #60DC +惝 > tăng; #60DD +惞 > xīn; #60DE +惟 > wéi; #60DF +惠 > hùi; #60E0 +惡 > è; #60E1 +惢 > rŭi; #60E2 +惣 > zŏng; #60E3 +惤 > jiān; #60E4 +惥 > yŏng; #60E5 +惦 > diàn; #60E6 +惧 > jù; #60E7 +惨 > căn; #60E8 +惩 > chéng; #60E9 +惪 > dé; #60EA +惫 > bèi; #60EB +惬 > qiè; #60EC +惭 > cán; #60ED +惮 > dàn; #60EE +惯 > guàn; #60EF +惰 > dùo; #60F0 +惱 > năo; #60F1 +惲 > yùn; #60F2 +想 > xiăng; #60F3 +惴 > zhùi; #60F4 +惵 > diè; #60F5 +惶 > huáng; #60F6 +惷 > chŭn; #60F7 +惸 > qíong; #60F8 +惹 > rĕ; #60F9 +惺 > xīng; #60FA +惻 > cè; #60FB +惼 > biăn; #60FC +惽 > hūn; #60FD +惾 > zōng; #60FE +惿 > tí; #60FF +愀 > qiăo; #6100 +愁 > chóu; #6101 +愂 > bèi; #6102 +愃 > xuān; #6103 +愄 > wēi; #6104 +愅 > gé; #6105 +愆 > qiān; #6106 +愇 > wĕi; #6107 +愈 > yù; #6108 +愉 > yú; #6109 +愊 > bì; #610A +愋 > xuān; #610B +愌 > huàn; #610C +愍 > mĭn; #610D +愎 > bì; #610E +意 > yì; #610F +愐 > miăn; #6110 +愑 > yŏng; #6111 +愒 > kài; #6112 +愓 > dàng; #6113 +愔 > yīn; #6114 +愕 > è; #6115 +愖 > chén; #6116 +愗 > mòu; #6117 +愘 > kè; #6118 +愙 > kè; #6119 +愚 > yú; #611A +愛 > ài; #611B +愜 > qiè; #611C +愝 > yăn; #611D +愞 > nùo; #611E +感 > găn; #611F +愠 > yùn; #6120 +愡 > zŏng; #6121 +愢 > sāi; #6122 +愣 > léng; #6123 +愤 > fèn; #6124 +愦 > kùi; #6126 +愧 > kùi; #6127 +愨 > què; #6128 +愩 > gōng; #6129 +愪 > yún; #612A +愫 > sù; #612B +愬 > sù; #612C +愭 > qí; #612D +愮 > yáo; #612E +愯 > sŏng; #612F +愰 > huăng; #6130 +愱 > jí; #6131 +愲 > gŭ; #6132 +愳 > jù; #6133 +愴 > chuàng; #6134 +愵 > nì; #6135 +愶 > xié; #6136 +愷 > kăi; #6137 +愸 > zhĕng; #6138 +愹 > yŏng; #6139 +愺 > căo; #613A +愻 > sùn; #613B +愼 > shèn; #613C +愽 > bó; #613D +愾 > kài; #613E +愿 > yuàn; #613F +慀 > xié; #6140 +慁 > hùn; #6141 +慂 > yŏng; #6142 +慃 > yăng; #6143 +慄 > lì; #6144 +慅 > sāo; #6145 +慆 > tāo; #6146 +慇 > yīn; #6147 +慈 > cí; #6148 +慉 > xù; #6149 +慊 > qiàn; #614A +態 > tài; #614B +慌 > huāng; #614C +慍 > yùn; #614D +慎 > shèn; #614E +慏 > mĭng; #614F +慑 > shè; #6151 +慒 > cóng; #6152 +慓 > piào; #6153 +慔 > mò; #6154 +慕 > mù; #6155 +慖 > gúo; #6156 +慗 > chì; #6157 +慘 > căn; #6158 +慙 > cán; #6159 +慚 > cán; #615A +慛 > cúi; #615B +慜 > mĭn; #615C +慝 > tè; #615D +慞 > zhāng; #615E +慟 > tòng; #615F +慠 > ào; #6160 +慡 > shuăng; #6161 +慢 > màn; #6162 +慣 > guàn; #6163 +慤 > què; #6164 +慥 > zào; #6165 +慦 > jìu; #6166 +慧 > hùi; #6167 +慨 > kăi; #6168 +慩 > lián; #6169 +慪 > òu; #616A +慫 > sŏng; #616B +慬 > jĭn; #616C +慭 > yìn; #616D +慮 > lǜ; #616E +慯 > shāng; #616F +慰 > wèi; #6170 +慱 > tuán; #6171 +慲 > mán; #6172 +慳 > qiān; #6173 +慴 > shè; #6174 +慵 > yōng; #6175 +慶 > qìng; #6176 +慷 > kāng; #6177 +慸 > dì; #6178 +慹 > zhí; #6179 +慺 > lóu; #617A +慻 > juàn; #617B +慼 > qī; #617C +慽 > qī; #617D +慾 > yù; #617E +慿 > píng; #617F +憀 > liáo; #6180 +憁 > cōng; #6181 +憂 > yōu; #6182 +憃 > chōng; #6183 +憄 > zhì; #6184 +憅 > tòng; #6185 +憆 > chēng; #6186 +憇 > qì; #6187 +憈 > qū; #6188 +憉 > péng; #6189 +憊 > bèi; #618A +憋 > biē; #618B +憌 > chún; #618C +憍 > jiāo; #618D +憎 > zēng; #618E +憏 > chì; #618F +憐 > lián; #6190 +憑 > píng; #6191 +憒 > kùi; #6192 +憓 > hùi; #6193 +憔 > qiáo; #6194 +憕 > chéng; #6195 +憖 > yìn; #6196 +憗 > yìn; #6197 +憘 > xĭ; #6198 +憙 > xĭ; #6199 +憚 > dàn; #619A +憛 > tán; #619B +憜 > dŭo; #619C +憝 > dùi; #619D +憞 > dùi; #619E +憟 > sù; #619F +憠 > jué; #61A0 +憡 > cè; #61A1 +憢 > xiāo; #61A2 +憣 > fán; #61A3 +憤 > fèn; #61A4 +憥 > láo; #61A5 +憦 > lào; #61A6 +憧 > chōng; #61A7 +憨 > hān; #61A8 +憩 > qì; #61A9 +憪 > xián; #61AA +憫 > mĭn; #61AB +憬 > jĭng; #61AC +憭 > liăo; #61AD +憮 > wŭ; #61AE +憯 > căn; #61AF +憰 > jué; #61B0 +憱 > cù; #61B1 +憲 > xiàn; #61B2 +憳 > tăn; #61B3 +憴 > shéng; #61B4 +憵 > pī; #61B5 +憶 > yì; #61B6 +憷 > chŭ; #61B7 +憸 > xiān; #61B8 +憹 > náo; #61B9 +憺 > dàn; #61BA +憻 > tăn; #61BB +憼 > jĭng; #61BC +憽 > sōng; #61BD +憾 > hàn; #61BE +憿 > jiāo; #61BF +懀 > wài; #61C0 +懁 > huán; #61C1 +懂 > dŏng; #61C2 +懃 > qín; #61C3 +懄 > qín; #61C4 +懅 > qú; #61C5 +懆 > căo; #61C6 +懇 > kĕn; #61C7 +懈 > xiè; #61C8 +應 > yìng; #61C9 +懊 > ào; #61CA +懋 > mào; #61CB +懌 > yì; #61CC +懍 > lĭn; #61CD +懎 > sè; #61CE +懏 > jùn; #61CF +懐 > huái; #61D0 +懑 > mèn; #61D1 +懒 > lăn; #61D2 +懓 > ài; #61D3 +懔 > lĭn; #61D4 +懕 > yān; #61D5 +懖 > guā; #61D6 +懗 > xià; #61D7 +懘 > chì; #61D8 +懙 > yŭ; #61D9 +懚 > yìn; #61DA +懛 > dāi; #61DB +懜 > mèng; #61DC +懝 > ài; #61DD +懞 > méng; #61DE +懟 > dùi; #61DF +懠 > qí; #61E0 +懡 > mŏ; #61E1 +懢 > lán; #61E2 +懣 > mèn; #61E3 +懤 > chóu; #61E4 +懥 > zhì; #61E5 +懦 > nùo; #61E6 +懧 > nùo; #61E7 +懨 > yān; #61E8 +懩 > yăng; #61E9 +懪 > bó; #61EA +懫 > zhí; #61EB +懬 > kuàng; #61EC +懭 > kuàng; #61ED +懮 > yŏu; #61EE +懯 > fū; #61EF +懰 > líu; #61F0 +懱 > miè; #61F1 +懲 > chéng; #61F2 +懴 > chàn; #61F4 +懵 > méng; #61F5 +懶 > lăn; #61F6 +懷 > huái; #61F7 +懸 > xuán; #61F8 +懹 > ràng; #61F9 +懺 > chàn; #61FA +懻 > jì; #61FB +懼 > jù; #61FC +懽 > huān; #61FD +懾 > shè; #61FE +懿 > yì; #61FF +戀 > liàn; #6200 +戁 > năn; #6201 +戂 > mí; #6202 +戃 > tăng; #6203 +戄 > jué; #6204 +戅 > gàng; #6205 +戆 > gàng; #6206 +戇 > gàng; #6207 +戈 > gē; #6208 +戉 > yuè; #6209 +戊 > wù; #620A +戋 > jiān; #620B +戌 > xū; #620C +戍 > shù; #620D +戎 > róng; #620E +戏 > xì; #620F +成 > chéng; #6210 +我 > wŏ; #6211 +戒 > jiè; #6212 +戓 > gē; #6213 +戔 > jiān; #6214 +戕 > qiāng; #6215 +或 > hùo; #6216 +戗 > qiāng; #6217 +战 > zhàn; #6218 +戙 > dòng; #6219 +戚 > qī; #621A +戛 > jiá; #621B +戜 > dié; #621C +戝 > zéi; #621D +戞 > jiá; #621E +戟 > jĭ; #621F +戠 > shì; #6220 +戡 > kān; #6221 +戢 > jí; #6222 +戣 > kúi; #6223 +戤 > gài; #6224 +戥 > dĕng; #6225 +戦 > zhàn; #6226 +戧 > chuāng; #6227 +戨 > gē; #6228 +戩 > jiăn; #6229 +截 > jié; #622A +戫 > yù; #622B +戬 > jiăn; #622C +戭 > yăn; #622D +戮 > lù; #622E +戯 > xì; #622F +戰 > zhàn; #6230 +戱 > xì; #6231 +戲 > xì; #6232 +戳 > chūo; #6233 +戴 > dài; #6234 +戵 > qú; #6235 +戶 > hù; #6236 +户 > hù; #6237 +戸 > hù; #6238 +戹 > è; #6239 +戺 > shì; #623A +戻 > lì; #623B +戼 > măo; #623C +戽 > hù; #623D +戾 > lì; #623E +房 > fáng; #623F +所 > sŭo; #6240 +扁 > biăn; #6241 +扂 > diàn; #6242 +扃 > jīong; #6243 +扄 > shăng; #6244 +扅 > yí; #6245 +扆 > yĭ; #6246 +扇 > shàn; #6247 +扈 > hù; #6248 +扉 > fēi; #6249 +扊 > yăn; #624A +手 > shŏu; #624B +扌 > t̄' 'shŏu' 'páng; #624C +才 > cái; #624D +扎 > zhā; #624E +扏 > qíu; #624F +扐 > lè; #6250 +扑 > bū; #6251 +扒 > bā; #6252 +打 > dă; #6253 +扔 > rēng; #6254 +払 > fú; #6255 +扗 > zài; #6257 +托 > tūo; #6258 +扙 > zhàng; #6259 +扚 > diāo; #625A +扛 > káng; #625B +扜 > yū; #625C +扝 > kū; #625D +扞 > hàn; #625E +扟 > shēn; #625F +扠 > chā; #6260 +扡 > yĭ; #6261 +扢 > gŭ; #6262 +扣 > kòu; #6263 +扤 > wù; #6264 +扥 > tūo; #6265 +扦 > qiān; #6266 +执 > zhí; #6267 +扨 > rèn; #6268 +扩 > kùo; #6269 +扪 > mén; #626A +扫 > săo; #626B +扬 > yáng; #626C +扭 > nĭu; #626D +扮 > bàn; #626E +扯 > chĕ; #626F +扰 > răo; #6270 +扱 > xī; #6271 +扲 > qián; #6272 +扳 > bān; #6273 +扴 > jiá; #6274 +扵 > yú; #6275 +扶 > fú; #6276 +扷 > ào; #6277 +扸 > xī; #6278 +批 > pī; #6279 +扺 > zhĭ; #627A +扻 > zì; #627B +扼 > è; #627C +扽 > dùn; #627D +找 > zhăo; #627E +承 > chéng; #627F +技 > jì; #6280 +抁 > yăn; #6281 +抂 > kuáng; #6282 +抃 > biàn; #6283 +抄 > chāo; #6284 +抅 > jū; #6285 +抆 > wèn; #6286 +抇 > hú; #6287 +抈 > yuè; #6288 +抉 > jué; #6289 +把 > bă; #628A +抋 > qìn; #628B +抌 > zhĕn; #628C +抍 > zhĕng; #628D +抎 > yŭn; #628E +抏 > wán; #628F +抐 > nù; #6290 +抑 > yì; #6291 +抒 > shū; #6292 +抓 > zhuā; #6293 +抔 > póu; #6294 +投 > tóu; #6295 +抖 > dŏu; #6296 +抗 > kàng; #6297 +折 > zhé; #6298 +抙 > póu; #6299 +抚 > fŭ; #629A +抛 > pāo; #629B +抜 > bá; #629C +抝 > ăo; #629D +択 > zé; #629E +抟 > tuán; #629F +抠 > kōu; #62A0 +抡 > lún; #62A1 +抢 > qiăng; #62A2 +护 > hù; #62A4 +报 > bào; #62A5 +抦 > bĭng; #62A6 +抧 > zhĭ; #62A7 +抨 > pēng; #62A8 +抩 > tān; #62A9 +抪 > pū; #62AA +披 > pī; #62AB +抬 > tái; #62AC +抭 > yăo; #62AD +抮 > zhĕn; #62AE +抯 > zhā; #62AF +抰 > yăng; #62B0 +抱 > bào; #62B1 +抲 > hē; #62B2 +抳 > nĭ; #62B3 +抴 > yì; #62B4 +抵 > dĭ; #62B5 +抶 > chì; #62B6 +抷 > pī; #62B7 +抸 > zā; #62B8 +抹 > mŏ; #62B9 +抺 > mŏ; #62BA +抻 > shèn; #62BB +押 > yā; #62BC +抽 > chōu; #62BD +抾 > qū; #62BE +抿 > mĭn; #62BF +拀 > chù; #62C0 +拁 > jiā; #62C1 +拂 > fú; #62C2 +拃 > zhăn; #62C3 +拄 > zhŭ; #62C4 +担 > dàn; #62C5 +拆 > chāi; #62C6 +拇 > mŭ; #62C7 +拈 > nián; #62C8 +拉 > lā; #62C9 +拊 > fŭ; #62CA +拋 > pāo; #62CB +拌 > bàn; #62CC +拍 > pāi; #62CD +拎 > līng; #62CE +拏 > ná; #62CF +拐 > guăi; #62D0 +拑 > qián; #62D1 +拒 > jù; #62D2 +拓 > tùo; #62D3 +拔 > bá; #62D4 +拕 > tūo; #62D5 +拖 > tūo; #62D6 +拗 > ăo; #62D7 +拘 > jū; #62D8 +拙 > zhúo; #62D9 +拚 > pàn; #62DA +招 > zhāo; #62DB +拜 > bài; #62DC +拝 > bài; #62DD +拞 > dĭ; #62DE +拟 > nĭ; #62DF +拠 > jù; #62E0 +拡 > kùo; #62E1 +拢 > lŏng; #62E2 +拣 > jiăn; #62E3 +拥 > yŏng; #62E5 +拦 > lán; #62E6 +拧 > níng; #62E7 +拨 > bō; #62E8 +择 > zé; #62E9 +拪 > qiān; #62EA +拫 > hén; #62EB +括 > guā; #62EC +拭 > shì; #62ED +拮 > jié; #62EE +拯 > zhĕng; #62EF +拰 > nĭn; #62F0 +拱 > gŏng; #62F1 +拲 > gŏng; #62F2 +拳 > quán; #62F3 +拴 > shuān; #62F4 +拵 > cún; #62F5 +拶 > zăn; #62F6 +拷 > kăo; #62F7 +拸 > chĭ; #62F8 +拹 > xié; #62F9 +拺 > cè; #62FA +拻 > hūi; #62FB +拼 > pīn; #62FC +拽 > zhuāi; #62FD +拾 > shí; #62FE +拿 > ná; #62FF +挀 > bò; #6300 +持 > chí; #6301 +挂 > guà; #6302 +挃 > zhì; #6303 +挄 > kùo; #6304 +挅 > dŭo; #6305 +挆 > dŭo; #6306 +指 > zhĭ; #6307 +挈 > qiè; #6308 +按 > àn; #6309 +挊 > nòng; #630A +挋 > zhèn; #630B +挌 > gé; #630C +挍 > jiào; #630D +挎 > kū; #630E +挏 > dòng; #630F +挐 > rú; #6310 +挑 > tiāo; #6311 +挒 > liè; #6312 +挓 > zhā; #6313 +挔 > lǚ; #6314 +挕 > dié; #6315 +挖 > wā; #6316 +挗 > jué; #6317 +挙 > jŭ; #6319 +挚 > zhì; #631A +挛 > luán; #631B +挜 > yà; #631C +挝 > zhuā; #631D +挞 > tà; #631E +挟 > xié; #631F +挠 > náo; #6320 +挡 > dăng; #6321 +挢 > jiăo; #6322 +挣 > zhēng; #6323 +挤 > jĭ; #6324 +挥 > hūi; #6325 +挦 > xún; #6326 +挨 > āi; #6328 +挩 > tūo; #6329 +挪 > núo; #632A +挫 > cùo; #632B +挬 > bó; #632C +挭 > gĕng; #632D +挮 > tĭ; #632E +振 > zhèn; #632F +挰 > chéng; #6330 +挱 > sūo; #6331 +挲 > sūo; #6332 +挳 > kēng; #6333 +挴 > mĕi; #6334 +挵 > lòng; #6335 +挶 > jú; #6336 +挷 > péng; #6337 +挸 > jiăn; #6338 +挹 > yì; #6339 +挺 > tĭng; #633A +挻 > shān; #633B +挼 > nùo; #633C +挽 > wăn; #633D +挾 > xié; #633E +挿 > chā; #633F +捀 > fēng; #6340 +捁 > jiăo; #6341 +捂 > wŭ; #6342 +捃 > jùn; #6343 +捄 > jìu; #6344 +捅 > tŏng; #6345 +捆 > kŭn; #6346 +捇 > hùo; #6347 +捈 > tú; #6348 +捉 > zhūo; #6349 +捊 > póu; #634A +捋 > lè; #634B +捌 > bā; #634C +捍 > hàn; #634D +捎 > shāo; #634E +捏 > niē; #634F +捐 > juān; #6350 +捑 > zé; #6351 +捒 > sŏng; #6352 +捓 > yé; #6353 +捔 > jué; #6354 +捕 > bŭ; #6355 +捖 > huán; #6356 +捗 > bù; #6357 +捘 > zùn; #6358 +捙 > yì; #6359 +捚 > zhāi; #635A +捛 > lǚ; #635B +捜 > sōu; #635C +捝 > tūo; #635D +捞 > lāo; #635E +损 > sŭn; #635F +捠 > bāng; #6360 +捡 > jiăn; #6361 +换 > huàn; #6362 +捣 > dăo; #6363 +捥 > wàn; #6365 +捦 > qín; #6366 +捧 > pĕng; #6367 +捨 > shĕ; #6368 +捩 > liè; #6369 +捪 > mín; #636A +捫 > mén; #636B +捬 > fŭ; #636C +捭 > băi; #636D +据 > jù; #636E +捯 > dăo; #636F +捰 > wŏ; #6370 +捱 > ái; #6371 +捲 > juăn; #6372 +捳 > yuè; #6373 +捴 > zŏng; #6374 +捵 > chĕn; #6375 +捶 > chúi; #6376 +捷 > jié; #6377 +捸 > tū; #6378 +捹 > bèn; #6379 +捺 > nà; #637A +捻 > niăn; #637B +捼 > núo; #637C +捽 > zú; #637D +捾 > wò; #637E +捿 > xī; #637F +掀 > xiān; #6380 +掁 > chéng; #6381 +掂 > diān; #6382 +掃 > săo; #6383 +掄 > lún; #6384 +掅 > qìng; #6385 +掆 > gāng; #6386 +掇 > dúo; #6387 +授 > shòu; #6388 +掉 > diào; #6389 +掊 > póu; #638A +掋 > dĭ; #638B +掌 > zhăng; #638C +掍 > gŭn; #638D +掎 > jĭ; #638E +掏 > tāo; #638F +掐 > qiā; #6390 +掑 > qí; #6391 +排 > pái; #6392 +掓 > shú; #6393 +掔 > qiān; #6394 +掕 > lìng; #6395 +掖 > yì; #6396 +掗 > yà; #6397 +掘 > jué; #6398 +掙 > zhēng; #6399 +掚 > liăng; #639A +掛 > guà; #639B +掜 > yĭ; #639C +掝 > hùo; #639D +掞 > shàn; #639E +掟 > zhĕng; #639F +掠 > lǜe; #63A0 +採 > căi; #63A1 +探 > tàn; #63A2 +掣 > chè; #63A3 +掤 > bīng; #63A4 +接 > jiē; #63A5 +掦 > tì; #63A6 +控 > kòng; #63A7 +推 > tūi; #63A8 +掩 > yăn; #63A9 +措 > cùo; #63AA +掫 > zōu; #63AB +掬 > jú; #63AC +掭 > tiàn; #63AD +掮 > qián; #63AE +掯 > kèn; #63AF +掰 > bāi; #63B0 +掱 > shŏu; #63B1 +掲 > jiē; #63B2 +掳 > lŭ; #63B3 +掴 > gúo; #63B4 +掷 > zhí; #63B7 +掸 > dăn; #63B8 +掺 > xiān; #63BA +掻 > sāo; #63BB +掼 > guàn; #63BC +掽 > pèng; #63BD +掾 > yuàn; #63BE +掿 > nùo; #63BF +揀 > jiăn; #63C0 +揁 > zhēn; #63C1 +揂 > jīu; #63C2 +揃 > jiān; #63C3 +揄 > yú; #63C4 +揅 > yán; #63C5 +揆 > kúi; #63C6 +揇 > năn; #63C7 +揈 > hōng; #63C8 +揉 > róu; #63C9 +揊 > pì; #63CA +揋 > wēi; #63CB +揌 > sāi; #63CC +揍 > zòu; #63CD +揎 > xuān; #63CE +描 > miáo; #63CF +提 > tí; #63D0 +揑 > niē; #63D1 +插 > chā; #63D2 +揓 > shì; #63D3 +揔 > zŏng; #63D4 +揕 > zhèn; #63D5 +揖 > yī; #63D6 +揗 > shŭn; #63D7 +揘 > héng; #63D8 +揙 > biàn; #63D9 +揚 > yáng; #63DA +換 > huàn; #63DB +揜 > yăn; #63DC +揝 > zuàn; #63DD +揞 > ăn; #63DE +揟 > xū; #63DF +揠 > yà; #63E0 +握 > wò; #63E1 +揢 > kè; #63E2 +揣 > chuăi; #63E3 +揤 > jí; #63E4 +揥 > tì; #63E5 +揦 > lá; #63E6 +揧 > là; #63E7 +揨 > chéng; #63E8 +揩 > kāi; #63E9 +揪 > jīu; #63EA +揫 > jīu; #63EB +揬 > tú; #63EC +揭 > jiē; #63ED +揮 > hūi; #63EE +揯 > gēng; #63EF +揰 > chòng; #63F0 +揱 > shùo; #63F1 +揲 > shé; #63F2 +揳 > xiè; #63F3 +援 > yuán; #63F4 +揵 > qián; #63F5 +揶 > yé; #63F6 +揷 > chā; #63F7 +揸 > zhā; #63F8 +揹 > bēi; #63F9 +揺 > yáo; #63FA +揽 > lăn; #63FD +揾 > wèn; #63FE +揿 > qìn; #63FF +搀 > chān; #6400 +搁 > gē; #6401 +搂 > lŏu; #6402 +搃 > zŏng; #6403 +搄 > gēng; #6404 +搅 > jiăo; #6405 +搆 > gòu; #6406 +搇 > qìn; #6407 +搈 > yŏng; #6408 +搉 > què; #6409 +搊 > chōu; #640A +搋 > chĭ; #640B +搌 > zhăn; #640C +損 > sŭn; #640D +搎 > sūn; #640E +搏 > bó; #640F +搐 > chù; #6410 +搑 > rŏng; #6411 +搒 > bèng; #6412 +搓 > cūo; #6413 +搔 > sāo; #6414 +搕 > kè; #6415 +搖 > yáo; #6416 +搗 > dăo; #6417 +搘 > zhī; #6418 +搙 > nù; #6419 +搚 > xié; #641A +搛 > jiān; #641B +搜 > sōu; #641C +搝 > qĭu; #641D +搞 > găo; #641E +搟 > xiăn; #641F +搠 > shùo; #6420 +搡 > săng; #6421 +搢 > jìn; #6422 +搣 > miè; #6423 +搤 > è; #6424 +搥 > chúi; #6425 +搦 > nùo; #6426 +搧 > shān; #6427 +搨 > tà; #6428 +搩 > jié; #6429 +搪 > táng; #642A +搫 > pán; #642B +搬 > bān; #642C +搭 > dā; #642D +搮 > lì; #642E +搯 > tāo; #642F +搰 > hú; #6430 +搱 > zhì; #6431 +搲 > wā; #6432 +搳 > xiá; #6433 +搴 > qiān; #6434 +搵 > wèn; #6435 +搶 > qiăng; #6436 +搷 > tián; #6437 +搸 > zhēn; #6438 +搹 > è; #6439 +携 > xī; #643A +搻 > nùo; #643B +搼 > quán; #643C +搽 > chá; #643D +搾 > zhà; #643E +搿 > gé; #643F +摀 > wŭ; #6440 +摁 > èn; #6441 +摂 > shè; #6442 +摃 > káng; #6443 +摄 > shè; #6444 +摅 > shū; #6445 +摆 > băi; #6446 +摇 > yáo; #6447 +摈 > bìn; #6448 +摉 > sōu; #6449 +摊 > tān; #644A +摋 > sà; #644B +摌 > chăn; #644C +摍 > sūo; #644D +摎 > liáo; #644E +摏 > chōng; #644F +摐 > chuāng; #6450 +摑 > gúo; #6451 +摒 > bìng; #6452 +摓 > féng; #6453 +摔 > shuāi; #6454 +摕 > dì; #6455 +摖 > qì; #6456 +摗 > sou; #6457 +摘 > zhāi; #6458 +摙 > liăn; #6459 +摚 > táng; #645A +摛 > chī; #645B +摜 > guàn; #645C +摝 > lù; #645D +摞 > lúo; #645E +摟 > lŏu; #645F +摠 > zŏng; #6460 +摡 > gài; #6461 +摢 > hù; #6462 +摣 > zhā; #6463 +摤 > chuăng; #6464 +摥 > tàng; #6465 +摦 > huà; #6466 +摧 > cūi; #6467 +摨 > nái; #6468 +摩 > mó; #6469 +摪 > jiāng; #646A +摫 > gūi; #646B +摬 > yìng; #646C +摭 > zhí; #646D +摮 > áo; #646E +摯 > zhì; #646F +摰 > niè; #6470 +摱 > mán; #6471 +摲 > shàn; #6472 +摳 > kōu; #6473 +摴 > shū; #6474 +摵 > sŭo; #6475 +摶 > tuán; #6476 +摷 > jiăo; #6477 +摸 > mō; #6478 +摹 > mó; #6479 +摺 > zhé; #647A +摻 > xiān; #647B +摼 > kēng; #647C +摽 > piăo; #647D +摾 > jiàng; #647E +摿 > yīn; #647F +撀 > gòu; #6480 +撁 > qiān; #6481 +撂 > lǜe; #6482 +撃 > jí; #6483 +撄 > yīng; #6484 +撅 > juē; #6485 +撆 > piē; #6486 +撇 > piĕ; #6487 +撈 > lāo; #6488 +撉 > dūn; #6489 +撊 > xiàn; #648A +撋 > ruán; #648B +撌 > kùi; #648C +撍 > zăn; #648D +撎 > yì; #648E +撏 > xún; #648F +撐 > chēng; #6490 +撑 > chēng; #6491 +撒 > să; #6492 +撓 > náo; #6493 +撔 > hèng; #6494 +撕 > sī; #6495 +撖 > qiăn; #6496 +撗 > huáng; #6497 +撘 > dā; #6498 +撙 > zŭn; #6499 +撚 > niăn; #649A +撛 > lĭn; #649B +撜 > zhĕng; #649C +撝 > hūi; #649D +撞 > zhuàng; #649E +撟 > jiăo; #649F +撠 > jĭ; #64A0 +撡 > cāo; #64A1 +撢 > dăn; #64A2 +撣 > dăn; #64A3 +撤 > chè; #64A4 +撥 > bō; #64A5 +撦 > chĕ; #64A6 +撧 > jué; #64A7 +撨 > xiāo; #64A8 +撩 > liáo; #64A9 +撪 > bèn; #64AA +撫 > fŭ; #64AB +撬 > qiào; #64AC +播 > bò; #64AD +撮 > cūo; #64AE +撯 > zhúo; #64AF +撰 > zhuàn; #64B0 +撱 > tŭo; #64B1 +撲 > pū; #64B2 +撳 > qìn; #64B3 +撴 > dūn; #64B4 +撵 > niăn; #64B5 +撷 > xié; #64B7 +撸 > lŭ; #64B8 +撹 > jiăo; #64B9 +撺 > cuān; #64BA +撻 > tà; #64BB +撼 > hàn; #64BC +撽 > qiào; #64BD +撾 > zhuā; #64BE +撿 > jiăn; #64BF +擀 > găn; #64C0 +擁 > yŏng; #64C1 +擂 > léi; #64C2 +擃 > kŭo; #64C3 +擄 > lŭ; #64C4 +擅 > shàn; #64C5 +擆 > zhúo; #64C6 +擇 > zé; #64C7 +擈 > pū; #64C8 +擉 > chùo; #64C9 +擊 > jí; #64CA +擋 > dăng; #64CB +擌 > sŭo; #64CC +操 > cāo; #64CD +擎 > qíng; #64CE +擏 > jìng; #64CF +擐 > huàn; #64D0 +擑 > jiē; #64D1 +擒 > qín; #64D2 +擓 > kuăi; #64D3 +擔 > dān; #64D4 +擕 > xī; #64D5 +擖 > gĕ; #64D6 +擗 > pì; #64D7 +擘 > bò; #64D8 +擙 > ào; #64D9 +據 > jù; #64DA +擛 > yè; #64DB +擞 > sŏu; #64DE +擟 > mí; #64DF +擠 > jĭ; #64E0 +擡 > tái; #64E1 +擢 > zhúo; #64E2 +擣 > dăo; #64E3 +擤 > xĭng; #64E4 +擥 > lăn; #64E5 +擦 > cā; #64E6 +擧 > jŭ; #64E7 +擨 > yé; #64E8 +擩 > rŭ; #64E9 +擪 > yè; #64EA +擫 > yè; #64EB +擬 > nĭ; #64EC +擭 > hù; #64ED +擮 > jí; #64EE +擯 > bìn; #64EF +擰 > níng; #64F0 +擱 > gē; #64F1 +擲 > zhí; #64F2 +擳 > jié; #64F3 +擴 > kùo; #64F4 +擵 > mó; #64F5 +擶 > jiàn; #64F6 +擷 > xié; #64F7 +擸 > liè; #64F8 +擹 > tān; #64F9 +擺 > băi; #64FA +擻 > sŏu; #64FB +擼 > lŭ; #64FC +擽 > lǜe; #64FD +擾 > răo; #64FE +擿 > zhí; #64FF +攀 > pān; #6500 +攁 > yăng; #6501 +攂 > lèi; #6502 +攃 > sà; #6503 +攄 > shū; #6504 +攅 > zăn; #6505 +攆 > niăn; #6506 +攇 > xiăn; #6507 +攈 > jùn; #6508 +攉 > hùo; #6509 +攊 > lì; #650A +攋 > là; #650B +攌 > hàn; #650C +攍 > yíng; #650D +攎 > lú; #650E +攏 > lŏng; #650F +攐 > qiān; #6510 +攑 > qiān; #6511 +攒 > zăn; #6512 +攓 > qiān; #6513 +攔 > lán; #6514 +攕 > sān; #6515 +攖 > yīng; #6516 +攗 > méi; #6517 +攘 > ráng; #6518 +攙 > chān; #6519 +攛 > cuān; #651B +攜 > xī; #651C +攝 > shè; #651D +攞 > lŭo; #651E +攟 > jùn; #651F +攠 > mí; #6520 +攡 > lí; #6521 +攢 > zăn; #6522 +攣 > lǘan; #6523 +攤 > tān; #6524 +攥 > zuàn; #6525 +攦 > lì; #6526 +攧 > diān; #6527 +攨 > wā; #6528 +攩 > dăng; #6529 +攪 > jiăo; #652A +攫 > jué; #652B +攬 > lăn; #652C +攭 > lì; #652D +攮 > năng; #652E +支 > zhī; #652F +攰 > gùi; #6530 +攱 > gŭi; #6531 +攲 > qī; #6532 +攳 > xín; #6533 +攴 > pū; #6534 +攵 > sūi; #6535 +收 > shōu; #6536 +攷 > káo; #6537 +攸 > yōu; #6538 +改 > găi; #6539 +攺 > yĭ; #653A +攻 > gōng; #653B +攼 > gān; #653C +攽 > bān; #653D +放 > fàng; #653E +政 > zhèng; #653F +敀 > bó; #6540 +敁 > diān; #6541 +敂 > kòu; #6542 +敃 > mĭn; #6543 +敄 > wù; #6544 +故 > gù; #6545 +敆 > hé; #6546 +敇 > cè; #6547 +效 > xiào; #6548 +敉 > mĭ; #6549 +敊 > chù; #654A +敋 > gé; #654B +敌 > dí; #654C +敍 > xù; #654D +敎 > jiào; #654E +敏 > mĭn; #654F +敐 > chén; #6550 +救 > jìu; #6551 +敒 > zhèn; #6552 +敓 > dúo; #6553 +敔 > yŭ; #6554 +敕 > chì; #6555 +敖 > áo; #6556 +敗 > bài; #6557 +敘 > xù; #6558 +教 > jiào; #6559 +敚 > dúo; #655A +敛 > liàn; #655B +敜 > niè; #655C +敝 > bì; #655D +敞 > chăng; #655E +敟 > diăn; #655F +敠 > dúo; #6560 +敡 > yì; #6561 +敢 > găn; #6562 +散 > sàn; #6563 +敤 > kĕ; #6564 +敥 > yàn; #6565 +敦 > dūn; #6566 +敧 > qĭ; #6567 +敨 > dŏu; #6568 +敩 > xiào; #6569 +敪 > dúo; #656A +敫 > jiào; #656B +敬 > jìng; #656C +敭 > yáng; #656D +敮 > xiá; #656E +敯 > mín; #656F +数 > shù; #6570 +敱 > ái; #6571 +敲 > qiāo; #6572 +敳 > ái; #6573 +整 > zhĕng; #6574 +敵 > dí; #6575 +敶 > zhèn; #6576 +敷 > fū; #6577 +數 > shù; #6578 +敹 > liáo; #6579 +敺 > qū; #657A +敻 > xìong; #657B +敼 > xĭ; #657C +敽 > jiăo; #657D +敿 > jiăo; #657F +斀 > zhúo; #6580 +斁 > yì; #6581 +斂 > liàn; #6582 +斃 > bì; #6583 +斄 > lì; #6584 +斅 > xiào; #6585 +斆 > xiào; #6586 +文 > wén; #6587 +斈 > xué; #6588 +斉 > qí; #6589 +斊 > qí; #658A +斋 > zhāi; #658B +斌 > bīn; #658C +斍 > jué; #658D +斎 > zhāi; #658E +斐 > fĕi; #6590 +斑 > bān; #6591 +斒 > bān; #6592 +斓 > lán; #6593 +斔 > yŭ; #6594 +斕 > lán; #6595 +斖 > wĕi; #6596 +斗 > dŏu; #6597 +斘 > shēng; #6598 +料 > liào; #6599 +斚 > jiă; #659A +斛 > hú; #659B +斜 > xié; #659C +斝 > jiă; #659D +斞 > yŭ; #659E +斟 > zhēn; #659F +斠 > jiào; #65A0 +斡 > wò; #65A1 +斢 > tŏu; #65A2 +斣 > chù; #65A3 +斤 > jīn; #65A4 +斥 > chì; #65A5 +斦 > yín; #65A6 +斧 > fŭ; #65A7 +斨 > qiāng; #65A8 +斩 > zhăn; #65A9 +斪 > qú; #65AA +斫 > zhúo; #65AB +斬 > zhăn; #65AC +断 > duàn; #65AD +斮 > zhúo; #65AE +斯 > sī; #65AF +新 > xīn; #65B0 +斱 > zhúo; #65B1 +斲 > zhúo; #65B2 +斳 > qín; #65B3 +斴 > lín; #65B4 +斵 > zhúo; #65B5 +斶 > chù; #65B6 +斷 > duàn; #65B7 +斸 > zhŭ; #65B8 +方 > fāng; #65B9 +斺 > xiè; #65BA +斻 > háng; #65BB +於 > yú; #65BC +施 > shī; #65BD +斾 > pèi; #65BE +斿 > yóu; #65BF +旁 > páng; #65C1 +旂 > qí; #65C2 +旃 > zhān; #65C3 +旄 > máo; #65C4 +旅 > lǚ; #65C5 +旆 > pèi; #65C6 +旇 > pī; #65C7 +旈 > líu; #65C8 +旉 > fū; #65C9 +旊 > făng; #65CA +旋 > xuán; #65CB +旌 > jīng; #65CC +旍 > jīng; #65CD +旎 > nĭ; #65CE +族 > zú; #65CF +旐 > zhào; #65D0 +旑 > yĭ; #65D1 +旒 > líu; #65D2 +旓 > shāo; #65D3 +旔 > jiàn; #65D4 +旖 > yĭ; #65D6 +旗 > qí; #65D7 +旘 > zhì; #65D8 +旙 > fān; #65D9 +旚 > piāo; #65DA +旛 > fān; #65DB +旜 > zhān; #65DC +旝 > guài; #65DD +旞 > sùi; #65DE +旟 > yú; #65DF +无 > wú; #65E0 +旡 > jì; #65E1 +既 > jì; #65E2 +旣 > jì; #65E3 +旤 > hùo; #65E4 +日 > rì; #65E5 +旦 > dàn; #65E6 +旧 > jìu; #65E7 +旨 > zhĭ; #65E8 +早 > zăo; #65E9 +旪 > xié; #65EA +旫 > tiāo; #65EB +旬 > xún; #65EC +旭 > xù; #65ED +旮 > xù; #65EE +旯 > xù; #65EF +旰 > gàn; #65F0 +旱 > hàn; #65F1 +旲 > tái; #65F2 +旳 > dì; #65F3 +旴 > xū; #65F4 +旵 > chăn; #65F5 +时 > shí; #65F6 +旷 > kuàng; #65F7 +旸 > yáng; #65F8 +旹 > shí; #65F9 +旺 > wàng; #65FA +旻 > mín; #65FB +旼 > mín; #65FC +旽 > tūn; #65FD +旾 > chūn; #65FE +旿 > wŭ; #65FF +昀 > yún; #6600 +昁 > bèi; #6601 +昂 > áng; #6602 +昃 > zè; #6603 +昄 > băn; #6604 +昅 > jié; #6605 +昆 > kūn; #6606 +昇 > shēng; #6607 +昈 > hù; #6608 +昉 > făng; #6609 +昊 > hào; #660A +昋 > gùi; #660B +昌 > chāng; #660C +昍 > xuān; #660D +明 > míng; #660E +昏 > hūn; #660F +昐 > fēn; #6610 +昑 > qĭn; #6611 +昒 > hū; #6612 +易 > yì; #6613 +昔 > xí; #6614 +昕 > xīn; #6615 +昖 > yán; #6616 +昗 > zè; #6617 +昘 > făng; #6618 +昙 > tán; #6619 +昚 > shèn; #661A +昛 > jù; #661B +昜 > yáng; #661C +昝 > zăn; #661D +昞 > bĭng; #661E +星 > xīng; #661F +映 > yìng; #6620 +昡 > xuàn; #6621 +昢 > pĕi; #6622 +昣 > zhĕn; #6623 +昤 > līng; #6624 +春 > chūn; #6625 +昦 > hào; #6626 +昧 > mèi; #6627 +昨 > zúo; #6628 +昩 > mò; #6629 +昪 > biàn; #662A +昫 > xŭ; #662B +昬 > hūn; #662C +昭 > zhāo; #662D +昮 > zòng; #662E +是 > shì; #662F +昰 > shì; #6630 +昱 > yù; #6631 +昲 > fèi; #6632 +昳 > dié; #6633 +昴 > măo; #6634 +昵 > nì; #6635 +昶 > chăng; #6636 +昷 > wēn; #6637 +昸 > dōng; #6638 +昹 > ăi; #6639 +昺 > bĭng; #663A +昻 > áng; #663B +昼 > zhòu; #663C +昽 > lóng; #663D +显 > xiăn; #663E +昿 > kuàng; #663F +晀 > tiăo; #6640 +晁 > cháo; #6641 +時 > shí; #6642 +晃 > huăng; #6643 +晄 > huăng; #6644 +晅 > xuān; #6645 +晆 > kúi; #6646 +晇 > xū; #6647 +晈 > jiăo; #6648 +晉 > jìn; #6649 +晊 > zhĭ; #664A +晋 > jìn; #664B +晌 > shăng; #664C +晍 > tóng; #664D +晎 > hŏng; #664E +晏 > yàn; #664F +晐 > gāi; #6650 +晑 > xiăng; #6651 +晒 > shài; #6652 +晓 > xiăo; #6653 +晔 > yē; #6654 +晕 > yūn; #6655 +晖 > hūi; #6656 +晗 > hán; #6657 +晘 > hàn; #6658 +晙 > jùn; #6659 +晚 > wăn; #665A +晛 > xiàn; #665B +晜 > kūn; #665C +晝 > zhòu; #665D +晞 > xī; #665E +晟 > chéng; #665F +晠 > shéng; #6660 +晡 > bū; #6661 +晢 > zhē; #6662 +晣 > zhē; #6663 +晤 > wù; #6664 +晥 > hàn; #6665 +晦 > hùi; #6666 +晧 > hào; #6667 +晨 > chén; #6668 +晩 > wăn; #6669 +晪 > tiăn; #666A +晫 > zhúo; #666B +晬 > zùi; #666C +晭 > zhŏu; #666D +普 > pŭ; #666E +景 > jĭng; #666F +晰 > xī; #6670 +晱 > shăn; #6671 +晲 > yĭ; #6672 +晳 > xì; #6673 +晴 > qíng; #6674 +晵 > qĭ; #6675 +晶 > jīng; #6676 +晷 > gŭi; #6677 +晸 > zhĕn; #6678 +晹 > yì; #6679 +智 > zhì; #667A +晻 > ăn; #667B +晼 > wăn; #667C +晽 > lín; #667D +晾 > liàng; #667E +晿 > chāng; #667F +暀 > wăng; #6680 +暁 > xiăo; #6681 +暂 > zàn; #6682 +暄 > xuān; #6684 +暅 > xuăn; #6685 +暆 > yí; #6686 +暇 > xiá; #6687 +暈 > yūn; #6688 +暉 > hūi; #6689 +暊 > fŭ; #668A +暋 > mĭn; #668B +暌 > kúi; #668C +暍 > hè; #668D +暎 > yìng; #668E +暏 > dŭ; #668F +暐 > wĕi; #6690 +暑 > shŭ; #6691 +暒 > qíng; #6692 +暓 > mào; #6693 +暔 > nán; #6694 +暕 > jiăn; #6695 +暖 > nuăn; #6696 +暗 > àn; #6697 +暘 > yáng; #6698 +暙 > chūn; #6699 +暚 > yáo; #669A +暛 > sŭo; #669B +暜 > jìn; #669C +暝 > míng; #669D +暞 > jiăo; #669E +暟 > kăi; #669F +暠 > găo; #66A0 +暡 > wĕng; #66A1 +暢 > chàng; #66A2 +暣 > qì; #66A3 +暤 > hào; #66A4 +暥 > yàn; #66A5 +暦 > lì; #66A6 +暧 > ài; #66A7 +暨 > jì; #66A8 +暩 > gùi; #66A9 +暪 > mĕn; #66AA +暫 > zàn; #66AB +暬 > xiè; #66AC +暭 > hào; #66AD +暮 > mù; #66AE +暯 > mò; #66AF +暰 > cōng; #66B0 +暱 > nì; #66B1 +暲 > zhāng; #66B2 +暳 > hùi; #66B3 +暴 > bào; #66B4 +暵 > hàn; #66B5 +暶 > xuán; #66B6 +暷 > chuán; #66B7 +暸 > liáo; #66B8 +暹 > xiān; #66B9 +暺 > dàn; #66BA +暻 > jĭng; #66BB +暼 > piē; #66BC +暽 > lín; #66BD +暾 > tūn; #66BE +暿 > xĭ; #66BF +曀 > yì; #66C0 +曁 > jì; #66C1 +曂 > huàng; #66C2 +曃 > tài; #66C3 +曄 > yè; #66C4 +曅 > yè; #66C5 +曆 > lì; #66C6 +曇 > tán; #66C7 +曈 > tóng; #66C8 +曉 > xiăo; #66C9 +曊 > fèi; #66CA +曋 > qĭn; #66CB +曌 > zhào; #66CC +曍 > hào; #66CD +曎 > yì; #66CE +曏 > xiàng; #66CF +曐 > xīng; #66D0 +曑 > sēn; #66D1 +曒 > jiăo; #66D2 +曓 > bào; #66D3 +曔 > jìng; #66D4 +曕 > yiàn; #66D5 +曖 > ài; #66D6 +曗 > yè; #66D7 +曘 > rú; #66D8 +曙 > shù; #66D9 +曚 > méng; #66DA +曛 > xūn; #66DB +曜 > yào; #66DC +曝 > pù; #66DD +曞 > lì; #66DE +曟 > chén; #66DF +曠 > kuàng; #66E0 +曡 > dié; #66E1 +曣 > yàn; #66E3 +曤 > hùo; #66E4 +曥 > lú; #66E5 +曦 > xī; #66E6 +曧 > róng; #66E7 +曨 > lóng; #66E8 +曩 > năng; #66E9 +曪 > lŭo; #66EA +曫 > luán; #66EB +曬 > shài; #66EC +曭 > tăng; #66ED +曮 > yăn; #66EE +曯 > chú; #66EF +曰 > yuē; #66F0 +曱 > yuē; #66F1 +曲 > qŭ; #66F2 +曳 > yì; #66F3 +更 > gèng; #66F4 +曵 > yè; #66F5 +曶 > hū; #66F6 +曷 > hé; #66F7 +書 > shū; #66F8 +曹 > cáo; #66F9 +曺 > cáo; #66FA +曼 > màn; #66FC +曽 > cēng; #66FD +曾 > céng; #66FE +替 > tì; #66FF +最 > zùi; #6700 +朁 > căn; #6701 +朂 > xù; #6702 +會 > hùi; #6703 +朄 > yìn; #6704 +朅 > qiè; #6705 +朆 > fēn; #6706 +朇 > pí; #6707 +月 > yuè; #6708 +有 > yŏu; #6709 +朊 > ruăn; #670A +朋 > péng; #670B +朌 > bān; #670C +服 > fú; #670D +朎 > líng; #670E +朏 > fĕi; #670F +朐 > qú; #6710 +朒 > nǜ; #6712 +朓 > tiào; #6713 +朔 > shùo; #6714 +朕 > zhèn; #6715 +朖 > lăng; #6716 +朗 > lăng; #6717 +朘 > juān; #6718 +朙 > míng; #6719 +朚 > huāng; #671A +望 > wàng; #671B +朜 > tūn; #671C +朝 > zhāo; #671D +朞 > jī; #671E +期 > qí; #671F +朠 > yīng; #6720 +朡 > zōng; #6721 +朢 > wàng; #6722 +朣 > tóng; #6723 +朤 > lăng; #6724 +朦 > méng; #6726 +朧 > lóng; #6727 +木 > mù; #6728 +朩 > dĕng; #6729 +未 > wèi; #672A +末 > mò; #672B +本 > bĕn; #672C +札 > zhá; #672D +朮 > zhú; #672E +术 > zhú; #672F +朱 > zhū; #6731 +朲 > rén; #6732 +朳 > bā; #6733 +朴 > pò; #6734 +朵 > dŭo; #6735 +朶 > dŭo; #6736 +朷 > dāo; #6737 +朸 > lì; #6738 +朹 > qíu; #6739 +机 > jī; #673A +朻 > jīu; #673B +朼 > bĭ; #673C +朽 > xĭu; #673D +朾 > tíng; #673E +朿 > cì; #673F +杀 > shā; #6740 +杂 > zá; #6742 +权 > quán; #6743 +杄 > qiān; #6744 +杅 > yú; #6745 +杆 > gān; #6746 +杇 > wū; #6747 +杈 > chā; #6748 +杉 > shān; #6749 +杊 > xún; #674A +杋 > fān; #674B +杌 > wù; #674C +杍 > zĭ; #674D +李 > lĭ; #674E +杏 > xìng; #674F +材 > cái; #6750 +村 > cūn; #6751 +杒 > rèn; #6752 +杓 > sháo; #6753 +杔 > tūo; #6754 +杕 > dì; #6755 +杖 > zhàng; #6756 +杗 > máng; #6757 +杘 > chì; #6758 +杙 > yì; #6759 +杚 > gŭ; #675A +杛 > gōng; #675B +杜 > dù; #675C +杝 > yí; #675D +杞 > qĭ; #675E +束 > shù; #675F +杠 > gāng; #6760 +条 > tiáo; #6761 +来 > lái; #6765 +杧 > máng; #6767 +杨 > yáng; #6768 +杩 > mà; #6769 +杪 > miăo; #676A +杫 > sì; #676B +杬 > yuán; #676C +杭 > háng; #676D +杮 > fèi; #676E +杯 > bēi; #676F +杰 > jié; #6770 +東 > dōng; #6771 +杲 > găo; #6772 +杳 > yăo; #6773 +杴 > xiān; #6774 +杵 > chŭ; #6775 +杶 > qūn; #6776 +杷 > pá; #6777 +杸 > shū; #6778 +杹 > huà; #6779 +杺 > xīn; #677A +杻 > chŏu; #677B +杼 > zhù; #677C +杽 > chŏu; #677D +松 > sōng; #677E +板 > băn; #677F +枀 > sōng; #6780 +极 > jí; #6781 +枂 > yuè; #6782 +枃 > jìn; #6783 +构 > gōu; #6784 +枅 > jī; #6785 +枆 > máo; #6786 +枇 > pí; #6787 +枈 > bì; #6788 +枉 > wăng; #6789 +枊 > àng; #678A +枋 > fāng; #678B +枌 > fén; #678C +枍 > yì; #678D +枎 > fú; #678E +枏 > nán; #678F +析 > xī; #6790 +枑 > hù; #6791 +枒 > yá; #6792 +枓 > dŏu; #6793 +枔 > xún; #6794 +枕 > zhĕn; #6795 +枖 > yāo; #6796 +林 > lín; #6797 +枘 > rùi; #6798 +枙 > é; #6799 +枚 > méi; #679A +枛 > zhào; #679B +果 > gŭo; #679C +枝 > zhī; #679D +枞 > cōng; #679E +枟 > yùn; #679F +枡 > dŏu; #67A1 +枢 > shū; #67A2 +枣 > zăo; #67A3 +枥 > lì; #67A5 +枧 > jiàn; #67A7 +枨 > chéng; #67A8 +枪 > qiāng; #67AA +枫 > fēng; #67AB +枬 > nán; #67AC +枭 > xiāo; #67AD +枮 > xiān; #67AE +枯 > kū; #67AF +枰 > píng; #67B0 +枱 > yí; #67B1 +枲 > xĭ; #67B2 +枳 > zhī; #67B3 +枴 > guăi; #67B4 +枵 > xiāo; #67B5 +架 > jià; #67B6 +枷 > jiā; #67B7 +枸 > gŏu; #67B8 +枹 > fū; #67B9 +枺 > mò; #67BA +枻 > yì; #67BB +枼 > yè; #67BC +枽 > yè; #67BD +枾 > shì; #67BE +枿 > niè; #67BF +柀 > bĭ; #67C0 +柁 > dùo; #67C1 +柂 > yí; #67C2 +柃 > líng; #67C3 +柄 > bĭng; #67C4 +柅 > nĭ; #67C5 +柆 > lā; #67C6 +柇 > hé; #67C7 +柈 > pán; #67C8 +柉 > fán; #67C9 +柊 > zhōng; #67CA +柋 > dài; #67CB +柌 > cí; #67CC +柍 > yāng; #67CD +柎 > fū; #67CE +柏 > bó; #67CF +某 > mŏu; #67D0 +柑 > gān; #67D1 +柒 > qī; #67D2 +染 > răn; #67D3 +柔 > róu; #67D4 +柕 > mào; #67D5 +柖 > zhāo; #67D6 +柗 > sōng; #67D7 +柘 > zhè; #67D8 +柙 > xiá; #67D9 +柚 > yòu; #67DA +柛 > shēn; #67DB +柜 > jŭ; #67DC +柝 > tùo; #67DD +柞 > zùo; #67DE +柟 > nán; #67DF +柠 > níng; #67E0 +柡 > yŏng; #67E1 +柢 > dĭ; #67E2 +柣 > zhí; #67E3 +柤 > zhā; #67E4 +查 > chá; #67E5 +柦 > dàn; #67E6 +柧 > gū; #67E7 +柨 > pu; #67E8 +柩 > jìu; #67E9 +柪 > āo; #67EA +柫 > fú; #67EB +柬 > jiăn; #67EC +柭 > bō; #67ED +柮 > dùo; #67EE +柯 > kē; #67EF +柰 > nài; #67F0 +柱 > zhù; #67F1 +柲 > bì; #67F2 +柳 > lĭu; #67F3 +柴 > chái; #67F4 +柵 > zhà; #67F5 +柶 > sì; #67F6 +柷 > zhù; #67F7 +柸 > pēi; #67F8 +柹 > shì; #67F9 +柺 > guăi; #67FA +査 > chá; #67FB +柼 > yăo; #67FC +柽 > jué; #67FD +柾 > jìu; #67FE +柿 > shì; #67FF +栀 > zhī; #6800 +栁 > lĭu; #6801 +栂 > méi; #6802 +栄 > róng; #6804 +栅 > zhà; #6805 +标 > biāo; #6807 +栈 > zhàn; #6808 +栉 > jié; #6809 +栊 > lóng; #680A +栋 > dòng; #680B +栌 > lú; #680C +栎 > lì; #680E +栏 > lán; #680F +栐 > yŏng; #6810 +树 > shù; #6811 +栒 > xún; #6812 +栓 > shuān; #6813 +栔 > qì; #6814 +栕 > zhēn; #6815 +栖 > qī; #6816 +栗 > lì; #6817 +栘 > yĭ; #6818 +栙 > xiáng; #6819 +栚 > zhèn; #681A +栛 > lì; #681B +栜 > sù; #681C +栝 > guā; #681D +栞 > kān; #681E +栟 > bīng; #681F +栠 > rĕn; #6820 +校 > xiào; #6821 +栢 > bó; #6822 +栣 > rĕn; #6823 +栤 > bìng; #6824 +栥 > zī; #6825 +栦 > chóu; #6826 +栧 > yì; #6827 +栨 > jié; #6828 +栩 > xŭ; #6829 +株 > zhū; #682A +栫 > jiàn; #682B +栬 > zùi; #682C +栭 > ér; #682D +栮 > ĕr; #682E +栯 > yŏu; #682F +栰 > fá; #6830 +栱 > gŏng; #6831 +栲 > kăo; #6832 +栳 > lăo; #6833 +栴 > zhān; #6834 +栵 > lì; #6835 +栶 > yin; #6836 +样 > yáng; #6837 +核 > hé; #6838 +根 > gēn; #6839 +栺 > zhĭ; #683A +栻 > chì; #683B +格 > gé; #683C +栽 > zāi; #683D +栾 > luán; #683E +栿 > fú; #683F +桀 > jié; #6840 +桁 > háng; #6841 +桂 > gùi; #6842 +桃 > táo; #6843 +桄 > guàng; #6844 +桅 > wéi; #6845 +框 > kuàng; #6846 +桇 > rú; #6847 +案 > àn; #6848 +桉 > àn; #6849 +桊 > juàn; #684A +桋 > yí; #684B +桌 > zhūo; #684C +桍 > kū; #684D +桎 > zhí; #684E +桏 > qíong; #684F +桐 > tóng; #6850 +桑 > sāng; #6851 +桒 > sāng; #6852 +桓 > huán; #6853 +桔 > jié; #6854 +桕 > jìu; #6855 +桖 > xuè; #6856 +桗 > dùo; #6857 +桘 > zhùi; #6858 +桙 > yú; #6859 +桚 > zăn; #685A +桜 > yīng; #685C +桟 > zhàn; #685F +桠 > yá; #6860 +桡 > náo; #6861 +桢 > zhēn; #6862 +档 > dăng; #6863 +桤 > qī; #6864 +桥 > qiáo; #6865 +桦 > huà; #6866 +桧 > kuài; #6867 +桨 > jiăng; #6868 +桩 > zhuāng; #6869 +桪 > xún; #686A +桫 > sūo; #686B +桬 > shā; #686C +桭 > zhēn; #686D +桮 > bēi; #686E +桯 > tīng; #686F +桰 > guā; #6870 +桱 > jìng; #6871 +桲 > bó; #6872 +桳 > bèn; #6873 +桴 > fú; #6874 +桵 > rŭi; #6875 +桶 > tŏng; #6876 +桷 > jué; #6877 +桸 > xī; #6878 +桹 > láng; #6879 +桺 > lĭu; #687A +桻 > fēng; #687B +桼 > qī; #687C +桽 > wĕn; #687D +桾 > jūn; #687E +桿 > găn; #687F +梀 > cù; #6880 +梁 > liáng; #6881 +梂 > qíu; #6882 +梃 > tĭng; #6883 +梄 > yŏu; #6884 +梅 > méi; #6885 +梆 > bāng; #6886 +梇 > lòng; #6887 +梈 > pēng; #6888 +梉 > zhuāng; #6889 +梊 > dì; #688A +梋 > xuān; #688B +梌 > tú; #688C +梍 > zào; #688D +梎 > āo; #688E +梏 > gù; #688F +梐 > bì; #6890 +梑 > dí; #6891 +梒 > hán; #6892 +梓 > zĭ; #6893 +梔 > zhī; #6894 +梕 > rèn; #6895 +梖 > bèi; #6896 +梗 > gĕng; #6897 +梘 > jiàn; #6898 +梙 > huàn; #6899 +梚 > wăn; #689A +梛 > núo; #689B +梜 > jiá; #689C +條 > tiáo; #689D +梞 > jì; #689E +梟 > xiāo; #689F +梠 > lǚ; #68A0 +梡 > huán; #68A1 +梢 > shāo; #68A2 +梣 > cén; #68A3 +梤 > fén; #68A4 +梥 > sōng; #68A5 +梦 > mèng; #68A6 +梧 > wú; #68A7 +梨 > lí; #68A8 +梩 > lí; #68A9 +梪 > dòu; #68AA +梫 > cēn; #68AB +梬 > yĭng; #68AC +梭 > sūo; #68AD +梮 > jú; #68AE +梯 > tī; #68AF +械 > jiè; #68B0 +梱 > kŭn; #68B1 +梲 > zhúo; #68B2 +梳 > shū; #68B3 +梴 > chān; #68B4 +梵 > fàn; #68B5 +梶 > wĕi; #68B6 +梷 > jìng; #68B7 +梸 > lí; #68B8 +梹 > bīng; #68B9 +梼 > táo; #68BC +梽 > zhì; #68BD +梾 > lái; #68BE +梿 > lián; #68BF +检 > jiăn; #68C0 +棁 > zhúo; #68C1 +棂 > líng; #68C2 +棃 > lí; #68C3 +棄 > qì; #68C4 +棅 > bìng; #68C5 +棆 > zhūn; #68C6 +棇 > cōng; #68C7 +棈 > qiàn; #68C8 +棉 > mián; #68C9 +棊 > qí; #68CA +棋 > qí; #68CB +棌 > căi; #68CC +棍 > gùn; #68CD +棎 > chán; #68CE +棏 > tè; #68CF +棐 > fĕi; #68D0 +棑 > pái; #68D1 +棒 > bàng; #68D2 +棓 > pŏu; #68D3 +棔 > hūn; #68D4 +棕 > zōng; #68D5 +棖 > chéng; #68D6 +棗 > zăo; #68D7 +棘 > jí; #68D8 +棙 > lì; #68D9 +棚 > péng; #68DA +棛 > yù; #68DB +棜 > yù; #68DC +棝 > gù; #68DD +棞 > hún; #68DE +棟 > dòng; #68DF +棠 > táng; #68E0 +棡 > gāng; #68E1 +棢 > wăng; #68E2 +棣 > dì; #68E3 +棤 > xí; #68E4 +棥 > fán; #68E5 +棦 > chēng; #68E6 +棧 > zhàn; #68E7 +棨 > qĭ; #68E8 +棩 > yuān; #68E9 +棪 > yăn; #68EA +棫 > yù; #68EB +棬 > quān; #68EC +棭 > yì; #68ED +森 > sēn; #68EE +棯 > rĕn; #68EF +棰 > chúi; #68F0 +棱 > léng; #68F1 +棲 > qī; #68F2 +棳 > zhúo; #68F3 +棴 > fú; #68F4 +棵 > kē; #68F5 +棶 > lái; #68F6 +棷 > zōu; #68F7 +棸 > zōu; #68F8 +棹 > zhūo; #68F9 +棺 > guān; #68FA +棻 > fén; #68FB +棼 > fén; #68FC +棽 > chēn; #68FD +棾 > qíong; #68FE +棿 > niè; #68FF +椀 > wăn; #6900 +椁 > gŭo; #6901 +椂 > lù; #6902 +椃 > háo; #6903 +椄 > jiē; #6904 +椅 > yĭ; #6905 +椆 > chóu; #6906 +椇 > jŭ; #6907 +椈 > jú; #6908 +椉 > chéng; #6909 +椊 > zúo; #690A +椋 > liáng; #690B +椌 > qiāng; #690C +植 > zhí; #690D +椎 > zhūi; #690E +椏 > yā; #690F +椐 > jū; #6910 +椑 > bēi; #6911 +椒 > jiāo; #6912 +椓 > zhúo; #6913 +椔 > zī; #6914 +椕 > bīn; #6915 +椖 > péng; #6916 +椗 > dìng; #6917 +椘 > chŭ; #6918 +検 > jiăn; #691C +椝 > gūi; #691D +椞 > xì; #691E +椟 > dú; #691F +椠 > qiàn; #6920 +椤 > lúo; #6924 +椥 > zhī; #6925 +椪 > pèng; #692A +椫 > zhăn; #692B +椭 > tŭo; #692D +椮 > sēn; #692E +椯 > dúo; #692F +椰 > yé; #6930 +椱 > fòu; #6931 +椲 > wĕi; #6932 +椳 > wēi; #6933 +椴 > duàn; #6934 +椵 > jiă; #6935 +椶 > zōng; #6936 +椷 > jiān; #6937 +椸 > yí; #6938 +椹 > shèn; #6939 +椺 > xí; #693A +椻 > yàn; #693B +椼 > yăn; #693C +椽 > chuán; #693D +椾 > zhàn; #693E +椿 > chūn; #693F +楀 > yŭ; #6940 +楁 > hé; #6941 +楂 > zhā; #6942 +楃 > wò; #6943 +楄 > pián; #6944 +楅 > bì; #6945 +楆 > yāo; #6946 +楇 > hùo; #6947 +楈 > xū; #6948 +楉 > rùo; #6949 +楊 > yáng; #694A +楋 > là; #694B +楌 > yán; #694C +楍 > bĕn; #694D +楎 > hún; #694E +楏 > kúi; #694F +楐 > jiè; #6950 +楑 > kúi; #6951 +楒 > sī; #6952 +楓 > fēng; #6953 +楔 > xiè; #6954 +楕 > tŭo; #6955 +楖 > zhì; #6956 +楗 > jiàn; #6957 +楘 > mù; #6958 +楙 > mào; #6959 +楚 > chŭ; #695A +楛 > hù; #695B +楜 > hú; #695C +楝 > liàn; #695D +楞 > léng; #695E +楟 > tíng; #695F +楠 > nán; #6960 +楡 > yú; #6961 +楢 > yóu; #6962 +楣 > méi; #6963 +楤 > sŏng; #6964 +楥 > xuàn; #6965 +楦 > xuàn; #6966 +楧 > yīng; #6967 +楨 > zhēn; #6968 +楩 > pián; #6969 +楪 > yè; #696A +楫 > jí; #696B +楬 > jié; #696C +業 > yè; #696D +楮 > chŭ; #696E +楯 > shŭn; #696F +楰 > yú; #6970 +楱 > còu; #6971 +楲 > wēi; #6972 +楳 > méi; #6973 +楴 > dì; #6974 +極 > jí; #6975 +楶 > jié; #6976 +楷 > kăi; #6977 +楸 > qīu; #6978 +楹 > yíng; #6979 +楺 > róu; #697A +楻 > héng; #697B +楼 > lóu; #697C +楽 > lè; #697D +榀 > pĭn; #6980 +概 > gài; #6982 +榃 > tán; #6983 +榄 > lăn; #6984 +榅 > yún; #6985 +榆 > yú; #6986 +榇 > chèn; #6987 +榈 > lǘ; #6988 +榉 > jŭ; #6989 +榍 > xiè; #698D +榎 > jiă; #698E +榏 > yì; #698F +榐 > zhăn; #6990 +榑 > fù; #6991 +榒 > nài; #6992 +榓 > mì; #6993 +榔 > láng; #6994 +榕 > róng; #6995 +榖 > gŭ; #6996 +榗 > jiàn; #6997 +榘 > jŭ; #6998 +榙 > tă; #6999 +榚 > yăo; #699A +榛 > zhēn; #699B +榜 > băng; #699C +榝 > shā; #699D +榞 > yuán; #699E +榟 > zĭ; #699F +榠 > mīng; #69A0 +榡 > sù; #69A1 +榢 > jià; #69A2 +榣 > yáo; #69A3 +榤 > jié; #69A4 +榥 > huăng; #69A5 +榦 > gàn; #69A6 +榧 > fĕi; #69A7 +榨 > zhà; #69A8 +榩 > qián; #69A9 +榪 > mà; #69AA +榫 > sŭn; #69AB +榬 > yuán; #69AC +榭 > xiè; #69AD +榮 > róng; #69AE +榯 > shí; #69AF +榰 > zhī; #69B0 +榱 > cūi; #69B1 +榲 > yún; #69B2 +榳 > tíng; #69B3 +榴 > líu; #69B4 +榵 > róng; #69B5 +榶 > táng; #69B6 +榷 > què; #69B7 +榸 > zhāi; #69B8 +榹 > sī; #69B9 +榺 > shèng; #69BA +榻 > tà; #69BB +榼 > kè; #69BC +榽 > xī; #69BD +榾 > gù; #69BE +榿 > qī; #69BF +槀 > kăo; #69C0 +槁 > găo; #69C1 +槂 > sūn; #69C2 +槃 > pán; #69C3 +槄 > tāo; #69C4 +槅 > gé; #69C5 +槆 > xún; #69C6 +槇 > diān; #69C7 +槈 > nòu; #69C8 +槉 > jí; #69C9 +槊 > shùo; #69CA +構 > gòu; #69CB +槌 > chúi; #69CC +槍 > qiāng; #69CD +槎 > chā; #69CE +槏 > qiăn; #69CF +槐 > huái; #69D0 +槑 > méi; #69D1 +槒 > xù; #69D2 +槓 > gàng; #69D3 +槔 > gāo; #69D4 +槕 > zhúo; #69D5 +槖 > tùo; #69D6 +様 > yàng; #69D8 +槙 > diān; #69D9 +槚 > jiă; #69DA +槛 > jiàn; #69DB +槜 > zùi; #69DC +槟 > bīn; #69DF +槠 > zhū; #69E0 +槢 > xí; #69E2 +槣 > qĭ; #69E3 +槤 > lián; #69E4 +槥 > hùi; #69E5 +槦 > yóng; #69E6 +槧 > qiàn; #69E7 +槨 > gŭo; #69E8 +槩 > gài; #69E9 +槪 > gài; #69EA +槫 > tuán; #69EB +槬 > huà; #69EC +槭 > cù; #69ED +槮 > sēn; #69EE +槯 > cūi; #69EF +槰 > bèng; #69F0 +槱 > yŏu; #69F1 +槲 > hú; #69F2 +槳 > jiăng; #69F3 +槴 > hù; #69F4 +槵 > huàn; #69F5 +槶 > kùi; #69F6 +槷 > yì; #69F7 +槸 > niè; #69F8 +槹 > gāo; #69F9 +槺 > kāng; #69FA +槻 > gūi; #69FB +槼 > gūi; #69FC +槽 > cáo; #69FD +槾 > mán; #69FE +槿 > jĭn; #69FF +樀 > dì; #6A00 +樁 > zhuāng; #6A01 +樂 > lè; #6A02 +樃 > láng; #6A03 +樄 > chén; #6A04 +樅 > cōng; #6A05 +樆 > lí; #6A06 +樇 > xīu; #6A07 +樈 > qíng; #6A08 +樉 > shuăng; #6A09 +樊 > fán; #6A0A +樋 > tōng; #6A0B +樌 > guàn; #6A0C +樍 > jī; #6A0D +樎 > sūo; #6A0E +樏 > lĕi; #6A0F +樐 > lŭ; #6A10 +樑 > liáng; #6A11 +樒 > mì; #6A12 +樓 > lóu; #6A13 +樔 > cháo; #6A14 +樕 > sù; #6A15 +樖 > kē; #6A16 +樗 > shū; #6A17 +樘 > táng; #6A18 +標 > biāo; #6A19 +樚 > lù; #6A1A +樛 > jīu; #6A1B +樜 > shù; #6A1C +樝 > zhā; #6A1D +樞 > shū; #6A1E +樟 > zhāng; #6A1F +樠 > mén; #6A20 +模 > mó; #6A21 +樢 > niăo; #6A22 +樣 > yàng; #6A23 +樤 > tiáo; #6A24 +樥 > péng; #6A25 +樦 > zhù; #6A26 +樧 > shā; #6A27 +樨 > xī; #6A28 +権 > quán; #6A29 +横 > héng; #6A2A +樫 > jiān; #6A2B +樬 > cōng; #6A2C +樯 > qiáng; #6A2F +樱 > yīng; #6A31 +樲 > èr; #6A32 +樳 > xín; #6A33 +樴 > zhí; #6A34 +樵 > qiáo; #6A35 +樶 > zūi; #6A36 +樷 > cōng; #6A37 +樸 > pú; #6A38 +樹 > shù; #6A39 +樺 > huà; #6A3A +樻 > kùi; #6A3B +樼 > zhēn; #6A3C +樽 > zūn; #6A3D +樾 > yuè; #6A3E +樿 > zhăn; #6A3F +橀 > xī; #6A40 +橁 > xún; #6A41 +橂 > diàn; #6A42 +橃 > fā; #6A43 +橄 > găn; #6A44 +橅 > mó; #6A45 +橆 > wŭ; #6A46 +橇 > qiāo; #6A47 +橈 > náo; #6A48 +橉 > lìn; #6A49 +橊 > líu; #6A4A +橋 > qiáo; #6A4B +橌 > xiàn; #6A4C +橍 > rùn; #6A4D +橎 > fán; #6A4E +橏 > zhăn; #6A4F +橐 > tùo; #6A50 +橑 > lăo; #6A51 +橒 > yún; #6A52 +橓 > shùn; #6A53 +橔 > túi; #6A54 +橕 > chēng; #6A55 +橖 > táng; #6A56 +橗 > méng; #6A57 +橘 > jú; #6A58 +橙 > chéng; #6A59 +橚 > sù; #6A5A +橛 > jué; #6A5B +橜 > jué; #6A5C +橝 > tān; #6A5D +橞 > hùi; #6A5E +機 > jī; #6A5F +橠 > nŭo; #6A60 +橡 > xiàng; #6A61 +橢 > tŭo; #6A62 +橣 > nĭng; #6A63 +橤 > rŭi; #6A64 +橥 > zhū; #6A65 +橦 > chuáng; #6A66 +橧 > zēng; #6A67 +橨 > fén; #6A68 +橩 > qíong; #6A69 +橪 > răn; #6A6A +橫 > héng; #6A6B +橬 > cén; #6A6C +橭 > gū; #6A6D +橮 > lĭu; #6A6E +橯 > lào; #6A6F +橰 > gāo; #6A70 +橱 > chú; #6A71 +橶 > jí; #6A76 +橷 > dōu; #6A77 +橹 > lŭ; #6A79 +橼 > yuán; #6A7C +橽 > tà; #6A7D +橾 > shū; #6A7E +橿 > jiāng; #6A7F +檀 > tán; #6A80 +檁 > lĭn; #6A81 +檂 > nóng; #6A82 +檃 > yĭn; #6A83 +檄 > xí; #6A84 +檅 > sùi; #6A85 +檆 > shān; #6A86 +檇 > zùi; #6A87 +檈 > xuán; #6A88 +檉 > chēng; #6A89 +檊 > gàn; #6A8A +檋 > jū; #6A8B +檌 > zùi; #6A8C +檍 > yì; #6A8D +檎 > qín; #6A8E +檏 > pŭ; #6A8F +檐 > yán; #6A90 +檑 > léi; #6A91 +檒 > fēng; #6A92 +檓 > hŭi; #6A93 +檔 > dăng; #6A94 +檕 > jì; #6A95 +檖 > sùi; #6A96 +檗 > bò; #6A97 +檘 > bì; #6A98 +檙 > dĭng; #6A99 +檚 > chŭ; #6A9A +檛 > zhuā; #6A9B +檜 > kuài; #6A9C +檝 > jí; #6A9D +檞 > jiĕ; #6A9E +檟 > jiă; #6A9F +檠 > qíng; #6AA0 +檡 > zhè; #6AA1 +檢 > jiăn; #6AA2 +檣 > qiáng; #6AA3 +檤 > dào; #6AA4 +檥 > yĭ; #6AA5 +檦 > biăo; #6AA6 +檧 > sōng; #6AA7 +檨 > shē; #6AA8 +檩 > lĭn; #6AA9 +檫 > chá; #6AAB +檬 > méng; #6AAC +檭 > yín; #6AAD +檮 > táo; #6AAE +檯 > tái; #6AAF +檰 > mián; #6AB0 +檱 > qí; #6AB1 +檲 > tóan; #6AB2 +檳 > bīn; #6AB3 +檴 > hùo; #6AB4 +檵 > jì; #6AB5 +檶 > qiān; #6AB6 +檷 > mí; #6AB7 +檸 > níng; #6AB8 +檹 > yī; #6AB9 +檺 > găo; #6ABA +檻 > jiàn; #6ABB +檼 > yìn; #6ABC +檽 > ér; #6ABD +檾 > qĭng; #6ABE +檿 > yăn; #6ABF +櫀 > qí; #6AC0 +櫁 > mì; #6AC1 +櫂 > zhào; #6AC2 +櫃 > gùi; #6AC3 +櫄 > chūn; #6AC4 +櫅 > jī; #6AC5 +櫆 > kúi; #6AC6 +櫇 > pó; #6AC7 +櫈 > dèng; #6AC8 +櫉 > chú; #6AC9 +櫋 > mián; #6ACB +櫌 > yōu; #6ACC +櫍 > zhì; #6ACD +櫎 > guàng; #6ACE +櫏 > qiān; #6ACF +櫐 > lĕi; #6AD0 +櫑 > lĕi; #6AD1 +櫒 > sà; #6AD2 +櫓 > lŭ; #6AD3 +櫔 > lì; #6AD4 +櫕 > cuán; #6AD5 +櫖 > lǘ; #6AD6 +櫗 > miè; #6AD7 +櫘 > hùi; #6AD8 +櫙 > ōu; #6AD9 +櫚 > lǘ; #6ADA +櫛 > jié; #6ADB +櫜 > gāo; #6ADC +櫝 > dú; #6ADD +櫞 > yuán; #6ADE +櫟 > lì; #6ADF +櫠 > fèi; #6AE0 +櫡 > zhúo; #6AE1 +櫢 > sŏu; #6AE2 +櫣 > lián; #6AE3 +櫥 > chú; #6AE5 +櫧 > zhū; #6AE7 +櫨 > lú; #6AE8 +櫩 > yán; #6AE9 +櫪 > lì; #6AEA +櫫 > zhū; #6AEB +櫬 > chèn; #6AEC +櫭 > jié; #6AED +櫮 > è; #6AEE +櫯 > sū; #6AEF +櫰 > huái; #6AF0 +櫱 > niè; #6AF1 +櫲 > yù; #6AF2 +櫳 > lóng; #6AF3 +櫴 > lài; #6AF4 +櫶 > xiăn; #6AF6 +櫸 > jŭ; #6AF8 +櫹 > xiāo; #6AF9 +櫺 > líng; #6AFA +櫻 > yīng; #6AFB +櫼 > jiān; #6AFC +櫽 > yĭn; #6AFD +櫾 > yóu; #6AFE +櫿 > yíng; #6AFF +欀 > xiāng; #6B00 +欁 > nóng; #6B01 +欂 > bó; #6B02 +欃 > chán; #6B03 +欄 > lán; #6B04 +欅 > jŭ; #6B05 +欆 > shuāng; #6B06 +欇 > shè; #6B07 +欈 > wéi; #6B08 +欉 > còng; #6B09 +權 > quán; #6B0A +欋 > qú; #6B0B +欎 > yù; #6B0E +欏 > lúo; #6B0F +欐 > lĭ; #6B10 +欑 > zàn; #6B11 +欒 > luán; #6B12 +欓 > dăng; #6B13 +欔 > jué; #6B14 +欖 > lăn; #6B16 +欗 > lán; #6B17 +欘 > zhŭ; #6B18 +欙 > léi; #6B19 +欚 > lĭ; #6B1A +欛 > bà; #6B1B +欜 > náng; #6B1C +欝 > yù; #6B1D +欞 > líng; #6B1E +欠 > qiàn; #6B20 +次 > cì; #6B21 +欢 > huān; #6B22 +欣 > xīn; #6B23 +欤 > yú; #6B24 +欥 > yù; #6B25 +欦 > qiān; #6B26 +欧 > ōu; #6B27 +欨 > xū; #6B28 +欩 > chāo; #6B29 +欪 > chù; #6B2A +欫 > chī; #6B2B +欬 > kài; #6B2C +欭 > yì; #6B2D +欮 > jué; #6B2E +欯 > xí; #6B2F +欰 > xū; #6B30 +欱 > xià; #6B31 +欲 > yù; #6B32 +欳 > kuài; #6B33 +欴 > láng; #6B34 +欵 > kuăn; #6B35 +欶 > shùo; #6B36 +欷 > xī; #6B37 +欸 > ăi; #6B38 +欹 > yī; #6B39 +欺 > qī; #6B3A +欻 > hū; #6B3B +欼 > chĭ; #6B3C +欽 > qīn; #6B3D +款 > kuăn; #6B3E +欿 > kăn; #6B3F +歀 > kuăn; #6B40 +歁 > kăn; #6B41 +歂 > chuán; #6B42 +歃 > shà; #6B43 +歄 > gua; #6B44 +歅 > yīn; #6B45 +歆 > xīn; #6B46 +歇 > xiē; #6B47 +歈 > yú; #6B48 +歉 > qiàn; #6B49 +歊 > xiāo; #6B4A +歋 > yí; #6B4B +歌 > gē; #6B4C +歍 > wū; #6B4D +歎 > tàn; #6B4E +歏 > jìn; #6B4F +歐 > ōu; #6B50 +歑 > hū; #6B51 +歒 > tì; #6B52 +歓 > huān; #6B53 +歔 > xū; #6B54 +歕 > pèn; #6B55 +歖 > xī; #6B56 +歗 > xiào; #6B57 +歘 > xū; #6B58 +歙 > xì; #6B59 +歛 > liàn; #6B5B +歜 > chù; #6B5C +歝 > yì; #6B5D +歞 > kăn; #6B5E +歟 > yú; #6B5F +歠 > chùo; #6B60 +歡 > huān; #6B61 +止 > zhĭ; #6B62 +正 > zhèng; #6B63 +此 > cĭ; #6B64 +步 > bù; #6B65 +武 > wŭ; #6B66 +歧 > qí; #6B67 +歨 > bù; #6B68 +歩 > bù; #6B69 +歪 > wāi; #6B6A +歫 > jù; #6B6B +歬 > qián; #6B6C +歭 > chí; #6B6D +歮 > sè; #6B6E +歯 > chĭ; #6B6F +歰 > sè; #6B70 +歱 > zhŏng; #6B71 +歲 > sùi; #6B72 +歳 > sùi; #6B73 +歴 > lì; #6B74 +歵 > cùo; #6B75 +歶 > yú; #6B76 +歷 > lì; #6B77 +歸 > gūi; #6B78 +歹 > dăi; #6B79 +歺 > dăi; #6B7A +死 > sĭ; #6B7B +歼 > jiān; #6B7C +歽 > zhé; #6B7D +歾 > mò; #6B7E +歿 > mò; #6B7F +殀 > yăo; #6B80 +殁 > mò; #6B81 +殂 > cú; #6B82 +殃 > yāng; #6B83 +殄 > tiăn; #6B84 +殅 > shēng; #6B85 +殆 > dài; #6B86 +殇 > shāng; #6B87 +殈 > xù; #6B88 +殉 > xùn; #6B89 +殊 > shū; #6B8A +残 > cán; #6B8B +殌 > jué; #6B8C +殍 > piăo; #6B8D +殎 > qià; #6B8E +殏 > qìu; #6B8F +殐 > sù; #6B90 +殑 > qíng; #6B91 +殒 > yŭn; #6B92 +殓 > liàn; #6B93 +殔 > yì; #6B94 +殕 > fŏu; #6B95 +殖 > zhí; #6B96 +殗 > yè; #6B97 +殘 > cán; #6B98 +殙 > hūn; #6B99 +殚 > dān; #6B9A +殛 > jí; #6B9B +殜 > yè; #6B9C +殝 > zhen; #6B9D +殞 > yŭn; #6B9E +殟 > wēn; #6B9F +殠 > chòu; #6BA0 +殡 > bìn; #6BA1 +殢 > tì; #6BA2 +殣 > jĭn; #6BA3 +殤 > shāng; #6BA4 +殥 > yín; #6BA5 +殦 > diāo; #6BA6 +殧 > cù; #6BA7 +殨 > hùi; #6BA8 +殩 > cuàn; #6BA9 +殪 > yì; #6BAA +殫 > dān; #6BAB +殬 > dù; #6BAC +殭 > jiāng; #6BAD +殮 > liàn; #6BAE +殯 > bìn; #6BAF +殰 > dú; #6BB0 +殲 > jiān; #6BB2 +殳 > shū; #6BB3 +殴 > ōu; #6BB4 +段 > duàn; #6BB5 +殶 > zhù; #6BB6 +殷 > yīn; #6BB7 +殸 > qìng; #6BB8 +殹 > yì; #6BB9 +殺 > shā; #6BBA +殻 > què; #6BBB +殼 > ké; #6BBC +殽 > yáo; #6BBD +殾 > jùn; #6BBE +殿 > diàn; #6BBF +毀 > hŭi; #6BC0 +毁 > hŭi; #6BC1 +毂 > gŭ; #6BC2 +毃 > què; #6BC3 +毄 > jī; #6BC4 +毅 > yì; #6BC5 +毆 > ōu; #6BC6 +毇 > hŭi; #6BC7 +毈 > duàn; #6BC8 +毉 > yī; #6BC9 +毊 > xiāo; #6BCA +毋 > wú; #6BCB +毌 > guàn; #6BCC +母 > mŭ; #6BCD +毎 > mĕi; #6BCE +每 > mĕi; #6BCF +毐 > ăi; #6BD0 +毑 > zŭo; #6BD1 +毒 > dú; #6BD2 +毓 > yù; #6BD3 +比 > bĭ; #6BD4 +毕 > bì; #6BD5 +毖 > bì; #6BD6 +毗 > pí; #6BD7 +毘 > pí; #6BD8 +毙 > bì; #6BD9 +毚 > chán; #6BDA +毛 > máo; #6BDB +毞 > pú; #6BDE +毠 > jiā; #6BE0 +毡 > zhān; #6BE1 +毢 > sāi; #6BE2 +毣 > mù; #6BE3 +毤 > tùo; #6BE4 +毥 > xún; #6BE5 +毦 > èr; #6BE6 +毧 > róng; #6BE7 +毨 > xiăn; #6BE8 +毩 > jú; #6BE9 +毪 > mú; #6BEA +毫 > háo; #6BEB +毬 > qíu; #6BEC +毭 > dòu; #6BED +毯 > tăn; #6BEF +毰 > péi; #6BF0 +毱 > jú; #6BF1 +毲 > dúo; #6BF2 +毳 > cùi; #6BF3 +毴 > bī; #6BF4 +毵 > sān; #6BF5 +毷 > mào; #6BF7 +毸 > sūi; #6BF8 +毹 > yū; #6BF9 +毺 > yū; #6BFA +毻 > tùo; #6BFB +毼 > hé; #6BFC +毽 > jiàn; #6BFD +毾 > tà; #6BFE +毿 > sān; #6BFF +氀 > lǘ; #6C00 +氁 > mú; #6C01 +氂 > lí; #6C02 +氃 > tóng; #6C03 +氄 > rŏng; #6C04 +氅 > chăng; #6C05 +氆 > pŭ; #6C06 +氇 > lúo; #6C07 +氈 > zhān; #6C08 +氉 > sào; #6C09 +氊 > zhān; #6C0A +氋 > méng; #6C0B +氌 > lúo; #6C0C +氍 > qú; #6C0D +氎 > dié; #6C0E +氏 > shì; #6C0F +氐 > dĭ; #6C10 +民 > mín; #6C11 +氒 > jué; #6C12 +氓 > máng; #6C13 +气 > qì; #6C14 +氕 > piē; #6C15 +氖 > năi; #6C16 +気 > qì; #6C17 +氘 > dāo; #6C18 +氙 > xiān; #6C19 +氚 > chuān; #6C1A +氛 > fēn; #6C1B +氜 > rì; #6C1C +氝 > nèi; #6C1D +氟 > fú; #6C1F +氠 > shēn; #6C20 +氡 > dōng; #6C21 +氢 > qīng; #6C22 +氣 > qì; #6C23 +氤 > yīn; #6C24 +氥 > xī; #6C25 +氦 > hài; #6C26 +氧 > yăng; #6C27 +氨 > ān; #6C28 +氩 > yà; #6C29 +氪 > kè; #6C2A +氫 > qīng; #6C2B +氬 > yà; #6C2C +氭 > dōng; #6C2D +氮 > dàn; #6C2E +氯 > lǜ; #6C2F +氰 > qīng; #6C30 +氱 > yăng; #6C31 +氲 > yūn; #6C32 +氳 > yūn; #6C33 +水 > shŭi; #6C34 +氵 > sān' 'diăn' 'shŭi; #6C35 +氶 > zhĕng; #6C36 +氷 > bīng; #6C37 +永 > yŏng; #6C38 +氹 > dàng; #6C39 +氻 > lè; #6C3B +氼 > nì; #6C3C +氽 > tŭn; #6C3D +氾 > fàn; #6C3E +氿 > gŭi; #6C3F +汀 > tīng; #6C40 +汁 > zhī; #6C41 +求 > qíu; #6C42 +汃 > bīn; #6C43 +汄 > zè; #6C44 +汅 > miăn; #6C45 +汆 > cuān; #6C46 +汇 > hùi; #6C47 +汈 > diāo; #6C48 +汉 > yì; #6C49 +汊 > chà; #6C4A +汋 > zhúo; #6C4B +汌 > chuàn; #6C4C +汍 > wán; #6C4D +汎 > fàn; #6C4E +汏 > dài; #6C4F +汐 > xì; #6C50 +汑 > tūo; #6C51 +汒 > máng; #6C52 +汓 > qíu; #6C53 +汔 > qì; #6C54 +汕 > shàn; #6C55 +汖 > pài; #6C56 +汗 > hàn; #6C57 +汘 > qiān; #6C58 +汙 > wū; #6C59 +汚 > wū; #6C5A +汛 > xùn; #6C5B +汜 > sì; #6C5C +汝 > rŭ; #6C5D +汞 > gŏng; #6C5E +江 > jiāng; #6C5F +池 > chí; #6C60 +污 > wū; #6C61 +汤 > tāng; #6C64 +汥 > zhī; #6C65 +汦 > chí; #6C66 +汧 > qiān; #6C67 +汨 > mì; #6C68 +汩 > yù; #6C69 +汪 > wāng; #6C6A +汫 > qìng; #6C6B +汬 > jĭng; #6C6C +汭 > rùi; #6C6D +汮 > jūn; #6C6E +汯 > hóng; #6C6F +汰 > tài; #6C70 +汱 > quăn; #6C71 +汲 > jí; #6C72 +汳 > biàn; #6C73 +汴 > biàn; #6C74 +汵 > gàn; #6C75 +汶 > wèn; #6C76 +汷 > zhōng; #6C77 +汸 > fāng; #6C78 +汹 > xīong; #6C79 +決 > jué; #6C7A +汻 > hăng; #6C7B +汼 > niōu; #6C7C +汽 > qì; #6C7D +汾 > fén; #6C7E +汿 > xù; #6C7F +沀 > xù; #6C80 +沁 > qìn; #6C81 +沂 > yí; #6C82 +沃 > wò; #6C83 +沄 > yún; #6C84 +沅 > yuán; #6C85 +沆 > háng; #6C86 +沇 > yăn; #6C87 +沈 > chén; #6C88 +沉 > chén; #6C89 +沊 > dàn; #6C8A +沋 > yóu; #6C8B +沌 > dùn; #6C8C +沍 > hù; #6C8D +沎 > hùo; #6C8E +沏 > qiē; #6C8F +沐 > mù; #6C90 +沑 > róu; #6C91 +沒 > méi; #6C92 +沓 > tà; #6C93 +沔 > miăn; #6C94 +沕 > wù; #6C95 +沖 > chōng; #6C96 +沗 > tiān; #6C97 +沘 > bĭ; #6C98 +沙 > shā; #6C99 +沚 > zhĭ; #6C9A +沛 > pèi; #6C9B +沜 > pàn; #6C9C +沝 > zhŭi; #6C9D +沞 > zā; #6C9E +沟 > gōu; #6C9F +沠 > líu; #6CA0 +没 > méi; #6CA1 +沢 > zé; #6CA2 +沣 > fēng; #6CA3 +沤 > òu; #6CA4 +沥 > lì; #6CA5 +沦 > lún; #6CA6 +沧 > cāng; #6CA7 +沨 > féng; #6CA8 +沩 > wéi; #6CA9 +沪 > hù; #6CAA +沫 > mò; #6CAB +沬 > mèi; #6CAC +沭 > shù; #6CAD +沮 > jū; #6CAE +沯 > zăn; #6CAF +沰 > tūo; #6CB0 +沱 > túo; #6CB1 +沲 > túo; #6CB2 +河 > hé; #6CB3 +沴 > lì; #6CB4 +沵 > mĭ; #6CB5 +沶 > yí; #6CB6 +沷 > fā; #6CB7 +沸 > fèi; #6CB8 +油 > yóu; #6CB9 +沺 > tián; #6CBA +治 > zhì; #6CBB +沼 > zhăo; #6CBC +沽 > gū; #6CBD +沾 > zhān; #6CBE +沿 > yán; #6CBF +泀 > sī; #6CC0 +況 > kuàng; #6CC1 +泂 > jĭong; #6CC2 +泃 > jù; #6CC3 +泄 > xiè; #6CC4 +泅 > qíu; #6CC5 +泆 > yī; #6CC6 +泇 > jiā; #6CC7 +泈 > zhōng; #6CC8 +泉 > quán; #6CC9 +泊 > bó; #6CCA +泋 > hùi; #6CCB +泌 > mì; #6CCC +泍 > bēn; #6CCD +泎 > zhúo; #6CCE +泏 > chù; #6CCF +泐 > lè; #6CD0 +泑 > yŏu; #6CD1 +泒 > gū; #6CD2 +泓 > hóng; #6CD3 +泔 > gān; #6CD4 +法 > fă; #6CD5 +泖 > măo; #6CD6 +泗 > sì; #6CD7 +泘 > hū; #6CD8 +泙 > píng; #6CD9 +泚 > cĭ; #6CDA +泛 > fàn; #6CDB +泜 > chí; #6CDC +泝 > sù; #6CDD +泞 > nìng; #6CDE +泟 > chēng; #6CDF +泠 > líng; #6CE0 +泡 > pào; #6CE1 +波 > bō; #6CE2 +泣 > qì; #6CE3 +泤 > sì; #6CE4 +泥 > ní; #6CE5 +泦 > jú; #6CE6 +泧 > yuè; #6CE7 +注 > zhù; #6CE8 +泩 > shēng; #6CE9 +泪 > lèi; #6CEA +泫 > xuàn; #6CEB +泬 > xuè; #6CEC +泭 > fū; #6CED +泮 > pàn; #6CEE +泯 > mĭn; #6CEF +泰 > tài; #6CF0 +泱 > yāng; #6CF1 +泲 > jĭ; #6CF2 +泳 > yŏng; #6CF3 +泴 > guàn; #6CF4 +泵 > bèng; #6CF5 +泶 > xué; #6CF6 +泷 > lóng; #6CF7 +泸 > lú; #6CF8 +泺 > bó; #6CFA +泻 > xiè; #6CFB +泼 > pō; #6CFC +泽 > zé; #6CFD +泾 > jīng; #6CFE +泿 > yín; #6CFF +洀 > zhōu; #6D00 +洁 > jí; #6D01 +洂 > yì; #6D02 +洃 > hūi; #6D03 +洄 > húi; #6D04 +洅 > zŭi; #6D05 +洆 > chéng; #6D06 +洇 > yīn; #6D07 +洈 > wéi; #6D08 +洉 > hòu; #6D09 +洊 > jiàn; #6D0A +洋 > yáng; #6D0B +洌 > liè; #6D0C +洍 > sì; #6D0D +洎 > jì; #6D0E +洏 > ér; #6D0F +洐 > xíng; #6D10 +洑 > fú; #6D11 +洒 > să; #6D12 +洓 > sŭo; #6D13 +洔 > zhĭ; #6D14 +洕 > yīn; #6D15 +洖 > wú; #6D16 +洗 > xĭ; #6D17 +洘 > kăo; #6D18 +洙 > zhū; #6D19 +洚 > jiàng; #6D1A +洛 > lùo; #6D1B +洝 > àn; #6D1D +洞 > dòng; #6D1E +洟 > yí; #6D1F +洠 > móu; #6D20 +洡 > lĕi; #6D21 +洢 > yī; #6D22 +洣 > mĭ; #6D23 +洤 > quán; #6D24 +津 > jīn; #6D25 +洦 > mò; #6D26 +洧 > wĕi; #6D27 +洨 > xiáo; #6D28 +洩 > xiè; #6D29 +洪 > hóng; #6D2A +洫 > xù; #6D2B +洬 > shùo; #6D2C +洭 > kuāng; #6D2D +洮 > tāo; #6D2E +洯 > qiè; #6D2F +洰 > jù; #6D30 +洱 > ĕr; #6D31 +洲 > zhōu; #6D32 +洳 > rù; #6D33 +洴 > píng; #6D34 +洵 > xún; #6D35 +洶 > xīong; #6D36 +洷 > zhì; #6D37 +洸 > guāng; #6D38 +洹 > huán; #6D39 +洺 > míng; #6D3A +活 > húo; #6D3B +洼 > wā; #6D3C +洽 > qià; #6D3D +派 > pài; #6D3E +洿 > wū; #6D3F +浀 > qŭ; #6D40 +流 > líu; #6D41 +浂 > yì; #6D42 +浃 > jiá; #6D43 +浄 > jìng; #6D44 +浅 > qiăn; #6D45 +浆 > jiāng; #6D46 +浇 > jiāo; #6D47 +浈 > chéng; #6D48 +浉 > shī; #6D49 +浊 > zhúo; #6D4A +测 > cè; #6D4B +浍 > kuài; #6D4D +济 > jì; #6D4E +浏 > líu; #6D4F +浐 > chăn; #6D50 +浑 > hún; #6D51 +浒 > hŭ; #6D52 +浓 > nóng; #6D53 +浔 > xún; #6D54 +浕 > jìn; #6D55 +浖 > liè; #6D56 +浗 > qíu; #6D57 +浘 > wĕi; #6D58 +浙 > zhè; #6D59 +浚 > jùn; #6D5A +浛 > hàn; #6D5B +浜 > bāng; #6D5C +浝 > máng; #6D5D +浞 > zhúo; #6D5E +浟 > yóu; #6D5F +浠 > xī; #6D60 +浡 > bó; #6D61 +浢 > dòu; #6D62 +浣 > wăn; #6D63 +浤 > hóng; #6D64 +浥 > yì; #6D65 +浦 > pŭ; #6D66 +浧 > yĭng; #6D67 +浨 > lăn; #6D68 +浩 > hào; #6D69 +浪 > làng; #6D6A +浫 > hăn; #6D6B +浬 > lĭ; #6D6C +浭 > gēng; #6D6D +浮 > fú; #6D6E +浯 > wú; #6D6F +浰 > liàn; #6D70 +浱 > chún; #6D71 +浲 > féng; #6D72 +浳 > yì; #6D73 +浴 > yù; #6D74 +浵 > tóng; #6D75 +浶 > láo; #6D76 +海 > hăi; #6D77 +浸 > jìn; #6D78 +浹 > jiá; #6D79 +浺 > chōng; #6D7A +浻 > wĕng; #6D7B +浼 > mĕi; #6D7C +浽 > sūi; #6D7D +浾 > chēng; #6D7E +浿 > pèi; #6D7F +涀 > xiàn; #6D80 +涁 > shèn; #6D81 +涂 > tú; #6D82 +涃 > kùn; #6D83 +涄 > pīn; #6D84 +涅 > niè; #6D85 +涆 > hàn; #6D86 +涇 > jīng; #6D87 +消 > xiāo; #6D88 +涉 > shè; #6D89 +涊 > niàn; #6D8A +涋 > tū; #6D8B +涌 > yŏng; #6D8C +涍 > xiào; #6D8D +涎 > xián; #6D8E +涏 > tĭng; #6D8F +涐 > é; #6D90 +涑 > sù; #6D91 +涒 > tūn; #6D92 +涓 > juān; #6D93 +涔 > cén; #6D94 +涕 > tì; #6D95 +涖 > lì; #6D96 +涗 > shùi; #6D97 +涘 > sì; #6D98 +涙 > lèi; #6D99 +涚 > shùi; #6D9A +涛 > tāo; #6D9B +涜 > dú; #6D9C +涝 > lào; #6D9D +涞 > lái; #6D9E +涟 > lián; #6D9F +涠 > wéi; #6DA0 +涡 > wō; #6DA1 +涢 > yún; #6DA2 +涣 > huàn; #6DA3 +涤 > dí; #6DA4 +润 > rùn; #6DA6 +涧 > jiàn; #6DA7 +涨 > zhăng; #6DA8 +涩 > sè; #6DA9 +涪 > fú; #6DAA +涫 > guàn; #6DAB +涬 > xìng; #6DAC +涭 > shòu; #6DAD +涮 > shuàn; #6DAE +涯 > yá; #6DAF +涰 > chùo; #6DB0 +涱 > zhàng; #6DB1 +液 > yè; #6DB2 +涳 > kōng; #6DB3 +涴 > wò; #6DB4 +涵 > hán; #6DB5 +涶 > tūo; #6DB6 +涷 > dōng; #6DB7 +涸 > hé; #6DB8 +涹 > wō; #6DB9 +涺 > jū; #6DBA +涻 > gàn; #6DBB +涼 > liáng; #6DBC +涽 > hūn; #6DBD +涾 > tà; #6DBE +涿 > zhúo; #6DBF +淀 > diàn; #6DC0 +淁 > qiè; #6DC1 +淂 > dé; #6DC2 +淃 > juàn; #6DC3 +淄 > zī; #6DC4 +淅 > xī; #6DC5 +淆 > yáo; #6DC6 +淇 > qí; #6DC7 +淈 > gŭ; #6DC8 +淉 > gŭo; #6DC9 +淊 > hàn; #6DCA +淋 > lín; #6DCB +淌 > tăng; #6DCC +淍 > zhōu; #6DCD +淎 > pĕng; #6DCE +淏 > hào; #6DCF +淐 > chāng; #6DD0 +淑 > shú; #6DD1 +淒 > qī; #6DD2 +淓 > fāng; #6DD3 +淔 > chì; #6DD4 +淕 > lù; #6DD5 +淖 > nào; #6DD6 +淗 > jú; #6DD7 +淘 > táo; #6DD8 +淙 > cóng; #6DD9 +淚 > lèi; #6DDA +淛 > zhì; #6DDB +淜 > péng; #6DDC +淝 > féi; #6DDD +淞 > sōng; #6DDE +淟 > tiăn; #6DDF +淠 > pì; #6DE0 +淡 > dàn; #6DE1 +淢 > yù; #6DE2 +淣 > ní; #6DE3 +淤 > yū; #6DE4 +淥 > lù; #6DE5 +淦 > gàn; #6DE6 +淧 > mì; #6DE7 +淨 > jìng; #6DE8 +淩 > líng; #6DE9 +淪 > lún; #6DEA +淫 > yín; #6DEB +淬 > cùi; #6DEC +淭 > qú; #6DED +淮 > huái; #6DEE +淯 > yù; #6DEF +淰 > niàn; #6DF0 +深 > shēn; #6DF1 +淲 > piáo; #6DF2 +淳 > chún; #6DF3 +淴 > wà; #6DF4 +淵 > yuān; #6DF5 +淶 > lái; #6DF6 +混 > hŭn; #6DF7 +淸 > qīng; #6DF8 +淹 > yān; #6DF9 +淺 > qiăn; #6DFA +添 > tiān; #6DFB +淼 > miăo; #6DFC +淽 > zhĭ; #6DFD +淾 > yĭn; #6DFE +淿 > mì; #6DFF +渀 > bēn; #6E00 +渁 > yuān; #6E01 +渂 > wèn; #6E02 +渃 > rè; #6E03 +渄 > fēi; #6E04 +清 > qīng; #6E05 +渆 > yuān; #6E06 +渇 > kĕ; #6E07 +済 > jì; #6E08 +渉 > shè; #6E09 +渊 > yuān; #6E0A +渌 > lù; #6E0C +渍 > zì; #6E0D +渎 > dú; #6E0E +渐 > jiàn; #6E10 +渑 > mĭn; #6E11 +渒 > pì; #6E12 +渔 > yú; #6E14 +渕 > yuān; #6E15 +渖 > shĕn; #6E16 +渗 > shèn; #6E17 +渘 > róu; #6E18 +渙 > huàn; #6E19 +渚 > zhŭ; #6E1A +減 > jiăn; #6E1B +渜 > nuăn; #6E1C +渝 > yú; #6E1D +渞 > qíu; #6E1E +渟 > tíng; #6E1F +渠 > qú; #6E20 +渡 > dù; #6E21 +渢 > féng; #6E22 +渣 > zhā; #6E23 +渤 > bó; #6E24 +渥 > wò; #6E25 +渦 > wō; #6E26 +渧 > dì; #6E27 +渨 > wēi; #6E28 +温 > wēn; #6E29 +渪 > rú; #6E2A +渫 > xiè; #6E2B +測 > cè; #6E2C +渭 > wèi; #6E2D +渮 > gē; #6E2E +港 > găng; #6E2F +渰 > yăn; #6E30 +渱 > hóng; #6E31 +渲 > xuàn; #6E32 +渳 > mĭ; #6E33 +渴 > kĕ; #6E34 +渵 > máo; #6E35 +渶 > yīng; #6E36 +渷 > yăn; #6E37 +游 > yóu; #6E38 +渹 > hōng; #6E39 +渺 > miăo; #6E3A +渻 > xĭng; #6E3B +渼 > mĕi; #6E3C +渽 > zāi; #6E3D +渾 > hún; #6E3E +渿 > nài; #6E3F +湀 > kúi; #6E40 +湁 > shí; #6E41 +湂 > è; #6E42 +湃 > pài; #6E43 +湄 > méi; #6E44 +湅 > liàn; #6E45 +湆 > qì; #6E46 +湇 > qì; #6E47 +湈 > méi; #6E48 +湉 > tián; #6E49 +湊 > còu; #6E4A +湋 > wéi; #6E4B +湌 > cān; #6E4C +湍 > tuān; #6E4D +湎 > miăn; #6E4E +湏 > hùi; #6E4F +湐 > mò; #6E50 +湑 > xŭ; #6E51 +湒 > jí; #6E52 +湓 > pén; #6E53 +湔 > jiān; #6E54 +湕 > jiăn; #6E55 +湖 > hú; #6E56 +湗 > fèng; #6E57 +湘 > xiāng; #6E58 +湙 > yì; #6E59 +湚 > yìn; #6E5A +湛 > zhàn; #6E5B +湜 > shí; #6E5C +湝 > jiē; #6E5D +湞 > chéng; #6E5E +湟 > huáng; #6E5F +湠 > tàn; #6E60 +湡 > yú; #6E61 +湢 > bì; #6E62 +湣 > mĭn; #6E63 +湤 > shī; #6E64 +湥 > tú; #6E65 +湦 > shēng; #6E66 +湧 > yŏng; #6E67 +湨 > qù; #6E68 +湩 > zhòng; #6E69 +湪 > suèi; #6E6A +湫 > jīu; #6E6B +湬 > jiăo; #6E6C +湭 > qióu; #6E6D +湮 > yīn; #6E6E +湯 > tāng; #6E6F +湰 > lóng; #6E70 +湱 > hùo; #6E71 +湲 > yuán; #6E72 +湳 > năn; #6E73 +湴 > bàn; #6E74 +湵 > yŏu; #6E75 +湶 > quán; #6E76 +湷 > chúi; #6E77 +湸 > liàng; #6E78 +湹 > chán; #6E79 +湺 > yán; #6E7A +湻 > chún; #6E7B +湼 > niè; #6E7C +湽 > zī; #6E7D +湾 > wān; #6E7E +湿 > shī; #6E7F +満 > măn; #6E80 +溁 > yíng; #6E81 +溃 > kùi; #6E83 +溅 > jiàn; #6E85 +溆 > xù; #6E86 +溇 > lǚ; #6E87 +溈 > gūi; #6E88 +溉 > gài; #6E89 +溌 > pō; #6E8C +溍 > jìn; #6E8D +溎 > gùi; #6E8E +溏 > táng; #6E8F +源 > yuán; #6E90 +溑 > sŭo; #6E91 +溒 > yuán; #6E92 +溓 > lián; #6E93 +溔 > yăo; #6E94 +溕 > mèng; #6E95 +準 > zhŭn; #6E96 +溗 > shéng; #6E97 +溘 > kè; #6E98 +溙 > tài; #6E99 +溚 > dá; #6E9A +溛 > wā; #6E9B +溜 > līu; #6E9C +溝 > gōu; #6E9D +溞 > sāo; #6E9E +溟 > míng; #6E9F +溠 > zhà; #6EA0 +溡 > shí; #6EA1 +溢 > yì; #6EA2 +溣 > lún; #6EA3 +溤 > mă; #6EA4 +溥 > pŭ; #6EA5 +溦 > wéi; #6EA6 +溧 > lì; #6EA7 +溨 > cái; #6EA8 +溩 > wù; #6EA9 +溪 > xī; #6EAA +溫 > wēn; #6EAB +溬 > qiāng; #6EAC +溭 > zé; #6EAD +溮 > shī; #6EAE +溯 > sù; #6EAF +溰 > yī; #6EB0 +溱 > zhēn; #6EB1 +溲 > sōu; #6EB2 +溳 > yún; #6EB3 +溴 > xìu; #6EB4 +溵 > yīn; #6EB5 +溶 > róng; #6EB6 +溷 > hùn; #6EB7 +溸 > sù; #6EB8 +溹 > sù; #6EB9 +溺 > nì; #6EBA +溻 > tà; #6EBB +溼 > shī; #6EBC +溽 > rù; #6EBD +溾 > wēi; #6EBE +溿 > pàn; #6EBF +滀 > chù; #6EC0 +滁 > chú; #6EC1 +滂 > pāng; #6EC2 +滃 > wĕng; #6EC3 +滄 > cāng; #6EC4 +滅 > miè; #6EC5 +滆 > hé; #6EC6 +滇 > diān; #6EC7 +滈 > hào; #6EC8 +滉 > huăng; #6EC9 +滊 > xì; #6ECA +滋 > zī; #6ECB +滌 > dí; #6ECC +滍 > zhĭ; #6ECD +滎 > yíng; #6ECE +滏 > fŭ; #6ECF +滐 > jié; #6ED0 +滑 > huá; #6ED1 +滒 > gē; #6ED2 +滓 > zĭ; #6ED3 +滔 > tāo; #6ED4 +滕 > téng; #6ED5 +滖 > sūi; #6ED6 +滗 > bĭ; #6ED7 +滘 > jiào; #6ED8 +滙 > hùi; #6ED9 +滚 > gŭn; #6EDA +滛 > yín; #6EDB +滜 > gāo; #6EDC +滝 > lóng; #6EDD +滞 > zhì; #6EDE +滟 > yàn; #6EDF +滠 > shè; #6EE0 +满 > măn; #6EE1 +滢 > yìng; #6EE2 +滣 > chún; #6EE3 +滤 > lǜ; #6EE4 +滥 > làn; #6EE5 +滦 > luán; #6EE6 +滨 > bīn; #6EE8 +滩 > tān; #6EE9 +滪 > yù; #6EEA +滫 > sŏu; #6EEB +滬 > hù; #6EEC +滭 > bì; #6EED +滮 > biāo; #6EEE +滯 > zhì; #6EEF +滰 > jiăng; #6EF0 +滱 > kòu; #6EF1 +滲 > shèn; #6EF2 +滳 > shāng; #6EF3 +滴 > dī; #6EF4 +滵 > mì; #6EF5 +滶 > áo; #6EF6 +滷 > lŭ; #6EF7 +滸 > hŭ; #6EF8 +滹 > hū; #6EF9 +滺 > yóu; #6EFA +滻 > chăn; #6EFB +滼 > fàn; #6EFC +滽 > yóng; #6EFD +滾 > gŭn; #6EFE +滿 > măn; #6EFF +漀 > qìng; #6F00 +漁 > yú; #6F01 +漂 > piāo; #6F02 +漃 > jí; #6F03 +漄 > yá; #6F04 +漅 > jiăo; #6F05 +漆 > qī; #6F06 +漇 > xĭ; #6F07 +漈 > jì; #6F08 +漉 > lù; #6F09 +漊 > lǚ; #6F0A +漋 > lóng; #6F0B +漌 > jĭn; #6F0C +漍 > gúo; #6F0D +漎 > cóng; #6F0E +漏 > lòu; #6F0F +漐 > zhí; #6F10 +漑 > gài; #6F11 +漒 > qiáng; #6F12 +漓 > lí; #6F13 +演 > yăn; #6F14 +漕 > cáo; #6F15 +漖 > jiào; #6F16 +漗 > cōng; #6F17 +漘 > qún; #6F18 +漙 > tuán; #6F19 +漚 > òu; #6F1A +漛 > téng; #6F1B +漜 > yĕ; #6F1C +漝 > xí; #6F1D +漞 > mì; #6F1E +漟 > táng; #6F1F +漠 > mò; #6F20 +漡 > shāng; #6F21 +漢 > hàn; #6F22 +漣 > lián; #6F23 +漤 > lăn; #6F24 +漥 > wā; #6F25 +漦 > lí; #6F26 +漧 > qián; #6F27 +漨 > féng; #6F28 +漩 > xuán; #6F29 +漪 > yī; #6F2A +漫 > màn; #6F2B +漬 > zì; #6F2C +漭 > măng; #6F2D +漮 > kāng; #6F2E +漯 > lĕi; #6F2F +漰 > pēng; #6F30 +漱 > shù; #6F31 +漲 > zhăng; #6F32 +漳 > zhāng; #6F33 +漴 > chóng; #6F34 +漵 > xù; #6F35 +漶 > huàn; #6F36 +漷 > kùo; #6F37 +漸 > jiàn; #6F38 +漹 > yān; #6F39 +漺 > chuăng; #6F3A +漻 > liáo; #6F3B +漼 > cŭi; #6F3C +漽 > tí; #6F3D +漾 > yàng; #6F3E +漿 > jiāng; #6F3F +潀 > cóng; #6F40 +潁 > yĭng; #6F41 +潂 > hóng; #6F42 +潃 > xún; #6F43 +潄 > shù; #6F44 +潅 > guàn; #6F45 +潆 > yíng; #6F46 +潇 > xiāo; #6F47 +潊 > xù; #6F4A +潋 > liàn; #6F4B +潌 > zhì; #6F4C +潍 > wéi; #6F4D +潎 > pì; #6F4E +潏 > jué; #6F4F +潐 > jiào; #6F50 +潑 > pō; #6F51 +潒 > dàng; #6F52 +潓 > hùi; #6F53 +潔 > jié; #6F54 +潕 > wŭ; #6F55 +潖 > pá; #6F56 +潗 > jí; #6F57 +潘 > pān; #6F58 +潙 > gúi; #6F59 +潚 > xiāo; #6F5A +潛 > qián; #6F5B +潜 > qián; #6F5C +潝 > xī; #6F5D +潞 > lù; #6F5E +潟 > xì; #6F5F +潠 > xuàn; #6F60 +潡 > dùn; #6F61 +潢 > huáng; #6F62 +潣 > mĭn; #6F63 +潤 > rùn; #6F64 +潥 > sù; #6F65 +潦 > liáo; #6F66 +潧 > zhēn; #6F67 +潨 > zhōng; #6F68 +潩 > yì; #6F69 +潪 > dí; #6F6A +潫 > wān; #6F6B +潬 > dàn; #6F6C +潭 > tán; #6F6D +潮 > cháo; #6F6E +潯 > xún; #6F6F +潰 > kùi; #6F70 +潱 > yie; #6F71 +潲 > shào; #6F72 +潳 > tú; #6F73 +潴 > zhū; #6F74 +潵 > sàn; #6F75 +潶 > hēi; #6F76 +潷 > bĭ; #6F77 +潸 > shān; #6F78 +潹 > chán; #6F79 +潺 > chán; #6F7A +潻 > shŭ; #6F7B +潼 > tóng; #6F7C +潽 > pŭ; #6F7D +潾 > lín; #6F7E +潿 > wéi; #6F7F +澀 > sè; #6F80 +澁 > sè; #6F81 +澂 > chéng; #6F82 +澃 > jìong; #6F83 +澄 > chéng; #6F84 +澅 > huà; #6F85 +澆 > jiāo; #6F86 +澇 > lào; #6F87 +澈 > chè; #6F88 +澉 > găn; #6F89 +澊 > cūn; #6F8A +澋 > hèng; #6F8B +澌 > sī; #6F8C +澍 > shù; #6F8D +澎 > péng; #6F8E +澏 > hàn; #6F8F +澐 > yún; #6F90 +澑 > lìu; #6F91 +澒 > hòng; #6F92 +澓 > fú; #6F93 +澔 > hào; #6F94 +澕 > hé; #6F95 +澖 > xiān; #6F96 +澗 > jiàn; #6F97 +澘 > shān; #6F98 +澙 > xì; #6F99 +澜 > lán; #6F9C +澞 > yú; #6F9E +澟 > lĭn; #6F9F +澠 > mĭn; #6FA0 +澡 > zăo; #6FA1 +澢 > dāng; #6FA2 +澣 > wăn; #6FA3 +澤 > zé; #6FA4 +澥 > xiè; #6FA5 +澦 > yù; #6FA6 +澧 > lĭ; #6FA7 +澨 > shì; #6FA8 +澩 > xué; #6FA9 +澪 > líng; #6FAA +澫 > màn; #6FAB +澬 > zī; #6FAC +澭 > yōng; #6FAD +澮 > kuài; #6FAE +澯 > càn; #6FAF +澰 > liàn; #6FB0 +澱 > diàn; #6FB1 +澲 > yè; #6FB2 +澳 > ào; #6FB3 +澴 > huán; #6FB4 +澵 > zhēn; #6FB5 +澶 > chán; #6FB6 +澷 > màn; #6FB7 +澸 > dăn; #6FB8 +澹 > dàn; #6FB9 +澺 > yì; #6FBA +澻 > sùi; #6FBB +澼 > pì; #6FBC +澽 > jù; #6FBD +澾 > tà; #6FBE +澿 > qín; #6FBF +激 > jī; #6FC0 +濁 > zhúo; #6FC1 +濂 > lián; #6FC2 +濃 > nóng; #6FC3 +濄 > gūo; #6FC4 +濅 > jìn; #6FC5 +濆 > fén; #6FC6 +濇 > sè; #6FC7 +濈 > jí; #6FC8 +濉 > sūi; #6FC9 +濊 > hùi; #6FCA +濋 > chŭ; #6FCB +濌 > tà; #6FCC +濍 > sōng; #6FCD +濎 > dĭng; #6FCE +濐 > zhŭ; #6FD0 +濑 > lài; #6FD1 +濒 > bīn; #6FD2 +濓 > lián; #6FD3 +濔 > mĭ; #6FD4 +濕 > shī; #6FD5 +濖 > shù; #6FD6 +濗 > mì; #6FD7 +濘 > nìng; #6FD8 +濙 > yíng; #6FD9 +濚 > yíng; #6FDA +濛 > méng; #6FDB +濜 > jìn; #6FDC +濝 > qí; #6FDD +濞 > pì; #6FDE +濟 > jì; #6FDF +濠 > háo; #6FE0 +濡 > rú; #6FE1 +濢 > zŭi; #6FE2 +濣 > wò; #6FE3 +濤 > tāo; #6FE4 +濥 > yìn; #6FE5 +濦 > yĭn; #6FE6 +濧 > dùi; #6FE7 +濨 > cí; #6FE8 +濩 > hùo; #6FE9 +濪 > jìng; #6FEA +濫 > làn; #6FEB +濬 > jùn; #6FEC +濭 > ài; #6FED +濮 > pū; #6FEE +濯 > zhúo; #6FEF +濰 > wéi; #6FF0 +濱 > bīn; #6FF1 +濲 > gŭ; #6FF2 +濳 > qián; #6FF3 +濴 > xíng; #6FF4 +濶 > kùo; #6FF6 +濷 > fèi; #6FF7 +濺 > jiàn; #6FFA +濻 > wĕi; #6FFB +濼 > lùo; #6FFC +濽 > zàn; #6FFD +濾 > lǜ; #6FFE +濿 > lì; #6FFF +瀀 > yōu; #7000 +瀁 > yàng; #7001 +瀂 > lŭ; #7002 +瀃 > sì; #7003 +瀄 > jié; #7004 +瀅 > yìng; #7005 +瀆 > dú; #7006 +瀇 > wăng; #7007 +瀈 > hūi; #7008 +瀉 > xiè; #7009 +瀊 > pán; #700A +瀋 > shĕn; #700B +瀌 > biāo; #700C +瀍 > chán; #700D +瀎 > mò; #700E +瀏 > líu; #700F +瀐 > jiān; #7010 +瀑 > pù; #7011 +瀒 > sè; #7012 +瀓 > chéng; #7013 +瀔 > gŭ; #7014 +瀕 > bīn; #7015 +瀖 > hùo; #7016 +瀗 > xiàn; #7017 +瀘 > lú; #7018 +瀙 > qīn; #7019 +瀚 > hàn; #701A +瀛 > yíng; #701B +瀜 > yōng; #701C +瀝 > lì; #701D +瀞 > jìng; #701E +瀟 > xiāo; #701F +瀠 > yíng; #7020 +瀡 > sŭi; #7021 +瀢 > wéi; #7022 +瀣 > xiè; #7023 +瀤 > huái; #7024 +瀥 > hào; #7025 +瀦 > zhū; #7026 +瀧 > lóng; #7027 +瀨 > lài; #7028 +瀩 > dùi; #7029 +瀪 > fán; #702A +瀫 > hú; #702B +瀬 > lài; #702C +瀯 > yíng; #702F +瀰 > mí; #7030 +瀱 > jì; #7031 +瀲 > liàn; #7032 +瀳 > jiàn; #7033 +瀴 > yĭng; #7034 +瀵 > fèn; #7035 +瀶 > lín; #7036 +瀷 > yì; #7037 +瀸 > jiān; #7038 +瀹 > yuè; #7039 +瀺 > chán; #703A +瀻 > dài; #703B +瀼 > ráng; #703C +瀽 > jiăn; #703D +瀾 > lán; #703E +瀿 > fán; #703F +灀 > shuàng; #7040 +灁 > yuān; #7041 +灂 > zhúo; #7042 +灃 > fēng; #7043 +灄 > shè; #7044 +灅 > lĕi; #7045 +灆 > lán; #7046 +灇 > cóng; #7047 +灈 > qú; #7048 +灉 > yōng; #7049 +灊 > qián; #704A +灋 > fă; #704B +灌 > guàn; #704C +灍 > què; #704D +灎 > yàn; #704E +灏 > hào; #704F +灑 > să; #7051 +灒 > zàn; #7052 +灓 > luán; #7053 +灔 > yàn; #7054 +灕 > lí; #7055 +灖 > mĭ; #7056 +灗 > shàn; #7057 +灘 > tān; #7058 +灙 > dăng; #7059 +灚 > jiăo; #705A +灛 > chăn; #705B +灝 > hào; #705D +灞 > bà; #705E +灟 > zhú; #705F +灠 > lăn; #7060 +灡 > lán; #7061 +灢 > năng; #7062 +灣 > wān; #7063 +灤 > luán; #7064 +灥 > xún; #7065 +灦 > xiăn; #7066 +灧 > yàn; #7067 +灨 > găn; #7068 +灩 > yàn; #7069 +灪 > yù; #706A +火 > hŭo; #706B +灬 > sì' 'diăn' 'hŭo; #706C +灭 > miè; #706D +灮 > guāng; #706E +灯 > dēng; #706F +灰 > hūi; #7070 +灱 > xiāo; #7071 +灲 > xiāo; #7072 +灳 > hū1; #7073 +灴 > hóng; #7074 +灵 > líng; #7075 +灶 > zào; #7076 +灷 > zhuàn; #7077 +灸 > jĭu; #7078 +灹 > zhà; #7079 +灺 > xiè; #707A +灻 > chì; #707B +灼 > zhúo; #707C +災 > zāi; #707D +灾 > zāi; #707E +灿 > càn; #707F +炀 > yáng; #7080 +炁 > qì; #7081 +炂 > zhōng; #7082 +炃 > fén; #7083 +炄 > nĭu; #7084 +炅 > jĭong; #7085 +炆 > wén; #7086 +炇 > pò; #7087 +炈 > yì; #7088 +炉 > lú; #7089 +炊 > chūi; #708A +炋 > pī; #708B +炌 > kài; #708C +炍 > pàn; #708D +炎 > yán; #708E +炏 > kài; #708F +炐 > pàng; #7090 +炑 > mù; #7091 +炒 > chăo; #7092 +炓 > liào; #7093 +炔 > gùi; #7094 +炕 > kàng; #7095 +炖 > tūn; #7096 +炗 > guāng; #7097 +炘 > xīn; #7098 +炙 > zhì; #7099 +炚 > guang; #709A +炛 > guāng; #709B +炜 > wĕi; #709C +炝 > qiàng; #709D +炟 > dá; #709F +炠 > xiá; #70A0 +炡 > zhēng; #70A1 +炢 > zhú; #70A2 +炣 > kĕ; #70A3 +炤 > zhào; #70A4 +炥 > fú; #70A5 +炦 > bá; #70A6 +炧 > dùo; #70A7 +炨 > dùo; #70A8 +炩 > lìng; #70A9 +炪 > zhúo; #70AA +炫 > xuàn; #70AB +炬 > jù; #70AC +炭 > tàn; #70AD +炮 > pào; #70AE +炯 > jĭong; #70AF +炰 > páo; #70B0 +炱 > tái; #70B1 +炲 > tái; #70B2 +炳 > bĭng; #70B3 +炴 > yăng; #70B4 +炵 > tōng; #70B5 +炶 > hān; #70B6 +炷 > zhù; #70B7 +炸 > zhà; #70B8 +点 > diăn; #70B9 +為 > wèi; #70BA +炻 > shí; #70BB +炼 > liàn; #70BC +炽 > chì; #70BD +炾 > huăng; #70BE +烀 > hū; #70C0 +烁 > shùo; #70C1 +烂 > làn; #70C2 +烃 > jĭng; #70C3 +烄 > jiăo; #70C4 +烅 > xù; #70C5 +烆 > xíng; #70C6 +烇 > quàn; #70C7 +烈 > liè; #70C8 +烉 > huàn; #70C9 +烊 > yáng; #70CA +烋 > xiāo; #70CB +烌 > xīu; #70CC +烍 > xiăn; #70CD +烎 > yín; #70CE +烏 > wū; #70CF +烐 > zhōu; #70D0 +烑 > yáo; #70D1 +烒 > shì; #70D2 +烓 > wēi; #70D3 +烔 > tóng; #70D4 +烕 > xuè; #70D5 +烖 > zāi; #70D6 +烗 > kài; #70D7 +烘 > hōng; #70D8 +烙 > lùo; #70D9 +烚 > xiá; #70DA +烛 > zhú; #70DB +烜 > xuăn; #70DC +烝 > zhēng; #70DD +烞 > pò; #70DE +烟 > yān; #70DF +烠 > hŭi; #70E0 +烡 > guāng; #70E1 +烢 > zhè; #70E2 +烣 > hūi; #70E3 +烤 > kăo; #70E4 +烦 > fán; #70E6 +烧 > shāo; #70E7 +烨 > yè; #70E8 +烩 > hùi; #70E9 +烫 > tàng; #70EB +烬 > jìn; #70EC +热 > rè; #70ED +烯 > xī; #70EF +烰 > fú; #70F0 +烱 > jĭong; #70F1 +烲 > chè; #70F2 +烳 > pŭ; #70F3 +烴 > jĭng; #70F4 +烵 > zhúo; #70F5 +烶 > tĭng; #70F6 +烷 > wán; #70F7 +烸 > hăi; #70F8 +烹 > pēng; #70F9 +烺 > lăng; #70FA +烻 > shān; #70FB +烼 > hū; #70FC +烽 > fēng; #70FD +烾 > chì; #70FE +烿 > róng; #70FF +焀 > hú; #7100 +焁 > xi; #7101 +焂 > shú; #7102 +焃 > hè; #7103 +焄 > xūn; #7104 +焅 > kù; #7105 +焆 > jué; #7106 +焇 > xiāo; #7107 +焈 > xī; #7108 +焉 > yān; #7109 +焊 > hàn; #710A +焋 > zhuàng; #710B +焌 > jùn; #710C +焍 > dì; #710D +焎 > xiè; #710E +焏 > jí; #710F +焐 > wù; #7110 +焓 > hán; #7113 +焔 > yàn; #7114 +焕 > huàn; #7115 +焖 > mèn; #7116 +焗 > jú; #7117 +焘 > chóu; #7118 +焙 > bèi; #7119 +焚 > fén; #711A +焛 > lìn; #711B +焜 > kūn; #711C +焝 > hùn; #711D +焞 > tūn; #711E +焟 > xí; #711F +焠 > cùi; #7120 +無 > wú; #7121 +焢 > hōng; #7122 +焣 > jù; #7123 +焤 > fŭ; #7124 +焥 > wò; #7125 +焦 > jiāo; #7126 +焧 > cōng; #7127 +焨 > fèng; #7128 +焩 > pīng; #7129 +焪 > qīong; #712A +焫 > rùo; #712B +焬 > xí; #712C +焭 > qíong; #712D +焮 > xìn; #712E +焯 > zhúo; #712F +焰 > yàn; #7130 +焱 > yàn; #7131 +焲 > yì; #7132 +焳 > jué; #7133 +焴 > yù; #7134 +焵 > gàng; #7135 +然 > rán; #7136 +焷 > pí; #7137 +焸 > gŭ; #7138 +焺 > shēng; #713A +焻 > chàng; #713B +焼 > shāo; #713C +煁 > chén; #7141 +煂 > hè; #7142 +煃 > kŭi; #7143 +煄 > zhōng; #7144 +煅 > duàn; #7145 +煆 > xiā; #7146 +煇 > hūi; #7147 +煈 > fèng; #7148 +煉 > liàn; #7149 +煊 > xuān; #714A +煋 > xīng; #714B +煌 > huáng; #714C +煍 > jiăo; #714D +煎 > jiān; #714E +煏 > bì; #714F +煐 > yīng; #7150 +煑 > zhŭ; #7151 +煒 > wĕi; #7152 +煓 > tuān; #7153 +煔 > tiàn; #7154 +煕 > xī; #7155 +煖 > nuăn; #7156 +煗 > nuăn; #7157 +煘 > chán; #7158 +煙 > yān; #7159 +煚 > jĭong; #715A +煛 > jĭong; #715B +煜 > yù; #715C +煝 > mèi; #715D +煞 > shà; #715E +煟 > wèi; #715F +煠 > yè; #7160 +煡 > xìn; #7161 +煢 > qíong; #7162 +煣 > rŏu; #7163 +煤 > méi; #7164 +煥 > huàn; #7165 +煦 > xŭ; #7166 +照 > zhào; #7167 +煨 > wēi; #7168 +煩 > fán; #7169 +煪 > qíu; #716A +煫 > sùi; #716B +煬 > yáng; #716C +煭 > liè; #716D +煮 > zhŭ; #716E +煯 > jie; #716F +煰 > gào; #7170 +煱 > guā; #7171 +煲 > bào; #7172 +煳 > hú; #7173 +煴 > yūn; #7174 +煵 > xiā; #7175 +煸 > biān; #7178 +煹 > gòu; #7179 +煺 > tùi; #717A +煻 > táng; #717B +煼 > chăo; #717C +煽 > shān; #717D +煾 > n; #717E +煿 > bó; #717F +熀 > huăng; #7180 +熁 > xié; #7181 +熂 > xì; #7182 +熃 > wù; #7183 +熄 > xí; #7184 +熅 > yún; #7185 +熆 > hé; #7186 +熇 > hè; #7187 +熈 > xī; #7188 +熉 > yún; #7189 +熊 > xíong; #718A +熋 > nái; #718B +熌 > shàn; #718C +熍 > qiong; #718D +熎 > yào; #718E +熏 > xūn; #718F +熐 > mì; #7190 +熑 > lián; #7191 +熒 > yíng; #7192 +熓 > wèn; #7193 +熔 > róng; #7194 +熗 > qiàng; #7197 +熘 > līu; #7198 +熙 > xī; #7199 +熚 > bì; #719A +熛 > biāo; #719B +熜 > zŏng; #719C +熝 > lù; #719D +熞 > jiān; #719E +熟 > shóu; #719F +熠 > yì; #71A0 +熡 > lóu; #71A1 +熢 > fēng; #71A2 +熣 > sūi; #71A3 +熤 > yì; #71A4 +熥 > tōng; #71A5 +熦 > jué; #71A6 +熧 > zōng; #71A7 +熨 > yùn; #71A8 +熩 > hù; #71A9 +熪 > yí; #71AA +熫 > zhì; #71AB +熬 > áo; #71AC +熭 > wèi; #71AD +熮 > liáo; #71AE +熯 > hàn; #71AF +熰 > ōu; #71B0 +熱 > rè; #71B1 +熲 > jĭong; #71B2 +熳 > màn; #71B3 +熵 > shāng; #71B5 +熶 > cuàn; #71B6 +熷 > zēng; #71B7 +熸 > jiān; #71B8 +熹 > xī; #71B9 +熺 > xī; #71BA +熻 > xī; #71BB +熼 > yì; #71BC +熽 > xiào; #71BD +熾 > chì; #71BE +熿 > huáng; #71BF +燀 > chăn; #71C0 +燁 > yè; #71C1 +燂 > qián; #71C2 +燃 > rán; #71C3 +燄 > yàn; #71C4 +燅 > xián; #71C5 +燆 > qiáo; #71C6 +燇 > zùn; #71C7 +燈 > dēng; #71C8 +燉 > dùn; #71C9 +燊 > shēn; #71CA +燋 > jiāo; #71CB +燌 > fén; #71CC +燍 > sī; #71CD +燎 > liào; #71CE +燏 > yù; #71CF +燐 > lín; #71D0 +燑 > tóng; #71D1 +燒 > shāo; #71D2 +燓 > fēn; #71D3 +燔 > fán; #71D4 +燕 > yàn; #71D5 +燖 > xún; #71D6 +燗 > làn; #71D7 +燘 > mĕi; #71D8 +燙 > tàng; #71D9 +燚 > yī; #71DA +燛 > jĭng; #71DB +燜 > mèn; #71DC +營 > yíng; #71DF +燠 > yù; #71E0 +燡 > yì; #71E1 +燢 > xué; #71E2 +燣 > lán; #71E3 +燤 > tài; #71E4 +燥 > zào; #71E5 +燦 > càn; #71E6 +燧 > sùi; #71E7 +燨 > xī; #71E8 +燩 > què; #71E9 +燪 > cōng; #71EA +燫 > lián; #71EB +燬 > hŭi; #71EC +燭 > zhú; #71ED +燮 > xiè; #71EE +燯 > líng; #71EF +燰 > wēi; #71F0 +燱 > yì; #71F1 +燲 > xié; #71F2 +燳 > zhào; #71F3 +燴 > hùi; #71F4 +燷 > lán; #71F7 +燸 > rú; #71F8 +燹 > xiăn; #71F9 +燺 > kăo; #71FA +燻 > xūn; #71FB +燼 > jìn; #71FC +燽 > chóu; #71FD +燾 > chóu; #71FE +燿 > yào; #71FF +爀 > hè; #7200 +爁 > làn; #7201 +爂 > biāo; #7202 +爃 > róng; #7203 +爄 > lì; #7204 +爅 > mò; #7205 +爆 > bào; #7206 +爇 > rùo; #7207 +爈 > lǘ; #7208 +爉 > là; #7209 +爊 > áo; #720A +爋 > xùn; #720B +爌 > kuàng; #720C +爍 > shùo; #720D +爏 > lì; #720F +爐 > lú; #7210 +爑 > jué; #7211 +爒 > liào; #7212 +爓 > yàn; #7213 +爔 > xī; #7214 +爕 > xiè; #7215 +爖 > lóng; #7216 +爗 > yè; #7217 +爙 > răng; #7219 +爚 > yuè; #721A +爛 > làn; #721B +爜 > cóng; #721C +爝 > jué; #721D +爞 > tóng; #721E +爟 > guàn; #721F +爡 > chè; #7221 +爢 > mí; #7222 +爣 > tăng; #7223 +爤 > làn; #7224 +爥 > zhú; #7225 +爧 > líng; #7227 +爨 > cuàn; #7228 +爩 > yù; #7229 +爪 > zhuă; #722A +爬 > pá; #722C +爭 > zhēng; #722D +爮 > páo; #722E +爯 > chēng; #722F +爰 > yuán; #7230 +爱 > ài; #7231 +爲 > wèi; #7232 +爴 > jué; #7234 +爵 > jué; #7235 +父 > fù; #7236 +爷 > yé; #7237 +爸 > bà; #7238 +爹 > diē; #7239 +爺 > yé; #723A +爻 > yáo; #723B +爼 > zŭ; #723C +爽 > shuăng; #723D +爾 > ĕr; #723E +爿 > qiáng; #723F +牀 > chuáng; #7240 +牁 > gē; #7241 +牂 > zāng; #7242 +牃 > dié; #7243 +牄 > qiāng; #7244 +牅 > yóng; #7245 +牆 > qiáng; #7246 +片 > piàn; #7247 +版 > băn; #7248 +牉 > pàn; #7249 +牊 > sháo; #724A +牋 > jiān; #724B +牌 > pái; #724C +牍 > dú; #724D +牎 > chuāng; #724E +牏 > tóu; #724F +牐 > zhá; #7250 +牑 > biān; #7251 +牒 > dié; #7252 +牓 > băng; #7253 +牔 > bó; #7254 +牕 > chuāng; #7255 +牖 > yŏu; #7256 +牘 > dú; #7258 +牙 > yá; #7259 +牚 > chèng; #725A +牛 > níu; #725B +牝 > pìn; #725D +牞 > jīu; #725E +牟 > móu; #725F +牠 > tūo; #7260 +牡 > mŭ; #7261 +牢 > láo; #7262 +牣 > rèn; #7263 +牤 > máng; #7264 +牥 > fāng; #7265 +牦 > máo; #7266 +牧 > mù; #7267 +牨 > gāng; #7268 +物 > wù; #7269 +牪 > yàn; #726A +牫 > gē; #726B +牬 > bèi; #726C +牭 > sì; #726D +牮 > jiàn; #726E +牯 > gŭ; #726F +牰 > yòu; #7270 +牱 > gē; #7271 +牲 > shēng; #7272 +牳 > mŭ; #7273 +牴 > dĭ; #7274 +牵 > qiān; #7275 +牶 > quàn; #7276 +牷 > quán; #7277 +牸 > zì; #7278 +特 > tè; #7279 +牺 > xī; #727A +牻 > máng; #727B +牼 > kēng; #727C +牽 > qiān; #727D +牾 > wú; #727E +牿 > gù; #727F +犀 > xī; #7280 +犁 > lí; #7281 +犂 > lí; #7282 +犃 > pŏu; #7283 +犄 > jī; #7284 +犅 > gāng; #7285 +犆 > zhí; #7286 +犇 > bēn; #7287 +犈 > quán; #7288 +犉 > rún; #7289 +犊 > dú; #728A +犋 > jù; #728B +犌 > jiā; #728C +犍 > jiān; #728D +犎 > fēng; #728E +犏 > piān; #728F +犐 > kē; #7290 +犑 > jú; #7291 +犒 > kào; #7292 +犓 > chú; #7293 +犔 > xì; #7294 +犕 > bèi; #7295 +犖 > lùo; #7296 +犗 > jiè; #7297 +犘 > má; #7298 +犙 > sān; #7299 +犚 > wèi; #729A +犛 > lí; #729B +犜 > dūn; #729C +犝 > tóng; #729D +犟 > jiàng; #729F +犡 > lì; #72A1 +犢 > dú; #72A2 +犣 > liè; #72A3 +犤 > pí; #72A4 +犥 > piăo; #72A5 +犦 > bào; #72A6 +犧 > xī; #72A7 +犨 > chōu; #72A8 +犩 > wèi; #72A9 +犪 > kúi; #72AA +犫 > chōu; #72AB +犬 > quăn; #72AC +犭 > făn' 'quăn' 'páng; #72AD +犮 > bá; #72AE +犯 > fàn; #72AF +犰 > qíu; #72B0 +犱 > jĭ; #72B1 +犲 > cái; #72B2 +犳 > chúo; #72B3 +犴 > àn; #72B4 +犵 > jié; #72B5 +状 > zhuàng; #72B6 +犷 > guăng; #72B7 +犸 > mà; #72B8 +犹 > yóu; #72B9 +犺 > kàng; #72BA +犻 > bó; #72BB +犼 > hŏu; #72BC +犽 > yá; #72BD +犾 > yín; #72BE +犿 > huān; #72BF +狀 > zhuàng; #72C0 +狁 > yŭn; #72C1 +狂 > kuáng; #72C2 +狃 > nĭu; #72C3 +狄 > dí; #72C4 +狅 > qīng; #72C5 +狆 > zhòng; #72C6 +狇 > mù; #72C7 +狈 > bèi; #72C8 +狉 > pī; #72C9 +狊 > jú; #72CA +狋 > ní; #72CB +狌 > shēng; #72CC +狍 > páo; #72CD +狎 > xiá; #72CE +狏 > túo; #72CF +狐 > hú; #72D0 +狑 > líng; #72D1 +狒 > fèi; #72D2 +狓 > pī; #72D3 +狔 > nĭ; #72D4 +狕 > ăo; #72D5 +狖 > yòu; #72D6 +狗 > gŏu; #72D7 +狘 > yuè; #72D8 +狙 > jū; #72D9 +狚 > dàn; #72DA +狛 > pò; #72DB +狜 > gŭ; #72DC +狝 > xiăn; #72DD +狞 > níng; #72DE +狟 > huán; #72DF +狠 > hĕn; #72E0 +狡 > jiăo; #72E1 +狢 > hé; #72E2 +狣 > zhào; #72E3 +狤 > jí; #72E4 +狥 > xùn; #72E5 +狦 > shān; #72E6 +狧 > tà; #72E7 +狨 > róng; #72E8 +狩 > shòu; #72E9 +狪 > tōng; #72EA +狫 > lăo; #72EB +独 > dú; #72EC +狭 > xiá; #72ED +狮 > shī; #72EE +狯 > huá; #72EF +狰 > zhēng; #72F0 +狱 > yù; #72F1 +狲 > sūn; #72F2 +狳 > yú; #72F3 +狴 > bì; #72F4 +狵 > máng; #72F5 +狶 > xĭ; #72F6 +狷 > juàn; #72F7 +狸 > lí; #72F8 +狹 > xiá; #72F9 +狺 > yín; #72FA +狻 > suān; #72FB +狼 > láng; #72FC +狽 > bèi; #72FD +狾 > zhì; #72FE +狿 > yán; #72FF +猀 > shā; #7300 +猁 > lì; #7301 +猂 > hàn; #7302 +猃 > xiăn; #7303 +猄 > jīng; #7304 +猅 > pái; #7305 +猆 > fēi; #7306 +猇 > yáo; #7307 +猈 > bà; #7308 +猉 > qí; #7309 +猊 > ní; #730A +猋 > biāo; #730B +猌 > yìn; #730C +猍 > lái; #730D +猎 > xí; #730E +猏 > jiān; #730F +猐 > qiāng; #7310 +猑 > kūn; #7311 +猒 > yān; #7312 +猓 > gŭo; #7313 +猔 > zòng; #7314 +猕 > mí; #7315 +猖 > chāng; #7316 +猗 > yī; #7317 +猘 > zhì; #7318 +猙 > zhēng; #7319 +猚 > yá; #731A +猛 > mĕng; #731B +猜 > cāi; #731C +猝 > cù; #731D +猞 > shè; #731E +猡 > lúo; #7321 +猢 > hú; #7322 +猣 > zōng; #7323 +猤 > jì; #7324 +猥 > wĕi; #7325 +猦 > fēng; #7326 +猧 > wō; #7327 +猨 > yuán; #7328 +猩 > xīng; #7329 +猪 > zhū; #732A +猫 > māo; #732B +猬 > wèi; #732C +猭 > yuán; #732D +献 > xiàn; #732E +猯 > tuān; #732F +猰 > yà; #7330 +猱 > náo; #7331 +猲 > xiē; #7332 +猳 > jiā; #7333 +猴 > hóu; #7334 +猵 > biān; #7335 +猶 > yóu; #7336 +猷 > yóu; #7337 +猸 > méi; #7338 +猹 > zhā; #7339 +猺 > yáo; #733A +猻 > sūn; #733B +猼 > bó; #733C +猽 > míng; #733D +猾 > huá; #733E +猿 > yuán; #733F +獀 > sōu; #7340 +獁 > mà; #7341 +獂 > yuán; #7342 +獃 > dāi; #7343 +獄 > yù; #7344 +獅 > shī; #7345 +獆 > háo; #7346 +獈 > yì; #7348 +獉 > zhēn; #7349 +獊 > chuàng; #734A +獋 > háo; #734B +獌 > màn; #734C +獍 > jìng; #734D +獎 > jiăng; #734E +獏 > mú; #734F +獐 > zhāng; #7350 +獑 > chán; #7351 +獒 > áo; #7352 +獓 > áo; #7353 +獔 > háo; #7354 +獕 > cūi; #7355 +獖 > fén; #7356 +獗 > jué; #7357 +獘 > bì; #7358 +獙 > bì; #7359 +獚 > huáng; #735A +獛 > pú; #735B +獜 > lín; #735C +獝 > yù; #735D +獞 > tóng; #735E +獟 > yào; #735F +獠 > liáo; #7360 +獡 > shùo; #7361 +獢 > xiāo; #7362 +獥 > xí; #7365 +獦 > gé; #7366 +獧 > juàn; #7367 +獨 > dú; #7368 +獩 > hùi; #7369 +獪 > kuài; #736A +獫 > xiăn; #736B +獬 > xiè; #736C +獭 > tà; #736D +獮 > xiăn; #736E +獯 > xūn; #736F +獰 > níng; #7370 +獱 > pín; #7371 +獲 > hùo; #7372 +獳 > nòu; #7373 +獴 > méng; #7374 +獵 > liè; #7375 +獶 > náo; #7376 +獷 > guăng; #7377 +獸 > shòu; #7378 +獹 > lú; #7379 +獺 > tà; #737A +獻 > xiàn; #737B +獼 > mí; #737C +獽 > ráng; #737D +獾 > huān; #737E +獿 > náo; #737F +玀 > lúo; #7380 +玁 > xiăn; #7381 +玂 > qí; #7382 +玃 > jué; #7383 +玄 > xuán; #7384 +玅 > miào; #7385 +玆 > zī; #7386 +率 > lǜ; #7387 +玈 > lú; #7388 +玉 > yù; #7389 +玊 > sù; #738A +王 > wáng; #738B +玌 > qíu; #738C +玍 > gă; #738D +玎 > dīng; #738E +玏 > lè; #738F +玐 > bā; #7390 +玑 > jī; #7391 +玒 > hóng; #7392 +玓 > dì; #7393 +玔 > quàn; #7394 +玕 > gān; #7395 +玖 > jĭu; #7396 +玗 > yú; #7397 +玘 > jĭ; #7398 +玙 > yú; #7399 +玚 > yáng; #739A +玛 > mă; #739B +玜 > gōng; #739C +玝 > wŭ; #739D +玞 > fū; #739E +玟 > wén; #739F +玠 > jiè; #73A0 +玡 > yà; #73A1 +玢 > fén; #73A2 +玣 > biàn; #73A3 +玤 > bĕng; #73A4 +玥 > yuè; #73A5 +玦 > jué; #73A6 +玧 > yŭn; #73A7 +玨 > jué; #73A8 +玩 > wán; #73A9 +玪 > jiān; #73AA +玫 > méi; #73AB +玬 > dăn; #73AC +玭 > pí; #73AD +玮 > wĕi; #73AE +环 > huán; #73AF +现 > xiàn; #73B0 +玱 > qiāng; #73B1 +玲 > líng; #73B2 +玳 > dài; #73B3 +玴 > yì; #73B4 +玵 > án; #73B5 +玶 > píng; #73B6 +玷 > diàn; #73B7 +玸 > fú; #73B8 +玹 > xuán; #73B9 +玺 > xĭ; #73BA +玻 > bō; #73BB +玼 > cĭ; #73BC +玽 > gŏu; #73BD +玾 > jiă; #73BE +玿 > sháo; #73BF +珀 > pò; #73C0 +珁 > cí; #73C1 +珂 > kē; #73C2 +珃 > răn; #73C3 +珄 > shēng; #73C4 +珅 > shēn; #73C5 +珆 > yí; #73C6 +珇 > zŭ; #73C7 +珈 > jiā; #73C8 +珉 > mín; #73C9 +珊 > shān; #73CA +珋 > lĭu; #73CB +珌 > bì; #73CC +珍 > zhēn; #73CD +珎 > zhēn; #73CE +珏 > jué; #73CF +珐 > fà; #73D0 +珑 > lóng; #73D1 +珒 > jīn; #73D2 +珓 > jiào; #73D3 +珔 > jiàn; #73D4 +珕 > lì; #73D5 +珖 > guāng; #73D6 +珗 > xiān; #73D7 +珘 > zhōu; #73D8 +珙 > gŏng; #73D9 +珚 > yān; #73DA +珛 > xìu; #73DB +珜 > yáng; #73DC +珝 > xŭ; #73DD +珞 > lùo; #73DE +珟 > sù; #73DF +珠 > zhū; #73E0 +珡 > qín; #73E1 +珢 > kèn; #73E2 +珣 > xún; #73E3 +珤 > băo; #73E4 +珥 > ĕr; #73E5 +珦 > xiàng; #73E6 +珧 > yáo; #73E7 +珨 > xiá; #73E8 +珩 > héng; #73E9 +珪 > gūi; #73EA +珫 > chōng; #73EB +珬 > xù; #73EC +班 > bān; #73ED +珮 > pèi; #73EE +珰 > dāng; #73F0 +珲 > hún; #73F2 +珳 > wén; #73F3 +珴 > é; #73F4 +珵 > chéng; #73F5 +珶 > tí; #73F6 +珷 > wŭ; #73F7 +珸 > wú; #73F8 +珹 > chéng; #73F9 +珺 > jùn; #73FA +珻 > méi; #73FB +珼 > bèi; #73FC +珽 > tĭng; #73FD +現 > xiàn; #73FE +珿 > chùo; #73FF +琀 > hán; #7400 +琁 > xuan; #7401 +琂 > yán; #7402 +球 > qíu; #7403 +琄 > quăn; #7404 +琅 > láng; #7405 +理 > lĭ; #7406 +琇 > xìu; #7407 +琈 > fú; #7408 +琉 > líu; #7409 +琊 > yé; #740A +琋 > xī; #740B +琌 > líng; #740C +琍 > lì; #740D +琎 > jìn; #740E +琏 > lián; #740F +琐 > sŭo; #7410 +琓 > wán; #7413 +琔 > diàn; #7414 +琕 > pín; #7415 +琖 > zhăn; #7416 +琗 > cùi; #7417 +琘 > mín; #7418 +琙 > yù; #7419 +琚 > jū; #741A +琛 > chēn; #741B +琜 > lái; #741C +琝 > wén; #741D +琞 > shèng; #741E +琟 > wéi; #741F +琠 > diăn; #7420 +琡 > chù; #7421 +琢 > zhúo; #7422 +琣 > pĕi; #7423 +琤 > chēng; #7424 +琥 > hŭ; #7425 +琦 > qí; #7426 +琧 > è; #7427 +琨 > kūn; #7428 +琩 > chāng; #7429 +琪 > qí; #742A +琫 > bĕng; #742B +琬 > wăn; #742C +琭 > lù; #742D +琮 > cóng; #742E +琯 > guăn; #742F +琰 > yăn; #7430 +琱 > diāo; #7431 +琲 > bèi; #7432 +琳 > lín; #7433 +琴 > qín; #7434 +琵 > pí; #7435 +琶 > pá; #7436 +琷 > què; #7437 +琸 > zhúo; #7438 +琹 > qín; #7439 +琺 > fà; #743A +琼 > qíong; #743C +琽 > dŭ; #743D +琾 > jiè; #743E +琿 > hún; #743F +瑀 > yŭ; #7440 +瑁 > mào; #7441 +瑂 > méi; #7442 +瑃 > chun; #7443 +瑄 > xuān; #7444 +瑅 > tí; #7445 +瑆 > xīng; #7446 +瑇 > dài; #7447 +瑈 > róu; #7448 +瑉 > mín; #7449 +瑊 > zhēn; #744A +瑋 > wĕi; #744B +瑌 > ruăn; #744C +瑍 > huàn; #744D +瑎 > jiē; #744E +瑏 > chuān; #744F +瑐 > jiăn; #7450 +瑑 > zhuàn; #7451 +瑒 > yáng; #7452 +瑓 > liàn; #7453 +瑔 > quán; #7454 +瑕 > xiá; #7455 +瑖 > duàn; #7456 +瑗 > yuàn; #7457 +瑘 > yé; #7458 +瑙 > năo; #7459 +瑚 > hú; #745A +瑛 > yīng; #745B +瑜 > yú; #745C +瑝 > huáng; #745D +瑞 > rùi; #745E +瑟 > sè; #745F +瑠 > líu; #7460 +瑡 > shi; #7461 +瑢 > róng; #7462 +瑣 > sŭo; #7463 +瑤 > yáo; #7464 +瑥 > wēn; #7465 +瑦 > wū; #7466 +瑧 > jīn; #7467 +瑨 > jìn; #7468 +瑩 > yíng; #7469 +瑪 > mă; #746A +瑫 > tāo; #746B +瑬 > líu; #746C +瑭 > táng; #746D +瑮 > lì; #746E +瑯 > láng; #746F +瑰 > gūi; #7470 +瑱 > zhèn; #7471 +瑲 > qiāng; #7472 +瑳 > cŭo; #7473 +瑴 > jué; #7474 +瑵 > zhăo; #7475 +瑶 > yáo; #7476 +瑷 > ài; #7477 +瑸 > bīn; #7478 +瑹 > tú; #7479 +瑺 > cháng; #747A +瑻 > kūn; #747B +瑼 > zhuān; #747C +瑽 > cōng; #747D +瑾 > jĭn; #747E +瑿 > yī; #747F +璀 > cŭi; #7480 +璁 > cōng; #7481 +璂 > qí; #7482 +璃 > lí; #7483 +璄 > yĭng; #7484 +璅 > sŭo; #7485 +璆 > qíu; #7486 +璇 > xuán; #7487 +璈 > áo; #7488 +璉 > lián; #7489 +璊 > mán; #748A +璋 > zhāng; #748B +璌 > yín; #748C +璎 > yīng; #748E +璏 > zhì; #748F +璐 > lù; #7490 +璑 > wú; #7491 +璒 > dēng; #7492 +璓 > xiòu; #7493 +璔 > zēng; #7494 +璕 > xún; #7495 +璖 > qú; #7496 +璗 > dàng; #7497 +璘 > lín; #7498 +璙 > liáo; #7499 +璚 > qíong; #749A +璛 > sù; #749B +璜 > huáng; #749C +璝 > gūi; #749D +璞 > pú; #749E +璟 > jĭng; #749F +璠 > fán; #74A0 +璡 > jìn; #74A1 +璢 > líu; #74A2 +璣 > jī; #74A3 +璥 > jĭng; #74A5 +璦 > ài; #74A6 +璧 > bì; #74A7 +璨 > càn; #74A8 +璩 > qú; #74A9 +璪 > zăo; #74AA +璫 > dāng; #74AB +璬 > jiăo; #74AC +璭 > gùn; #74AD +璮 > tăn; #74AE +璯 > hùi; #74AF +環 > huán; #74B0 +璱 > sè; #74B1 +璲 > sùi; #74B2 +璳 > tián; #74B3 +璵 > yú; #74B5 +璶 > jìn; #74B6 +璷 > lú; #74B7 +璸 > bīn; #74B8 +璹 > shòu; #74B9 +璺 > wèn; #74BA +璻 > zŭi; #74BB +璼 > lán; #74BC +璽 > xĭ; #74BD +璾 > jì; #74BE +璿 > xuán; #74BF +瓀 > ruăn; #74C0 +瓁 > hùo; #74C1 +瓂 > gài; #74C2 +瓃 > léi; #74C3 +瓄 > dú; #74C4 +瓅 > lì; #74C5 +瓆 > zhí; #74C6 +瓇 > róu; #74C7 +瓈 > lí; #74C8 +瓉 > zàn; #74C9 +瓊 > qíong; #74CA +瓋 > zhé; #74CB +瓌 > gūi; #74CC +瓍 > sùi; #74CD +瓎 > là; #74CE +瓏 > lóng; #74CF +瓐 > lú; #74D0 +瓑 > lì; #74D1 +瓒 > zàn; #74D2 +瓓 > làn; #74D3 +瓔 > yīng; #74D4 +瓕 > mí; #74D5 +瓖 > xiāng; #74D6 +瓗 > xī; #74D7 +瓘 > guàn; #74D8 +瓙 > dào; #74D9 +瓚 > zàn; #74DA +瓛 > huán; #74DB +瓜 > guā; #74DC +瓝 > bó; #74DD +瓞 > dié; #74DE +瓟 > báo; #74DF +瓠 > hù; #74E0 +瓡 > zhí; #74E1 +瓢 > piáo; #74E2 +瓣 > bàn; #74E3 +瓤 > ráng; #74E4 +瓥 > lì; #74E5 +瓦 > wă; #74E6 +瓨 > jiāng; #74E8 +瓩 > qián' 'wă; #74E9 +瓪 > făn; #74EA +瓫 > pén; #74EB +瓬 > făng; #74EC +瓭 > dăn; #74ED +瓮 > wèng; #74EE +瓯 > ōu; #74EF +瓳 > hú; #74F3 +瓴 > líng; #74F4 +瓵 > yí; #74F5 +瓶 > píng; #74F6 +瓷 > cí; #74F7 +瓹 > juàn; #74F9 +瓺 > cháng; #74FA +瓻 > chī; #74FB +瓽 > dàng; #74FD +瓾 > mĕng; #74FE +瓿 > pŏu; #74FF +甀 > zhùi; #7500 +甁 > píng; #7501 +甂 > biān; #7502 +甃 > zhòu; #7503 +甄 > zhēn; #7504 +甆 > cí; #7506 +甇 > yīng; #7507 +甈 > qì; #7508 +甉 > xián; #7509 +甊 > lŏu; #750A +甋 > dì; #750B +甌 > ōu; #750C +甍 > méng; #750D +甎 > zhuān; #750E +甏 > pèng; #750F +甐 > lín; #7510 +甑 > zèng; #7511 +甒 > wŭ; #7512 +甓 > pì; #7513 +甔 > dān; #7514 +甕 > wèng; #7515 +甖 > yīng; #7516 +甗 > yăn; #7517 +甘 > gān; #7518 +甙 > dài; #7519 +甚 > shén; #751A +甛 > tián; #751B +甜 > tián; #751C +甝 > hān; #751D +甞 > cháng; #751E +生 > shēng; #751F +甠 > qíng; #7520 +甡 > shēng; #7521 +產 > chăn; #7522 +産 > chăn; #7523 +甤 > rúi; #7524 +甥 > shēng; #7525 +甦 > sū; #7526 +甧 > sēn; #7527 +用 > yòng; #7528 +甩 > shuăi; #7529 +甪 > lù; #752A +甫 > fŭ; #752B +甬 > yŏng; #752C +甭 > béng; #752D +甮 > fèng; #752E +甯 > níng; #752F +田 > tián; #7530 +由 > yóu; #7531 +甲 > jiă; #7532 +申 > shēn; #7533 +甴 > zhá; #7534 +电 > diàn; #7535 +甶 > fú; #7536 +男 > nán; #7537 +甸 > diàn; #7538 +甹 > píng; #7539 +町 > tĭng; #753A +画 > huà; #753B +甼 > tĭng; #753C +甽 > quăn; #753D +甾 > zī; #753E +甿 > méng; #753F +畀 > bì; #7540 +畁 > qí; #7541 +畂 > lìu; #7542 +畃 > xún; #7543 +畄 > líu; #7544 +畅 > chàng; #7545 +畆 > mŭ; #7546 +畇 > yún; #7547 +畈 > fàn; #7548 +畉 > fú; #7549 +畊 > gēng; #754A +畋 > tián; #754B +界 > jiè; #754C +畍 > jiè; #754D +畎 > quăn; #754E +畏 > wèi; #754F +畐 > fú; #7550 +畑 > tián; #7551 +畒 > mŭ; #7552 +畔 > pàn; #7554 +畕 > jiāng; #7555 +畖 > wā; #7556 +畗 > dá; #7557 +畘 > nán; #7558 +留 > líu; #7559 +畚 > bĕn; #755A +畛 > zhĕn; #755B +畜 > chù; #755C +畝 > mŭ; #755D +畞 > mŭ; #755E +畟 > cè; #755F +畡 > gāi; #7561 +畢 > bì; #7562 +畣 > dá; #7563 +畤 > zhì; #7564 +略 > lǜe; #7565 +畦 > qí; #7566 +畧 > lǜe; #7567 +畨 > pān; #7568 +番 > fān; #756A +畫 > huà; #756B +畬 > yú; #756C +畭 > yú; #756D +畮 > mŭ; #756E +畯 > jùn; #756F +異 > yì; #7570 +畱 > líu; #7571 +畲 > yú; #7572 +畳 > dié; #7573 +畴 > chóu; #7574 +畵 > huà; #7575 +當 > dāng; #7576 +畷 > chùo; #7577 +畸 > jī; #7578 +畹 > wăn; #7579 +畺 > jiāng; #757A +畻 > shéng; #757B +畼 > chàng; #757C +畽 > tuăn; #757D +畾 > léi; #757E +畿 > jī; #757F +疀 > chā; #7580 +疁 > líu; #7581 +疃 > tuăn; #7583 +疄 > lín; #7584 +疅 > jiāng; #7585 +疆 > jiāng; #7586 +疇 > chóu; #7587 +疈 > bò; #7588 +疉 > dié; #7589 +疊 > dié; #758A +疋 > pĭ; #758B +疌 > niè; #758C +疍 > dàn; #758D +疎 > shū; #758E +疏 > shū; #758F +疐 > zhì; #7590 +疑 > yí; #7591 +疒 > chuáng; #7592 +疓 > năi; #7593 +疔 > dīng; #7594 +疕 > bĭ; #7595 +疖 > jié; #7596 +疗 > liáo; #7597 +疘 > gōng; #7598 +疙 > gē; #7599 +疚 > jìu; #759A +疛 > zhŏu; #759B +疜 > xià; #759C +疝 > shàn; #759D +疞 > xū; #759E +疟 > nǜe; #759F +疠 > lì; #75A0 +疡 > yáng; #75A1 +疢 > chèn; #75A2 +疣 > yóu; #75A3 +疤 > bā; #75A4 +疥 > jiè; #75A5 +疦 > jué; #75A6 +疧 > zhī; #75A7 +疨 > xiā; #75A8 +疩 > cùi; #75A9 +疪 > bì; #75AA +疫 > yì; #75AB +疬 > lì; #75AC +疭 > zòng; #75AD +疮 > chuāng; #75AE +疯 > fēng; #75AF +疰 > zhù; #75B0 +疱 > pào; #75B1 +疲 > pí; #75B2 +疳 > gān; #75B3 +疴 > kē; #75B4 +疵 > cī; #75B5 +疶 > xiè; #75B6 +疷 > qí; #75B7 +疸 > dăn; #75B8 +疹 > zhĕn; #75B9 +疺 > fá; #75BA +疻 > zhĭ; #75BB +疼 > téng; #75BC +疽 > jū; #75BD +疾 > jí; #75BE +疿 > fèi; #75BF +痀 > qú; #75C0 +痁 > diàn; #75C1 +痂 > jiā; #75C2 +痃 > xián; #75C3 +痄 > chá; #75C4 +病 > bìng; #75C5 +痆 > nì; #75C6 +症 > zhèng; #75C7 +痈 > yōng; #75C8 +痉 > jìng; #75C9 +痊 > quán; #75CA +痋 > chóng; #75CB +痌 > tōng; #75CC +痍 > yí; #75CD +痎 > kāi; #75CE +痏 > wĕi; #75CF +痐 > húi; #75D0 +痑 > dŭo; #75D1 +痒 > yăng; #75D2 +痓 > chì; #75D3 +痔 > zhì; #75D4 +痕 > hén; #75D5 +痖 > yă; #75D6 +痗 > mèi; #75D7 +痘 > dòu; #75D8 +痙 > jìng; #75D9 +痚 > xiāo; #75DA +痛 > tòng; #75DB +痜 > tū; #75DC +痝 > máng; #75DD +痞 > pĭ; #75DE +痟 > xiāo; #75DF +痠 > suān; #75E0 +痡 > pū; #75E1 +痢 > lì; #75E2 +痣 > zhì; #75E3 +痤 > cúo; #75E4 +痥 > dúo; #75E5 +痦 > wù; #75E6 +痧 > shā; #75E7 +痨 > láo; #75E8 +痩 > shòu; #75E9 +痪 > huàn; #75EA +痫 > xián; #75EB +痬 > yì; #75EC +痭 > péng; #75ED +痮 > zhàng; #75EE +痯 > guăn; #75EF +痰 > tán; #75F0 +痱 > fèi; #75F1 +痲 > má; #75F2 +痳 > lín; #75F3 +痴 > chī; #75F4 +痵 > jì; #75F5 +痶 > diăn; #75F6 +痷 > ān; #75F7 +痸 > chì; #75F8 +痹 > bì; #75F9 +痺 > bēi; #75FA +痻 > mín; #75FB +痼 > gū; #75FC +痽 > dūi; #75FD +痾 > ē; #75FE +痿 > wĕi; #75FF +瘀 > yū; #7600 +瘁 > cùi; #7601 +瘂 > yă; #7602 +瘃 > zhŭ; #7603 +瘄 > cù; #7604 +瘅 > dàn; #7605 +瘆 > shèn; #7606 +瘇 > zhŭng; #7607 +瘈 > jì; #7608 +瘉 > yù; #7609 +瘊 > hóu; #760A +瘋 > fēng; #760B +瘌 > là; #760C +瘍 > yáng; #760D +瘎 > shèn; #760E +瘏 > tú; #760F +瘐 > yŭ; #7610 +瘑 > guā; #7611 +瘒 > wén; #7612 +瘓 > huàn; #7613 +瘔 > kù; #7614 +瘕 > jiă; #7615 +瘖 > yīn; #7616 +瘗 > yì; #7617 +瘘 > lǘ; #7618 +瘙 > sāo; #7619 +瘚 > jué; #761A +瘛 > chì; #761B +瘜 > xí; #761C +瘝 > guān; #761D +瘞 > yì; #761E +瘟 > wēn; #761F +瘠 > jí; #7620 +瘡 > chuāng; #7621 +瘢 > bān; #7622 +瘣 > lĕi; #7623 +瘤 > líu; #7624 +瘥 > chài; #7625 +瘦 > shòu; #7626 +瘧 > nǜe; #7627 +瘨 > diān; #7628 +瘩 > dā; #7629 +瘪 > piē; #762A +瘫 > tān; #762B +瘬 > zhàng; #762C +瘭 > biāo; #762D +瘮 > shen; #762E +瘯 > cù; #762F +瘰 > lŭo; #7630 +瘱 > yì; #7631 +瘲 > zòng; #7632 +瘳 > chōu; #7633 +瘴 > zhàng; #7634 +瘵 > zhài; #7635 +瘶 > sòu; #7636 +瘷 > sŭo; #7637 +瘸 > qué; #7638 +瘹 > diào; #7639 +瘺 > lòu; #763A +瘻 > lǘ; #763B +瘼 > mò; #763C +瘽 > jìn; #763D +瘾 > yĭn; #763E +瘿 > yĭng; #763F +癀 > huáng; #7640 +癁 > fú; #7641 +療 > liáo; #7642 +癃 > lóng; #7643 +癄 > qiáo; #7644 +癅 > líu; #7645 +癆 > láo; #7646 +癇 > xián; #7647 +癈 > fèi; #7648 +癉 > dàn; #7649 +癊 > yìn; #764A +癋 > hè; #764B +癌 > yán; #764C +癍 > bān; #764D +癎 > xián; #764E +癏 > guān; #764F +癐 > guài; #7650 +癑 > nóng; #7651 +癒 > yù; #7652 +癓 > wéi; #7653 +癔 > yì; #7654 +癕 > yōng; #7655 +癖 > pĭ; #7656 +癗 > lĕi; #7657 +癘 > lì; #7658 +癙 > shŭ; #7659 +癚 > dàn; #765A +癛 > lĭn; #765B +癜 > diàn; #765C +癝 > lĭn; #765D +癞 > lài; #765E +癟 > piē; #765F +癠 > jì; #7660 +癡 > chī; #7661 +癢 > yăng; #7662 +癣 > xiăn; #7663 +癤 > jié; #7664 +癥 > zhēng; #7665 +癧 > lì; #7667 +癨 > hùo; #7668 +癩 > lài; #7669 +癫 > diān; #766B +癬 > xiăn; #766C +癭 > yĭng; #766D +癮 > yĭn; #766E +癯 > qú; #766F +癰 > yōng; #7670 +癱 > tān; #7671 +癲 > diān; #7672 +癳 > lŭo; #7673 +癴 > lǘan; #7674 +癵 > luán; #7675 +癶 > bō; #7676 +癸 > gŭi; #7678 +癹 > pō; #7679 +発 > fā; #767A +登 > dēng; #767B +發 > fā; #767C +白 > bái; #767D +百 > băi; #767E +癿 > qié; #767F +皀 > bī; #7680 +皁 > zào; #7681 +皂 > zào; #7682 +皃 > mào; #7683 +的 > de; #7684 +皅 > pā; #7685 +皆 > jiē; #7686 +皇 > huáng; #7687 +皈 > gūi; #7688 +皉 > cĭ; #7689 +皊 > líng; #768A +皋 > gāo; #768B +皌 > mò; #768C +皍 > jí; #768D +皎 > jiăo; #768E +皏 > pĕng; #768F +皐 > gāo; #7690 +皑 > ái; #7691 +皒 > é; #7692 +皓 > hào; #7693 +皔 > hàn; #7694 +皕 > bī; #7695 +皖 > wăn; #7696 +皗 > chóu; #7697 +皘 > qiàn; #7698 +皙 > xī; #7699 +皚 > ái; #769A +皛 > jĭong; #769B +皜 > hào; #769C +皝 > huăng; #769D +皞 > hào; #769E +皟 > zé; #769F +皠 > cŭi; #76A0 +皡 > hào; #76A1 +皢 > xiăo; #76A2 +皣 > yè; #76A3 +皤 > pó; #76A4 +皥 > hào; #76A5 +皦 > jiăo; #76A6 +皧 > ài; #76A7 +皨 > xīng; #76A8 +皩 > huàng; #76A9 +皪 > lì; #76AA +皫 > piăo; #76AB +皬 > hè; #76AC +皭 > jiào; #76AD +皮 > pí; #76AE +皯 > găn; #76AF +皰 > pào; #76B0 +皱 > zhòu; #76B1 +皲 > jūn; #76B2 +皳 > qíu; #76B3 +皴 > cūn; #76B4 +皵 > què; #76B5 +皶 > zhā; #76B6 +皷 > gŭ; #76B7 +皸 > jūn; #76B8 +皹 > jūn; #76B9 +皺 > zhòu; #76BA +皻 > zhā; #76BB +皼 > gŭ; #76BC +皽 > zhăn; #76BD +皾 > dú; #76BE +皿 > mĭn; #76BF +盀 > qĭ; #76C0 +盁 > yíng; #76C1 +盂 > yú; #76C2 +盃 > bēi; #76C3 +盄 > zhāo; #76C4 +盅 > zhōng; #76C5 +盆 > pén; #76C6 +盇 > hé; #76C7 +盈 > yíng; #76C8 +盉 > hé; #76C9 +益 > yì; #76CA +盋 > bō; #76CB +盌 > wăn; #76CC +盍 > hé; #76CD +盎 > àng; #76CE +盏 > zhăn; #76CF +盐 > yán; #76D0 +监 > jiān; #76D1 +盒 > hé; #76D2 +盓 > yū; #76D3 +盔 > kūi; #76D4 +盕 > fàn; #76D5 +盖 > gài; #76D6 +盗 > dào; #76D7 +盘 > pán; #76D8 +盙 > fŭ; #76D9 +盚 > qíu; #76DA +盛 > shèng; #76DB +盜 > dào; #76DC +盝 > lù; #76DD +盞 > zhăn; #76DE +盟 > méng; #76DF +盠 > lĭ; #76E0 +盡 > jìn; #76E1 +盢 > xù; #76E2 +監 > jiān; #76E3 +盤 > pán; #76E4 +盥 > guàn; #76E5 +盦 > ān; #76E6 +盧 > lú; #76E7 +盨 > shŭ; #76E8 +盩 > zhōu; #76E9 +盪 > dàng; #76EA +盫 > ān; #76EB +盬 > gŭ; #76EC +盭 > lì; #76ED +目 > mù; #76EE +盯 > chéng; #76EF +盰 > găn; #76F0 +盱 > xū; #76F1 +盲 > máng; #76F2 +盳 > máng; #76F3 +直 > zhí; #76F4 +盵 > qì; #76F5 +盶 > ruăn; #76F6 +盷 > tián; #76F7 +相 > xiāng; #76F8 +盹 > dùn; #76F9 +盺 > xīn; #76FA +盻 > xì; #76FB +盼 > pàn; #76FC +盽 > fēng; #76FD +盾 > dùn; #76FE +盿 > mín; #76FF +眀 > míng; #7700 +省 > shĕng; #7701 +眂 > shì; #7702 +眃 > yún; #7703 +眄 > miăn; #7704 +眅 > pān; #7705 +眆 > făng; #7706 +眇 > miăo; #7707 +眈 > dān; #7708 +眉 > méi; #7709 +眊 > mào; #770A +看 > kàn; #770B +県 > xiàn; #770C +眍 > ōu; #770D +眎 > shì; #770E +眏 > yāng; #770F +眐 > zhēng; #7710 +眑 > yăo; #7711 +眒 > shèn; #7712 +眓 > hùo; #7713 +眔 > dà; #7714 +眕 > zhĕn; #7715 +眖 > kuàng; #7716 +眗 > jū; #7717 +眘 > shèn; #7718 +眙 > chì; #7719 +眚 > shĕng; #771A +眛 > mèi; #771B +眜 > mò; #771C +眝 > zhù; #771D +眞 > zhēn; #771E +真 > zhēn; #771F +眠 > mián; #7720 +眡 > dī; #7721 +眢 > yuān; #7722 +眣 > dié; #7723 +眤 > yí; #7724 +眥 > zì; #7725 +眦 > zì; #7726 +眧 > chăo; #7727 +眨 > zhă; #7728 +眩 > xuàn; #7729 +眪 > bĭng; #772A +眫 > mĭ; #772B +眬 > lóng; #772C +眭 > sūi; #772D +眮 > dòng; #772E +眯 > mĭ; #772F +眰 > dié; #7730 +眱 > yí; #7731 +眲 > èr; #7732 +眳 > mĭng; #7733 +眴 > xuàn; #7734 +眵 > chī; #7735 +眶 > kuàng; #7736 +眷 > juàn; #7737 +眸 > móu; #7738 +眹 > zhèn; #7739 +眺 > tiào; #773A +眻 > yáng; #773B +眼 > yăn; #773C +眽 > mò; #773D +眾 > zhòng; #773E +眿 > mài; #773F +着 > zháo; #7740 +睁 > zhēng; #7741 +睂 > méi; #7742 +睃 > jùn; #7743 +睄 > shào; #7744 +睅 > hàn; #7745 +睆 > huăn; #7746 +睇 > dì; #7747 +睈 > chĕng; #7748 +睉 > cūo; #7749 +睊 > juàn; #774A +睋 > é; #774B +睌 > wăn; #774C +睍 > xiàn; #774D +睎 > xī; #774E +睏 > kùn; #774F +睐 > lài; #7750 +睑 > jiăn; #7751 +睒 > shăn; #7752 +睓 > tiăn; #7753 +睔 > hŭn; #7754 +睕 > wăn; #7755 +睖 > líng; #7756 +睗 > shì; #7757 +睘 > qíong; #7758 +睙 > liè; #7759 +睚 > yái; #775A +睛 > jīng; #775B +睜 > zhēng; #775C +睝 > lí; #775D +睞 > lài; #775E +睟 > sùi; #775F +睠 > juàn; #7760 +睡 > shùi; #7761 +睢 > sūi; #7762 +督 > dū; #7763 +睤 > bì; #7764 +睥 > bì; #7765 +睦 > mù; #7766 +睧 > hūn; #7767 +睨 > nì; #7768 +睩 > lù; #7769 +睪 > yì; #776A +睫 > jié; #776B +睬 > căi; #776C +睭 > zhŏu; #776D +睮 > yú; #776E +睯 > hūn; #776F +睰 > mà; #7770 +睱 > xià; #7771 +睲 > xĭng; #7772 +睳 > xī; #7773 +睴 > gùn; #7774 +睵 > cai; #7775 +睶 > chŭn; #7776 +睷 > jiān; #7777 +睸 > mèi; #7778 +睹 > dŭ; #7779 +睺 > hóu; #777A +睻 > xuān; #777B +睼 > tì; #777C +睽 > kúi; #777D +睾 > gāo; #777E +睿 > rùi; #777F +瞀 > mòu; #7780 +瞁 > xù; #7781 +瞂 > fā; #7782 +瞃 > wēn; #7783 +瞄 > miáo; #7784 +瞅 > chŏu; #7785 +瞆 > kùi; #7786 +瞇 > mī; #7787 +瞈 > wĕng; #7788 +瞉 > kòu; #7789 +瞊 > dàng; #778A +瞋 > chēn; #778B +瞌 > kē; #778C +瞍 > sŏu; #778D +瞎 > xiā; #778E +瞏 > qíong; #778F +瞐 > mào; #7790 +瞑 > míng; #7791 +瞒 > mán; #7792 +瞓 > shùi; #7793 +瞔 > zé; #7794 +瞕 > zhàng; #7795 +瞖 > yì; #7796 +瞗 > diāo; #7797 +瞘 > ōu; #7798 +瞙 > mò; #7799 +瞚 > shùn; #779A +瞛 > cōng; #779B +瞜 > lōu; #779C +瞝 > chī; #779D +瞞 > mán; #779E +瞟 > piăo; #779F +瞠 > chēng; #77A0 +瞡 > jì; #77A1 +瞢 > méng; #77A2 +瞤 > rún; #77A4 +瞥 > piē; #77A5 +瞦 > xī; #77A6 +瞧 > qiáo; #77A7 +瞨 > pú; #77A8 +瞩 > zhŭ; #77A9 +瞪 > dèng; #77AA +瞫 > shĕn; #77AB +瞬 > shùn; #77AC +瞭 > liăo; #77AD +瞮 > chè; #77AE +瞯 > xián; #77AF +瞰 > kàn; #77B0 +瞱 > yè; #77B1 +瞲 > xù; #77B2 +瞳 > tóng; #77B3 +瞴 > móu; #77B4 +瞵 > lín; #77B5 +瞶 > kùi; #77B6 +瞷 > xián; #77B7 +瞸 > yè; #77B8 +瞹 > ài; #77B9 +瞺 > hùi; #77BA +瞻 > zhān; #77BB +瞼 > jiăn; #77BC +瞽 > gŭ; #77BD +瞾 > zhào; #77BE +瞿 > qū; #77BF +矀 > wéi; #77C0 +矁 > chŏu; #77C1 +矂 > sào; #77C2 +矃 > nĭng; #77C3 +矄 > xūn; #77C4 +矅 > yào; #77C5 +矆 > hùo; #77C6 +矇 > méng; #77C7 +矈 > mián; #77C8 +矉 > bīn; #77C9 +矊 > mián; #77CA +矋 > lì; #77CB +矌 > kuàng; #77CC +矍 > jué; #77CD +矎 > xuān; #77CE +矏 > mián; #77CF +矐 > hùo; #77D0 +矑 > lú; #77D1 +矒 > méng; #77D2 +矓 > lóng; #77D3 +矔 > guàn; #77D4 +矕 > măn; #77D5 +矖 > xĭ; #77D6 +矗 > chù; #77D7 +矘 > tăng; #77D8 +矙 > kàn; #77D9 +矚 > zhŭ; #77DA +矛 > máo; #77DB +矜 > jīn; #77DC +矝 > lín; #77DD +矞 > yù; #77DE +矟 > shùo; #77DF +矠 > cè; #77E0 +矡 > jué; #77E1 +矢 > shĭ; #77E2 +矣 > yĭ; #77E3 +矤 > shĕn; #77E4 +知 > zhī; #77E5 +矦 > hóu; #77E6 +矧 > shĕn; #77E7 +矨 > yĭng; #77E8 +矩 > jŭ; #77E9 +矪 > zhōu; #77EA +矫 > jiăo; #77EB +矬 > cúo; #77EC +短 > duăn; #77ED +矮 > ăi; #77EE +矯 > jiăo; #77EF +矰 > zēng; #77F0 +矱 > hùo; #77F1 +矲 > băi; #77F2 +石 > shí; #77F3 +矴 > dìng; #77F4 +矵 > qì; #77F5 +矶 > jī; #77F6 +矷 > zĭ; #77F7 +矸 > gān; #77F8 +矹 > wù; #77F9 +矺 > tūo; #77FA +矻 > kù; #77FB +矼 > qiāng; #77FC +矽 > xì; #77FD +矾 > fán; #77FE +矿 > kuàng; #77FF +砀 > dàng; #7800 +码 > mă; #7801 +砂 > shā; #7802 +砃 > dān; #7803 +砄 > jué; #7804 +砅 > lì; #7805 +砆 > fū; #7806 +砇 > mín; #7807 +砈 > nŭo; #7808 +砉 > hùo; #7809 +砊 > kàng; #780A +砋 > zhĭ; #780B +砌 > qì; #780C +砍 > kăn; #780D +砎 > jiè; #780E +砏 > fēn; #780F +砐 > è; #7810 +砑 > yà; #7811 +砒 > pī; #7812 +砓 > zhé; #7813 +研 > yán; #7814 +砕 > sùi; #7815 +砖 > zhuān; #7816 +砗 > chē; #7817 +砘 > dùn; #7818 +砙 > pān; #7819 +砚 > yàn; #781A +砜 > fēng; #781C +砝 > fá; #781D +砞 > mò; #781E +砟 > zhà; #781F +砠 > qū; #7820 +砡 > yù; #7821 +砢 > lŭo; #7822 +砣 > túo; #7823 +砤 > túo; #7824 +砥 > dĭ; #7825 +砦 > zhài; #7826 +砧 > zhēn; #7827 +砨 > ài; #7828 +砩 > fèi; #7829 +砪 > mŭ; #782A +砫 > zhŭ; #782B +砬 > lì; #782C +砭 > biān; #782D +砮 > nŭ; #782E +砯 > pīng; #782F +砰 > pēng; #7830 +砱 > líng; #7831 +砲 > pào; #7832 +砳 > lè; #7833 +破 > pò; #7834 +砵 > bō; #7835 +砶 > pò; #7836 +砷 > shēn; #7837 +砸 > zá; #7838 +砹 > nŭo; #7839 +砺 > lì; #783A +砻 > lóng; #783B +砼 > tóng; #783C +砾 > lì; #783E +础 > chŭ; #7840 +硁 > kēng; #7841 +硂 > quán; #7842 +硃 > zhū; #7843 +硄 > kuāng; #7844 +硅 > hùo; #7845 +硆 > è; #7846 +硇 > náo; #7847 +硈 > jiá; #7848 +硉 > lù; #7849 +硊 > wĕi; #784A +硋 > ài; #784B +硌 > lùo; #784C +硍 > kèn; #784D +硎 > xíng; #784E +硏 > yán; #784F +硐 > tóng; #7850 +硑 > pēng; #7851 +硒 > xī; #7852 +硔 > hóng; #7854 +硕 > shùo; #7855 +硖 > xiá; #7856 +硗 > qiāo; #7857 +硙 > wèi; #7859 +硚 > qiáo; #785A +硜 > kēng; #785C +硝 > xiāo; #785D +硞 > què; #785E +硟 > chàn; #785F +硠 > lăng; #7860 +硡 > hóng; #7861 +硢 > yú; #7862 +硣 > xiāo; #7863 +硤 > xiá; #7864 +硥 > măng; #7865 +硦 > lòng; #7866 +硧 > ĭong; #7867 +硨 > chē; #7868 +硩 > chè; #7869 +硪 > é; #786A +硫 > líu; #786B +硬 > yìng; #786C +硭 > máng; #786D +确 > què; #786E +硯 > yàn; #786F +硰 > shā; #7870 +硱 > kŭn; #7871 +硲 > yù; #7872 +硵 > lŭ; #7875 +硶 > chĕn; #7876 +硷 > jiăn; #7877 +硸 > nuè; #7878 +硹 > sōng; #7879 +硺 > zhúo; #787A +硻 > kēng; #787B +硼 > péng; #787C +硽 > yăn; #787D +硾 > zhùi; #787E +硿 > kōng; #787F +碀 > céng; #7880 +碁 > qí; #7881 +碂 > zòng; #7882 +碃 > qìng; #7883 +碄 > lín; #7884 +碅 > jūn; #7885 +碆 > bō; #7886 +碇 > dìng; #7887 +碈 > mín; #7888 +碉 > diāo; #7889 +碊 > jiān; #788A +碋 > hè; #788B +碌 > lù; #788C +碍 > ài; #788D +碎 > sùi; #788E +碏 > què; #788F +碐 > líng; #7890 +碑 > bēi; #7891 +碒 > yín; #7892 +碓 > dùi; #7893 +碔 > wŭ; #7894 +碕 > qí; #7895 +碖 > lùn; #7896 +碗 > wăn; #7897 +碘 > diăn; #7898 +碙 > gāng; #7899 +碚 > péi; #789A +碛 > qì; #789B +碜 > chĕn; #789C +碝 > ruăn; #789D +碞 > yán; #789E +碟 > dié; #789F +碠 > dìng; #78A0 +碡 > dú; #78A1 +碢 > túo; #78A2 +碣 > jié; #78A3 +碤 > yīng; #78A4 +碥 > biăn; #78A5 +碦 > kè; #78A6 +碧 > bì; #78A7 +碨 > wēi; #78A8 +碩 > shùo; #78A9 +碪 > zhēn; #78AA +碫 > duàn; #78AB +碬 > xiá; #78AC +碭 > dàng; #78AD +碮 > tí; #78AE +碯 > năo; #78AF +碰 > pèng; #78B0 +碱 > jiăn; #78B1 +碲 > dì; #78B2 +碳 > tàn; #78B3 +碴 > chá; #78B4 +碶 > qì; #78B6 +碸 > fēng; #78B8 +碹 > xuàn; #78B9 +確 > què; #78BA +碻 > què; #78BB +碼 > mă; #78BC +碽 > gōng; #78BD +碾 > niàn; #78BE +碿 > sù; #78BF +磀 > é; #78C0 +磁 > cí; #78C1 +磂 > lìu; #78C2 +磃 > sī; #78C3 +磄 > táng; #78C4 +磅 > bàng; #78C5 +磆 > huá; #78C6 +磇 > pī; #78C7 +磈 > wĕi; #78C8 +磉 > săng; #78C9 +磊 > lĕi; #78CA +磋 > cūo; #78CB +磌 > zhēn; #78CC +磍 > xiá; #78CD +磎 > qī; #78CE +磏 > lián; #78CF +磐 > pán; #78D0 +磑 > wèi; #78D1 +磒 > yŭn; #78D2 +磓 > dūi; #78D3 +磔 > zhé; #78D4 +磕 > kē; #78D5 +磖 > lā; #78D6 +磘 > qìng; #78D8 +磙 > gŭn; #78D9 +磚 > zhuān; #78DA +磛 > chán; #78DB +磜 > qì; #78DC +磝 > áo; #78DD +磞 > pēng; #78DE +磟 > lù; #78DF +磠 > lŭ; #78E0 +磡 > kàn; #78E1 +磢 > qiăng; #78E2 +磣 > chĕn; #78E3 +磤 > yĭn; #78E4 +磥 > lĕi; #78E5 +磦 > biāo; #78E6 +磧 > qì; #78E7 +磨 > mó; #78E8 +磩 > qī; #78E9 +磪 > cūi; #78EA +磫 > zōng; #78EB +磬 > qìng; #78EC +磭 > chùo; #78ED +磯 > jī; #78EF +磰 > shàn; #78F0 +磱 > láo; #78F1 +磲 > qú; #78F2 +磳 > zēng; #78F3 +磴 > dèng; #78F4 +磵 > jiàn; #78F5 +磶 > xì; #78F6 +磷 > lìn; #78F7 +磸 > dìng; #78F8 +磹 > diàn; #78F9 +磺 > huáng; #78FA +磻 > pán; #78FB +磼 > zá; #78FC +磽 > qiāo; #78FD +磾 > dī; #78FE +磿 > lì; #78FF +礁 > jiāo; #7901 +礃 > zhăng; #7903 +礄 > qiáo; #7904 +礅 > dūn; #7905 +礆 > xiăn; #7906 +礇 > yù; #7907 +礈 > zhùi; #7908 +礉 > hé; #7909 +礊 > hùo; #790A +礋 > zhái; #790B +礌 > lèi; #790C +礍 > kĕ; #790D +礎 > chŭ; #790E +礏 > jí; #790F +礐 > què; #7910 +礑 > dàng; #7911 +礒 > yĭ; #7912 +礓 > jiāng; #7913 +礔 > pì; #7914 +礕 > pī; #7915 +礖 > yù; #7916 +礗 > pīn; #7917 +礘 > qì; #7918 +礙 > ài; #7919 +礚 > kài; #791A +礛 > jiān; #791B +礜 > yù; #791C +礝 > ruăn; #791D +礞 > méng; #791E +礟 > pào; #791F +礠 > cí; #7920 +礣 > miè; #7923 +礤 > că; #7924 +礥 > xián; #7925 +礦 > kuàng; #7926 +礧 > lèi; #7927 +礨 > lĕi; #7928 +礩 > zhì; #7929 +礪 > lì; #792A +礫 > lì; #792B +礬 > fán; #792C +礭 > què; #792D +礮 > pào; #792E +礯 > yīng; #792F +礰 > lì; #7930 +礱 > lóng; #7931 +礲 > lóng; #7932 +礳 > mò; #7933 +礴 > bó; #7934 +礵 > shuāng; #7935 +礶 > guàn; #7936 +礷 > lán; #7937 +礸 > zăn; #7938 +礹 > yán; #7939 +示 > shì; #793A +礻 > shì' 'zì' 'páng; #793B +礼 > lĭ; #793C +礽 > réng; #793D +社 > shè; #793E +礿 > yuè; #793F +祀 > sì; #7940 +祁 > qí; #7941 +祂 > tā; #7942 +祃 > mà; #7943 +祄 > xiè; #7944 +祅 > xiān; #7945 +祆 > xiān; #7946 +祇 > zhī; #7947 +祈 > qí; #7948 +祉 > zhĭ; #7949 +祊 > bēng; #794A +祋 > dùi; #794B +祌 > zhòng; #794C +祎 > yī; #794E +祏 > shí; #794F +祐 > yòu; #7950 +祑 > zhì; #7951 +祒 > tiáo; #7952 +祓 > fú; #7953 +祔 > fù; #7954 +祕 > mì; #7955 +祖 > zŭ; #7956 +祗 > zhī; #7957 +祘 > suàn; #7958 +祙 > mèi; #7959 +祚 > zùo; #795A +祛 > qū; #795B +祜 > hù; #795C +祝 > zhù; #795D +神 > shén; #795E +祟 > sùi; #795F +祠 > cí; #7960 +祡 > chái; #7961 +祢 > mí; #7962 +祣 > lǚ; #7963 +祤 > yŭ; #7964 +祥 > xiáng; #7965 +祦 > wú; #7966 +祧 > tiāo; #7967 +票 > piào; #7968 +祩 > zhū; #7969 +祪 > gŭi; #796A +祫 > xiá; #796B +祬 > zhī; #796C +祭 > jì; #796D +祮 > gào; #796E +祯 > zhēn; #796F +祰 > gào; #7970 +祱 > shùi; #7971 +祲 > jīn; #7972 +祳 > chĕn; #7973 +祴 > gāi; #7974 +祵 > kŭn; #7975 +祶 > dì; #7976 +祷 > dăo; #7977 +祸 > hùo; #7978 +祹 > táo; #7979 +祺 > qí; #797A +祻 > gù; #797B +祼 > guàn; #797C +祽 > zùi; #797D +祾 > líng; #797E +祿 > lù; #797F +禀 > bĭng; #7980 +禁 > jìn; #7981 +禂 > dăo; #7982 +禃 > zhí; #7983 +禄 > lù; #7984 +禅 > shàn; #7985 +禆 > bēi; #7986 +禇 > zhĕ; #7987 +禈 > hūi; #7988 +禉 > yŏu; #7989 +禊 > xì; #798A +禋 > yīn; #798B +禌 > zī; #798C +禍 > hùo; #798D +禎 > zhēn; #798E +福 > fú; #798F +禐 > yuàn; #7990 +禑 > wú; #7991 +禒 > xiăn; #7992 +禓 > yáng; #7993 +禔 > tí; #7994 +禕 > yī; #7995 +禖 > méi; #7996 +禗 > sī; #7997 +禘 > dì; #7998 +禚 > zhúo; #799A +禛 > zhēn; #799B +禜 > yŏng; #799C +禝 > jí; #799D +禞 > gào; #799E +禟 > táng; #799F +禠 > sī; #79A0 +禡 > mà; #79A1 +禢 > tā; #79A2 +禤 > xuān; #79A4 +禥 > qí; #79A5 +禦 > yù; #79A6 +禧 > xī; #79A7 +禨 > jī; #79A8 +禩 > sì; #79A9 +禪 > chán; #79AA +禫 > tăn; #79AB +禬 > kuài; #79AC +禭 > sùi; #79AD +禮 > lĭ; #79AE +禯 > nóng; #79AF +禰 > nĭ; #79B0 +禱 > dăo; #79B1 +禲 > lì; #79B2 +禳 > ráng; #79B3 +禴 > yuè; #79B4 +禵 > tí; #79B5 +禶 > zăn; #79B6 +禷 > lèi; #79B7 +禸 > róu; #79B8 +禹 > yŭ; #79B9 +禺 > yú; #79BA +离 > chī; #79BB +禼 > xiè; #79BC +禽 > qín; #79BD +禾 > hé; #79BE +禿 > tū; #79BF +秀 > xìu; #79C0 +私 > sī; #79C1 +秂 > rén; #79C2 +秃 > tū; #79C3 +秄 > zĭ; #79C4 +秅 > chá; #79C5 +秆 > găn; #79C6 +秇 > yì; #79C7 +秈 > xiān; #79C8 +秉 > bĭng; #79C9 +秊 > nián; #79CA +秋 > qīu; #79CB +秌 > qīu; #79CC +种 > chóng; #79CD +秎 > fén; #79CE +秏 > hào; #79CF +秐 > yún; #79D0 +科 > kē; #79D1 +秒 > miăo; #79D2 +秓 > zhī; #79D3 +秔 > gēng; #79D4 +秕 > bĭ; #79D5 +秖 > zhī; #79D6 +秗 > yù; #79D7 +秘 > mì; #79D8 +秙 > kù; #79D9 +秚 > bàn; #79DA +秛 > pī; #79DB +秜 > ní; #79DC +秝 > lì; #79DD +秞 > yóu; #79DE +租 > zū; #79DF +秠 > pī; #79E0 +秡 > bá; #79E1 +秢 > líng; #79E2 +秣 > mò; #79E3 +秤 > chèng; #79E4 +秥 > nián; #79E5 +秦 > qín; #79E6 +秧 > yāng; #79E7 +秨 > zúo; #79E8 +秩 > zhì; #79E9 +秪 > zhī; #79EA +秫 > shú; #79EB +秬 > jù; #79EC +秭 > zĭ; #79ED +秮 > húo; #79EE +积 > jī; #79EF +称 > chēng; #79F0 +秱 > tóng; #79F1 +秲 > zhì; #79F2 +秳 > húo; #79F3 +秴 > hé; #79F4 +秵 > yīn; #79F5 +秶 > zī; #79F6 +秷 > zhí; #79F7 +秸 > jiē; #79F8 +秹 > rĕn; #79F9 +秺 > dù; #79FA +移 > yí; #79FB +秼 > zhū; #79FC +秽 > hùi; #79FD +秾 > nóng; #79FE +秿 > fŭ; #79FF +稀 > xī; #7A00 +稁 > kăo; #7A01 +稂 > láng; #7A02 +稃 > fū; #7A03 +稄 > zè; #7A04 +稅 > shùi; #7A05 +稆 > lǚ; #7A06 +稇 > kŭn; #7A07 +稈 > găn; #7A08 +稉 > gēng; #7A09 +稊 > tí; #7A0A +程 > chéng; #7A0B +稌 > tú; #7A0C +稍 > shāo; #7A0D +税 > shùi; #7A0E +稏 > yà; #7A0F +稐 > lŭn; #7A10 +稑 > lù; #7A11 +稒 > gù; #7A12 +稓 > zúo; #7A13 +稔 > rĕn; #7A14 +稕 > zhùn; #7A15 +稖 > bàng; #7A16 +稗 > bài; #7A17 +稘 > jī; #7A18 +稙 > zhí; #7A19 +稚 > zhì; #7A1A +稛 > kŭn; #7A1B +稜 > léng; #7A1C +稝 > péng; #7A1D +稞 > kē; #7A1E +稟 > bĭng; #7A1F +稠 > chóu; #7A20 +稡 > zú; #7A21 +稢 > yù; #7A22 +稣 > sū; #7A23 +稤 > lǜe; #7A24 +稦 > yī; #7A26 +稧 > xì; #7A27 +稨 > biān; #7A28 +稩 > jì; #7A29 +稪 > fù; #7A2A +稫 > bī; #7A2B +稬 > nùo; #7A2C +稭 > jiē; #7A2D +種 > zhŏng; #7A2E +稯 > zōng; #7A2F +稰 > xū; #7A30 +稱 > chēng; #7A31 +稲 > dào; #7A32 +稳 > wĕn; #7A33 +稴 > lián; #7A34 +稵 > zī; #7A35 +稶 > yù; #7A36 +稷 > jì; #7A37 +稸 > xù; #7A38 +稹 > zhĕn; #7A39 +稺 > zhì; #7A3A +稻 > dào; #7A3B +稼 > jià; #7A3C +稽 > jī; #7A3D +稾 > găo; #7A3E +稿 > găo; #7A3F +穀 > gŭ; #7A40 +穁 > róng; #7A41 +穂 > sùi; #7A42 +穄 > jì; #7A44 +穅 > kāng; #7A45 +穆 > mù; #7A46 +穇 > shān; #7A47 +穈 > mén; #7A48 +穉 > zhì; #7A49 +穊 > jì; #7A4A +穋 > lù; #7A4B +穌 > sū; #7A4C +積 > jī; #7A4D +穎 > yĭng; #7A4E +穏 > wĕn; #7A4F +穐 > qīu; #7A50 +穑 > sè; #7A51 +穓 > yì; #7A53 +穔 > huáng; #7A54 +穕 > qiè; #7A55 +穖 > jĭ; #7A56 +穗 > sùi; #7A57 +穘 > xiāo; #7A58 +穙 > pú; #7A59 +穚 > jiāo; #7A5A +穛 > zhūo; #7A5B +穜 > tóng; #7A5C +穞 > lǚ; #7A5E +穟 > sùi; #7A5F +穠 > nóng; #7A60 +穡 > sè; #7A61 +穢 > hùi; #7A62 +穣 > ráng; #7A63 +穤 > nùo; #7A64 +穥 > yù; #7A65 +穦 > bin; #7A66 +穧 > jì; #7A67 +穨 > túi; #7A68 +穩 > wĕn; #7A69 +穪 > chēng; #7A6A +穫 > hùo; #7A6B +穬 > gŏng; #7A6C +穭 > lǚ; #7A6D +穮 > biāo; #7A6E +穰 > ráng; #7A70 +穱 > zhūo; #7A71 +穲 > lí; #7A72 +穳 > zàn; #7A73 +穴 > xuè; #7A74 +穵 > wā; #7A75 +究 > jìu; #7A76 +穷 > qíong; #7A77 +穸 > xì; #7A78 +穹 > qīong; #7A79 +空 > kōng; #7A7A +穻 > yū; #7A7B +穼 > sēn; #7A7C +穽 > jĭng; #7A7D +穾 > yào; #7A7E +穿 > chuān; #7A7F +窀 > zhūn; #7A80 +突 > tú; #7A81 +窂 > láo; #7A82 +窃 > qiè; #7A83 +窄 > zhăi; #7A84 +窅 > yăo; #7A85 +窆 > biăn; #7A86 +窇 > báo; #7A87 +窈 > yăo; #7A88 +窉 > bĭng; #7A89 +窊 > wā; #7A8A +窋 > zhú; #7A8B +窌 > jiào; #7A8C +窍 > qiào; #7A8D +窎 > diào; #7A8E +窏 > wū; #7A8F +窐 > gūi; #7A90 +窑 > yáo; #7A91 +窒 > zhì; #7A92 +窓 > chuāng; #7A93 +窔 > yăo; #7A94 +窕 > tiăo; #7A95 +窖 > jiào; #7A96 +窗 > chuāng; #7A97 +窘 > jĭong; #7A98 +窙 > xiāo; #7A99 +窚 > chéng; #7A9A +窛 > kòu; #7A9B +窜 > cuàn; #7A9C +窝 > wō; #7A9D +窞 > dàn; #7A9E +窟 > kū; #7A9F +窠 > kē; #7AA0 +窡 > zhùi; #7AA1 +窢 > xù; #7AA2 +窣 > sù; #7AA3 +窤 > guan; #7AA4 +窥 > kūi; #7AA5 +窦 > dòu; #7AA6 +窨 > yìn; #7AA8 +窩 > wō; #7AA9 +窪 > wā; #7AAA +窫 > yà; #7AAB +窬 > yú; #7AAC +窭 > jù; #7AAD +窮 > qíong; #7AAE +窯 > yáo; #7AAF +窰 > yáo; #7AB0 +窱 > tiào; #7AB1 +窲 > cháo; #7AB2 +窳 > yŭ; #7AB3 +窴 > tián; #7AB4 +窵 > diào; #7AB5 +窶 > jù; #7AB6 +窷 > liáo; #7AB7 +窸 > xī; #7AB8 +窹 > wù; #7AB9 +窺 > kūi; #7ABA +窻 > chuāng; #7ABB +窼 > zhāo; #7ABC +窾 > kuăn; #7ABE +窿 > lóng; #7ABF +竀 > chēng; #7AC0 +竁 > cùi; #7AC1 +竂 > piáo; #7AC2 +竃 > zào; #7AC3 +竄 > cuàn; #7AC4 +竅 > qiào; #7AC5 +竆 > qíong; #7AC6 +竇 > dòu; #7AC7 +竈 > zào; #7AC8 +竉 > lŏng; #7AC9 +竊 > qiè; #7ACA +立 > lì; #7ACB +竌 > chù; #7ACC +竍 > shí' 'gōng' 'shēng; #7ACD +竎 > fòu; #7ACE +竏 > qiān' 'gōng' 'shēng; #7ACF +竐 > chù; #7AD0 +竑 > hóng; #7AD1 +竒 > qí; #7AD2 +竓 > qiān' 'fēn' 'zhī' 'yī' 'gōng' 'shēng; #7AD3 +竔 > gōng' 'shēng; #7AD4 +竕 > shí' 'fēn' 'zhī' 'yī' 'gōng' 'shēng; #7AD5 +竖 > shù; #7AD6 +竗 > miào; #7AD7 +竘 > jŭ; #7AD8 +站 > zhàn; #7AD9 +竚 > zhù; #7ADA +竛 > líng; #7ADB +竜 > lóng; #7ADC +竝 > bìng; #7ADD +竞 > jìng; #7ADE +竟 > jìng; #7ADF +章 > zhāng; #7AE0 +竡 > yī' 'gōng' 'shēng' 'de' 'yī' 'băi' 'bèi; #7AE1 +竢 > sì; #7AE2 +竣 > jùn; #7AE3 +竤 > hóng; #7AE4 +童 > tóng; #7AE5 +竦 > sŏng; #7AE6 +竧 > jìng; #7AE7 +竨 > diào; #7AE8 +竩 > yì; #7AE9 +竪 > shù; #7AEA +竫 > jìng; #7AEB +竬 > qŭ; #7AEC +竭 > jié; #7AED +竮 > píng; #7AEE +端 > duān; #7AEF +竰 > sháo; #7AF0 +竱 > zhuăn; #7AF1 +竲 > céng; #7AF2 +竳 > dēng; #7AF3 +竴 > cūi; #7AF4 +竵 > huāi; #7AF5 +競 > jìng; #7AF6 +竷 > kàn; #7AF7 +竸 > jìng; #7AF8 +竹 > zhú; #7AF9 +竺 > zhú; #7AFA +竻 > lè; #7AFB +竼 > péng; #7AFC +竽 > yú; #7AFD +竾 > chí; #7AFE +竿 > gān; #7AFF +笀 > máng; #7B00 +笁 > zhú; #7B01 +笃 > dŭ; #7B03 +笄 > jī; #7B04 +笅 > xiáo; #7B05 +笆 > bā; #7B06 +笇 > suàn; #7B07 +笈 > jí; #7B08 +笉 > zhĕn; #7B09 +笊 > zhào; #7B0A +笋 > sŭn; #7B0B +笌 > yá; #7B0C +笍 > zhùi; #7B0D +笎 > yuán; #7B0E +笏 > hù; #7B0F +笐 > gāng; #7B10 +笑 > xiào; #7B11 +笒 > cén; #7B12 +笓 > pí; #7B13 +笔 > bĭ; #7B14 +笕 > jiăn; #7B15 +笖 > yĭ; #7B16 +笗 > dōng; #7B17 +笘 > shān; #7B18 +笙 > shēng; #7B19 +笚 > xiá; #7B1A +笛 > dí; #7B1B +笜 > zhú; #7B1C +笝 > nà; #7B1D +笞 > chī; #7B1E +笟 > gū; #7B1F +笠 > lì; #7B20 +笡 > qiè; #7B21 +笢 > mĭn; #7B22 +笣 > bāo; #7B23 +笤 > tiáo; #7B24 +笥 > sì; #7B25 +符 > fú; #7B26 +笧 > cè; #7B27 +笨 > bèn; #7B28 +笩 > pèi; #7B29 +笪 > dá; #7B2A +笫 > zĭ; #7B2B +第 > dì; #7B2C +笭 > líng; #7B2D +笮 > zé; #7B2E +笯 > nú; #7B2F +笰 > fú; #7B30 +笱 > gŏu; #7B31 +笲 > fān; #7B32 +笳 > jiā; #7B33 +笴 > gĕ; #7B34 +笵 > fàn; #7B35 +笶 > shĭ; #7B36 +笷 > măo; #7B37 +笸 > pŏ; #7B38 +笺 > jiān; #7B3A +笻 > qíong; #7B3B +笼 > lóng; #7B3C +笾 > biān; #7B3E +笿 > lùo; #7B3F +筀 > gùi; #7B40 +筁 > qŭ; #7B41 +筂 > chí; #7B42 +筃 > yīn; #7B43 +筄 > yào; #7B44 +筅 > xiăn; #7B45 +筆 > bĭ; #7B46 +筇 > qíong; #7B47 +筈 > guā; #7B48 +等 > dĕng; #7B49 +筊 > jiăo; #7B4A +筋 > jīn; #7B4B +筌 > quán; #7B4C +筍 > sŭn; #7B4D +筎 > rú; #7B4E +筏 > fá; #7B4F +筐 > kuāng; #7B50 +筑 > zhú; #7B51 +筒 > tŏng; #7B52 +筓 > jī; #7B53 +答 > dá; #7B54 +筕 > xíng; #7B55 +策 > cè; #7B56 +筗 > zhòng; #7B57 +筘 > kòu; #7B58 +筙 > lái; #7B59 +筚 > bì; #7B5A +筛 > shāi; #7B5B +筜 > dāng; #7B5C +筝 > zhēng; #7B5D +筞 > cè; #7B5E +筟 > fū; #7B5F +筠 > yún; #7B60 +筡 > tú; #7B61 +筢 > pá; #7B62 +筣 > lì; #7B63 +筤 > láng; #7B64 +筥 > jŭ; #7B65 +筦 > guăn; #7B66 +筧 > jiăn; #7B67 +筨 > hán; #7B68 +筩 > tóng; #7B69 +筪 > xiá; #7B6A +筫 > zhì; #7B6B +筬 > chéng; #7B6C +筭 > suàn; #7B6D +筮 > shì; #7B6E +筯 > zhù; #7B6F +筰 > zúo; #7B70 +筱 > xiăo; #7B71 +筲 > shāo; #7B72 +筳 > tíng; #7B73 +筴 > cè; #7B74 +筵 > yán; #7B75 +筶 > găo; #7B76 +筷 > kuài; #7B77 +筸 > gān; #7B78 +筹 > chóu; #7B79 +筻 > gàng; #7B7B +筼 > yún; #7B7C +签 > qiān; #7B7E +筿 > xiăo; #7B7F +简 > jiăn; #7B80 +箁 > pú; #7B81 +箂 > lái; #7B82 +箃 > zōu; #7B83 +箄 > bì; #7B84 +箅 > bì; #7B85 +箆 > bì; #7B86 +箇 > gè; #7B87 +箈 > chí; #7B88 +箉 > guăi; #7B89 +箊 > yū; #7B8A +箋 > jiān; #7B8B +箌 > zhào; #7B8C +箍 > gū; #7B8D +箎 > chí; #7B8E +箏 > zhēng; #7B8F +箐 > jīng; #7B90 +箑 > shà; #7B91 +箒 > zhŏu; #7B92 +箓 > lù; #7B93 +箔 > bó; #7B94 +箕 > jī; #7B95 +箖 > lín; #7B96 +算 > suàn; #7B97 +箘 > jùn; #7B98 +箙 > fú; #7B99 +箚 > zhá; #7B9A +箛 > gū; #7B9B +箜 > kōng; #7B9C +箝 > qián; #7B9D +箞 > quān; #7B9E +箟 > jùn; #7B9F +箠 > chúi; #7BA0 +管 > guăn; #7BA1 +箢 > yuān; #7BA2 +箣 > cè; #7BA3 +箤 > jú; #7BA4 +箥 > bŏ; #7BA5 +箦 > zé; #7BA6 +箧 > qiè; #7BA7 +箨 > tùo; #7BA8 +箩 > lúo; #7BA9 +箪 > dān; #7BAA +箫 > xiāo; #7BAB +箬 > rùo; #7BAC +箭 > jiàn; #7BAD +箮 > xuan; #7BAE +箯 > biān; #7BAF +箰 > sŭn; #7BB0 +箱 > xiāng; #7BB1 +箲 > xiăn; #7BB2 +箳 > píng; #7BB3 +箴 > zhēn; #7BB4 +箵 > shĕng; #7BB5 +箶 > hú; #7BB6 +箷 > shī; #7BB7 +箸 > zhù; #7BB8 +箹 > yuē; #7BB9 +箺 > chŭn; #7BBA +箻 > lǜ; #7BBB +箼 > wū; #7BBC +箽 > dŏng; #7BBD +箾 > xiāo; #7BBE +箿 > jí; #7BBF +節 > jié; #7BC0 +篁 > huáng; #7BC1 +篂 > xīng; #7BC2 +篃 > méi; #7BC3 +範 > fàn; #7BC4 +篅 > chúi; #7BC5 +篆 > zhuàn; #7BC6 +篇 > piān; #7BC7 +篈 > fēng; #7BC8 +築 > zhú; #7BC9 +篊 > hóng; #7BCA +篋 > qiè; #7BCB +篌 > hóu; #7BCC +篍 > qīu; #7BCD +篎 > miăo; #7BCE +篏 > qiàn; #7BCF +篑 > kùi; #7BD1 +篓 > lŏu; #7BD3 +篔 > yún; #7BD4 +篕 > hé; #7BD5 +篖 > táng; #7BD6 +篗 > yuè; #7BD7 +篘 > chōu; #7BD8 +篙 > gāo; #7BD9 +篚 > fĕi; #7BDA +篛 > rùo; #7BDB +篜 > zhēng; #7BDC +篝 > gōu; #7BDD +篞 > niè; #7BDE +篟 > qiàn; #7BDF +篠 > xiăo; #7BE0 +篡 > cuàn; #7BE1 +篢 > gōng; #7BE2 +篣 > páng; #7BE3 +篤 > dŭ; #7BE4 +篥 > lì; #7BE5 +篦 > bì; #7BE6 +篧 > zhúo; #7BE7 +篨 > chú; #7BE8 +篩 > shāi; #7BE9 +篪 > chí; #7BEA +篫 > zhú; #7BEB +篬 > qiāng; #7BEC +篭 > lóng; #7BED +篮 > lán; #7BEE +篯 > jiān; #7BEF +篰 > bù; #7BF0 +篱 > lí; #7BF1 +篲 > hùi; #7BF2 +篳 > bì; #7BF3 +篴 > dí; #7BF4 +篵 > cōng; #7BF5 +篶 > yān; #7BF6 +篷 > péng; #7BF7 +篸 > sēn; #7BF8 +篹 > zhuàn; #7BF9 +篺 > pái; #7BFA +篻 > piào; #7BFB +篼 > dōu; #7BFC +篽 > yŭ; #7BFD +篾 > miè; #7BFE +篿 > zhuān; #7BFF +簀 > zé; #7C00 +簁 > xĭ; #7C01 +簂 > gúo; #7C02 +簃 > yí; #7C03 +簄 > hù; #7C04 +簅 > chăn; #7C05 +簆 > kòu; #7C06 +簇 > cù; #7C07 +簈 > píng; #7C08 +簉 > chòu; #7C09 +簊 > jī; #7C0A +簋 > gŭi; #7C0B +簌 > sù; #7C0C +簍 > lŏu; #7C0D +簎 > zhà; #7C0E +簏 > lù; #7C0F +簐 > niăn; #7C10 +簑 > sūo; #7C11 +簒 > cuàn; #7C12 +簔 > sūo; #7C14 +簕 > lè; #7C15 +簖 > duàn; #7C16 +簘 > xiāo; #7C18 +簙 > bó; #7C19 +簚 > mì; #7C1A +簛 > sī; #7C1B +簜 > dàng; #7C1C +簝 > liáo; #7C1D +簞 > dān; #7C1E +簟 > diàn; #7C1F +簠 > fŭ; #7C20 +簡 > jiăn; #7C21 +簢 > mĭn; #7C22 +簣 > kùi; #7C23 +簤 > dài; #7C24 +簥 > qiáo; #7C25 +簦 > dēng; #7C26 +簧 > huáng; #7C27 +簨 > sŭn; #7C28 +簩 > láo; #7C29 +簪 > zān; #7C2A +簫 > xiāo; #7C2B +簬 > dù; #7C2C +簭 > shì; #7C2D +簮 > zān; #7C2E +簰 > pái; #7C30 +簲 > pái; #7C32 +簳 > gàn; #7C33 +簴 > jù; #7C34 +簵 > dù; #7C35 +簶 > lù; #7C36 +簷 > yán; #7C37 +簸 > bò; #7C38 +簹 > dāng; #7C39 +簺 > sài; #7C3A +簻 > kē; #7C3B +簼 > lóng; #7C3C +簽 > qiān; #7C3D +簾 > lián; #7C3E +簿 > bó; #7C3F +籀 > zhòu; #7C40 +籁 > lài; #7C41 +籃 > lán; #7C43 +籄 > kùi; #7C44 +籅 > yú; #7C45 +籆 > yuè; #7C46 +籇 > háo; #7C47 +籈 > zhēn; #7C48 +籉 > tái; #7C49 +籊 > tì; #7C4A +籋 > mí; #7C4B +籌 > chóu; #7C4C +籍 > jí; #7C4D +籐 > téng; #7C50 +籑 > zhuàn; #7C51 +籒 > zhòu; #7C52 +籓 > fān; #7C53 +籔 > sŏu; #7C54 +籕 > zhòu; #7C55 +籗 > zhúo; #7C57 +籘 > téng; #7C58 +籙 > lù; #7C59 +籚 > lú; #7C5A +籛 > jiān; #7C5B +籜 > tùo; #7C5C +籝 > yíng; #7C5D +籞 > yù; #7C5E +籟 > lài; #7C5F +籠 > lóng; #7C60 +籢 > lián; #7C62 +籣 > lán; #7C63 +籤 > qiān; #7C64 +籥 > yuè; #7C65 +籦 > zhōng; #7C66 +籧 > qú; #7C67 +籨 > lián; #7C68 +籩 > biān; #7C69 +籪 > duàn; #7C6A +籫 > zuăn; #7C6B +籬 > lí; #7C6C +籭 > sī; #7C6D +籮 > lúo; #7C6E +籯 > yíng; #7C6F +籰 > yuè; #7C70 +籱 > zhúo; #7C71 +籲 > xū; #7C72 +米 > mĭ; #7C73 +籴 > dí; #7C74 +籵 > fán; #7C75 +籶 > shēn; #7C76 +籷 > zhé; #7C77 +籸 > shēn; #7C78 +籹 > nǚ; #7C79 +籺 > xié; #7C7A +类 > lèi; #7C7B +籼 > xiān; #7C7C +籽 > zĭ; #7C7D +籾 > ní; #7C7E +籿 > cùn; #7C7F +粁 > qiān; #7C81 +粃 > bĭ; #7C83 +粄 > băn; #7C84 +粅 > wù; #7C85 +粆 > shā; #7C86 +粇 > kāng; #7C87 +粈 > rŏu; #7C88 +粉 > fĕn; #7C89 +粊 > bì; #7C8A +粋 > cùi; #7C8B +粍 > lí; #7C8D +粎 > chĭ; #7C8E +粑 > bā; #7C91 +粒 > lì; #7C92 +粓 > gān; #7C93 +粔 > jù; #7C94 +粕 > pò; #7C95 +粖 > mò; #7C96 +粗 > cū; #7C97 +粘 > nián; #7C98 +粙 > zhòu; #7C99 +粚 > lí; #7C9A +粛 > sù; #7C9B +粜 > tiào; #7C9C +粝 > lì; #7C9D +粞 > qī; #7C9E +粟 > sù; #7C9F +粠 > hóng; #7CA0 +粡 > tóng; #7CA1 +粢 > zī; #7CA2 +粣 > cè; #7CA3 +粤 > yuè; #7CA4 +粥 > zhōu; #7CA5 +粦 > lìn; #7CA6 +粧 > zhuāng; #7CA7 +粨 > băi; #7CA8 +粪 > fèn; #7CAA +粮 > liáng; #7CAE +粯 > xiàn; #7CAF +粰 > fú; #7CB0 +粱 > liáng; #7CB1 +粲 > càn; #7CB2 +粳 > gēng; #7CB3 +粴 > lĭ; #7CB4 +粵 > yuè; #7CB5 +粶 > lù; #7CB6 +粷 > jú; #7CB7 +粸 > qí; #7CB8 +粹 > cùi; #7CB9 +粺 > bài; #7CBA +粻 > zhāng; #7CBB +粼 > lín; #7CBC +粽 > zòng; #7CBD +精 > jīng; #7CBE +粿 > gŭo; #7CBF +糁 > sān; #7CC1 +糂 > săn; #7CC2 +糃 > táng; #7CC3 +糄 > biān; #7CC4 +糅 > rŏu; #7CC5 +糆 > miàn; #7CC6 +糇 > hóu; #7CC7 +糈 > xŭ; #7CC8 +糉 > zòng; #7CC9 +糊 > hú; #7CCA +糋 > jiàn; #7CCB +糌 > zán; #7CCC +糍 > cí; #7CCD +糎 > lí; #7CCE +糏 > xiè; #7CCF +糐 > fū; #7CD0 +糑 > nì; #7CD1 +糒 > bèi; #7CD2 +糓 > gŭ; #7CD3 +糔 > xĭu; #7CD4 +糕 > gāo; #7CD5 +糖 > táng; #7CD6 +糗 > qĭu; #7CD7 +糙 > cāo; #7CD9 +糚 > zhuāng; #7CDA +糛 > táng; #7CDB +糜 > mí; #7CDC +糝 > sān; #7CDD +糞 > fèn; #7CDE +糟 > zāo; #7CDF +糠 > kāng; #7CE0 +糡 > jiàng; #7CE1 +糢 > mó; #7CE2 +糣 > săn; #7CE3 +糤 > săn; #7CE4 +糥 > nùo; #7CE5 +糦 > xī; #7CE6 +糧 > liáng; #7CE7 +糨 > jiàng; #7CE8 +糩 > kuài; #7CE9 +糪 > bó; #7CEA +糫 > huán; #7CEB +糭 > zòng; #7CED +糮 > xiàn; #7CEE +糯 > nùo; #7CEF +糰 > tuán; #7CF0 +糱 > niè; #7CF1 +糲 > lì; #7CF2 +糳 > zùo; #7CF3 +糴 > dí; #7CF4 +糵 > niè; #7CF5 +糶 > tiào; #7CF6 +糷 > lán; #7CF7 +糸 > mì; #7CF8 +糹 > jiăo' 'sī' 'páng; #7CF9 +糺 > jīu; #7CFA +系 > xì; #7CFB +糼 > gōng; #7CFC +糽 > zhĕng; #7CFD +糾 > jīu; #7CFE +糿 > yòu; #7CFF +紀 > jì; #7D00 +紁 > chà; #7D01 +紂 > zhòu; #7D02 +紃 > xún; #7D03 +約 > yuē; #7D04 +紅 > hóng; #7D05 +紆 > yū; #7D06 +紇 > hé; #7D07 +紈 > wán; #7D08 +紉 > rèn; #7D09 +紊 > wèn; #7D0A +紋 > wén; #7D0B +紌 > qíu; #7D0C +納 > nà; #7D0D +紎 > zī; #7D0E +紏 > tŏu; #7D0F +紐 > nĭu; #7D10 +紑 > fóu; #7D11 +紒 > jiè; #7D12 +紓 > shū; #7D13 +純 > chún; #7D14 +紕 > pí; #7D15 +紖 > yĭn; #7D16 +紗 > shā; #7D17 +紘 > hóng; #7D18 +紙 > zhĭ; #7D19 +級 > jí; #7D1A +紛 > fēn; #7D1B +紜 > yún; #7D1C +紝 > rén; #7D1D +紞 > dăn; #7D1E +紟 > jīn; #7D1F +素 > sù; #7D20 +紡 > făng; #7D21 +索 > sŭo; #7D22 +紣 > cùi; #7D23 +紤 > jĭu; #7D24 +紥 > zhá; #7D25 +紧 > jĭn; #7D27 +紨 > fù; #7D28 +紩 > zhì; #7D29 +紪 > cĭ; #7D2A +紫 > zĭ; #7D2B +紬 > chóu; #7D2C +紭 > hóng; #7D2D +紮 > zhá; #7D2E +累 > lèi; #7D2F +細 > xì; #7D30 +紱 > fú; #7D31 +紲 > xiè; #7D32 +紳 > shēn; #7D33 +紴 > bèi; #7D34 +紵 > zhù; #7D35 +紶 > qŭ; #7D36 +紷 > líng; #7D37 +紸 > zhù; #7D38 +紹 > shào; #7D39 +紺 > gàn; #7D3A +紻 > yāng; #7D3B +紼 > fú; #7D3C +紽 > túo; #7D3D +紾 > zhĕn; #7D3E +紿 > dài; #7D3F +絀 > zhúo; #7D40 +絁 > shī; #7D41 +終 > zhōng; #7D42 +絃 > xián; #7D43 +組 > zŭ; #7D44 +絅 > jĭong; #7D45 +絆 > bàn; #7D46 +絇 > jù; #7D47 +絈 > mò; #7D48 +絉 > shù; #7D49 +絊 > zùi; #7D4A +経 > jīng; #7D4C +絍 > rén; #7D4D +絎 > hèng; #7D4E +絏 > xiè; #7D4F +結 > jié; #7D50 +絑 > zhū; #7D51 +絒 > chóu; #7D52 +絓 > guà; #7D53 +絔 > băi; #7D54 +絕 > jué; #7D55 +絖 > kuàng; #7D56 +絗 > hú; #7D57 +絘 > cì; #7D58 +絙 > gēng; #7D59 +絚 > gēng; #7D5A +絛 > tāo; #7D5B +絜 > xié; #7D5C +絝 > kù; #7D5D +絞 > jiăo; #7D5E +絟 > quān; #7D5F +絠 > găi; #7D60 +絡 > lùo; #7D61 +絢 > xuàn; #7D62 +絣 > bīng; #7D63 +絤 > xiàn; #7D64 +絥 > fú; #7D65 +給 > gĕi; #7D66 +絧 > tóng; #7D67 +絨 > róng; #7D68 +絩 > tiào; #7D69 +絪 > yīn; #7D6A +絫 > lĕi; #7D6B +絬 > xiè; #7D6C +絭 > quàn; #7D6D +絮 > xù; #7D6E +絯 > lǜn; #7D6F +絰 > dié; #7D70 +統 > tŏng; #7D71 +絲 > sī; #7D72 +絳 > jiàng; #7D73 +絴 > xiáng; #7D74 +絵 > hùi; #7D75 +絶 > jué; #7D76 +絷 > zhí; #7D77 +絸 > jiăn; #7D78 +絹 > juàn; #7D79 +絺 > chī; #7D7A +絻 > miăn; #7D7B +絼 > zhĕn; #7D7C +絽 > lǚ; #7D7D +絾 > chéng; #7D7E +絿 > qíu; #7D7F +綀 > shū; #7D80 +綁 > băng; #7D81 +綂 > tŏng; #7D82 +綃 > xiāo; #7D83 +綄 > wàn; #7D84 +綅 > qīn; #7D85 +綆 > gĕng; #7D86 +綇 > xĭu; #7D87 +綈 > tí; #7D88 +綉 > xìu; #7D89 +綊 > xié; #7D8A +綋 > hóng; #7D8B +綌 > xì; #7D8C +綍 > fú; #7D8D +綎 > tīng; #7D8E +綏 > sūi; #7D8F +綐 > dùi; #7D90 +綑 > kŭn; #7D91 +綒 > fū; #7D92 +經 > jīng; #7D93 +綔 > hù; #7D94 +綕 > zhī; #7D95 +綖 > yán; #7D96 +綗 > jĭong; #7D97 +綘 > féng; #7D98 +継 > jì; #7D99 +綜 > zòng; #7D9C +綝 > lín; #7D9D +綞 > dŭo; #7D9E +綟 > lì; #7D9F +綠 > lǜ; #7DA0 +綡 > liáng; #7DA1 +綢 > chóu; #7DA2 +綣 > quăn; #7DA3 +綤 > shào; #7DA4 +綥 > qì; #7DA5 +綦 > qí; #7DA6 +綧 > zhŭn; #7DA7 +綨 > qí; #7DA8 +綩 > wăn; #7DA9 +綪 > qiàn; #7DAA +綫 > xiàn; #7DAB +綬 > shòu; #7DAC +維 > wéi; #7DAD +綮 > qĭ; #7DAE +綯 > táo; #7DAF +綰 > wăn; #7DB0 +綱 > gāng; #7DB1 +網 > wăng; #7DB2 +綳 > bēng; #7DB3 +綴 > zhùi; #7DB4 +綵 > căi; #7DB5 +綶 > gŭo; #7DB6 +綷 > cùi; #7DB7 +綸 > lún; #7DB8 +綹 > lĭu; #7DB9 +綺 > qĭ; #7DBA +綻 > zhàn; #7DBB +綼 > bēi; #7DBC +綽 > chùo; #7DBD +綾 > líng; #7DBE +綿 > mián; #7DBF +緀 > qī; #7DC0 +緁 > qiè; #7DC1 +緂 > tān; #7DC2 +緃 > zōng; #7DC3 +緄 > gŭn; #7DC4 +緅 > zōu; #7DC5 +緆 > yì; #7DC6 +緇 > zī; #7DC7 +緈 > xìng; #7DC8 +緉 > liăng; #7DC9 +緊 > jĭn; #7DCA +緋 > fēi; #7DCB +緌 > rúi; #7DCC +緍 > mín; #7DCD +緎 > yù; #7DCE +総 > zŏng; #7DCF +緐 > fán; #7DD0 +緑 > lǜ; #7DD1 +緒 > xù; #7DD2 +緓 > yingl; #7DD3 +緔 > zhàng; #7DD4 +緖 > xù; #7DD6 +緗 > xiāng; #7DD7 +緘 > jiān; #7DD8 +緙 > kè; #7DD9 +線 > xiàn; #7DDA +緛 > ruăn; #7DDB +緜 > mián; #7DDC +緝 > qì; #7DDD +緞 > duàn; #7DDE +緟 > zhòng; #7DDF +締 > dì; #7DE0 +緡 > mín; #7DE1 +緢 > miáo; #7DE2 +緣 > yuán; #7DE3 +緤 > xiè; #7DE4 +緥 > băo; #7DE5 +緦 > sī; #7DE6 +緧 > qīu; #7DE7 +編 > biān; #7DE8 +緩 > huăn; #7DE9 +緪 > gēng; #7DEA +緫 > cōng; #7DEB +緬 > miăn; #7DEC +緭 > wèi; #7DED +緮 > fù; #7DEE +緯 > wĕi; #7DEF +緰 > yú; #7DF0 +緱 > gōu; #7DF1 +緲 > miăo; #7DF2 +緳 > xié; #7DF3 +練 > liàn; #7DF4 +緵 > zōng; #7DF5 +緶 > biàn; #7DF6 +緷 > yùn; #7DF7 +緸 > yīn; #7DF8 +緹 > tí; #7DF9 +緺 > guā; #7DFA +緻 > zhì; #7DFB +緼 > yūn; #7DFC +緽 > chēng; #7DFD +緾 > chán; #7DFE +緿 > dài; #7DFF +縀 > xiá; #7E00 +縁 > yuán; #7E01 +縂 > zŏng; #7E02 +縃 > xū; #7E03 +縆 > gēng; #7E06 +縈 > yíng; #7E08 +縉 > jìn; #7E09 +縊 > yì; #7E0A +縋 > zhùi; #7E0B +縌 > nì; #7E0C +縍 > bāng; #7E0D +縎 > gŭ; #7E0E +縏 > pán; #7E0F +縐 > zhòu; #7E10 +縑 > jiān; #7E11 +縒 > cŭo; #7E12 +縓 > quăn; #7E13 +縔 > shuăng; #7E14 +縕 > yūn; #7E15 +縖 > xiá; #7E16 +縗 > shuāi; #7E17 +縘 > xī; #7E18 +縙 > róng; #7E19 +縚 > tāo; #7E1A +縛 > fú; #7E1B +縜 > yún; #7E1C +縝 > zhēn; #7E1D +縞 > găo; #7E1E +縟 > rù; #7E1F +縠 > hú; #7E20 +縡 > zăi; #7E21 +縢 > téng; #7E22 +縣 > xiàn; #7E23 +縤 > sù; #7E24 +縥 > zhĕn; #7E25 +縦 > zòng; #7E26 +縧 > tāo; #7E27 +縩 > cài; #7E29 +縪 > bì; #7E2A +縫 > féng; #7E2B +縬 > cù; #7E2C +縭 > lí; #7E2D +縮 > sūo; #7E2E +縯 > yĭn; #7E2F +縰 > xĭ; #7E30 +縱 > zòng; #7E31 +縲 > léi; #7E32 +縳 > zhuàn; #7E33 +縴 > qiān; #7E34 +縵 > màn; #7E35 +縶 > zhí; #7E36 +縷 > lǚ; #7E37 +縸 > mò; #7E38 +縹 > piăo; #7E39 +縺 > lián; #7E3A +縻 > mí; #7E3B +縼 > xuàn; #7E3C +總 > zŏng; #7E3D +績 > jī; #7E3E +縿 > shān; #7E3F +繀 > sùi; #7E40 +繁 > fán; #7E41 +繂 > shuài; #7E42 +繃 > bēng; #7E43 +繄 > yī; #7E44 +繅 > sāo; #7E45 +繆 > móu; #7E46 +繇 > zhòu; #7E47 +繈 > qiăng; #7E48 +繉 > hún; #7E49 +繋 > xì; #7E4B +繍 > xìu; #7E4D +繎 > rán; #7E4E +繏 > xuàn; #7E4F +繐 > hùi; #7E50 +繑 > qiāo; #7E51 +繒 > zēng; #7E52 +繓 > zŭo; #7E53 +織 > zhī; #7E54 +繕 > shàn; #7E55 +繖 > săn; #7E56 +繗 > lín; #7E57 +繘 > yù; #7E58 +繙 > fān; #7E59 +繚 > liáo; #7E5A +繛 > chùo; #7E5B +繜 > zūn; #7E5C +繝 > jiàn; #7E5D +繞 > rào; #7E5E +繟 > chăn; #7E5F +繠 > rŭi; #7E60 +繡 > xìu; #7E61 +繢 > hùi; #7E62 +繣 > huà; #7E63 +繤 > zuăn; #7E64 +繥 > xī; #7E65 +繦 > qiăng; #7E66 +繨 > dá; #7E68 +繩 > shéng; #7E69 +繪 > hùi; #7E6A +繫 > xì; #7E6B +繬 > sè; #7E6C +繭 > jiăn; #7E6D +繮 > jiāng; #7E6E +繯 > huán; #7E6F +繰 > zăo; #7E70 +繱 > cōng; #7E71 +繲 > jiè; #7E72 +繳 > jiăo; #7E73 +繴 > bò; #7E74 +繵 > chán; #7E75 +繶 > yì; #7E76 +繷 > náo; #7E77 +繸 > sùi; #7E78 +繹 > yì; #7E79 +繺 > shăi; #7E7A +繻 > xū; #7E7B +繼 > jì; #7E7C +繽 > bīn; #7E7D +繾 > qiăn; #7E7E +繿 > lán; #7E7F +纀 > pú; #7E80 +纁 > xūn; #7E81 +纂 > zuăn; #7E82 +纃 > qí; #7E83 +纄 > péng; #7E84 +纅 > lì; #7E85 +纆 > mò; #7E86 +纇 > lèi; #7E87 +纈 > xié; #7E88 +纉 > zuăn; #7E89 +纊 > kuàng; #7E8A +纋 > yōu; #7E8B +續 > xù; #7E8C +纍 > léi; #7E8D +纎 > xiān; #7E8E +纏 > chán; #7E8F +纑 > lú; #7E91 +纒 > chán; #7E92 +纓 > yīng; #7E93 +纔 > cái; #7E94 +纕 > xiāng; #7E95 +纖 > xiān; #7E96 +纗 > zūi; #7E97 +纘 > zuăn; #7E98 +纙 > lùo; #7E99 +纚 > xĭ; #7E9A +纛 > dào; #7E9B +纜 > làn; #7E9C +纝 > léi; #7E9D +纞 > liàn; #7E9E +纟 > sī; #7E9F +纠 > jīu; #7EA0 +纡 > yū; #7EA1 +红 > hóng; #7EA2 +纣 > zhòu; #7EA3 +纤 > xiān; #7EA4 +纥 > hé; #7EA5 +约 > yuē; #7EA6 +级 > jí; #7EA7 +纨 > wán; #7EA8 +纩 > kuàng; #7EA9 +纪 > jì; #7EAA +纫 > rèn; #7EAB +纬 > wĕi; #7EAC +纭 > yún; #7EAD +纮 > hóng; #7EAE +纯 > chún; #7EAF +纰 > pí; #7EB0 +纱 > shā; #7EB1 +纲 > gāng; #7EB2 +纳 > nà; #7EB3 +纴 > rén; #7EB4 +纵 > zòng; #7EB5 +纶 > lún; #7EB6 +纷 > fēn; #7EB7 +纸 > zhĭ; #7EB8 +纹 > wén; #7EB9 +纺 > făng; #7EBA +纻 > zhù; #7EBB +纼 > yĭn; #7EBC +纽 > nĭu; #7EBD +纾 > shū; #7EBE +线 > xiàn; #7EBF +绀 > gàn; #7EC0 +绁 > xiè; #7EC1 +绂 > fú; #7EC2 +练 > liàn; #7EC3 +组 > zŭ; #7EC4 +绅 > shēn; #7EC5 +细 > xì; #7EC6 +织 > zhī; #7EC7 +终 > zhōng; #7EC8 +绉 > zhòu; #7EC9 +绊 > bàn; #7ECA +绋 > fú; #7ECB +绌 > zhúo; #7ECC +绍 > shào; #7ECD +绎 > yì; #7ECE +经 > jīng; #7ECF +绐 > dài; #7ED0 +绑 > băng; #7ED1 +绒 > róng; #7ED2 +结 > jié; #7ED3 +绔 > kù; #7ED4 +绕 > rào; #7ED5 +绖 > dié; #7ED6 +绗 > hèng; #7ED7 +绘 > hùi; #7ED8 +给 > gĕi; #7ED9 +绚 > xuàn; #7EDA +绛 > jiàng; #7EDB +络 > lùo; #7EDC +绝 > jué; #7EDD +绞 > jiăo; #7EDE +统 > tŏng; #7EDF +绠 > gĕng; #7EE0 +绡 > xiāo; #7EE1 +绢 > juàn; #7EE2 +绣 > xìu; #7EE3 +绤 > xì; #7EE4 +绥 > sūi; #7EE5 +绦 > tāo; #7EE6 +继 > jì; #7EE7 +绨 > tí; #7EE8 +绩 > jī; #7EE9 +绪 > xù; #7EEA +绫 > líng; #7EEB +续 > xù; #7EED +绮 > qĭ; #7EEE +绯 > fēi; #7EEF +绰 > chùo; #7EF0 +绱 > zhăng; #7EF1 +绲 > gŭn; #7EF2 +绳 > shéng; #7EF3 +维 > wéi; #7EF4 +绵 > mián; #7EF5 +绶 > shòu; #7EF6 +绷 > bēng; #7EF7 +绸 > chóu; #7EF8 +绹 > táo; #7EF9 +绺 > lĭu; #7EFA +绻 > quăn; #7EFB +综 > zòng; #7EFC +绽 > zhàn; #7EFD +绾 > wăn; #7EFE +绿 > lǜ; #7EFF +缀 > zhùi; #7F00 +缁 > zī; #7F01 +缂 > kè; #7F02 +缃 > xiāng; #7F03 +缄 > jiān; #7F04 +缅 > miăn; #7F05 +缆 > làn; #7F06 +缇 > tí; #7F07 +缈 > miăo; #7F08 +缉 > qì; #7F09 +缊 > yūn; #7F0A +缋 > hùi; #7F0B +缌 > sī; #7F0C +缍 > dŭo; #7F0D +缎 > duàn; #7F0E +缏 > biàn; #7F0F +缐 > xiàn; #7F10 +缑 > gōu; #7F11 +缒 > zhùi; #7F12 +缓 > huăn; #7F13 +缔 > dì; #7F14 +缕 > lǚ; #7F15 +编 > biān; #7F16 +缗 > mín; #7F17 +缘 > yuán; #7F18 +缙 > jìn; #7F19 +缚 > fú; #7F1A +缛 > rù; #7F1B +缜 > zhēn; #7F1C +缝 > féng; #7F1D +缞 > shuāi; #7F1E +缟 > găo; #7F1F +缠 > chán; #7F20 +缡 > lí; #7F21 +缢 > yì; #7F22 +缣 > jiān; #7F23 +缤 > bīn; #7F24 +缥 > piăo; #7F25 +缦 > màn; #7F26 +缧 > léi; #7F27 +缨 > yīng; #7F28 +缩 > sūo; #7F29 +缪 > móu; #7F2A +缫 > sāo; #7F2B +缬 > xié; #7F2C +缭 > liáo; #7F2D +缮 > shàn; #7F2E +缯 > zēng; #7F2F +缰 > jiāng; #7F30 +缱 > qiăn; #7F31 +缲 > zăo; #7F32 +缳 > huán; #7F33 +缴 > jiăo; #7F34 +缵 > zuăn; #7F35 +缶 > fŏu; #7F36 +缷 > xiè; #7F37 +缸 > gāng; #7F38 +缹 > fŏu; #7F39 +缺 > quē; #7F3A +缻 > fŏu; #7F3B +缽 > bō; #7F3D +缾 > píng; #7F3E +缿 > hòu; #7F3F +罁 > gāng; #7F41 +罂 > yīng; #7F42 +罃 > yīng; #7F43 +罄 > qìng; #7F44 +罅 > xià; #7F45 +罆 > guàn; #7F46 +罇 > zūn; #7F47 +罈 > tán; #7F48 +罊 > qì; #7F4A +罋 > wèng; #7F4B +罌 > yīng; #7F4C +罍 > léi; #7F4D +罎 > tán; #7F4E +罏 > lú; #7F4F +罐 > guàn; #7F50 +网 > wăng; #7F51 +罒 > wăng; #7F52 +罓 > gāng; #7F53 +罔 > wăng; #7F54 +罕 > hăn; #7F55 +罗 > lūo; #7F57 +罘 > fú; #7F58 +罙 > mí; #7F59 +罚 > fá; #7F5A +罛 > gū; #7F5B +罜 > zhŭ; #7F5C +罝 > jū; #7F5D +罞 > máo; #7F5E +罟 > gŭ; #7F5F +罠 > mín; #7F60 +罡 > gāng; #7F61 +罢 > bà; #7F62 +罣 > guà; #7F63 +罤 > tí; #7F64 +罥 > juàn; #7F65 +罦 > fū; #7F66 +罧 > lín; #7F67 +罨 > yăn; #7F68 +罩 > zhào; #7F69 +罪 > zùi; #7F6A +罫 > guà; #7F6B +罬 > zhúo; #7F6C +罭 > yù; #7F6D +置 > zhì; #7F6E +罯 > ăn; #7F6F +罰 > fá; #7F70 +罱 > năn; #7F71 +署 > shŭ; #7F72 +罳 > sī; #7F73 +罴 > pí; #7F74 +罵 > mà; #7F75 +罶 > lĭu; #7F76 +罷 > bà; #7F77 +罸 > fá; #7F78 +罹 > lí; #7F79 +罺 > chāo; #7F7A +罻 > wèi; #7F7B +罼 > bì; #7F7C +罽 > jì; #7F7D +罾 > zēng; #7F7E +罿 > tóng; #7F7F +羀 > lĭu; #7F80 +羁 > jī; #7F81 +羂 > juàn; #7F82 +羃 > mì; #7F83 +羄 > zhào; #7F84 +羅 > lúo; #7F85 +羆 > pí; #7F86 +羇 > jī; #7F87 +羈 > jī; #7F88 +羉 > luán; #7F89 +羊 > yáng; #7F8A +羋 > miē; #7F8B +羌 > qiāng; #7F8C +羍 > tà; #7F8D +美 > mĕi; #7F8E +羏 > yáng; #7F8F +羐 > yŏu; #7F90 +羑 > yŏu; #7F91 +羒 > fén; #7F92 +羓 > bā; #7F93 +羔 > gāo; #7F94 +羕 > yàng; #7F95 +羖 > gŭ; #7F96 +羗 > qiāng; #7F97 +羘 > zāng; #7F98 +羙 > gāo; #7F99 +羚 > líng; #7F9A +羛 > yì; #7F9B +羜 > zhù; #7F9C +羝 > dī; #7F9D +羞 > xīu; #7F9E +羟 > qiān; #7F9F +羠 > yí; #7FA0 +羡 > xiàn; #7FA1 +羢 > róng; #7FA2 +羣 > qún; #7FA3 +群 > qún; #7FA4 +羥 > qiān; #7FA5 +羦 > huán; #7FA6 +羧 > zūi; #7FA7 +羨 > xiàn; #7FA8 +義 > yì; #7FA9 +羫 > qiāng; #7FAB +羬 > xián; #7FAC +羭 > yú; #7FAD +羮 > gēng; #7FAE +羯 > jié; #7FAF +羰 > tāng; #7FB0 +羱 > yuán; #7FB1 +羲 > xī; #7FB2 +羳 > fán; #7FB3 +羴 > shān; #7FB4 +羵 > fĕn; #7FB5 +羶 > shān; #7FB6 +羷 > liăn; #7FB7 +羸 > léi; #7FB8 +羹 > gēng; #7FB9 +羺 > nóu; #7FBA +羻 > qiàng; #7FBB +羼 > chàn; #7FBC +羽 > yŭ; #7FBD +羾 > gòng; #7FBE +羿 > yì; #7FBF +翀 > chóng; #7FC0 +翁 > wēng; #7FC1 +翂 > fēn; #7FC2 +翃 > hóng; #7FC3 +翄 > chì; #7FC4 +翅 > chì; #7FC5 +翆 > cùi; #7FC6 +翇 > fú; #7FC7 +翈 > xiá; #7FC8 +翉 > pĕn; #7FC9 +翊 > yì; #7FCA +翋 > lā; #7FCB +翌 > yì; #7FCC +翍 > pī; #7FCD +翎 > líng; #7FCE +翏 > lìu; #7FCF +翐 > zhì; #7FD0 +翑 > qú; #7FD1 +習 > xí; #7FD2 +翓 > xié; #7FD3 +翔 > xiáng; #7FD4 +翕 > xì; #7FD5 +翖 > xì; #7FD6 +翗 > qí; #7FD7 +翘 > qiáo; #7FD8 +翙 > hùi; #7FD9 +翚 > hūi; #7FDA +翛 > xiāo; #7FDB +翜 > sè; #7FDC +翝 > hóng; #7FDD +翞 > jiāng; #7FDE +翟 > dí; #7FDF +翠 > cùi; #7FE0 +翡 > fĕi; #7FE1 +翢 > tāo; #7FE2 +翣 > shà; #7FE3 +翤 > chì; #7FE4 +翥 > zhù; #7FE5 +翦 > jiăn; #7FE6 +翧 > xuān; #7FE7 +翨 > shì; #7FE8 +翩 > piān; #7FE9 +翪 > zōng; #7FEA +翫 > wàn; #7FEB +翬 > hūi; #7FEC +翭 > hóu; #7FED +翮 > hé; #7FEE +翯 > hè; #7FEF +翰 > hàn; #7FF0 +翱 > áo; #7FF1 +翲 > piāo; #7FF2 +翳 > yì; #7FF3 +翴 > lián; #7FF4 +翵 > qú; #7FF5 +翷 > lín; #7FF7 +翸 > pĕn; #7FF8 +翹 > qiáo; #7FF9 +翺 > áo; #7FFA +翻 > fān; #7FFB +翼 > yì; #7FFC +翽 > hùi; #7FFD +翾 > xuān; #7FFE +翿 > dào; #7FFF +耀 > yào; #8000 +老 > lăo; #8001 +考 > kăo; #8003 +耄 > mào; #8004 +者 > zhĕ; #8005 +耆 > qí; #8006 +耇 > gŏu; #8007 +耈 > gŏu; #8008 +耉 > gŏu; #8009 +耊 > diè; #800A +耋 > diè; #800B +而 > ér; #800C +耍 > shuă; #800D +耎 > ruăn; #800E +耏 > ér; #800F +耐 > nài; #8010 +耑 > zhuān; #8011 +耒 > lĕi; #8012 +耓 > tīng; #8013 +耔 > zĭ; #8014 +耕 > gēng; #8015 +耖 > chào; #8016 +耗 > hào; #8017 +耘 > yún; #8018 +耙 > pá; #8019 +耚 > pī; #801A +耛 > chí; #801B +耜 > sì; #801C +耝 > chú; #801D +耞 > jiā; #801E +耟 > jù; #801F +耠 > hé; #8020 +耡 > chú; #8021 +耢 > lào; #8022 +耣 > lŭn; #8023 +耤 > jí; #8024 +耥 > tăng; #8025 +耦 > ŏu; #8026 +耧 > lóu; #8027 +耨 > nòu; #8028 +耩 > gōu; #8029 +耪 > păng; #802A +耫 > zé; #802B +耬 > lóu; #802C +耭 > jī; #802D +耮 > lào; #802E +耯 > hùo; #802F +耰 > yōu; #8030 +耱 > mò; #8031 +耲 > huái; #8032 +耳 > ĕr; #8033 +耴 > zhé; #8034 +耵 > tīng; #8035 +耶 > yé; #8036 +耷 > dā; #8037 +耸 > sŏng; #8038 +耹 > qín; #8039 +耺 > yún; #803A +耻 > chĭ; #803B +耼 > dān; #803C +耽 > dān; #803D +耾 > hóng; #803E +耿 > gĕng; #803F +聀 > zhí; #8040 +聂 > niè; #8042 +聃 > dān; #8043 +聄 > zhĕn; #8044 +聅 > chè; #8045 +聆 > líng; #8046 +聇 > zhēng; #8047 +聈 > yŏu; #8048 +聉 > wā; #8049 +聊 > liáo; #804A +聋 > lóng; #804B +职 > zhí; #804C +聍 > níng; #804D +聎 > tiāo; #804E +聏 > ér; #804F +聐 > yà; #8050 +聑 > dié; #8051 +聒 > guā; #8052 +联 > lián; #8054 +聕 > hào; #8055 +聖 > shèng; #8056 +聗 > liè; #8057 +聘 > pìn; #8058 +聙 > jīng; #8059 +聚 > jù; #805A +聛 > bì; #805B +聜 > dĭ; #805C +聝 > gúo; #805D +聞 > wén; #805E +聟 > xù; #805F +聠 > píng; #8060 +聡 > cōng; #8061 +聤 > tíng; #8064 +聥 > yŭ; #8065 +聦 > cōng; #8066 +聧 > kúi; #8067 +聩 > kùi; #8069 +聪 > cōng; #806A +聫 > lián; #806B +聬 > wĕng; #806C +聭 > kùi; #806D +聮 > lián; #806E +聯 > lián; #806F +聰 > cōng; #8070 +聱 > áo; #8071 +聲 > shēng; #8072 +聳 > sŏng; #8073 +聴 > tīng; #8074 +聵 > kùi; #8075 +聶 > niè; #8076 +職 > zhí; #8077 +聸 > dān; #8078 +聹 > níng; #8079 +聺 > qie; #807A +聻 > jī; #807B +聼 > tīng; #807C +聽 > tīng; #807D +聾 > lóng; #807E +聿 > yù; #807F +肀 > yù; #8080 +肁 > zhào; #8081 +肂 > sì; #8082 +肃 > sù; #8083 +肄 > yì; #8084 +肅 > sù; #8085 +肆 > sì; #8086 +肇 > zhào; #8087 +肈 > zhào; #8088 +肉 > ròu; #8089 +肊 > yì; #808A +肋 > lè; #808B +肌 > jī; #808C +肍 > qíu; #808D +肎 > kĕn; #808E +肏 > cào; #808F +肐 > gē; #8090 +肑 > dì; #8091 +肒 > huán; #8092 +肓 > huāng; #8093 +肔 > yĭ; #8094 +肕 > rèn; #8095 +肖 > xiào; #8096 +肗 > rŭ; #8097 +肘 > zhŏu; #8098 +肙 > yuān; #8099 +肚 > dù; #809A +肛 > gāng; #809B +肜 > róng; #809C +肝 > gān; #809D +肞 > chā; #809E +肟 > wò; #809F +肠 > cháng; #80A0 +股 > gŭ; #80A1 +肢 > zhī; #80A2 +肣 > hán; #80A3 +肤 > fū; #80A4 +肥 > féi; #80A5 +肦 > fén; #80A6 +肧 > pēi; #80A7 +肨 > pàng; #80A8 +肩 > jiān; #80A9 +肪 > fáng; #80AA +肫 > zhūn; #80AB +肬 > yóu; #80AC +肭 > nà; #80AD +肮 > háng; #80AE +肯 > kĕn; #80AF +肰 > rán; #80B0 +肱 > gōng; #80B1 +育 > yù; #80B2 +肳 > wĕn; #80B3 +肴 > yáo; #80B4 +肵 > jìn; #80B5 +肶 > pí; #80B6 +肷 > qiān; #80B7 +肸 > xì; #80B8 +肹 > xī; #80B9 +肺 > fèi; #80BA +肻 > kĕn; #80BB +肼 > jĭng; #80BC +肽 > tài; #80BD +肾 > shèn; #80BE +肿 > zhŏng; #80BF +胀 > zhàng; #80C0 +胁 > xié; #80C1 +胂 > shēn; #80C2 +胃 > wèi; #80C3 +胄 > zhòu; #80C4 +胅 > dié; #80C5 +胆 > dăn; #80C6 +胇 > fèi; #80C7 +胈 > bá; #80C8 +胉 > bó; #80C9 +胊 > qú; #80CA +胋 > tián; #80CB +背 > bèi; #80CC +胍 > guā; #80CD +胎 > tāi; #80CE +胏 > zĭ; #80CF +胐 > kū; #80D0 +胑 > zhī; #80D1 +胒 > nì; #80D2 +胓 > píng; #80D3 +胔 > zì; #80D4 +胕 > fù; #80D5 +胖 > pàng; #80D6 +胗 > zhēn; #80D7 +胘 > xián; #80D8 +胙 > zùo; #80D9 +胚 > pēi; #80DA +胛 > jiă; #80DB +胜 > shèng; #80DC +胝 > zhī; #80DD +胞 > bāo; #80DE +胟 > mŭ; #80DF +胠 > qū; #80E0 +胡 > hú; #80E1 +胢 > kē; #80E2 +胣 > yĭ; #80E3 +胤 > yìn; #80E4 +胥 > xū; #80E5 +胦 > yāng; #80E6 +胧 > lóng; #80E7 +胨 > dòng; #80E8 +胩 > kă; #80E9 +胪 > lú; #80EA +胫 > jìng; #80EB +胬 > nŭ; #80EC +胭 > yān; #80ED +胮 > páng; #80EE +胯 > kuà; #80EF +胰 > yí; #80F0 +胱 > guāng; #80F1 +胲 > gāi; #80F2 +胳 > gē; #80F3 +胴 > dòng; #80F4 +胵 > zhì; #80F5 +胶 > xiáo; #80F6 +胷 > xīong; #80F7 +胸 > xīong; #80F8 +胹 > ér; #80F9 +胺 > è; #80FA +胻 > xíng; #80FB +胼 > pián; #80FC +能 > néng; #80FD +胾 > zì; #80FE +胿 > gui; #80FF +脀 > chéng; #8100 +脁 > tiào; #8101 +脂 > zhī; #8102 +脃 > cùi; #8103 +脄 > méi; #8104 +脅 > xié; #8105 +脆 > cùi; #8106 +脇 > xié; #8107 +脈 > mò; #8108 +脉 > mài; #8109 +脊 > jí; #810A +脍 > kuài; #810D +脎 > sà; #810E +脏 > zāng; #810F +脐 > qí; #8110 +脑 > năo; #8111 +脒 > mĭ; #8112 +脓 > nóng; #8113 +脔 > luán; #8114 +脕 > wăn; #8115 +脖 > bó; #8116 +脗 > wĕn; #8117 +脘 > guăn; #8118 +脙 > qíu; #8119 +脚 > jiăo; #811A +脛 > jìng; #811B +脜 > róu; #811C +脝 > hēng; #811D +脞 > cŭo; #811E +脟 > liè; #811F +脠 > shān; #8120 +脡 > tĭng; #8121 +脢 > méi; #8122 +脣 > chún; #8123 +脤 > shèn; #8124 +脥 > xié; #8125 +脦 > de; #8126 +脧 > zūi; #8127 +脨 > cù; #8128 +脩 > xīu; #8129 +脪 > xìn; #812A +脫 > tūo; #812B +脬 > pāo; #812C +脭 > chéng; #812D +脮 > nĕi; #812E +脯 > fŭ; #812F +脰 > dòu; #8130 +脱 > tūo; #8131 +脲 > niào; #8132 +脴 > pĭ; #8134 +脵 > gŭ; #8135 +脶 > guā; #8136 +脷 > lì; #8137 +脸 > liăn; #8138 +脹 > zhàng; #8139 +脺 > cùi; #813A +脻 > jié; #813B +脼 > liăng; #813C +脽 > zhōu; #813D +脾 > pí; #813E +脿 > biāo; #813F +腀 > lún; #8140 +腁 > pián; #8141 +腂 > gùo; #8142 +腃 > kùi; #8143 +腄 > chúi; #8144 +腅 > dàn; #8145 +腆 > tiăn; #8146 +腇 > nĕi; #8147 +腈 > jīng; #8148 +腉 > jiē; #8149 +腊 > là; #814A +腋 > yì; #814B +腌 > ān; #814C +腍 > rĕn; #814D +腎 > shèn; #814E +腏 > chùo; #814F +腐 > fŭ; #8150 +腑 > fŭ; #8151 +腒 > jū; #8152 +腓 > féi; #8153 +腔 > qiāng; #8154 +腕 > wàn; #8155 +腖 > dòng; #8156 +腗 > pí; #8157 +腘 > gúo; #8158 +腙 > zōng; #8159 +腚 > dìng; #815A +腛 > wū; #815B +腜 > méi; #815C +腝 > ruăn; #815D +腞 > zhuàn; #815E +腟 > zhì; #815F +腠 > còu; #8160 +腡 > guā; #8161 +腢 > ŏu; #8162 +腣 > dì; #8163 +腤 > ān; #8164 +腥 > xīng; #8165 +腦 > năo; #8166 +腧 > yú; #8167 +腨 > chuăn; #8168 +腩 > năn; #8169 +腪 > yùn; #816A +腫 > zhŏng; #816B +腬 > róu; #816C +腭 > è; #816D +腮 > sāi; #816E +腯 > tú; #816F +腰 > yāo; #8170 +腱 > jiàn; #8171 +腲 > wĕi; #8172 +腳 > jiăo; #8173 +腴 > yú; #8174 +腵 > jiā; #8175 +腶 > duàn; #8176 +腷 > bì; #8177 +腸 > cháng; #8178 +腹 > fù; #8179 +腺 > xiàn; #817A +腻 > nì; #817B +腼 > miăn; #817C +腽 > wà; #817D +腾 > téng; #817E +腿 > tŭi; #817F +膀 > băng; #8180 +膁 > qiān; #8181 +膂 > lǚ; #8182 +膃 > wà; #8183 +膄 > sòu; #8184 +膅 > táng; #8185 +膆 > sù; #8186 +膇 > zhùi; #8187 +膈 > gé; #8188 +膉 > yì; #8189 +膊 > bó; #818A +膋 > liáo; #818B +膌 > jí; #818C +膍 > pí; #818D +膎 > xié; #818E +膏 > gāo; #818F +膐 > lǚ; #8190 +膑 > bìn; #8191 +膒 > ou; #8192 +膓 > cháng; #8193 +膔 > lù; #8194 +膕 > gúo; #8195 +膖 > pāng; #8196 +膗 > chuái; #8197 +膘 > piăo; #8198 +膙 > jiăng; #8199 +膚 > fū; #819A +膛 > táng; #819B +膜 > mò; #819C +膝 > xī; #819D +膞 > zhuān; #819E +膟 > lǜ; #819F +膠 > jiāo; #81A0 +膡 > yìng; #81A1 +膢 > lǘ; #81A2 +膣 > zhì; #81A3 +膥 > chūn; #81A5 +膦 > liăn; #81A6 +膧 > tóng; #81A7 +膨 > péng; #81A8 +膩 > nì; #81A9 +膪 > zhà; #81AA +膫 > liáo; #81AB +膬 > cùi; #81AC +膭 > gūi; #81AD +膮 > xiāo; #81AE +膯 > tēng; #81AF +膰 > fán; #81B0 +膱 > zhí; #81B1 +膲 > jiāo; #81B2 +膳 > shàn; #81B3 +膴 > wú; #81B4 +膵 > cùi; #81B5 +膶 > rùn; #81B6 +膷 > xiāng; #81B7 +膸 > sŭi; #81B8 +膹 > fèn; #81B9 +膺 > yīng; #81BA +膻 > tăn; #81BB +膼 > zhuā; #81BC +膽 > dăn; #81BD +膾 > kuài; #81BE +膿 > nóng; #81BF +臀 > tún; #81C0 +臁 > lián; #81C1 +臂 > bì; #81C2 +臃 > yŏng; #81C3 +臄 > jué; #81C4 +臅 > chù; #81C5 +臆 > yì; #81C6 +臇 > juăn; #81C7 +臈 > là; #81C8 +臉 > liăn; #81C9 +臊 > sāo; #81CA +臋 > tún; #81CB +臌 > gŭ; #81CC +臍 > qí; #81CD +臎 > cùi; #81CE +臏 > bìn; #81CF +臐 > xūn; #81D0 +臑 > rú; #81D1 +臒 > hùo; #81D2 +臓 > zàng; #81D3 +臔 > xiàn; #81D4 +臕 > biāo; #81D5 +臖 > xìng; #81D6 +臗 > kuān; #81D7 +臘 > là; #81D8 +臙 > yān; #81D9 +臚 > lú; #81DA +臛 > hùo; #81DB +臜 > zāng; #81DC +臝 > lŭo; #81DD +臞 > qú; #81DE +臟 > zàng; #81DF +臠 > luán; #81E0 +臡 > ní; #81E1 +臢 > zāng; #81E2 +臣 > chén; #81E3 +臤 > qiān; #81E4 +臥 > wò; #81E5 +臦 > guàng; #81E6 +臧 > záng; #81E7 +臨 > lín; #81E8 +臩 > guàng; #81E9 +自 > zì; #81EA +臫 > jiăo; #81EB +臬 > niè; #81EC +臭 > chòu; #81ED +臮 > jì; #81EE +臯 > gāo; #81EF +臰 > chòu; #81F0 +臱 > mián; #81F1 +臲 > niè; #81F2 +至 > zhì; #81F3 +致 > zhì; #81F4 +臵 > gé; #81F5 +臶 > jiàn; #81F6 +臷 > dié; #81F7 +臸 > zhì; #81F8 +臹 > xīu; #81F9 +臺 > tái; #81FA +臻 > zhēn; #81FB +臼 > jìu; #81FC +臽 > xiàn; #81FD +臾 > yú; #81FE +臿 > chā; #81FF +舀 > yăo; #8200 +舁 > yú; #8201 +舂 > chōng; #8202 +舃 > xì; #8203 +舄 > xì; #8204 +舅 > jìu; #8205 +舆 > yú; #8206 +與 > yŭ; #8207 +興 > xīng; #8208 +舉 > jŭ; #8209 +舊 > jìu; #820A +舋 > xìn; #820B +舌 > shé; #820C +舍 > shè; #820D +舏 > jĭu; #820F +舐 > shì; #8210 +舑 > tān; #8211 +舒 > shū; #8212 +舓 > shì; #8213 +舔 > tiăn; #8214 +舕 > dàn; #8215 +舖 > pù; #8216 +舗 > pù; #8217 +舘 > guăn; #8218 +舙 > huà; #8219 +舚 > tān; #821A +舛 > chuăn; #821B +舜 > shùn; #821C +舝 > xiá; #821D +舞 > wŭ; #821E +舟 > zhōu; #821F +舠 > dāo; #8220 +舡 > gāng; #8221 +舢 > shān; #8222 +舣 > yĭ; #8223 +舥 > pā; #8225 +舦 > tài; #8226 +舧 > fán; #8227 +舨 > băn; #8228 +舩 > chuán; #8229 +航 > háng; #822A +舫 > făng; #822B +般 > bān; #822C +舭 > què; #822D +舯 > zhōng; #822F +舰 > jiàn; #8230 +舱 > cāng; #8231 +舲 > líng; #8232 +舳 > zhú; #8233 +舴 > zé; #8234 +舵 > dùo; #8235 +舶 > bó; #8236 +舷 > xián; #8237 +舸 > gĕ; #8238 +船 > chuán; #8239 +舺 > jiá; #823A +舻 > lŭ; #823B +舼 > hóng; #823C +舽 > páng; #823D +舾 > xī; #823E +艀 > fú; #8240 +艁 > zào; #8241 +艂 > féng; #8242 +艃 > lí; #8243 +艄 > shāo; #8244 +艅 > yú; #8245 +艆 > láng; #8246 +艇 > tĭng; #8247 +艉 > wĕi; #8249 +艊 > bó; #824A +艋 > mĕng; #824B +艌 > niàn; #824C +艍 > jū; #824D +艎 > huáng; #824E +艏 > shŏu; #824F +艐 > zōng; #8250 +艑 > biàn; #8251 +艒 > mào; #8252 +艓 > dié; #8253 +艕 > bàng; #8255 +艖 > chā; #8256 +艗 > yì; #8257 +艘 > sāo; #8258 +艙 > cāng; #8259 +艚 > cáo; #825A +艛 > lóu; #825B +艜 > dài; #825C +艞 > yào; #825E +艟 > tóng; #825F +艡 > dāng; #8261 +艢 > tán; #8262 +艣 > lŭ; #8263 +艤 > yĭ; #8264 +艥 > jiè; #8265 +艦 > jiàn; #8266 +艧 > hùo; #8267 +艨 > méng; #8268 +艩 > qí; #8269 +艪 > lŭ; #826A +艫 > lú; #826B +艬 > chán; #826C +艭 > shuāng; #826D +艮 > gèn; #826E +良 > liáng; #826F +艰 > jiān; #8270 +艱 > jiān; #8271 +色 > sè; #8272 +艳 > yàn; #8273 +艴 > fú; #8274 +艵 > píng; #8275 +艶 > yàn; #8276 +艷 > yàn; #8277 +艸 > căo; #8278 +艹 > căo' 'zì' 'tóu; #8279 +艺 > yì; #827A +艻 > lè; #827B +艼 > tīng; #827C +艽 > qíu; #827D +艾 > ài; #827E +艿 > năi; #827F +芀 > tiáo; #8280 +芁 > jiāo; #8281 +节 > jié; #8282 +芃 > péng; #8283 +芄 > wán; #8284 +芅 > yì; #8285 +芆 > chāi; #8286 +芇 > mián; #8287 +芈 > miē; #8288 +芉 > gān; #8289 +芊 > qiān; #828A +芋 > yù; #828B +芌 > yù; #828C +芍 > shùo; #828D +芎 > qīong; #828E +芏 > tŭ; #828F +芐 > xià; #8290 +芑 > qĭ; #8291 +芒 > máng; #8292 +芓 > zĭ; #8293 +芔 > hŭi; #8294 +芕 > sūi; #8295 +芖 > zhì; #8296 +芗 > xiāng; #8297 +芘 > bī; #8298 +芙 > fú; #8299 +芚 > tún; #829A +芛 > wĕi; #829B +芜 > wú; #829C +芝 > zhī; #829D +芞 > qĭ; #829E +芟 > shān; #829F +芠 > wén; #82A0 +芡 > qiàn; #82A1 +芢 > rén; #82A2 +芣 > fŏu; #82A3 +芤 > kōu; #82A4 +芥 > jiè; #82A5 +芦 > lú; #82A6 +芧 > xù; #82A7 +芨 > jí; #82A8 +芩 > qín; #82A9 +芪 > qí; #82AA +芫 > yuán; #82AB +芬 > fēn; #82AC +芭 > bā; #82AD +芮 > rùi; #82AE +芯 > xīn; #82AF +芰 > jì; #82B0 +花 > huā; #82B1 +芲 > huā; #82B2 +芳 > fāng; #82B3 +芴 > wù; #82B4 +芵 > jué; #82B5 +芶 > gōu; #82B6 +芷 > zhĭ; #82B7 +芸 > yún; #82B8 +芹 > qín; #82B9 +芺 > ăo; #82BA +芻 > chú; #82BB +芼 > mào; #82BC +芽 > yá; #82BD +芾 > fèi; #82BE +芿 > rèng; #82BF +苀 > háng; #82C0 +苁 > cōng; #82C1 +苂 > yín; #82C2 +苃 > yŏu; #82C3 +苄 > biàn; #82C4 +苅 > yì; #82C5 +苇 > wĕi; #82C7 +苈 > lì; #82C8 +苉 > pĭ; #82C9 +苊 > è; #82CA +苋 > xiàn; #82CB +苌 > cháng; #82CC +苍 > cāng; #82CD +苎 > méng; #82CE +苏 > sū; #82CF +苐 > yí; #82D0 +苑 > yuàn; #82D1 +苒 > răn; #82D2 +苓 > líng; #82D3 +苔 > tái; #82D4 +苕 > tiáo; #82D5 +苖 > dĭ; #82D6 +苗 > miáo; #82D7 +苘 > qĭong; #82D8 +苙 > lì; #82D9 +苚 > yòng; #82DA +苛 > kē; #82DB +苜 > mù; #82DC +苝 > pèi; #82DD +苞 > bāo; #82DE +苟 > gŏu; #82DF +苠 > mín; #82E0 +苡 > yĭ; #82E1 +苢 > yĭ; #82E2 +苣 > jù; #82E3 +苤 > pĭ; #82E4 +若 > rùo; #82E5 +苦 > kŭ; #82E6 +苧 > zhù; #82E7 +苨 > nĭ; #82E8 +苩 > bó; #82E9 +苪 > bĭng; #82EA +苫 > shān; #82EB +苬 > qíu; #82EC +苭 > yăo; #82ED +苮 > xiān; #82EE +苯 > bĕn; #82EF +苰 > hóng; #82F0 +英 > yīng; #82F1 +苲 > zhă; #82F2 +苳 > dōng; #82F3 +苴 > jū; #82F4 +苵 > dié; #82F5 +苶 > nié; #82F6 +苷 > gān; #82F7 +苸 > hū; #82F8 +苹 > píng; #82F9 +苺 > méi; #82FA +苻 > fú; #82FB +苼 > shēng; #82FC +苽 > gū; #82FD +苾 > bì; #82FE +苿 > wèi; #82FF +茀 > fú; #8300 +茁 > zhúo; #8301 +茂 > mào; #8302 +范 > fàn; #8303 +茄 > qié; #8304 +茅 > máo; #8305 +茆 > măo; #8306 +茇 > bá; #8307 +茈 > zĭ; #8308 +茉 > mò; #8309 +茊 > zī; #830A +茋 > dĭ; #830B +茌 > chí; #830C +茍 > jì; #830D +茎 > jīng; #830E +茏 > lóng; #830F +茑 > niăo; #8311 +茓 > xué; #8313 +茔 > yíng; #8314 +茕 > qíong; #8315 +茖 > gé; #8316 +茗 > mĭng; #8317 +茘 > lì; #8318 +茙 > róng; #8319 +茚 > yìn; #831A +茛 > gèn; #831B +茜 > qiàn; #831C +茝 > chăi; #831D +茞 > chén; #831E +茟 > yù; #831F +茠 > xīu; #8320 +茡 > zì; #8321 +茢 > liè; #8322 +茣 > wú; #8323 +茤 > jì; #8324 +茥 > kūi; #8325 +茦 > cè; #8326 +茧 > chóng; #8327 +茨 > cí; #8328 +茩 > gŏu; #8329 +茪 > guāng; #832A +茫 > máng; #832B +茬 > chí; #832C +茭 > jiāo; #832D +茮 > jiāo; #832E +茯 > fú; #832F +茰 > yú; #8330 +茱 > zhū; #8331 +茲 > zī; #8332 +茳 > jiāng; #8333 +茴 > húi; #8334 +茵 > yīn; #8335 +茶 > chá; #8336 +茷 > fá; #8337 +茸 > róng; #8338 +茹 > rú; #8339 +茺 > chōng; #833A +茻 > măng; #833B +茼 > tóng; #833C +茽 > zhòng; #833D +茿 > zhú; #833F +荀 > xún; #8340 +荁 > huán; #8341 +荂 > kuā; #8342 +荃 > quán; #8343 +荄 > gāi; #8344 +荅 > dā; #8345 +荆 > jīng; #8346 +荇 > xìng; #8347 +荈 > quàn; #8348 +草 > căo; #8349 +荊 > jīng; #834A +荋 > ér; #834B +荌 > àn; #834C +荍 > shōu; #834D +荎 > chí; #834E +荏 > rĕn; #834F +荐 > jiàn; #8350 +荑 > tí; #8351 +荒 > huāng; #8352 +荓 > píng; #8353 +荔 > lì; #8354 +荕 > jīn; #8355 +荖 > lăo; #8356 +荗 > shù; #8357 +荘 > zhuāng; #8358 +荙 > dá; #8359 +荚 > jiá; #835A +荛 > ráo; #835B +荜 > bì; #835C +荝 > zé; #835D +荞 > qiáo; #835E +荟 > hùi; #835F +荠 > qí; #8360 +荡 > dàng; #8361 +荣 > róng; #8363 +荤 > hūn; #8364 +荥 > yíng; #8365 +荦 > lùo; #8366 +荧 > yíng; #8367 +荨 > xún; #8368 +荩 > jìn; #8369 +荪 > sūn; #836A +荫 > yìn; #836B +荬 > măi; #836C +荭 > hóng; #836D +荮 > zhòu; #836E +药 > yào; #836F +荰 > dù; #8370 +荱 > wĕi; #8371 +荲 > chù; #8372 +荳 > dòu; #8373 +荴 > fū; #8374 +荵 > rĕn; #8375 +荶 > yín; #8376 +荷 > hé; #8377 +荸 > bí; #8378 +荹 > bù; #8379 +荺 > yún; #837A +荻 > dí; #837B +荼 > tú; #837C +荽 > sūi; #837D +荾 > sūi; #837E +荿 > chéng; #837F +莀 > chén; #8380 +莁 > wú; #8381 +莂 > bié; #8382 +莃 > xī; #8383 +莄 > gĕng; #8384 +莅 > lì; #8385 +莆 > fŭ; #8386 +莇 > zhù; #8387 +莈 > mò; #8388 +莉 > lì; #8389 +莊 > zhuāng; #838A +莋 > jí; #838B +莌 > dúo; #838C +莍 > qíu; #838D +莎 > shā; #838E +莏 > sūo; #838F +莐 > chén; #8390 +莑 > fēng; #8391 +莒 > jŭ; #8392 +莓 > méi; #8393 +莔 > méng; #8394 +莕 > xìng; #8395 +莖 > jīng; #8396 +莗 > chē; #8397 +莘 > xīn; #8398 +莙 > jūn; #8399 +莚 > yán; #839A +莛 > tíng; #839B +莜 > diào; #839C +莝 > cùo; #839D +莞 > wăn; #839E +莟 > hàn; #839F +莠 > yŏu; #83A0 +莡 > cùo; #83A1 +莢 > jiá; #83A2 +莣 > wáng; #83A3 +莤 > yóu; #83A4 +莥 > nĭu; #83A5 +莦 > shāo; #83A6 +莧 > xiàn; #83A7 +莨 > láng; #83A8 +莩 > fú; #83A9 +莪 > é; #83AA +莫 > mò; #83AB +莬 > wèn; #83AC +莭 > jié; #83AD +莮 > nán; #83AE +莯 > mù; #83AF +莰 > kăn; #83B0 +莱 > lái; #83B1 +莲 > lián; #83B2 +莳 > shí; #83B3 +莴 > wō; #83B4 +莶 > liăn; #83B6 +获 > hùo; #83B7 +莸 > yóu; #83B8 +莹 > yíng; #83B9 +莺 > yīng; #83BA +莼 > chún; #83BC +莽 > măng; #83BD +莾 > măng; #83BE +莿 > cì; #83BF +菀 > wăn; #83C0 +菁 > jīng; #83C1 +菂 > dī; #83C2 +菃 > qú; #83C3 +菄 > dōng; #83C4 +菅 > jiān; #83C5 +菆 > zōu; #83C6 +菇 > gū; #83C7 +菈 > lā; #83C8 +菉 > lù; #83C9 +菊 > jú; #83CA +菋 > wèi; #83CB +菌 > jùn; #83CC +菍 > niè; #83CD +菎 > kūn; #83CE +菏 > hé; #83CF +菐 > pú; #83D0 +菑 > zī; #83D1 +菒 > găo; #83D2 +菓 > gŭo; #83D3 +菔 > fú; #83D4 +菕 > lún; #83D5 +菖 > chāng; #83D6 +菗 > chóu; #83D7 +菘 > sōng; #83D8 +菙 > chúi; #83D9 +菚 > zhàn; #83DA +菛 > mén; #83DB +菜 > cài; #83DC +菝 > bá; #83DD +菞 > lí; #83DE +菟 > tù; #83DF +菠 > bō; #83E0 +菡 > hàn; #83E1 +菢 > bào; #83E2 +菣 > qìn; #83E3 +菤 > juăn; #83E4 +菥 > xī; #83E5 +菦 > qín; #83E6 +菧 > dĭ; #83E7 +菨 > jiē; #83E8 +菩 > pú; #83E9 +菪 > dàng; #83EA +菫 > jĭn; #83EB +菬 > zhăo; #83EC +菭 > tái; #83ED +菮 > gēng; #83EE +華 > huá; #83EF +菰 > gū; #83F0 +菱 > líng; #83F1 +菲 > fēi; #83F2 +菳 > jīn; #83F3 +菴 > ān; #83F4 +菵 > wăng; #83F5 +菶 > bĕng; #83F6 +菷 > zhŏu; #83F7 +菸 > yān; #83F8 +菹 > jū; #83F9 +菺 > jiān; #83FA +菻 > lĭn; #83FB +菼 > tăn; #83FC +菽 > shú; #83FD +菾 > tián; #83FE +菿 > dào; #83FF +萀 > hŭ; #8400 +萁 > qí; #8401 +萂 > hé; #8402 +萃 > cùi; #8403 +萄 > táo; #8404 +萅 > chūn; #8405 +萆 > bēi; #8406 +萇 > cháng; #8407 +萈 > huán; #8408 +萉 > féi; #8409 +萊 > lái; #840A +萋 > qī; #840B +萌 > méng; #840C +萍 > píng; #840D +萎 > wēi; #840E +萏 > dàn; #840F +萐 > shà; #8410 +萑 > huán; #8411 +萒 > yăn; #8412 +萓 > yí; #8413 +萔 > tiáo; #8414 +萕 > qí; #8415 +萖 > wăn; #8416 +萗 > cè; #8417 +萘 > nài; #8418 +萚 > tùo; #841A +萛 > jīu; #841B +萜 > tiē; #841C +萝 > lúo; #841D +萠 > méng; #8420 +萤 > yíng; #8424 +营 > yíng; #8425 +萦 > yíng; #8426 +萧 > xiāo; #8427 +萨 > sà; #8428 +萩 > qīu; #8429 +萪 > kē; #842A +萫 > xiàng; #842B +萬 > wàn; #842C +萭 > yŭ; #842D +萮 > yù; #842E +萯 > fù; #842F +萰 > liàn; #8430 +萱 > xuān; #8431 +萲 > yuán; #8432 +萳 > nán; #8433 +萴 > zé; #8434 +萵 > wō; #8435 +萶 > chŭn; #8436 +萷 > xiāo; #8437 +萸 > yú; #8438 +萹 > piān; #8439 +萺 > mào; #843A +萻 > ān; #843B +萼 > è; #843C +落 > lùo; #843D +萾 > yíng; #843E +萿 > húo; #843F +葀 > guā; #8440 +葁 > jiāng; #8441 +葂 > miăn; #8442 +葃 > zúo; #8443 +葄 > zùo; #8444 +葅 > jū; #8445 +葆 > băo; #8446 +葇 > róu; #8447 +葈 > xĭ; #8448 +葉 > xié; #8449 +葊 > ān; #844A +葋 > qú; #844B +葌 > jiān; #844C +葍 > fú; #844D +葎 > lǜ; #844E +葏 > jīng; #844F +葐 > pén; #8450 +葑 > fēng; #8451 +葒 > hóng; #8452 +葓 > hóng; #8453 +葔 > hóu; #8454 +葕 > yán; #8455 +葖 > tú; #8456 +著 > zhù; #8457 +葘 > zī; #8458 +葙 > xiāng; #8459 +葚 > shèn; #845A +葛 > gĕ; #845B +葜 > jié; #845C +葝 > jìng; #845D +葞 > mĭ; #845E +葟 > huáng; #845F +葠 > shēn; #8460 +葡 > pú; #8461 +葢 > gài; #8462 +董 > dŏng; #8463 +葤 > zhòu; #8464 +葥 > qián; #8465 +葦 > wĕi; #8466 +葧 > bó; #8467 +葨 > wēi; #8468 +葩 > pā; #8469 +葪 > jì; #846A +葫 > hú; #846B +葬 > zàng; #846C +葭 > jiā; #846D +葮 > duàn; #846E +葯 > yào; #846F +葰 > jùn; #8470 +葱 > cōng; #8471 +葲 > quán; #8472 +葳 > wēi; #8473 +葴 > xián; #8474 +葵 > kúi; #8475 +葶 > tíng; #8476 +葷 > hūn; #8477 +葸 > xĭ; #8478 +葹 > shī; #8479 +葺 > qì; #847A +葻 > lán; #847B +葼 > zōng; #847C +葽 > yāo; #847D +葾 > yuān; #847E +葿 > méi; #847F +蒀 > yūn; #8480 +蒁 > shù; #8481 +蒂 > dì; #8482 +蒃 > zhuàn; #8483 +蒄 > guān; #8484 +蒆 > xuē; #8486 +蒇 > chăn; #8487 +蒈 > kăi; #8488 +蒉 > kùi; #8489 +蒋 > jiăng; #848B +蒌 > lóu; #848C +蒍 > wéi; #848D +蒎 > pài; #848E +蒐 > sōu; #8490 +蒑 > yīn; #8491 +蒒 > shī; #8492 +蒓 > chún; #8493 +蒔 > shí; #8494 +蒕 > yūn; #8495 +蒖 > zhēn; #8496 +蒗 > làng; #8497 +蒘 > nú; #8498 +蒙 > méng; #8499 +蒚 > hé; #849A +蒛 > quē; #849B +蒜 > suàn; #849C +蒝 > yuán; #849D +蒞 > lì; #849E +蒟 > jŭ; #849F +蒠 > xí; #84A0 +蒡 > páng; #84A1 +蒢 > chú; #84A2 +蒣 > xú; #84A3 +蒤 > tú; #84A4 +蒥 > líu; #84A5 +蒦 > wò; #84A6 +蒧 > zhēn; #84A7 +蒨 > qiàn; #84A8 +蒩 > zū; #84A9 +蒪 > pò; #84AA +蒫 > cūo; #84AB +蒬 > yuān; #84AC +蒭 > chú; #84AD +蒮 > yù; #84AE +蒯 > kuăi; #84AF +蒰 > pán; #84B0 +蒱 > pú; #84B1 +蒲 > pú; #84B2 +蒳 > nà; #84B3 +蒴 > shùo; #84B4 +蒵 > xī; #84B5 +蒶 > fén; #84B6 +蒷 > yún; #84B7 +蒸 > zhēng; #84B8 +蒹 > jiān; #84B9 +蒺 > jí; #84BA +蒻 > rùo; #84BB +蒼 > cāng; #84BC +蒽 > ēn; #84BD +蒾 > mí; #84BE +蒿 > hāo; #84BF +蓀 > sūn; #84C0 +蓁 > zhēn; #84C1 +蓂 > míng; #84C2 +蓃 > sou; #84C3 +蓄 > xù; #84C4 +蓅 > líu; #84C5 +蓆 > xí; #84C6 +蓇 > gŭ; #84C7 +蓈 > láng; #84C8 +蓉 > róng; #84C9 +蓊 > wĕng; #84CA +蓋 > gài; #84CB +蓌 > cùo; #84CC +蓍 > shī; #84CD +蓎 > táng; #84CE +蓏 > lŭo; #84CF +蓐 > rù; #84D0 +蓑 > sūo; #84D1 +蓒 > xiān; #84D2 +蓓 > bèi; #84D3 +蓔 > yăo; #84D4 +蓕 > gùi; #84D5 +蓖 > bī; #84D6 +蓗 > zŏng; #84D7 +蓘 > gŭn; #84D8 +蓚 > xīu; #84DA +蓛 > cè; #84DB +蓝 > lán; #84DD +蓟 > jì; #84DF +蓠 > lí; #84E0 +蓡 > cān; #84E1 +蓢 > láng; #84E2 +蓣 > yù; #84E3 +蓥 > yìng; #84E5 +蓦 > mò; #84E6 +蓧 > diào; #84E7 +蓨 > tiāo; #84E8 +蓩 > mào; #84E9 +蓪 > tōng; #84EA +蓫 > zhú; #84EB +蓬 > péng; #84EC +蓭 > ān; #84ED +蓮 > lián; #84EE +蓯 > cōng; #84EF +蓰 > xĭ; #84F0 +蓱 > píng; #84F1 +蓲 > qīu; #84F2 +蓳 > jìn; #84F3 +蓴 > chún; #84F4 +蓵 > jié; #84F5 +蓶 > wĕi; #84F6 +蓷 > tūi; #84F7 +蓸 > cáo; #84F8 +蓹 > yŭ; #84F9 +蓺 > yì; #84FA +蓻 > jí; #84FB +蓼 > liăo; #84FC +蓽 > bì; #84FD +蓾 > lŭ; #84FE +蓿 > sù; #84FF +蔀 > bù; #8500 +蔁 > zhāng; #8501 +蔂 > lúo; #8502 +蔃 > jiàng; #8503 +蔄 > màn; #8504 +蔅 > yán; #8505 +蔆 > líng; #8506 +蔇 > jì; #8507 +蔈 > piăo; #8508 +蔉 > gŭn; #8509 +蔊 > hăn; #850A +蔋 > dí; #850B +蔌 > sù; #850C +蔍 > lù; #850D +蔎 > shè; #850E +蔏 > shāng; #850F +蔐 > dí; #8510 +蔑 > miè; #8511 +蔒 > xūn; #8512 +蔓 > màn; #8513 +蔔 > bó; #8514 +蔕 > dì; #8515 +蔖 > cúo; #8516 +蔗 > zhè; #8517 +蔘 > sēn; #8518 +蔙 > xuàn; #8519 +蔚 > wèi; #851A +蔛 > hú; #851B +蔜 > áo; #851C +蔝 > mĭ; #851D +蔞 > lóu; #851E +蔟 > cù; #851F +蔠 > zhōng; #8520 +蔡 > cài; #8521 +蔢 > pó; #8522 +蔣 > jiăng; #8523 +蔤 > mì; #8524 +蔥 > cōng; #8525 +蔦 > niăo; #8526 +蔧 > hùi; #8527 +蔨 > jùn; #8528 +蔩 > yín; #8529 +蔪 > jiàn; #852A +蔫 > yān; #852B +蔬 > shū; #852C +蔭 > yìn; #852D +蔮 > kùi; #852E +蔯 > chén; #852F +蔰 > hù; #8530 +蔱 > shā; #8531 +蔲 > kòu; #8532 +蔳 > qiàn; #8533 +蔴 > má; #8534 +蔵 > zāng; #8535 +蔷 > qiáng; #8537 +蔸 > dōu; #8538 +蔹 > liàn; #8539 +蔺 > lìn; #853A +蔻 > kòu; #853B +蔼 > ăi; #853C +蔽 > bì; #853D +蔾 > lí; #853E +蔿 > wéi; #853F +蕀 > jí; #8540 +蕁 > xún; #8541 +蕂 > shèng; #8542 +蕃 > fán; #8543 +蕄 > méng; #8544 +蕅 > ŏu; #8545 +蕆 > chăn; #8546 +蕇 > diăn; #8547 +蕈 > xùn; #8548 +蕉 > jiāo; #8549 +蕊 > rŭi; #854A +蕋 > rŭi; #854B +蕌 > lĕi; #854C +蕍 > yú; #854D +蕎 > qiáo; #854E +蕏 > chú; #854F +蕐 > huá; #8550 +蕑 > jiān; #8551 +蕒 > măi; #8552 +蕓 > yún; #8553 +蕔 > bāo; #8554 +蕕 > yóu; #8555 +蕖 > qú; #8556 +蕗 > lù; #8557 +蕘 > ráo; #8558 +蕙 > hùi; #8559 +蕚 > è; #855A +蕛 > téng; #855B +蕜 > fĕi; #855C +蕝 > jué; #855D +蕞 > zùi; #855E +蕟 > fà; #855F +蕠 > rú; #8560 +蕡 > fén; #8561 +蕢 > kùi; #8562 +蕣 > shùn; #8563 +蕤 > rúi; #8564 +蕥 > yă; #8565 +蕦 > xū; #8566 +蕧 > fù; #8567 +蕨 > jué; #8568 +蕩 > dàng; #8569 +蕪 > wú; #856A +蕫 > tóng; #856B +蕬 > sī; #856C +蕭 > xiāo; #856D +蕮 > xì; #856E +蕯 > lóng; #856F +蕰 > yùn; #8570 +蕲 > qí; #8572 +蕳 > jiān; #8573 +蕴 > yùn; #8574 +蕵 > sūn; #8575 +蕶 > líng; #8576 +蕷 > yù; #8577 +蕸 > xiá; #8578 +蕹 > yōng; #8579 +蕺 > jí; #857A +蕻 > hòng; #857B +蕼 > sì; #857C +蕽 > nóng; #857D +蕾 > lĕi; #857E +蕿 > xuān; #857F +薀 > yùn; #8580 +薁 > yù; #8581 +薂 > xí; #8582 +薃 > hào; #8583 +薄 > bó; #8584 +薅 > hāo; #8585 +薆 > ài; #8586 +薇 > wéi; #8587 +薈 > hùi; #8588 +薉 > wèi; #8589 +薊 > jì; #858A +薋 > cī; #858B +薌 > xiāng; #858C +薍 > luàn; #858D +薎 > miè; #858E +薏 > yì; #858F +薐 > léng; #8590 +薑 > jiāng; #8591 +薒 > càn; #8592 +薓 > shēn; #8593 +薔 > qiáng; #8594 +薕 > lián; #8595 +薖 > kē; #8596 +薗 > yuán; #8597 +薘 > dá; #8598 +薙 > tì; #8599 +薚 > táng; #859A +薛 > xiē; #859B +薜 > bì; #859C +薝 > zhán; #859D +薞 > sūn; #859E +薟 > liăn; #859F +薠 > fán; #85A0 +薡 > dĭng; #85A1 +薢 > jiē; #85A2 +薣 > gŭ; #85A3 +薤 > xiè; #85A4 +薥 > shŭ; #85A5 +薦 > jiàn; #85A6 +薧 > kăo; #85A7 +薨 > hōng; #85A8 +薩 > sà; #85A9 +薪 > xīn; #85AA +薫 > xūn; #85AB +薬 > yào; #85AC +薮 > sŏu; #85AE +薯 > shŭ; #85AF +薰 > xūn; #85B0 +薱 > dùi; #85B1 +薲 > pín; #85B2 +薳 > wĕi; #85B3 +薴 > néng; #85B4 +薵 > chóu; #85B5 +薶 > mái; #85B6 +薷 > rú; #85B7 +薸 > piāo; #85B8 +薹 > tái; #85B9 +薺 > qí; #85BA +薻 > zăo; #85BB +薼 > chén; #85BC +薽 > zhēn; #85BD +薾 > ĕr; #85BE +薿 > nĭ; #85BF +藀 > yíng; #85C0 +藁 > găo; #85C1 +藂 > còng; #85C2 +藃 > xiāo; #85C3 +藄 > qí; #85C4 +藅 > fá; #85C5 +藆 > jiăn; #85C6 +藇 > xù; #85C7 +藈 > kūi; #85C8 +藉 > jiè; #85C9 +藊 > biăn; #85CA +藋 > diào; #85CB +藌 > mì; #85CC +藍 > lán; #85CD +藎 > jìn; #85CE +藏 > cáng; #85CF +藐 > miăo; #85D0 +藑 > qíong; #85D1 +藒 > qiè; #85D2 +藓 > xiăn; #85D3 +藕 > ŏu; #85D5 +藖 > xián; #85D6 +藗 > sù; #85D7 +藘 > lǘ; #85D8 +藙 > yì; #85D9 +藚 > xù; #85DA +藛 > xiĕ; #85DB +藜 > lí; #85DC +藝 > yì; #85DD +藞 > lă; #85DE +藟 > lĕi; #85DF +藠 > xiào; #85E0 +藡 > dí; #85E1 +藢 > zhĭ; #85E2 +藣 > bēi; #85E3 +藤 > téng; #85E4 +藥 > yào; #85E5 +藦 > mò; #85E6 +藧 > huăn; #85E7 +藨 > piăo; #85E8 +藩 > fán; #85E9 +藪 > sŏu; #85EA +藫 > tán; #85EB +藬 > tūi; #85EC +藭 > qíong; #85ED +藮 > qiáo; #85EE +藯 > wèi; #85EF +藰 > líu; #85F0 +藱 > hùi; #85F1 +藳 > găo; #85F3 +藴 > yùn; #85F4 +藶 > lì; #85F6 +藷 > shŭ; #85F7 +藸 > chú; #85F8 +藹 > ăi; #85F9 +藺 > lìn; #85FA +藻 > zăo; #85FB +藼 > xuān; #85FC +藽 > chèn; #85FD +藾 > lài; #85FE +藿 > hùo; #85FF +蘀 > tùo; #8600 +蘁 > wù; #8601 +蘂 > rŭi; #8602 +蘃 > rŭi; #8603 +蘄 > qí; #8604 +蘅 > héng; #8605 +蘆 > lú; #8606 +蘇 > sū; #8607 +蘈 > túi; #8608 +蘉 > máng; #8609 +蘊 > yùn; #860A +蘋 > pín; #860B +蘌 > yŭ; #860C +蘍 > xūn; #860D +蘎 > jì; #860E +蘏 > jīong; #860F +蘐 > xiān; #8610 +蘑 > mó; #8611 +蘓 > sū; #8613 +蘔 > jīong; #8614 +蘖 > niè; #8616 +蘗 > bò; #8617 +蘘 > ráng; #8618 +蘙 > yì; #8619 +蘚 > xiăn; #861A +蘛 > yú; #861B +蘜 > jú; #861C +蘝 > liàn; #861D +蘞 > liàn; #861E +蘟 > yĭn; #861F +蘠 > qiáng; #8620 +蘡 > yīng; #8621 +蘢 > lóng; #8622 +蘣 > tòng; #8623 +蘤 > wĕi; #8624 +蘥 > yuè; #8625 +蘦 > líng; #8626 +蘧 > qú; #8627 +蘨 > yáo; #8628 +蘩 > fán; #8629 +蘪 > mí; #862A +蘫 > lán; #862B +蘬 > kūi; #862C +蘭 > lán; #862D +蘮 > jì; #862E +蘯 > dàng; #862F +蘱 > lèi; #8631 +蘲 > léi; #8632 +蘳 > huă; #8633 +蘴 > fēng; #8634 +蘵 > zhí; #8635 +蘶 > wèi; #8636 +蘷 > kúi; #8637 +蘸 > zhàn; #8638 +蘹 > huài; #8639 +蘺 > lí; #863A +蘻 > jì; #863B +蘼 > mí; #863C +蘽 > lĕi; #863D +蘾 > huài; #863E +蘿 > lúo; #863F +虀 > jī; #8640 +虁 > kúi; #8641 +虂 > lù; #8642 +虃 > jiān; #8643 +虆 > léi; #8646 +虇 > quăn; #8647 +虈 > xiāo; #8648 +虉 > yì; #8649 +虊 > luán; #864A +虋 > mén; #864B +虌 > biē; #864C +虍 > hū; #864D +虎 > hŭ; #864E +虏 > lŭ; #864F +虐 > nǜe; #8650 +虑 > lǜ; #8651 +虒 > sī; #8652 +虓 > xiāo; #8653 +虔 > qián; #8654 +處 > chù; #8655 +虖 > hū; #8656 +虗 > xū; #8657 +虘 > cúo; #8658 +虙 > fú; #8659 +虚 > xū; #865A +虛 > xū; #865B +虜 > lŭ; #865C +虝 > hŭ; #865D +虞 > yú; #865E +號 > hào; #865F +虠 > jiăo; #8660 +虡 > jù; #8661 +虢 > gúo; #8662 +虣 > bào; #8663 +虤 > yán; #8664 +虥 > zhàn; #8665 +虦 > zhàn; #8666 +虧 > kūi; #8667 +虨 > bān; #8668 +虩 > xì; #8669 +虪 > shú; #866A +虫 > chóng; #866B +虬 > qíu; #866C +虭 > diāo; #866D +虮 > jī; #866E +虯 > qíu; #866F +虰 > chéng; #8670 +虱 > shī; #8671 +虳 > dì; #8673 +虴 > zhé; #8674 +虵 > shé; #8675 +虶 > yū; #8676 +虷 > gān; #8677 +虸 > zĭ; #8678 +虹 > hóng; #8679 +虺 > hŭi; #867A +虻 > méng; #867B +虼 > gè; #867C +虽 > sūi; #867D +虾 > xiā; #867E +虿 > chài; #867F +蚀 > shí; #8680 +蚁 > yĭ; #8681 +蚂 > mă; #8682 +蚃 > xiàng; #8683 +蚄 > fāng; #8684 +蚅 > è; #8685 +蚆 > pā; #8686 +蚇 > chĭ; #8687 +蚈 > qiān; #8688 +蚉 > wén; #8689 +蚊 > wén; #868A +蚋 > rùi; #868B +蚌 > bàng; #868C +蚍 > bĭ; #868D +蚎 > yuè; #868E +蚏 > yuè; #868F +蚐 > jūn; #8690 +蚑 > qí; #8691 +蚒 > rán; #8692 +蚓 > yĭn; #8693 +蚔 > qí; #8694 +蚕 > tiăn; #8695 +蚖 > yuán; #8696 +蚗 > jué; #8697 +蚘 > húi; #8698 +蚙 > qín; #8699 +蚚 > qí; #869A +蚛 > zhòng; #869B +蚜 > yá; #869C +蚝 > cì; #869D +蚞 > mù; #869E +蚟 > wáng; #869F +蚠 > fén; #86A0 +蚡 > fén; #86A1 +蚢 > háng; #86A2 +蚣 > gōng; #86A3 +蚤 > zăo; #86A4 +蚥 > fŭ; #86A5 +蚦 > rán; #86A6 +蚧 > jiè; #86A7 +蚨 > fú; #86A8 +蚩 > chī; #86A9 +蚪 > dŏu; #86AA +蚫 > piáo; #86AB +蚬 > xiàn; #86AC +蚭 > ní; #86AD +蚮 > tè; #86AE +蚯 > qīu; #86AF +蚰 > yóu; #86B0 +蚱 > zhà; #86B1 +蚲 > píng; #86B2 +蚳 > chí; #86B3 +蚴 > yŏu; #86B4 +蚵 > hé; #86B5 +蚶 > hān; #86B6 +蚷 > jù; #86B7 +蚸 > lì; #86B8 +蚹 > fù; #86B9 +蚺 > rán; #86BA +蚻 > zhá; #86BB +蚼 > gŏu; #86BC +蚽 > pí; #86BD +蚾 > bŏ; #86BE +蚿 > xián; #86BF +蛀 > zhù; #86C0 +蛁 > diāo; #86C1 +蛂 > biĕ; #86C2 +蛃 > bĭng; #86C3 +蛄 > gū; #86C4 +蛅 > rán; #86C5 +蛆 > qū; #86C6 +蛇 > shé; #86C7 +蛈 > tiè; #86C8 +蛉 > líng; #86C9 +蛊 > gŭ; #86CA +蛋 > dàn; #86CB +蛌 > gŭ; #86CC +蛍 > yíng; #86CD +蛎 > lì; #86CE +蛏 > chēng; #86CF +蛐 > qū; #86D0 +蛑 > móu; #86D1 +蛒 > gé; #86D2 +蛓 > cì; #86D3 +蛔 > húi; #86D4 +蛕 > húi; #86D5 +蛖 > máng; #86D6 +蛗 > fù; #86D7 +蛘 > yáng; #86D8 +蛙 > wā; #86D9 +蛚 > liè; #86DA +蛛 > zhū; #86DB +蛜 > yī; #86DC +蛝 > xián; #86DD +蛞 > kùo; #86DE +蛟 > jiāo; #86DF +蛠 > lì; #86E0 +蛡 > yì; #86E1 +蛢 > píng; #86E2 +蛣 > jī; #86E3 +蛤 > há; #86E4 +蛥 > shé; #86E5 +蛦 > yí; #86E6 +蛧 > wăng; #86E7 +蛨 > mò; #86E8 +蛩 > qíong; #86E9 +蛪 > qiè; #86EA +蛫 > gŭi; #86EB +蛬 > gŏng; #86EC +蛭 > zhì; #86ED +蛮 > mán; #86EE +蛰 > zhí; #86F0 +蛱 > jiá; #86F1 +蛲 > ráo; #86F2 +蛳 > sī; #86F3 +蛴 > qí; #86F4 +蛵 > xīng; #86F5 +蛶 > liè; #86F6 +蛷 > qíu; #86F7 +蛸 > shāo; #86F8 +蛹 > yŏng; #86F9 +蛺 > jiá; #86FA +蛻 > shùi; #86FB +蛼 > chē; #86FC +蛽 > bài; #86FD +蛾 > é; #86FE +蛿 > hàn; #86FF +蜀 > shŭ; #8700 +蜁 > xuán; #8701 +蜂 > fēng; #8702 +蜃 > shèn; #8703 +蜄 > zhèn; #8704 +蜅 > fŭ; #8705 +蜆 > xiàn; #8706 +蜇 > zhé; #8707 +蜈 > wú; #8708 +蜉 > fú; #8709 +蜊 > lí; #870A +蜋 > láng; #870B +蜌 > bì; #870C +蜍 > chú; #870D +蜎 > yuān; #870E +蜏 > yŏu; #870F +蜐 > jié; #8710 +蜑 > dàn; #8711 +蜒 > yán; #8712 +蜓 > tíng; #8713 +蜔 > diàn; #8714 +蜕 > shùi; #8715 +蜖 > húi; #8716 +蜗 > guā; #8717 +蜘 > zhī; #8718 +蜙 > sōng; #8719 +蜚 > fēi; #871A +蜛 > jū; #871B +蜜 > mì; #871C +蜝 > qí; #871D +蜞 > qí; #871E +蜟 > yù; #871F +蜠 > jŭn; #8720 +蜡 > zhà; #8721 +蜢 > mĕng; #8722 +蜣 > qiāng; #8723 +蜤 > sī; #8724 +蜥 > xī; #8725 +蜦 > lún; #8726 +蜧 > lì; #8727 +蜨 > dié; #8728 +蜩 > tiáo; #8729 +蜪 > tāo; #872A +蜫 > kūn; #872B +蜬 > gān; #872C +蜭 > hàn; #872D +蜮 > yù; #872E +蜯 > bàng; #872F +蜰 > féi; #8730 +蜱 > pí; #8731 +蜲 > wĕi; #8732 +蜳 > dūn; #8733 +蜴 > yì; #8734 +蜵 > yuān; #8735 +蜶 > sù; #8736 +蜷 > quán; #8737 +蜸 > qiăn; #8738 +蜹 > rùi; #8739 +蜺 > ní; #873A +蜻 > qīng; #873B +蜼 > wèi; #873C +蜽 > liăng; #873D +蜾 > gŭo; #873E +蜿 > wān; #873F +蝀 > dōng; #8740 +蝁 > è; #8741 +蝂 > băn; #8742 +蝃 > dì; #8743 +蝄 > wăng; #8744 +蝅 > cán; #8745 +蝆 > yăng; #8746 +蝇 > yíng; #8747 +蝈 > gūo; #8748 +蝉 > chán; #8749 +蝋 > là; #874B +蝌 > kē; #874C +蝍 > jí; #874D +蝎 > hé; #874E +蝏 > tíng; #874F +蝐 > mài; #8750 +蝑 > xū; #8751 +蝒 > mián; #8752 +蝓 > yú; #8753 +蝔 > jiē; #8754 +蝕 > shí; #8755 +蝖 > xuān; #8756 +蝗 > huáng; #8757 +蝘 > yăn; #8758 +蝙 > biān; #8759 +蝚 > róu; #875A +蝛 > wēi; #875B +蝜 > fù; #875C +蝝 > yuán; #875D +蝞 > mèi; #875E +蝟 > wèi; #875F +蝠 > fú; #8760 +蝡 > ruăn; #8761 +蝢 > xié; #8762 +蝣 > yóu; #8763 +蝤 > qíu; #8764 +蝥 > máo; #8765 +蝦 > xiā; #8766 +蝧 > yīng; #8767 +蝨 > shī; #8768 +蝩 > chóng; #8769 +蝪 > tāng; #876A +蝫 > zhū; #876B +蝬 > zōng; #876C +蝭 > tí; #876D +蝮 > fù; #876E +蝯 > yuán; #876F +蝰 > hŭi; #8770 +蝱 > méng; #8771 +蝲 > là; #8772 +蝳 > dú; #8773 +蝴 > hú; #8774 +蝵 > qīu; #8775 +蝶 > dié; #8776 +蝷 > lì; #8777 +蝸 > guā; #8778 +蝹 > yūn; #8779 +蝺 > jŭ; #877A +蝻 > năn; #877B +蝼 > lóu; #877C +蝽 > qŭn; #877D +蝾 > róng; #877E +蝿 > yíng; #877F +螀 > jiāng; #8780 +螂 > láng; #8782 +螃 > páng; #8783 +螄 > sī; #8784 +螅 > xī; #8785 +螆 > cì; #8786 +螇 > xī; #8787 +螈 > yuán; #8788 +螉 > wēng; #8789 +螊 > lián; #878A +螋 > sōu; #878B +螌 > bān; #878C +融 > róng; #878D +螎 > róng; #878E +螏 > jí; #878F +螐 > wū; #8790 +螑 > qìu; #8791 +螒 > hàn; #8792 +螓 > qín; #8793 +螔 > yí; #8794 +螕 > bī; #8795 +螖 > huá; #8796 +螗 > táng; #8797 +螘 > yĭ; #8798 +螙 > dù; #8799 +螚 > nài; #879A +螛 > hé; #879B +螜 > hú; #879C +螝 > hùi; #879D +螞 > mă; #879E +螟 > míng; #879F +螠 > yì; #87A0 +螡 > wén; #87A1 +螢 > yíng; #87A2 +螣 > téng; #87A3 +螤 > yŭ; #87A4 +螥 > cāng; #87A5 +螨 > măn; #87A8 +螪 > shāng; #87AA +螫 > zhē; #87AB +螬 > cáo; #87AC +螭 > chī; #87AD +螮 > dì; #87AE +螯 > áo; #87AF +螰 > lù; #87B0 +螱 > wèi; #87B1 +螲 > zhì; #87B2 +螳 > táng; #87B3 +螴 > chén; #87B4 +螵 > piāo; #87B5 +螶 > qú; #87B6 +螷 > pí; #87B7 +螸 > yú; #87B8 +螹 > jiàn; #87B9 +螺 > lúo; #87BA +螻 > lóu; #87BB +螼 > qĭn; #87BC +螽 > zhōng; #87BD +螾 > yĭn; #87BE +螿 > jiāng; #87BF +蟀 > shuài; #87C0 +蟁 > wén; #87C1 +蟂 > jiāo; #87C2 +蟃 > wàn; #87C3 +蟄 > zhí; #87C4 +蟅 > zhè; #87C5 +蟆 > má; #87C6 +蟇 > má; #87C7 +蟈 > gūo; #87C8 +蟉 > líu; #87C9 +蟊 > máo; #87CA +蟋 > xī; #87CB +蟌 > cōng; #87CC +蟍 > lí; #87CD +蟎 > măn; #87CE +蟏 > xiāo; #87CF +蟑 > zhāng; #87D1 +蟒 > măng; #87D2 +蟓 > xiàng; #87D3 +蟔 > mò; #87D4 +蟕 > zūi; #87D5 +蟖 > sī; #87D6 +蟗 > qīu; #87D7 +蟘 > tè; #87D8 +蟙 > zhí; #87D9 +蟚 > péng; #87DA +蟛 > péng; #87DB +蟜 > jiăo; #87DC +蟝 > qú; #87DD +蟞 > bié; #87DE +蟟 > liáo; #87DF +蟠 > pán; #87E0 +蟡 > gŭi; #87E1 +蟢 > xĭ; #87E2 +蟣 > jĭ; #87E3 +蟤 > zhuān; #87E4 +蟥 > huáng; #87E5 +蟦 > fèi; #87E6 +蟧 > láo; #87E7 +蟨 > jué; #87E8 +蟩 > jué; #87E9 +蟪 > hùi; #87EA +蟫 > yín; #87EB +蟬 > chán; #87EC +蟭 > jiāo; #87ED +蟮 > shàn; #87EE +蟯 > ráo; #87EF +蟰 > xiāo; #87F0 +蟱 > móu; #87F1 +蟲 > chóng; #87F2 +蟳 > xún; #87F3 +蟴 > sī; #87F4 +蟶 > chēng; #87F6 +蟷 > dāng; #87F7 +蟸 > lĭ; #87F8 +蟹 > xiè; #87F9 +蟺 > shàn; #87FA +蟻 > yĭ; #87FB +蟼 > jĭng; #87FC +蟽 > dá; #87FD +蟾 > chán; #87FE +蟿 > qì; #87FF +蠀 > cī; #8800 +蠁 > xiàng; #8801 +蠂 > shè; #8802 +蠃 > lŭo; #8803 +蠄 > qín; #8804 +蠅 > yíng; #8805 +蠆 > chài; #8806 +蠇 > lì; #8807 +蠈 > zé; #8808 +蠉 > xuān; #8809 +蠊 > lián; #880A +蠋 > zhú; #880B +蠌 > zé; #880C +蠍 > xiē; #880D +蠎 > măng; #880E +蠏 > xiè; #880F +蠐 > qí; #8810 +蠑 > róng; #8811 +蠒 > jiăn; #8812 +蠓 > mĕng; #8813 +蠔 > háo; #8814 +蠕 > ruăn; #8815 +蠖 > hùo; #8816 +蠗 > zhúo; #8817 +蠘 > jié; #8818 +蠙 > bīn; #8819 +蠚 > hè; #881A +蠛 > miè; #881B +蠜 > fán; #881C +蠝 > léi; #881D +蠞 > jié; #881E +蠟 > là; #881F +蠠 > mì; #8820 +蠡 > lĭ; #8821 +蠢 > chŭn; #8822 +蠣 > lì; #8823 +蠤 > qīu; #8824 +蠥 > niè; #8825 +蠦 > lú; #8826 +蠧 > dù; #8827 +蠨 > xiāo; #8828 +蠩 > zhū; #8829 +蠪 > lóng; #882A +蠫 > lì; #882B +蠬 > lóng; #882C +蠭 > fēng; #882D +蠮 > yē; #882E +蠯 > bèng; #882F +蠰 > shàng; #8830 +蠱 > gŭ; #8831 +蠲 > juān; #8832 +蠳 > yīng; #8833 +蠵 > xī; #8835 +蠶 > cán; #8836 +蠷 > qú; #8837 +蠸 > quán; #8838 +蠹 > dù; #8839 +蠺 > cán; #883A +蠻 > mán; #883B +蠼 > jué; #883C +蠽 > jié; #883D +蠾 > zhú; #883E +蠿 > zhá; #883F +血 > xiĕ; #8840 +衁 > huāng; #8841 +衂 > nìu; #8842 +衃 > pēi; #8843 +衄 > nǜ; #8844 +衅 > xìn; #8845 +衆 > zhòng; #8846 +衇 > mò; #8847 +衈 > èr; #8848 +衉 > kè; #8849 +衊 > miè; #884A +衋 > xì; #884B +行 > xíng; #884C +衍 > yăn; #884D +衎 > kàn; #884E +衏 > yuàn; #884F +衑 > líng; #8851 +衒 > xuàn; #8852 +術 > shù; #8853 +衔 > xián; #8854 +衕 > tòng; #8855 +衖 > lòng; #8856 +街 > jiē; #8857 +衘 > xián; #8858 +衙 > yá; #8859 +衚 > hú; #885A +衛 > wèi; #885B +衜 > dào; #885C +衝 > chōng; #885D +衞 > wèi; #885E +衟 > dào; #885F +衠 > zhūn; #8860 +衡 > héng; #8861 +衢 > qú; #8862 +衣 > yī; #8863 +衤 > yī' 'zì' 'páng; #8864 +补 > bŭ; #8865 +衦 > găn; #8866 +衧 > yú; #8867 +表 > biăo; #8868 +衩 > chà; #8869 +衪 > yĭ; #886A +衫 > shān; #886B +衬 > chèn; #886C +衭 > fū; #886D +衮 > gŭn; #886E +衯 > fēn; #886F +衰 > shuāi; #8870 +衱 > jié; #8871 +衲 > nà; #8872 +衳 > zhōng; #8873 +衴 > dăn; #8874 +衵 > rì; #8875 +衶 > zhòng; #8876 +衷 > zhōng; #8877 +衸 > xiè; #8878 +衹 > qí; #8879 +衺 > xié; #887A +衻 > rán; #887B +衼 > zhī; #887C +衽 > rèn; #887D +衾 > qīn; #887E +衿 > jīn; #887F +袀 > jūn; #8880 +袁 > yuán; #8881 +袂 > mèi; #8882 +袃 > chài; #8883 +袄 > ăo; #8884 +袅 > niăo; #8885 +袆 > hūi; #8886 +袇 > rán; #8887 +袈 > jiā; #8888 +袉 > túo; #8889 +袊 > lĭng; #888A +袋 > dài; #888B +袌 > bào; #888C +袍 > páo; #888D +袎 > yào; #888E +袏 > zùo; #888F +袐 > bì; #8890 +袑 > shào; #8891 +袒 > tăn; #8892 +袓 > jŭ; #8893 +袔 > hè; #8894 +袕 > shù; #8895 +袖 > xìu; #8896 +袗 > zhĕn; #8897 +袘 > yí; #8898 +袙 > pà; #8899 +袚 > bō; #889A +袛 > dī; #889B +袜 > wà; #889C +袝 > fù; #889D +袞 > gŭn; #889E +袟 > zhì; #889F +袠 > zhì; #88A0 +袡 > rán; #88A1 +袢 > pàn; #88A2 +袣 > yì; #88A3 +袤 > mào; #88A4 +袥 > tuo; #88A5 +袦 > nà; #88A6 +袧 > kōu; #88A7 +袨 > xiàn; #88A8 +袩 > chān; #88A9 +袪 > qū; #88AA +被 > bèi; #88AB +袬 > gŭn; #88AC +袭 > xí; #88AD +袯 > bó; #88AF +袱 > fú; #88B1 +袲 > yí; #88B2 +袳 > chĭ; #88B3 +袴 > kù; #88B4 +袵 > rèn; #88B5 +袶 > jiàng; #88B6 +袷 > jiá; #88B7 +袸 > cún; #88B8 +袹 > mò; #88B9 +袺 > jié; #88BA +袻 > ér; #88BB +袼 > lùo; #88BC +袽 > rú; #88BD +袾 > zhū; #88BE +袿 > gūi; #88BF +裀 > yīn; #88C0 +裁 > cái; #88C1 +裂 > liè; #88C2 +装 > zhuāng; #88C5 +裆 > dāng; #88C6 +裈 > kūn; #88C8 +裉 > kèn; #88C9 +裊 > niăo; #88CA +裋 > shù; #88CB +裌 > jiá; #88CC +裍 > kŭn; #88CD +裎 > chéng; #88CE +裏 > lĭ; #88CF +裐 > juān; #88D0 +裑 > shēn; #88D1 +裒 > póu; #88D2 +裓 > gé; #88D3 +裔 > yì; #88D4 +裕 > yù; #88D5 +裖 > zhĕn; #88D6 +裗 > líu; #88D7 +裘 > qíu; #88D8 +裙 > qún; #88D9 +裚 > jì; #88DA +裛 > yì; #88DB +補 > bŭ; #88DC +裝 > zhuāng; #88DD +裞 > shùi; #88DE +裟 > shā; #88DF +裠 > qún; #88E0 +裡 > lĭ; #88E1 +裢 > lián; #88E2 +裣 > liàn; #88E3 +裤 > kù; #88E4 +裥 > jiăn; #88E5 +裦 > fóu; #88E6 +裧 > chān; #88E7 +裨 > bì; #88E8 +裩 > gūn; #88E9 +裪 > táo; #88EA +裫 > yuàn; #88EB +裬 > líng; #88EC +裭 > chĭ; #88ED +裮 > chāng; #88EE +裯 > chóu; #88EF +裰 > dúo; #88F0 +裱 > biăo; #88F1 +裲 > liăng; #88F2 +裳 > cháng; #88F3 +裴 > péi; #88F4 +裵 > péi; #88F5 +裶 > fēi; #88F6 +裷 > yuān; #88F7 +裸 > lŭo; #88F8 +裹 > gŭo; #88F9 +裺 > yăn; #88FA +裻 > dŭ; #88FB +裼 > xí; #88FC +製 > zhì; #88FD +裾 > jū; #88FE +裿 > qĭ; #88FF +褀 > jì; #8900 +褁 > zhí; #8901 +褂 > guà; #8902 +褃 > kèn; #8903 +褅 > tì; #8905 +褆 > tí; #8906 +複 > fù; #8907 +褈 > chóng; #8908 +褉 > xiē; #8909 +褊 > biăn; #890A +褋 > dié; #890B +褌 > kūn; #890C +褍 > duān; #890D +褎 > xìu; #890E +褏 > xìu; #890F +褐 > hé; #8910 +褑 > yuàn; #8911 +褒 > bāo; #8912 +褓 > băo; #8913 +褔 > fù; #8914 +褕 > yú; #8915 +褖 > tuàn; #8916 +褗 > yăn; #8917 +褘 > hūi; #8918 +褙 > bèi; #8919 +褚 > chŭ; #891A +褛 > lǚ; #891B +褞 > yŭn; #891E +褟 > dá; #891F +褠 > gōu; #8920 +褡 > dā; #8921 +褢 > huái; #8922 +褣 > róng; #8923 +褤 > yuàn; #8924 +褥 > rù; #8925 +褦 > nài; #8926 +褧 > jĭong; #8927 +褨 > sŭo; #8928 +褩 > bān; #8929 +褪 > tùn; #892A +褫 > chĭ; #892B +褬 > săng; #892C +褭 > niăo; #892D +褮 > yīng; #892E +褯 > jiè; #892F +褰 > qiān; #8930 +褱 > huái; #8931 +褲 > kù; #8932 +褳 > lián; #8933 +褴 > băo; #8934 +褵 > lí; #8935 +褶 > zhé; #8936 +褷 > shī; #8937 +褸 > lǚ; #8938 +褹 > yì; #8939 +褺 > dié; #893A +褻 > xiè; #893B +褼 > xiān; #893C +褽 > wèi; #893D +褾 > biăo; #893E +褿 > cáo; #893F +襀 > jī; #8940 +襁 > jiăng; #8941 +襂 > sēn; #8942 +襃 > bāo; #8943 +襄 > xiāng; #8944 +襆 > pú; #8946 +襇 > jiăn; #8947 +襈 > zhuàn; #8948 +襉 > jiàn; #8949 +襊 > zùi; #894A +襋 > jí; #894B +襌 > dān; #894C +襍 > zá; #894D +襎 > fán; #894E +襏 > bó; #894F +襐 > xiàng; #8950 +襑 > xín; #8951 +襒 > bié; #8952 +襓 > ráo; #8953 +襔 > măn; #8954 +襕 > lán; #8955 +襖 > ăo; #8956 +襗 > dúo; #8957 +襘 > gùi; #8958 +襙 > cào; #8959 +襚 > sùi; #895A +襛 > nóng; #895B +襜 > chān; #895C +襝 > liàn; #895D +襞 > bì; #895E +襟 > jīn; #895F +襠 > dāng; #8960 +襡 > shú; #8961 +襢 > tăn; #8962 +襣 > bì; #8963 +襤 > lán; #8964 +襥 > pú; #8965 +襦 > rú; #8966 +襧 > zhĭ; #8967 +襩 > shŭ; #8969 +襪 > wà; #896A +襫 > shì; #896B +襬 > băi; #896C +襭 > xié; #896D +襮 > bó; #896E +襯 > chèn; #896F +襰 > lài; #8970 +襱 > lóng; #8971 +襲 > xí; #8972 +襳 > xiān; #8973 +襴 > lán; #8974 +襵 > zhé; #8975 +襶 > dài; #8976 +襸 > zàn; #8978 +襹 > shī; #8979 +襺 > jiăn; #897A +襻 > pàn; #897B +襼 > yì; #897C +襾 > yà; #897E +西 > xī; #897F +覀 > xī; #8980 +要 > yào; #8981 +覂 > fĕng; #8982 +覃 > tán; #8983 +覅 > biào; #8985 +覆 > fù; #8986 +覇 > bà; #8987 +覈 > hé; #8988 +覉 > jī; #8989 +覊 > jī; #898A +見 > jiàn; #898B +覌 > guān; #898C +覍 > biàn; #898D +覎 > yàn; #898E +規 > gūi; #898F +覐 > jué; #8990 +覑 > piăn; #8991 +覒 > máo; #8992 +覓 > mì; #8993 +覔 > mì; #8994 +覕 > miè; #8995 +視 > shì; #8996 +覗 > sī; #8997 +覘 > zhān; #8998 +覙 > lúo; #8999 +覚 > jué; #899A +覛 > mì; #899B +覜 > tiào; #899C +覝 > lián; #899D +覞 > yào; #899E +覟 > zhì; #899F +覠 > jūn; #89A0 +覡 > xí; #89A1 +覢 > shăn; #89A2 +覣 > wēi; #89A3 +覤 > xì; #89A4 +覥 > tiăn; #89A5 +覦 > yú; #89A6 +覧 > lăn; #89A7 +覨 > è; #89A8 +覩 > dŭ; #89A9 +親 > qīn; #89AA +覫 > păng; #89AB +覬 > jì; #89AC +覭 > míng; #89AD +覮 > yíng; #89AE +覯 > gòu; #89AF +覰 > qù; #89B0 +覱 > zhàn; #89B1 +覲 > jĭn; #89B2 +観 > guān; #89B3 +覴 > dēng; #89B4 +覵 > jiàn; #89B5 +覶 > lúo; #89B6 +覷 > qù; #89B7 +覸 > jiàn; #89B8 +覹 > wéi; #89B9 +覺 > jué; #89BA +覻 > qù; #89BB +覼 > lúo; #89BC +覽 > lăn; #89BD +覾 > shĕn; #89BE +覿 > dí; #89BF +觀 > guān; #89C0 +见 > jiàn; #89C1 +观 > guān; #89C2 +觃 > yàn; #89C3 +规 > gūi; #89C4 +觅 > mì; #89C5 +视 > shì; #89C6 +觇 > zhān; #89C7 +览 > lăn; #89C8 +觉 > jué; #89C9 +觊 > jì; #89CA +觋 > xí; #89CB +觌 > dí; #89CC +觍 > tiăn; #89CD +觎 > yú; #89CE +觏 > gòu; #89CF +觐 > jĭn; #89D0 +觑 > qù; #89D1 +角 > jiăo; #89D2 +觓 > jīu; #89D3 +觔 > jīn; #89D4 +觕 > cū; #89D5 +觖 > jué; #89D6 +觗 > zhì; #89D7 +觘 > chào; #89D8 +觙 > jí; #89D9 +觚 > gū; #89DA +觛 > dàn; #89DB +觜 > zŭi; #89DC +觝 > dĭ; #89DD +觞 > shāng; #89DE +觟 > huà; #89DF +觠 > quán; #89E0 +觡 > gé; #89E1 +觢 > chì; #89E2 +解 > jiĕ; #89E3 +觤 > gŭi; #89E4 +觥 > gōng; #89E5 +触 > hóng; #89E6 +觧 > jiĕ; #89E7 +觨 > hùn; #89E8 +觩 > qíu; #89E9 +觪 > xīng; #89EA +觫 > sù; #89EB +觬 > ní; #89EC +觭 > jī; #89ED +觮 > lù; #89EE +觯 > zhì; #89EF +觰 > zhā; #89F0 +觱 > bì; #89F1 +觲 > xīng; #89F2 +觳 > hú; #89F3 +觴 > shāng; #89F4 +觵 > gōng; #89F5 +觶 > zhì; #89F6 +觷 > xué; #89F7 +觸 > chù; #89F8 +觹 > xī; #89F9 +觺 > yí; #89FA +觻 > lù; #89FB +觼 > jué; #89FC +觽 > xī; #89FD +觾 > yàn; #89FE +觿 > xī; #89FF +言 > yán; #8A00 +訁 > yán' 'zì' 'páng; #8A01 +訂 > dìng; #8A02 +訃 > fù; #8A03 +訄 > qíu; #8A04 +訅 > qíu; #8A05 +訆 > jiào; #8A06 +訇 > hōng; #8A07 +計 > jì; #8A08 +訉 > fàn; #8A09 +訊 > xùn; #8A0A +訋 > diào; #8A0B +訌 > hóng; #8A0C +訍 > chà; #8A0D +討 > tăo; #8A0E +訏 > xū; #8A0F +訐 > jié; #8A10 +訑 > yí; #8A11 +訒 > rèn; #8A12 +訓 > xùn; #8A13 +訔 > yín; #8A14 +訕 > shàn; #8A15 +訖 > qì; #8A16 +託 > tūo; #8A17 +記 > jì; #8A18 +訙 > xùn; #8A19 +訚 > yín; #8A1A +訛 > é; #8A1B +訜 > fēn; #8A1C +訝 > yà; #8A1D +訞 > yāo; #8A1E +訟 > sòng; #8A1F +訠 > shĕn; #8A20 +訡 > yín; #8A21 +訢 > xīn; #8A22 +訣 > jué; #8A23 +訤 > xiáo; #8A24 +訥 > nè; #8A25 +訦 > chén; #8A26 +訧 > yóu; #8A27 +訨 > zhĭ; #8A28 +訩 > xīong; #8A29 +訪 > făng; #8A2A +訫 > xìn; #8A2B +訬 > chāo; #8A2C +設 > shè; #8A2D +訮 > xiān; #8A2E +訯 > shă; #8A2F +訰 > tún; #8A30 +許 > xŭ; #8A31 +訲 > yì; #8A32 +訳 > yì; #8A33 +訴 > sù; #8A34 +訵 > chī; #8A35 +訶 > hē; #8A36 +訷 > shēn; #8A37 +訸 > hé; #8A38 +訹 > xù; #8A39 +診 > zhĕn; #8A3A +註 > zhù; #8A3B +証 > zhèng; #8A3C +訽 > gòu; #8A3D +訾 > zĭ; #8A3E +訿 > zĭ; #8A3F +詀 > zhān; #8A40 +詁 > gŭ; #8A41 +詂 > fù; #8A42 +詃 > quăn; #8A43 +詄 > dié; #8A44 +詅 > líng; #8A45 +詆 > dĭ; #8A46 +詇 > yàng; #8A47 +詈 > lì; #8A48 +詉 > náo; #8A49 +詊 > pàn; #8A4A +詋 > zhòu; #8A4B +詌 > gàn; #8A4C +詍 > yì; #8A4D +詎 > jù; #8A4E +詏 > ào; #8A4F +詐 > zhà; #8A50 +詑 > túo; #8A51 +詒 > yí; #8A52 +詓 > qŭ; #8A53 +詔 > zhào; #8A54 +評 > píng; #8A55 +詖 > bì; #8A56 +詗 > xìong; #8A57 +詘 > qù; #8A58 +詙 > bá; #8A59 +詚 > dá; #8A5A +詛 > zŭ; #8A5B +詜 > tāo; #8A5C +詝 > zhŭ; #8A5D +詞 > cí; #8A5E +詟 > zhé; #8A5F +詠 > yŏng; #8A60 +詡 > xŭ; #8A61 +詢 > xún; #8A62 +詣 > yì; #8A63 +詤 > huăng; #8A64 +詥 > hé; #8A65 +試 > shì; #8A66 +詧 > chá; #8A67 +詨 > jiāo; #8A68 +詩 > shī; #8A69 +詪 > hĕn; #8A6A +詫 > chà; #8A6B +詬 > gòu; #8A6C +詭 > gŭi; #8A6D +詮 > quán; #8A6E +詯 > hùi; #8A6F +詰 > jié; #8A70 +話 > huà; #8A71 +該 > gāi; #8A72 +詳 > xiáng; #8A73 +詴 > wēi; #8A74 +詵 > shēn; #8A75 +詶 > chóu; #8A76 +詷 > tóng; #8A77 +詸 > mí; #8A78 +詹 > zhān; #8A79 +詺 > mìng; #8A7A +詻 > è; #8A7B +詼 > hūi; #8A7C +詽 > yán; #8A7D +詾 > xīong; #8A7E +詿 > guà; #8A7F +誀 > èr; #8A80 +誁 > bĕng; #8A81 +誂 > tiăo; #8A82 +誃 > chĭ; #8A83 +誄 > lĕi; #8A84 +誅 > zhū; #8A85 +誆 > kuāng; #8A86 +誇 > kuā; #8A87 +誈 > wú; #8A88 +誉 > yù; #8A89 +誊 > téng; #8A8A +誋 > jì; #8A8B +誌 > zhì; #8A8C +認 > rèn; #8A8D +誎 > sù; #8A8E +誏 > lăng; #8A8F +誐 > é; #8A90 +誑 > kuáng; #8A91 +誒 > è; #8A92 +誓 > shì; #8A93 +誔 > tĭng; #8A94 +誕 > dàn; #8A95 +誖 > bó; #8A96 +誗 > chán; #8A97 +誘 > yòu; #8A98 +誙 > héng; #8A99 +誚 > qiào; #8A9A +誛 > qīn; #8A9B +誜 > shuà; #8A9C +誝 > ān; #8A9D +語 > yŭ; #8A9E +誟 > xiào; #8A9F +誠 > chéng; #8AA0 +誡 > jiè; #8AA1 +誢 > xiàn; #8AA2 +誣 > wú; #8AA3 +誤 > wù; #8AA4 +誥 > gào; #8AA5 +誦 > sòng; #8AA6 +誧 > pŭ; #8AA7 +誨 > hùi; #8AA8 +誩 > jìng; #8AA9 +說 > shūo; #8AAA +誫 > zhèn; #8AAB +説 > shūo; #8AAC +読 > dú; #8AAD +誯 > chàng; #8AAF +誰 > shúi; #8AB0 +誱 > jié; #8AB1 +課 > kè; #8AB2 +誳 > qū; #8AB3 +誴 > cóng; #8AB4 +誵 > xiáo; #8AB5 +誶 > sùi; #8AB6 +誷 > wăng; #8AB7 +誸 > xuán; #8AB8 +誹 > fĕi; #8AB9 +誺 > chī; #8ABA +誻 > tà; #8ABB +誼 > yí; #8ABC +誽 > ná; #8ABD +誾 > yín; #8ABE +調 > diào; #8ABF +諀 > pĭ; #8AC0 +諁 > chùo; #8AC1 +諂 > chăn; #8AC2 +諃 > chēn; #8AC3 +諄 > zhūn; #8AC4 +諅 > jī; #8AC5 +諆 > qī; #8AC6 +談 > tán; #8AC7 +諈 > zhùi; #8AC8 +諉 > wĕi; #8AC9 +諊 > jú; #8ACA +請 > qĭng; #8ACB +諌 > jiàn; #8ACC +諍 > zhēng; #8ACD +諎 > zé; #8ACE +諏 > zōu; #8ACF +諐 > qiān; #8AD0 +諑 > zhúo; #8AD1 +諒 > liàng; #8AD2 +諓 > jiàn; #8AD3 +諔 > zhù; #8AD4 +諕 > háo; #8AD5 +論 > lùn; #8AD6 +諗 > shĕn; #8AD7 +諘 > biăo; #8AD8 +諙 > huài; #8AD9 +諚 > pián; #8ADA +諛 > yú; #8ADB +諜 > dié; #8ADC +諝 > xŭ; #8ADD +諞 > pián; #8ADE +諟 > shì; #8ADF +諠 > xuān; #8AE0 +諡 > shì; #8AE1 +諢 > hùn; #8AE2 +諣 > huà; #8AE3 +諤 > è; #8AE4 +諥 > zhòng; #8AE5 +諦 > dì; #8AE6 +諧 > xié; #8AE7 +諨 > fú; #8AE8 +諩 > pŭ; #8AE9 +諪 > tíng; #8AEA +諫 > jiàn; #8AEB +諬 > qĭ; #8AEC +諭 > yù; #8AED +諮 > zī; #8AEE +諯 > chuán; #8AEF +諰 > xĭ; #8AF0 +諱 > hùi; #8AF1 +諲 > yīn; #8AF2 +諳 > ān; #8AF3 +諴 > xián; #8AF4 +諵 > nán; #8AF5 +諶 > chén; #8AF6 +諷 > fēng; #8AF7 +諸 > zhū; #8AF8 +諹 > yáng; #8AF9 +諺 > yàn; #8AFA +諻 > hēng; #8AFB +諼 > xuān; #8AFC +諽 > gé; #8AFD +諾 > nùo; #8AFE +諿 > qì; #8AFF +謀 > móu; #8B00 +謁 > yè; #8B01 +謂 > wèi; #8B02 +謄 > téng; #8B04 +謅 > zōu; #8B05 +謆 > shàn; #8B06 +謇 > jiăn; #8B07 +謈 > bó; #8B08 +謉 > kù1; #8B09 +謊 > huăng; #8B0A +謋 > hùo; #8B0B +謌 > gē; #8B0C +謍 > yíng; #8B0D +謎 > mí; #8B0E +謏 > xiăo; #8B0F +謐 > mì; #8B10 +謑 > xì; #8B11 +謒 > qiāng; #8B12 +謓 > chēn; #8B13 +謔 > nǜe; #8B14 +謕 > tí; #8B15 +謖 > sù; #8B16 +謗 > bàng; #8B17 +謘 > chí; #8B18 +謙 > qiān; #8B19 +謚 > shì; #8B1A +講 > jiăng; #8B1B +謜 > yuàn; #8B1C +謝 > xiè; #8B1D +謞 > xuè; #8B1E +謟 > tāo; #8B1F +謠 > yáo; #8B20 +謡 > yáo; #8B21 +謣 > yú; #8B23 +謤 > biāo; #8B24 +謥 > còng; #8B25 +謦 > qìng; #8B26 +謧 > lí; #8B27 +謨 > mó; #8B28 +謩 > mò; #8B29 +謪 > shāng; #8B2A +謫 > zhé; #8B2B +謬 > mìu; #8B2C +謭 > jiăn; #8B2D +謮 > zé; #8B2E +謯 > jiē; #8B2F +謰 > lián; #8B30 +謱 > lóu; #8B31 +謲 > cān; #8B32 +謳 > ōu; #8B33 +謴 > guàn; #8B34 +謵 > xí; #8B35 +謶 > zhúo; #8B36 +謷 > áo; #8B37 +謸 > áo; #8B38 +謹 > jĭn; #8B39 +謺 > zhé; #8B3A +謻 > yí; #8B3B +謼 > hù; #8B3C +謽 > jiàng; #8B3D +謾 > mán; #8B3E +謿 > cháo; #8B3F +譀 > hàn; #8B40 +譁 > huá; #8B41 +譂 > chăn; #8B42 +譃 > xū; #8B43 +譄 > zēng; #8B44 +譅 > sè; #8B45 +譆 > xī; #8B46 +譇 > shē; #8B47 +譈 > dùi; #8B48 +證 > zhèng; #8B49 +譊 > náo; #8B4A +譋 > lán; #8B4B +譌 > é; #8B4C +譍 > yìng; #8B4D +譎 > jué; #8B4E +譏 > jī; #8B4F +譐 > zŭn; #8B50 +譑 > jiăo; #8B51 +譒 > bò; #8B52 +譓 > hùi; #8B53 +譔 > zhuàn; #8B54 +譕 > mú; #8B55 +譖 > zèn; #8B56 +譗 > zhá; #8B57 +識 > shì; #8B58 +譙 > qiáo; #8B59 +譚 > tán; #8B5A +譛 > zèn; #8B5B +譜 > pŭ; #8B5C +譝 > shéng; #8B5D +譞 > xuān; #8B5E +譟 > zào; #8B5F +譠 > tān; #8B60 +譡 > dăng; #8B61 +譢 > sùi; #8B62 +譣 > qiān; #8B63 +譤 > jī; #8B64 +譥 > jiào; #8B65 +警 > jĭng; #8B66 +譧 > lián; #8B67 +譨 > nóu; #8B68 +譩 > yī; #8B69 +譪 > ài; #8B6A +譫 > zhān; #8B6B +譬 > pì; #8B6C +譭 > hŭi; #8B6D +譮 > huà; #8B6E +譯 > yì; #8B6F +議 > yì; #8B70 +譱 > shàn; #8B71 +譲 > ràng; #8B72 +譳 > nòu; #8B73 +譴 > qiăn; #8B74 +譵 > zhùi; #8B75 +譶 > tà; #8B76 +護 > hù; #8B77 +譸 > zhōu; #8B78 +譹 > háo; #8B79 +譺 > yè; #8B7A +譻 > yīng; #8B7B +譼 > jiàn; #8B7C +譽 > yù; #8B7D +譾 > jiăn; #8B7E +譿 > hùi; #8B7F +讀 > dú; #8B80 +讁 > zhé; #8B81 +讂 > xuàn; #8B82 +讃 > zàn; #8B83 +讄 > lĕi; #8B84 +讅 > shĕn; #8B85 +讆 > wèi; #8B86 +讇 > chăn; #8B87 +讈 > lì; #8B88 +讉 > yí; #8B89 +變 > biàn; #8B8A +讋 > zhé; #8B8B +讌 > yàn; #8B8C +讍 > è; #8B8D +讎 > chóu; #8B8E +讏 > wèi; #8B8F +讐 > chóu; #8B90 +讑 > yào; #8B91 +讒 > chán; #8B92 +讓 > ràng; #8B93 +讔 > yĭn; #8B94 +讕 > lán; #8B95 +讖 > chèn; #8B96 +讗 > hùo; #8B97 +讘 > zhé; #8B98 +讙 > huān; #8B99 +讚 > zàn; #8B9A +讛 > yì; #8B9B +讜 > dăng; #8B9C +讝 > zhān; #8B9D +讞 > yàn; #8B9E +讟 > dú; #8B9F +讠 > yán; #8BA0 +计 > jì; #8BA1 +订 > dìng; #8BA2 +讣 > fù; #8BA3 +认 > rèn; #8BA4 +讥 > jī; #8BA5 +讦 > jié; #8BA6 +讧 > hóng; #8BA7 +讨 > tăo; #8BA8 +让 > ràng; #8BA9 +讪 > shàn; #8BAA +讫 > qì; #8BAB +讬 > tūo; #8BAC +训 > xùn; #8BAD +议 > yì; #8BAE +讯 > xùn; #8BAF +记 > jì; #8BB0 +讱 > rèn; #8BB1 +讲 > jiăng; #8BB2 +讳 > hùi; #8BB3 +讴 > ōu; #8BB4 +讵 > jù; #8BB5 +讶 > yà; #8BB6 +讷 > nè; #8BB7 +许 > xŭ; #8BB8 +讹 > é; #8BB9 +论 > lùn; #8BBA +讻 > xīong; #8BBB +讼 > sòng; #8BBC +讽 > fēng; #8BBD +设 > shè; #8BBE +访 > făng; #8BBF +诀 > jué; #8BC0 +证 > zhèng; #8BC1 +诂 > gŭ; #8BC2 +诃 > hē; #8BC3 +评 > píng; #8BC4 +诅 > zŭ; #8BC5 +识 > shì; #8BC6 +诇 > xìong; #8BC7 +诈 > zhà; #8BC8 +诉 > sù; #8BC9 +诊 > zhĕn; #8BCA +诋 > dĭ; #8BCB +诌 > zōu; #8BCC +词 > cí; #8BCD +诎 > qù; #8BCE +诏 > zhào; #8BCF +诐 > bì; #8BD0 +译 > yì; #8BD1 +诒 > yí; #8BD2 +诓 > kuāng; #8BD3 +诔 > lĕi; #8BD4 +试 > shì; #8BD5 +诖 > guà; #8BD6 +诗 > shī; #8BD7 +诘 > jié; #8BD8 +诙 > hūi; #8BD9 +诚 > chéng; #8BDA +诛 > zhū; #8BDB +诜 > shēn; #8BDC +话 > huà; #8BDD +诞 > dàn; #8BDE +诟 > gòu; #8BDF +诠 > quán; #8BE0 +诡 > gŭi; #8BE1 +询 > xún; #8BE2 +诣 > yì; #8BE3 +诤 > zhēng; #8BE4 +该 > gāi; #8BE5 +详 > xiáng; #8BE6 +诧 > chà; #8BE7 +诨 > hùn; #8BE8 +诩 > xŭ; #8BE9 +诪 > zhōu; #8BEA +诫 > jiè; #8BEB +诬 > wú; #8BEC +语 > yŭ; #8BED +诮 > qiào; #8BEE +误 > wù; #8BEF +诰 > gào; #8BF0 +诱 > yòu; #8BF1 +诲 > hùi; #8BF2 +诳 > kuáng; #8BF3 +说 > shūo; #8BF4 +诵 > sòng; #8BF5 +诶 > āi; #8BF6 +请 > qĭng; #8BF7 +诸 > zhū; #8BF8 +诹 > zōu; #8BF9 +诺 > nùo; #8BFA +读 > dú; #8BFB +诼 > zhúo; #8BFC +诽 > fĕi; #8BFD +课 > kè; #8BFE +诿 > wĕi; #8BFF +谀 > yú; #8C00 +谁 > shúi; #8C01 +谂 > shĕn; #8C02 +调 > diào; #8C03 +谄 > chăn; #8C04 +谅 > liàng; #8C05 +谆 > zhūn; #8C06 +谇 > sùi; #8C07 +谈 > tán; #8C08 +谉 > shĕn; #8C09 +谊 > yí; #8C0A +谋 > móu; #8C0B +谌 > chén; #8C0C +谍 > dié; #8C0D +谎 > huăng; #8C0E +谏 > jiàn; #8C0F +谐 > xié; #8C10 +谑 > nǜe; #8C11 +谒 > yè; #8C12 +谓 > wèi; #8C13 +谔 > è; #8C14 +谕 > yù; #8C15 +谖 > xuān; #8C16 +谗 > chán; #8C17 +谘 > zī; #8C18 +谙 > ān; #8C19 +谚 > yàn; #8C1A +谛 > dì; #8C1B +谜 > mí; #8C1C +谝 > pián; #8C1D +谞 > xŭ; #8C1E +谟 > mó; #8C1F +谠 > dăng; #8C20 +谡 > sù; #8C21 +谢 > xiè; #8C22 +谣 > yáo; #8C23 +谤 > bàng; #8C24 +谥 > shì; #8C25 +谦 > qiān; #8C26 +谧 > mì; #8C27 +谨 > jĭn; #8C28 +谩 > mán; #8C29 +谪 > zhé; #8C2A +谫 > jiăn; #8C2B +谬 > mìu; #8C2C +谭 > tán; #8C2D +谮 > zèn; #8C2E +谯 > qiáo; #8C2F +谰 > lán; #8C30 +谱 > pŭ; #8C31 +谲 > jué; #8C32 +谳 > yàn; #8C33 +谴 > qiăn; #8C34 +谵 > zhān; #8C35 +谶 > chèn; #8C36 +谷 > gŭ; #8C37 +谸 > qiān; #8C38 +谹 > hóng; #8C39 +谺 > xiā; #8C3A +谻 > jué; #8C3B +谼 > hóng; #8C3C +谽 > hān; #8C3D +谾 > hōng; #8C3E +谿 > xī; #8C3F +豀 > xī; #8C40 +豁 > hùo; #8C41 +豂 > liáo; #8C42 +豃 > hăn; #8C43 +豄 > dú; #8C44 +豅 > lóng; #8C45 +豆 > dòu; #8C46 +豇 > jiāng; #8C47 +豈 > qĭ; #8C48 +豉 > shì; #8C49 +豊 > lĭ; #8C4A +豋 > dēng; #8C4B +豌 > wān; #8C4C +豍 > bī; #8C4D +豎 > shù; #8C4E +豏 > xiàn; #8C4F +豐 > fēng; #8C50 +豑 > zhì; #8C51 +豒 > zhì; #8C52 +豓 > yàn; #8C53 +豔 > yàn; #8C54 +豕 > shĭ; #8C55 +豖 > chù; #8C56 +豗 > hūi; #8C57 +豘 > tún; #8C58 +豙 > yì; #8C59 +豚 > tún; #8C5A +豛 > yì; #8C5B +豜 > jiān; #8C5C +豝 > bā; #8C5D +豞 > hòu; #8C5E +豟 > è; #8C5F +豠 > cú; #8C60 +象 > xiàng; #8C61 +豢 > huàn; #8C62 +豣 > jiān; #8C63 +豤 > kĕn; #8C64 +豥 > gāi; #8C65 +豦 > qú; #8C66 +豧 > fū; #8C67 +豨 > xī; #8C68 +豩 > bīn; #8C69 +豪 > háo; #8C6A +豫 > yù; #8C6B +豬 > zhū; #8C6C +豭 > jiā; #8C6D +豯 > xī; #8C6F +豰 > bó; #8C70 +豱 > wēn; #8C71 +豲 > huán; #8C72 +豳 > bīn; #8C73 +豴 > dí; #8C74 +豵 > zōng; #8C75 +豶 > fén; #8C76 +豷 > yì; #8C77 +豸 > zhì; #8C78 +豹 > bào; #8C79 +豺 > chái; #8C7A +豻 > hàn; #8C7B +豼 > pí; #8C7C +豽 > nà; #8C7D +豾 > pī; #8C7E +豿 > gŏu; #8C7F +貀 > nà; #8C80 +貁 > yòu; #8C81 +貂 > diāo; #8C82 +貃 > mò; #8C83 +貄 > sì; #8C84 +貅 > xīu; #8C85 +貆 > huán; #8C86 +貇 > kūn; #8C87 +貈 > hé; #8C88 +貉 > hé; #8C89 +貊 > mò; #8C8A +貋 > hàn; #8C8B +貌 > mào; #8C8C +貍 > lí; #8C8D +貎 > ní; #8C8E +貏 > bĭ; #8C8F +貐 > yŭ; #8C90 +貑 > jiā; #8C91 +貒 > tuān; #8C92 +貓 > māo; #8C93 +貔 > pí; #8C94 +貕 > xī; #8C95 +貖 > è; #8C96 +貗 > jù; #8C97 +貘 > mò; #8C98 +貙 > chū; #8C99 +貚 > tán; #8C9A +貛 > huān; #8C9B +貜 > jué; #8C9C +貝 > bèi; #8C9D +貞 > zhēn; #8C9E +貟 > yuán; #8C9F +負 > fù; #8CA0 +財 > cái; #8CA1 +貢 > gòng; #8CA2 +貣 > tè; #8CA3 +貤 > yí; #8CA4 +貥 > háng; #8CA5 +貦 > wàn; #8CA6 +貧 > pín; #8CA7 +貨 > hùo; #8CA8 +販 > fàn; #8CA9 +貪 > tān; #8CAA +貫 > guàn; #8CAB +責 > zé; #8CAC +貭 > zhí; #8CAD +貮 > èr; #8CAE +貯 > zhŭ; #8CAF +貰 > shì; #8CB0 +貱 > bì; #8CB1 +貲 > zī; #8CB2 +貳 > èr; #8CB3 +貴 > gùi; #8CB4 +貵 > piăn; #8CB5 +貶 > biăn; #8CB6 +買 > măi; #8CB7 +貸 > dài; #8CB8 +貹 > shèng; #8CB9 +貺 > kuàng; #8CBA +費 > fèi; #8CBB +貼 > tiē; #8CBC +貽 > yí; #8CBD +貾 > chí; #8CBE +貿 > mào; #8CBF +賀 > hè; #8CC0 +賁 > bì; #8CC1 +賂 > lù; #8CC2 +賃 > rèn; #8CC3 +賄 > hùi; #8CC4 +賅 > gāi; #8CC5 +賆 > pián; #8CC6 +資 > zī; #8CC7 +賈 > jiă; #8CC8 +賉 > xù; #8CC9 +賊 > zéi; #8CCA +賋 > jiăo; #8CCB +賌 > gài; #8CCC +賍 > zāng; #8CCD +賎 > jiàn; #8CCE +賏 > yìng; #8CCF +賐 > xùn; #8CD0 +賑 > zhèn; #8CD1 +賒 > shē; #8CD2 +賓 > bīn; #8CD3 +賔 > bīn; #8CD4 +賕 > qíu; #8CD5 +賖 > shē; #8CD6 +賗 > chuàn; #8CD7 +賘 > zāng; #8CD8 +賙 > zhōu; #8CD9 +賚 > lài; #8CDA +賛 > zàn; #8CDB +賜 > sì; #8CDC +賝 > chēn; #8CDD +賞 > shăng; #8CDE +賟 > tiăn; #8CDF +賠 > péi; #8CE0 +賡 > gēng; #8CE1 +賢 > xián; #8CE2 +賣 > mài; #8CE3 +賤 > jiàn; #8CE4 +賥 > sùi; #8CE5 +賦 > fù; #8CE6 +賧 > tàn; #8CE7 +賨 > cóng; #8CE8 +賩 > cóng; #8CE9 +質 > zhí; #8CEA +賫 > jī; #8CEB +賬 > zhàng; #8CEC +賭 > dŭ; #8CED +賮 > jìn; #8CEE +賯 > xīong; #8CEF +賰 > shŭn; #8CF0 +賱 > yŭn; #8CF1 +賲 > băo; #8CF2 +賳 > zāi; #8CF3 +賴 > lài; #8CF4 +賵 > fèng; #8CF5 +賶 > càng; #8CF6 +賷 > jī; #8CF7 +賸 > shèng; #8CF8 +賹 > ài; #8CF9 +賺 > zhuàn; #8CFA +賻 > fù; #8CFB +購 > gòu; #8CFC +賽 > sài; #8CFD +賾 > zé; #8CFE +賿 > liáo; #8CFF +贀 > wèi; #8D00 +贁 > bài; #8D01 +贂 > chĕn; #8D02 +贃 > zhuàn; #8D03 +贄 > zhì; #8D04 +贅 > zhùi; #8D05 +贆 > biāo; #8D06 +贇 > yūn; #8D07 +贈 > zèng; #8D08 +贉 > tăn; #8D09 +贊 > zàn; #8D0A +贋 > yàn; #8D0B +贍 > shàn; #8D0D +贎 > wàn; #8D0E +贏 > yíng; #8D0F +贐 > jìn; #8D10 +贑 > găn; #8D11 +贒 > xián; #8D12 +贓 > zāng; #8D13 +贔 > bì; #8D14 +贕 > dú; #8D15 +贖 > shú; #8D16 +贗 > yàn; #8D17 +贙 > xuàn; #8D19 +贚 > lòng; #8D1A +贛 > gàn; #8D1B +贜 > zāng; #8D1C +贝 > bèi; #8D1D +贞 > zhēn; #8D1E +负 > fù; #8D1F +贠 > yuán; #8D20 +贡 > gòng; #8D21 +财 > cái; #8D22 +责 > zé; #8D23 +贤 > xián; #8D24 +败 > bài; #8D25 +账 > zhàng; #8D26 +货 > hùo; #8D27 +质 > zhí; #8D28 +贩 > fàn; #8D29 +贪 > tān; #8D2A +贫 > pín; #8D2B +贬 > biăn; #8D2C +购 > gòu; #8D2D +贮 > zhŭ; #8D2E +贯 > guàn; #8D2F +贰 > èr; #8D30 +贱 > jiàn; #8D31 +贲 > bì; #8D32 +贳 > shì; #8D33 +贴 > tiē; #8D34 +贵 > gùi; #8D35 +贶 > kuàng; #8D36 +贷 > dài; #8D37 +贸 > mào; #8D38 +费 > fèi; #8D39 +贺 > hè; #8D3A +贻 > yí; #8D3B +贼 > zéi; #8D3C +贽 > zhì; #8D3D +贾 > jiă; #8D3E +贿 > hùi; #8D3F +赀 > zī; #8D40 +赁 > rèn; #8D41 +赂 > lù; #8D42 +赃 > zāng; #8D43 +资 > zī; #8D44 +赅 > gāi; #8D45 +赆 > jìn; #8D46 +赇 > qíu; #8D47 +赈 > zhèn; #8D48 +赉 > lài; #8D49 +赊 > shē; #8D4A +赋 > fù; #8D4B +赌 > dŭ; #8D4C +赍 > jī; #8D4D +赎 > shú; #8D4E +赏 > shăng; #8D4F +赐 > sì; #8D50 +赑 > bì; #8D51 +赒 > zhōu; #8D52 +赓 > gēng; #8D53 +赔 > péi; #8D54 +赕 > tàn; #8D55 +赖 > lài; #8D56 +赗 > fèng; #8D57 +赘 > zhùi; #8D58 +赙 > fù; #8D59 +赚 > zhuàn; #8D5A +赛 > sài; #8D5B +赜 > zé; #8D5C +赝 > yàn; #8D5D +赞 > zàn; #8D5E +赟 > yūn; #8D5F +赠 > zèng; #8D60 +赡 > shàn; #8D61 +赢 > yíng; #8D62 +赣 > gàn; #8D63 +赤 > chì; #8D64 +赥 > xì; #8D65 +赦 > shè; #8D66 +赧 > năn; #8D67 +赨 > xíong; #8D68 +赩 > xì; #8D69 +赪 > chēng; #8D6A +赫 > hè; #8D6B +赬 > chēng; #8D6C +赭 > zhĕ; #8D6D +赮 > xiá; #8D6E +赯 > táng; #8D6F +走 > zŏu; #8D70 +赱 > zŏu; #8D71 +赲 > lì; #8D72 +赳 > jĭu; #8D73 +赴 > fù; #8D74 +赵 > zhào; #8D75 +赶 > găn; #8D76 +起 > qĭ; #8D77 +赸 > shàn; #8D78 +赹 > qíong; #8D79 +赺 > qín; #8D7A +赻 > xiăn; #8D7B +赼 > cī; #8D7C +赽 > jué; #8D7D +赾 > qĭn; #8D7E +赿 > chí; #8D7F +趀 > cī; #8D80 +趁 > chèn; #8D81 +趂 > chèn; #8D82 +趃 > dié; #8D83 +趄 > jū; #8D84 +超 > chāo; #8D85 +趆 > dī; #8D86 +趇 > sè; #8D87 +趈 > zhān; #8D88 +趉 > zhú; #8D89 +越 > yuè; #8D8A +趋 > qū; #8D8B +趌 > jié; #8D8C +趍 > chí; #8D8D +趎 > chú; #8D8E +趏 > guā; #8D8F +趐 > xuè; #8D90 +趑 > cī; #8D91 +趒 > tiáo; #8D92 +趓 > dŭo; #8D93 +趔 > liè; #8D94 +趕 > găn; #8D95 +趖 > sūo; #8D96 +趗 > cù; #8D97 +趘 > xí; #8D98 +趙 > zhào; #8D99 +趚 > sù; #8D9A +趛 > yĭn; #8D9B +趜 > jú; #8D9C +趝 > jiàn; #8D9D +趞 > què; #8D9E +趟 > tàng; #8D9F +趠 > chùo; #8DA0 +趡 > cŭi; #8DA1 +趢 > lù; #8DA2 +趣 > qù; #8DA3 +趤 > dàng; #8DA4 +趥 > qīu; #8DA5 +趦 > zī; #8DA6 +趧 > tí; #8DA7 +趨 > qū; #8DA8 +趩 > chì; #8DA9 +趪 > huáng; #8DAA +趫 > qiáo; #8DAB +趬 > qiáo; #8DAC +趭 > yào; #8DAD +趮 > zào; #8DAE +趯 > tì; #8DAF +趱 > zăn; #8DB1 +趲 > zăn; #8DB2 +足 > zú; #8DB3 +趴 > pā; #8DB4 +趵 > bào; #8DB5 +趶 > kù; #8DB6 +趷 > kē; #8DB7 +趸 > dŭn; #8DB8 +趹 > jué; #8DB9 +趺 > fū; #8DBA +趻 > chĕn; #8DBB +趼 > jiăn; #8DBC +趽 > fàng; #8DBD +趾 > zhĭ; #8DBE +趿 > sà; #8DBF +跀 > yuè; #8DC0 +跁 > pá; #8DC1 +跂 > qí; #8DC2 +跃 > yuè; #8DC3 +跄 > qiāng; #8DC4 +跅 > tùo; #8DC5 +跆 > tái; #8DC6 +跇 > yì; #8DC7 +跈 > niăn; #8DC8 +跉 > líng; #8DC9 +跊 > mèi; #8DCA +跋 > bá; #8DCB +跌 > diē; #8DCC +跍 > kū; #8DCD +跎 > túo; #8DCE +跏 > jiā; #8DCF +跐 > cĭ; #8DD0 +跑 > păo; #8DD1 +跒 > qiă; #8DD2 +跓 > zhù; #8DD3 +跔 > jū; #8DD4 +跕 > dié; #8DD5 +跖 > zhī; #8DD6 +跗 > fū; #8DD7 +跘 > pán; #8DD8 +跙 > jŭ; #8DD9 +跚 > shān; #8DDA +跛 > bŏ; #8DDB +跜 > ní; #8DDC +距 > jù; #8DDD +跞 > lì; #8DDE +跟 > gēn; #8DDF +跠 > yí; #8DE0 +跡 > jī; #8DE1 +跢 > dài; #8DE2 +跣 > xiăn; #8DE3 +跤 > jiāo; #8DE4 +跥 > dùo; #8DE5 +跦 > zhū; #8DE6 +跧 > zhuān; #8DE7 +跨 > kuà; #8DE8 +跩 > zhuăi; #8DE9 +跪 > gùi; #8DEA +跫 > qíong; #8DEB +跬 > kŭi; #8DEC +跭 > xiáng; #8DED +跮 > chì; #8DEE +路 > lù; #8DEF +跰 > bèng; #8DF0 +跱 > zhì; #8DF1 +跲 > jiá; #8DF2 +跳 > tiào; #8DF3 +跴 > căi; #8DF4 +践 > jiàn; #8DF5 +跶 > tà; #8DF6 +跷 > qiāo; #8DF7 +跸 > bì; #8DF8 +跹 > xiān; #8DF9 +跺 > dùo; #8DFA +跻 > jī; #8DFB +跼 > jú; #8DFC +跽 > jì; #8DFD +跾 > shú; #8DFE +跿 > tú; #8DFF +踀 > chù; #8E00 +踁 > jìng; #8E01 +踂 > niè; #8E02 +踃 > xiāo; #8E03 +踄 > bó; #8E04 +踅 > chì; #8E05 +踆 > qūn; #8E06 +踇 > mŏu; #8E07 +踈 > shū; #8E08 +踉 > láng; #8E09 +踊 > yŏng; #8E0A +踋 > jiăo; #8E0B +踌 > chóu; #8E0C +踍 > qiāo; #8E0D +踏 > tà; #8E0F +踐 > jiàn; #8E10 +踑 > qí; #8E11 +踒 > wō; #8E12 +踓 > wĕi; #8E13 +踔 > zhúo; #8E14 +踕 > jié; #8E15 +踖 > jí; #8E16 +踗 > niē; #8E17 +踘 > jú; #8E18 +踙 > jū; #8E19 +踚 > lún; #8E1A +踛 > lù; #8E1B +踜 > lèng; #8E1C +踝 > huái; #8E1D +踞 > jù; #8E1E +踟 > chí; #8E1F +踠 > wăn; #8E20 +踡 > quán; #8E21 +踢 > tī; #8E22 +踣 > bó; #8E23 +踤 > zú; #8E24 +踥 > qiè; #8E25 +踦 > jĭ; #8E26 +踧 > cù; #8E27 +踨 > zōng; #8E28 +踩 > căi; #8E29 +踪 > zōng; #8E2A +踫 > pèng; #8E2B +踬 > zhì; #8E2C +踭 > zhēng; #8E2D +踮 > diăn; #8E2E +踯 > zhí; #8E2F +踰 > yú; #8E30 +踱 > dùo; #8E31 +踲 > dùn; #8E32 +踳 > chŭn; #8E33 +踴 > yŏng; #8E34 +踵 > zhŏng; #8E35 +踶 > dì; #8E36 +踷 > zhĕ; #8E37 +踸 > chĕn; #8E38 +踹 > chuài; #8E39 +踺 > jiàn; #8E3A +踻 > guā; #8E3B +踼 > táng; #8E3C +踽 > jŭ; #8E3D +踾 > fú; #8E3E +踿 > zú; #8E3F +蹀 > dié; #8E40 +蹁 > pián; #8E41 +蹂 > róu; #8E42 +蹃 > nùo; #8E43 +蹄 > tí; #8E44 +蹅 > chă; #8E45 +蹆 > tŭi; #8E46 +蹇 > jiăn; #8E47 +蹈 > dào; #8E48 +蹉 > cūo; #8E49 +蹊 > xī; #8E4A +蹋 > tà; #8E4B +蹌 > qiāng; #8E4C +蹍 > zhăn; #8E4D +蹎 > diān; #8E4E +蹏 > tí; #8E4F +蹐 > jí; #8E50 +蹑 > niè; #8E51 +蹒 > mán; #8E52 +蹓 > līu; #8E53 +蹔 > zhàn; #8E54 +蹕 > bì; #8E55 +蹖 > chōng; #8E56 +蹗 > lù; #8E57 +蹘 > liáo; #8E58 +蹙 > cù; #8E59 +蹚 > tāng; #8E5A +蹛 > dài; #8E5B +蹜 > sūo; #8E5C +蹝 > xĭ; #8E5D +蹞 > kŭi; #8E5E +蹟 > jī; #8E5F +蹠 > zhí; #8E60 +蹡 > qiāng; #8E61 +蹢 > dí; #8E62 +蹣 > mán; #8E63 +蹤 > zōng; #8E64 +蹥 > lián; #8E65 +蹦 > bèng; #8E66 +蹧 > zāo; #8E67 +蹨 > niăn; #8E68 +蹩 > bié; #8E69 +蹪 > túi; #8E6A +蹫 > jú; #8E6B +蹬 > dèng; #8E6C +蹭 > cèng; #8E6D +蹮 > xiān; #8E6E +蹯 > fán; #8E6F +蹰 > chú; #8E70 +蹱 > zhōng; #8E71 +蹲 > dūn; #8E72 +蹳 > bō; #8E73 +蹴 > cù; #8E74 +蹵 > zú; #8E75 +蹶 > jué; #8E76 +蹷 > jué; #8E77 +蹸 > lìn; #8E78 +蹹 > tà; #8E79 +蹺 > qiāo; #8E7A +蹻 > qiāo; #8E7B +蹼 > pú; #8E7C +蹽 > liāo; #8E7D +蹾 > dūn; #8E7E +蹿 > cuān; #8E7F +躀 > kuàng; #8E80 +躁 > zào; #8E81 +躂 > tà; #8E82 +躃 > bì; #8E83 +躄 > bì; #8E84 +躅 > zhú; #8E85 +躆 > jù; #8E86 +躇 > chú; #8E87 +躈 > qiào; #8E88 +躉 > dŭn; #8E89 +躊 > chóu; #8E8A +躋 > jī; #8E8B +躌 > wŭ; #8E8C +躍 > yuè; #8E8D +躎 > niăn; #8E8E +躏 > lìn; #8E8F +躐 > liè; #8E90 +躑 > zhí; #8E91 +躒 > lì; #8E92 +躓 > zhì; #8E93 +躔 > chán; #8E94 +躕 > chú; #8E95 +躖 > duàn; #8E96 +躗 > wèi; #8E97 +躘 > lóng; #8E98 +躙 > lìn; #8E99 +躚 > xiān; #8E9A +躛 > wèi; #8E9B +躜 > zuān; #8E9C +躝 > lán; #8E9D +躞 > xiè; #8E9E +躟 > ráng; #8E9F +躠 > xiĕ; #8EA0 +躡 > niè; #8EA1 +躢 > tà; #8EA2 +躣 > qú; #8EA3 +躤 > jiè; #8EA4 +躥 > cuān; #8EA5 +躦 > zuān; #8EA6 +躧 > xĭ; #8EA7 +躨 > kúi; #8EA8 +躩 > jué; #8EA9 +躪 > lìn; #8EAA +身 > shēn; #8EAB +躬 > gōng; #8EAC +躭 > dān; #8EAD +躯 > qū; #8EAF +躰 > tĭ; #8EB0 +躱 > dŭo; #8EB1 +躲 > dŭo; #8EB2 +躳 > gōng; #8EB3 +躴 > láng; #8EB4 +躶 > lŭo; #8EB6 +躷 > ăi; #8EB7 +躸 > jī; #8EB8 +躹 > jú; #8EB9 +躺 > tăng; #8EBA +躽 > yăn; #8EBD +躿 > kāng; #8EBF +軀 > qū; #8EC0 +軁 > lóu; #8EC1 +軂 > lào; #8EC2 +軃 > tŭo; #8EC3 +軄 > zhí; #8EC4 +軆 > tĭ; #8EC6 +軇 > dào; #8EC7 +軉 > yù; #8EC9 +車 > chē; #8ECA +軋 > yà; #8ECB +軌 > gŭi; #8ECC +軍 > jūn; #8ECD +軎 > wèi; #8ECE +軏 > yuè; #8ECF +軐 > xìn; #8ED0 +軑 > dì; #8ED1 +軒 > xuān; #8ED2 +軓 > fàn; #8ED3 +軔 > rèn; #8ED4 +軕 > shān; #8ED5 +軖 > qiáng; #8ED6 +軗 > shū; #8ED7 +軘 > tún; #8ED8 +軙 > chén; #8ED9 +軚 > dài; #8EDA +軛 > è; #8EDB +軜 > nà; #8EDC +軝 > qí; #8EDD +軞 > máo; #8EDE +軟 > ruăn; #8EDF +軠 > rèn; #8EE0 +軡 > făn; #8EE1 +転 > zhuăn; #8EE2 +軣 > hōng; #8EE3 +軤 > hū; #8EE4 +軥 > qú; #8EE5 +軦 > huàng; #8EE6 +軧 > dĭ; #8EE7 +軨 > líng; #8EE8 +軩 > dài; #8EE9 +軪 > āo; #8EEA +軫 > zhĕn; #8EEB +軬 > fàn; #8EEC +軭 > kuāng; #8EED +軮 > ăng; #8EEE +軯 > pēng; #8EEF +軰 > bèi; #8EF0 +軱 > gū; #8EF1 +軲 > kū; #8EF2 +軳 > páo; #8EF3 +軴 > zhù; #8EF4 +軵 > rŏng; #8EF5 +軶 > è; #8EF6 +軷 > bá; #8EF7 +軸 > zhóu; #8EF8 +軹 > zhĭ; #8EF9 +軺 > yáo; #8EFA +軻 > kē; #8EFB +軼 > yì; #8EFC +軽 > qīng; #8EFD +軾 > shì; #8EFE +軿 > píng; #8EFF +輀 > ér; #8F00 +輁 > qíong; #8F01 +輂 > jú; #8F02 +較 > jiào; #8F03 +輄 > guāng; #8F04 +輅 > lù; #8F05 +輆 > kăi; #8F06 +輇 > quán; #8F07 +輈 > zhōu; #8F08 +載 > zài; #8F09 +輊 > zhì; #8F0A +輋 > shē; #8F0B +輌 > liàng; #8F0C +輍 > yù; #8F0D +輎 > shāo; #8F0E +輏 > yóu; #8F0F +輐 > huăn; #8F10 +輑 > yŭn; #8F11 +輒 > zhé; #8F12 +輓 > wăn; #8F13 +輔 > fŭ; #8F14 +輕 > qīng; #8F15 +輖 > zhōu; #8F16 +輗 > ní; #8F17 +輘 > líng; #8F18 +輙 > zhé; #8F19 +輚 > zhàn; #8F1A +輛 > liàng; #8F1B +輜 > zī; #8F1C +輝 > hūi; #8F1D +輞 > wăng; #8F1E +輟 > chùo; #8F1F +輠 > gŭo; #8F20 +輡 > kăn; #8F21 +輢 > yĭ; #8F22 +輣 > péng; #8F23 +輤 > qiàn; #8F24 +輥 > gŭn; #8F25 +輦 > niăn; #8F26 +輧 > pián; #8F27 +輨 > guăn; #8F28 +輩 > bèi; #8F29 +輪 > lún; #8F2A +輫 > pái; #8F2B +輬 > liáng; #8F2C +輭 > ruăn; #8F2D +輮 > róu; #8F2E +輯 > jí; #8F2F +輰 > yáng; #8F30 +輱 > xián; #8F31 +輲 > chuán; #8F32 +輳 > còu; #8F33 +輴 > qūn; #8F34 +輵 > gé; #8F35 +輶 > yóu; #8F36 +輷 > hōng; #8F37 +輸 > shū; #8F38 +輹 > fù; #8F39 +輺 > zī; #8F3A +輻 > fú; #8F3B +輼 > wēn; #8F3C +輽 > bèn; #8F3D +輾 > zhăn; #8F3E +輿 > yú; #8F3F +轀 > wēn; #8F40 +轁 > tāo; #8F41 +轂 > gŭ; #8F42 +轃 > zhēn; #8F43 +轄 > xiá; #8F44 +轅 > yuán; #8F45 +轆 > lù; #8F46 +轇 > jīu; #8F47 +轈 > cháo; #8F48 +轉 > zhuăn; #8F49 +轊 > wèi; #8F4A +轋 > hún; #8F4B +轍 > chè; #8F4D +轎 > jiào; #8F4E +轏 > zhàn; #8F4F +轐 > pú; #8F50 +轑 > lăo; #8F51 +轒 > fén; #8F52 +轓 > fān; #8F53 +轔 > lín; #8F54 +轕 > gé; #8F55 +轖 > sè; #8F56 +轗 > kăn; #8F57 +轘 > huàn; #8F58 +轙 > yĭ; #8F59 +轚 > jí; #8F5A +轛 > dùi; #8F5B +轜 > ér; #8F5C +轝 > yú; #8F5D +轞 > xiàn; #8F5E +轟 > hōng; #8F5F +轠 > lĕi; #8F60 +轡 > pèi; #8F61 +轢 > lì; #8F62 +轣 > lì; #8F63 +轤 > lú; #8F64 +轥 > lìn; #8F65 +车 > chē; #8F66 +轧 > yà; #8F67 +轨 > gŭi; #8F68 +轩 > xuān; #8F69 +轪 > dì; #8F6A +轫 > rèn; #8F6B +转 > zhuăn; #8F6C +轭 > è; #8F6D +轮 > lún; #8F6E +软 > ruăn; #8F6F +轰 > hōng; #8F70 +轱 > kū; #8F71 +轲 > kē; #8F72 +轳 > lú; #8F73 +轴 > zhóu; #8F74 +轵 > zhĭ; #8F75 +轶 > yì; #8F76 +轷 > hū; #8F77 +轸 > zhĕn; #8F78 +轹 > lì; #8F79 +轺 > yáo; #8F7A +轻 > qīng; #8F7B +轼 > shì; #8F7C +载 > zài; #8F7D +轾 > zhì; #8F7E +轿 > jiào; #8F7F +辀 > zhōu; #8F80 +辁 > quán; #8F81 +辂 > lù; #8F82 +较 > jiào; #8F83 +辄 > zhé; #8F84 +辅 > fŭ; #8F85 +辆 > liàng; #8F86 +辇 > niăn; #8F87 +辈 > bèi; #8F88 +辉 > hūi; #8F89 +辊 > gŭn; #8F8A +辋 > wăng; #8F8B +辌 > liáng; #8F8C +辍 > chùo; #8F8D +辎 > zī; #8F8E +辏 > còu; #8F8F +辐 > fú; #8F90 +辑 > jí; #8F91 +辒 > wēn; #8F92 +输 > shū; #8F93 +辔 > pèi; #8F94 +辕 > yuán; #8F95 +辖 > xiá; #8F96 +辗 > zhăn; #8F97 +辘 > lù; #8F98 +辙 > chè; #8F99 +辚 > lín; #8F9A +辛 > xīn; #8F9B +辜 > gū; #8F9C +辝 > cí; #8F9D +辞 > cí; #8F9E +辟 > pì; #8F9F +辠 > zùi; #8FA0 +辡 > biàn; #8FA1 +辢 > là; #8FA2 +辣 > là; #8FA3 +辤 > cí; #8FA4 +辥 > xuē; #8FA5 +辦 > bàn; #8FA6 +辧 > biàn; #8FA7 +辨 > biàn; #8FA8 +辩 > biàn; #8FA9 +辫 > biàn; #8FAB +辬 > bān; #8FAC +辭 > cí; #8FAD +辮 > biàn; #8FAE +辯 > biàn; #8FAF +辰 > chén; #8FB0 +辱 > rù; #8FB1 +農 > nóng; #8FB2 +辳 > nóng; #8FB3 +辴 > zhĕn; #8FB4 +辵 > chùo; #8FB5 +辶 > chùo; #8FB6 +辸 > réng; #8FB8 +边 > biān; #8FB9 +辺 > biān; #8FBA +辽 > liáo; #8FBD +达 > dá; #8FBE +辿 > chān; #8FBF +迀 > gān; #8FC0 +迁 > qiān; #8FC1 +迂 > yū; #8FC2 +迃 > yū; #8FC3 +迄 > qì; #8FC4 +迅 > xùn; #8FC5 +迆 > yĭ; #8FC6 +过 > gùo; #8FC7 +迈 > mài; #8FC8 +迉 > qí; #8FC9 +迊 > zā; #8FCA +迋 > wàng; #8FCB +迌 > jia; #8FCC +迍 > zhūn; #8FCD +迎 > yíng; #8FCE +迏 > tì; #8FCF +运 > yùn; #8FD0 +近 > jìn; #8FD1 +迒 > háng; #8FD2 +迓 > yà; #8FD3 +返 > făn; #8FD4 +迕 > wù; #8FD5 +迖 > dá; #8FD6 +迗 > é; #8FD7 +还 > huán; #8FD8 +这 > zhè; #8FD9 +进 > jìn; #8FDB +远 > yuăn; #8FDC +违 > wéi; #8FDD +连 > lián; #8FDE +迟 > chí; #8FDF +迠 > chè; #8FE0 +迡 > nì; #8FE1 +迢 > tiáo; #8FE2 +迣 > zhì; #8FE3 +迤 > yĭ; #8FE4 +迥 > jĭong; #8FE5 +迦 > jiā; #8FE6 +迧 > chén; #8FE7 +迨 > dài; #8FE8 +迩 > ĕr; #8FE9 +迪 > dí; #8FEA +迫 > pò; #8FEB +迬 > wăng; #8FEC +迭 > dié; #8FED +迮 > zé; #8FEE +迯 > táo; #8FEF +述 > shù; #8FF0 +迱 > túo; #8FF1 +迳 > jìng; #8FF3 +迴 > húi; #8FF4 +迵 > tóng; #8FF5 +迶 > yòu; #8FF6 +迷 > mí; #8FF7 +迸 > bèng; #8FF8 +迹 > jī; #8FF9 +迺 > năi; #8FFA +迻 > yí; #8FFB +迼 > jié; #8FFC +追 > zhūi; #8FFD +迾 > liè; #8FFE +迿 > xùn; #8FFF +退 > tùi; #9000 +送 > sòng; #9001 +适 > guā; #9002 +逃 > táo; #9003 +逄 > páng; #9004 +逅 > hòu; #9005 +逆 > nì; #9006 +逇 > dùn; #9007 +逈 > jĭong; #9008 +选 > xuăn; #9009 +逊 > xùn; #900A +逋 > bū; #900B +逌 > yóu; #900C +逍 > xiāo; #900D +逎 > qíu; #900E +透 > tòu; #900F +逐 > zhú; #9010 +逑 > qíu; #9011 +递 > dì; #9012 +逓 > dì; #9013 +途 > tú; #9014 +逕 > jìng; #9015 +逖 > tì; #9016 +逗 > dòu; #9017 +逘 > yĭ; #9018 +這 > zhè; #9019 +通 > tōng; #901A +逛 > guàng; #901B +逜 > wù; #901C +逝 > shì; #901D +逞 > chĕng; #901E +速 > sù; #901F +造 > zào; #9020 +逡 > qūn; #9021 +逢 > féng; #9022 +連 > lián; #9023 +逤 > sùo; #9024 +逥 > húi; #9025 +逦 > lĭ; #9026 +逨 > lái; #9028 +逩 > bèn; #9029 +逪 > cùo; #902A +逫 > jué; #902B +逬 > bèng; #902C +逭 > huàn; #902D +逮 > dài; #902E +逯 > lù; #902F +逰 > yóu; #9030 +週 > zhōu; #9031 +進 > jìn; #9032 +逳 > yù; #9033 +逴 > chùo; #9034 +逵 > kúi; #9035 +逶 > wēi; #9036 +逷 > tì; #9037 +逸 > yì; #9038 +逹 > dá; #9039 +逺 > yuăn; #903A +逻 > lúo; #903B +逼 > bī; #903C +逽 > nùo; #903D +逾 > yú; #903E +逿 > dàng; #903F +遀 > súi; #9040 +遁 > dùn; #9041 +遂 > sùi; #9042 +遃 > yăn; #9043 +遄 > chuán; #9044 +遅 > chí; #9045 +遆 > tí; #9046 +遇 > yù; #9047 +遈 > shí; #9048 +遉 > zhēn; #9049 +遊 > yóu; #904A +運 > yùn; #904B +遌 > è; #904C +遍 > biàn; #904D +過 > gùo; #904E +遏 > è; #904F +遐 > xiá; #9050 +遑 > huáng; #9051 +遒 > qíu; #9052 +道 > dào; #9053 +達 > dá; #9054 +違 > wéi; #9055 +遗 > yí; #9057 +遘 > gòu; #9058 +遙 > yáo; #9059 +遚 > chù; #905A +遛 > líu; #905B +遜 > xùn; #905C +遝 > tà; #905D +遞 > dì; #905E +遟 > chí; #905F +遠 > yuăn; #9060 +遡 > sù; #9061 +遢 > tà; #9062 +遣 > qiăn; #9063 +遥 > yáo; #9065 +遦 > guàn; #9066 +遧 > zhāng; #9067 +遨 > áo; #9068 +適 > shì; #9069 +遪 > cè; #906A +遫 > chì; #906B +遬 > sù; #906C +遭 > zāo; #906D +遮 > zhē; #906E +遯 > dùn; #906F +遰 > dì; #9070 +遱 > lóu; #9071 +遲 > chí; #9072 +遳 > cūo; #9073 +遴 > lín; #9074 +遵 > zūn; #9075 +遶 > rào; #9076 +遷 > qiān; #9077 +選 > xuăn; #9078 +遹 > yù; #9079 +遺 > yí; #907A +遻 > wù; #907B +遼 > liáo; #907C +遽 > jù; #907D +遾 > shì; #907E +避 > bì; #907F +邀 > yāo; #9080 +邁 > mài; #9081 +邂 > xiè; #9082 +邃 > sùi; #9083 +還 > huán; #9084 +邅 > zhān; #9085 +邆 > téng; #9086 +邇 > ĕr; #9087 +邈 > miăo; #9088 +邉 > biān; #9089 +邊 > biān; #908A +邋 > lá; #908B +邌 > lí; #908C +邍 > yuán; #908D +邎 > yáo; #908E +邏 > lúo; #908F +邐 > lĭ; #9090 +邑 > yì; #9091 +邒 > tíng; #9092 +邓 > dèng; #9093 +邔 > qĭ; #9094 +邕 > yōng; #9095 +邖 > shān; #9096 +邗 > hán; #9097 +邘 > yú; #9098 +邙 > máng; #9099 +邚 > rú; #909A +邛 > qíong; #909B +邝 > kuàng; #909D +邞 > fū; #909E +邟 > kàng; #909F +邠 > bīn; #90A0 +邡 > fāng; #90A1 +邢 > xíng; #90A2 +那 > nà; #90A3 +邤 > xin; #90A4 +邥 > shĕn; #90A5 +邦 > bāng; #90A6 +邧 > yuán; #90A7 +邨 > cūn; #90A8 +邩 > hŭo; #90A9 +邪 > xié; #90AA +邫 > bāng; #90AB +邬 > wū; #90AC +邭 > jù; #90AD +邮 > yóu; #90AE +邯 > hán; #90AF +邰 > tái; #90B0 +邱 > qīu; #90B1 +邲 > bì; #90B2 +邳 > péi; #90B3 +邴 > bĭng; #90B4 +邵 > shào; #90B5 +邶 > bèi; #90B6 +邷 > wă; #90B7 +邸 > dĭ; #90B8 +邹 > zōu; #90B9 +邺 > yè; #90BA +邻 > lín; #90BB +邼 > kuāng; #90BC +邽 > gūi; #90BD +邾 > zhū; #90BE +邿 > shī; #90BF +郀 > kū; #90C0 +郁 > yù; #90C1 +郂 > gāi; #90C2 +郃 > gé; #90C3 +郄 > xì; #90C4 +郅 > zhì; #90C5 +郆 > jí; #90C6 +郇 > xún; #90C7 +郈 > hòu; #90C8 +郉 > xíng; #90C9 +郊 > jiāo; #90CA +郋 > xí; #90CB +郌 > gūi; #90CC +郍 > núo; #90CD +郎 > láng; #90CE +郏 > jiá; #90CF +郐 > kuài; #90D0 +郑 > zhèng; #90D1 +郓 > yùn; #90D3 +郔 > yán; #90D4 +郕 > chéng; #90D5 +郖 > dōu; #90D6 +郗 > chī; #90D7 +郘 > lǚ; #90D8 +郙 > fŭ; #90D9 +郚 > wú; #90DA +郛 > fú; #90DB +郜 > gào; #90DC +郝 > hăo; #90DD +郞 > láng; #90DE +郟 > jiá; #90DF +郠 > gĕng; #90E0 +郡 > jùn; #90E1 +郢 > yĭng; #90E2 +郣 > bó; #90E3 +郤 > xì; #90E4 +郥 > bèi; #90E5 +郦 > lì; #90E6 +郧 > yún; #90E7 +部 > bù; #90E8 +郩 > xiáo; #90E9 +郪 > qī; #90EA +郫 > pí; #90EB +郬 > qīng; #90EC +郭 > gūo; #90ED +郮 > zhou; #90EE +郯 > tán; #90EF +郰 > zōu; #90F0 +郱 > píng; #90F1 +郲 > lái; #90F2 +郳 > ní; #90F3 +郴 > chēn; #90F4 +郵 > yóu; #90F5 +郶 > bù; #90F6 +郷 > xiāng; #90F7 +郸 > dān; #90F8 +郹 > jú; #90F9 +郺 > yōng; #90FA +郻 > qiāo; #90FB +郼 > yī; #90FC +都 > dū; #90FD +郾 > yăn; #90FE +郿 > méi; #90FF +鄀 > rùo; #9100 +鄁 > bèi; #9101 +鄂 > è; #9102 +鄃 > yú; #9103 +鄄 > juàn; #9104 +鄅 > yŭ; #9105 +鄆 > yùn; #9106 +鄇 > hòu; #9107 +鄈 > kúi; #9108 +鄉 > xiāng; #9109 +鄊 > xiāng; #910A +鄋 > sōu; #910B +鄌 > táng; #910C +鄍 > míng; #910D +鄎 > xì; #910E +鄏 > rù; #910F +鄐 > chù; #9110 +鄑 > zī; #9111 +鄒 > zōu; #9112 +鄓 > jú; #9113 +鄔 > wū; #9114 +鄕 > xiāng; #9115 +鄖 > yún; #9116 +鄗 > hào; #9117 +鄘 > yōng; #9118 +鄙 > bĭ; #9119 +鄚 > mò; #911A +鄛 > cháo; #911B +鄜 > fū; #911C +鄝 > liăo; #911D +鄞 > yín; #911E +鄟 > zhuān; #911F +鄠 > hù; #9120 +鄡 > qiāo; #9121 +鄢 > yān; #9122 +鄣 > zhāng; #9123 +鄤 > fàn; #9124 +鄥 > qiāo; #9125 +鄦 > xŭ; #9126 +鄧 > dèng; #9127 +鄨 > bì; #9128 +鄩 > xín; #9129 +鄪 > bì; #912A +鄫 > céng; #912B +鄬 > wéi; #912C +鄭 > zhèng; #912D +鄮 > mào; #912E +鄯 > shàn; #912F +鄰 > lín; #9130 +鄱 > pó; #9131 +鄲 > dān; #9132 +鄳 > méng; #9133 +鄴 > yè; #9134 +鄵 > cāo; #9135 +鄶 > kuài; #9136 +鄷 > fēng; #9137 +鄸 > méng; #9138 +鄹 > zōu; #9139 +鄺 > kuàng; #913A +鄻 > lián; #913B +鄼 > zàn; #913C +鄽 > chán; #913D +鄾 > yōu; #913E +鄿 > qí; #913F +酀 > yān; #9140 +酁 > chán; #9141 +酂 > zàn; #9142 +酃 > líng; #9143 +酄 > huān; #9144 +酅 > xī; #9145 +酆 > fēng; #9146 +酇 > zàn; #9147 +酈 > lì; #9148 +酉 > yŏu; #9149 +酊 > dĭng; #914A +酋 > qíu; #914B +酌 > zhúo; #914C +配 > pèi; #914D +酎 > zhòu; #914E +酏 > yí; #914F +酐 > hăng; #9150 +酑 > yŭ; #9151 +酒 > jĭu; #9152 +酓 > yăn; #9153 +酔 > zùi; #9154 +酕 > máo; #9155 +酖 > dān; #9156 +酗 > xù; #9157 +酘 > tóu; #9158 +酙 > zhēn; #9159 +酚 > fēn; #915A +酝 > yùn; #915D +酞 > tài; #915E +酟 > tiān; #915F +酠 > qiă; #9160 +酡 > túo; #9161 +酢 > zùo; #9162 +酣 > hān; #9163 +酤 > gū; #9164 +酥 > sū; #9165 +酦 > pò; #9166 +酧 > chóu; #9167 +酨 > zài; #9168 +酩 > míng; #9169 +酪 > lùo; #916A +酫 > chùo; #916B +酬 > chóu; #916C +酭 > yòu; #916D +酮 > tóng; #916E +酯 > zhĭ; #916F +酰 > xiān; #9170 +酱 > jiàng; #9171 +酲 > chéng; #9172 +酳 > yìn; #9173 +酴 > tú; #9174 +酵 > xiào; #9175 +酶 > méi; #9176 +酷 > kù; #9177 +酸 > suān; #9178 +酹 > lèi; #9179 +酺 > pú; #917A +酻 > zùi; #917B +酼 > hăi; #917C +酽 > yàn; #917D +酾 > xĭ; #917E +酿 > niàng; #917F +醀 > wéi; #9180 +醁 > lù; #9181 +醂 > lăn; #9182 +醃 > yān; #9183 +醄 > táo; #9184 +醅 > pēi; #9185 +醆 > zhăn; #9186 +醇 > chún; #9187 +醈 > tán; #9188 +醉 > zùi; #9189 +醊 > chùo; #918A +醋 > cù; #918B +醌 > kūn; #918C +醍 > tí; #918D +醎 > mián; #918E +醏 > dū; #918F +醐 > hú; #9190 +醑 > xŭ; #9191 +醒 > xĭng; #9192 +醓 > tăn; #9193 +醔 > jīu; #9194 +醕 > chún; #9195 +醖 > yùn; #9196 +醗 > pò; #9197 +醘 > kè; #9198 +醙 > sōu; #9199 +醚 > mí; #919A +醛 > quán; #919B +醜 > chŏu; #919C +醝 > cúo; #919D +醞 > yùn; #919E +醟 > yòng; #919F +醠 > àng; #91A0 +醡 > zhà; #91A1 +醢 > hăi; #91A2 +醣 > táng; #91A3 +醤 > jiàng; #91A4 +醥 > piăo; #91A5 +醦 > shăn; #91A6 +醧 > yù; #91A7 +醨 > lí; #91A8 +醩 > záo; #91A9 +醪 > láo; #91AA +醫 > yī; #91AB +醬 > jiàng; #91AC +醭 > pū; #91AD +醮 > jiào; #91AE +醯 > xī; #91AF +醰 > tán; #91B0 +醱 > pò; #91B1 +醲 > nóng; #91B2 +醳 > yì; #91B3 +醴 > lĭ; #91B4 +醵 > jù; #91B5 +醶 > jiào; #91B6 +醷 > yì; #91B7 +醸 > niàng; #91B8 +醹 > rú; #91B9 +醺 > xūn; #91BA +醻 > chóu; #91BB +醼 > yàn; #91BC +醽 > líng; #91BD +醾 > mí; #91BE +醿 > mí; #91BF +釀 > niàng; #91C0 +釁 > xìn; #91C1 +釂 > jiào; #91C2 +釃 > xĭ; #91C3 +釄 > mí; #91C4 +釅 > yàn; #91C5 +釆 > biàn; #91C6 +采 > căi; #91C7 +釈 > shì; #91C8 +釉 > yòu; #91C9 +释 > shì; #91CA +釋 > shì; #91CB +里 > lĭ; #91CC +重 > zhòng; #91CD +野 > yĕ; #91CE +量 > liàng; #91CF +釐 > lí; #91D0 +金 > jīn; #91D1 +釒 > jīn' 'zì' 'páng; #91D2 +釓 > qíu; #91D3 +釔 > yĭ; #91D4 +釕 > diăo; #91D5 +釖 > dāo; #91D6 +釗 > zhāo; #91D7 +釘 > dīng; #91D8 +釙 > pò; #91D9 +釚 > qíu; #91DA +釛 > hé; #91DB +釜 > fŭ; #91DC +針 > zhēn; #91DD +釞 > zhí; #91DE +釟 > bā; #91DF +釠 > luàn; #91E0 +釡 > fŭ; #91E1 +釢 > nái; #91E2 +釣 > diào; #91E3 +釤 > shàn; #91E4 +釥 > qiăo; #91E5 +釦 > kòu; #91E6 +釧 > chuàn; #91E7 +釨 > zĭ; #91E8 +釩 > fán; #91E9 +釪 > yú; #91EA +釫 > huá; #91EB +釬 > hàn; #91EC +釭 > gōng; #91ED +釮 > qí; #91EE +釯 > máng; #91EF +釰 > rì; #91F0 +釱 > dì; #91F1 +釲 > sì; #91F2 +釳 > xì; #91F3 +釴 > yì; #91F4 +釵 > chāi; #91F5 +釶 > shī; #91F6 +釷 > tŭ; #91F7 +釸 > xì; #91F8 +釹 > nǚ; #91F9 +釺 > qiān; #91FA +釼 > jiàn; #91FC +釽 > pī; #91FD +釾 > yé; #91FE +釿 > yín; #91FF +鈀 > bă; #9200 +鈁 > fāng; #9201 +鈂 > chén; #9202 +鈃 > xíng; #9203 +鈄 > tŏu; #9204 +鈅 > yuè; #9205 +鈆 > yán; #9206 +鈇 > fū; #9207 +鈈 > pī; #9208 +鈉 > nà; #9209 +鈊 > xīn; #920A +鈋 > é; #920B +鈌 > jué; #920C +鈍 > dùn; #920D +鈎 > gōu; #920E +鈏 > yĭn; #920F +鈐 > qián; #9210 +鈑 > băn; #9211 +鈒 > jí; #9212 +鈓 > rén; #9213 +鈔 > chāo; #9214 +鈕 > nĭu; #9215 +鈖 > fēn; #9216 +鈗 > yŭn; #9217 +鈘 > jĭ; #9218 +鈙 > qín; #9219 +鈚 > pí; #921A +鈛 > gūo; #921B +鈜 > hóng; #921C +鈝 > yín; #921D +鈞 > jūn; #921E +鈟 > shī; #921F +鈠 > yì; #9220 +鈡 > zhōng; #9221 +鈢 > niē; #9222 +鈣 > gài; #9223 +鈤 > rì; #9224 +鈥 > húo; #9225 +鈦 > tài; #9226 +鈧 > kàng; #9227 +鈬 > dúo; #922C +鈭 > zī; #922D +鈮 > nĭ; #922E +鈯 > tú; #922F +鈰 > shì; #9230 +鈱 > mín; #9231 +鈲 > gū; #9232 +鈳 > ē; #9233 +鈴 > líng; #9234 +鈵 > bìng; #9235 +鈶 > yí; #9236 +鈷 > gū; #9237 +鈸 > bá; #9238 +鈹 > pī; #9239 +鈺 > yù; #923A +鈻 > sì; #923B +鈼 > zúo; #923C +鈽 > bù; #923D +鈾 > yóu; #923E +鈿 > diàn; #923F +鉀 > jiă; #9240 +鉁 > zhēn; #9241 +鉂 > shĭ; #9242 +鉃 > shì; #9243 +鉄 > tiĕ; #9244 +鉅 > jù; #9245 +鉆 > zhān; #9246 +鉇 > shī; #9247 +鉈 > shé; #9248 +鉉 > xuàn; #9249 +鉊 > zhāo; #924A +鉋 > bào; #924B +鉌 > hé; #924C +鉍 > bì; #924D +鉎 > shēng; #924E +鉏 > chú; #924F +鉐 > shí; #9250 +鉑 > bó; #9251 +鉒 > zhù; #9252 +鉓 > chì; #9253 +鉔 > zā; #9254 +鉕 > pō; #9255 +鉖 > tóng; #9256 +鉗 > qián; #9257 +鉘 > fú; #9258 +鉙 > zhăi; #9259 +鉚 > lĭu; #925A +鉛 > qiān; #925B +鉜 > fú; #925C +鉝 > lì; #925D +鉞 > yuè; #925E +鉟 > pī; #925F +鉠 > yāng; #9260 +鉡 > bàn; #9261 +鉢 > bō; #9262 +鉣 > jié; #9263 +鉤 > gōu; #9264 +鉥 > shù; #9265 +鉦 > zhēng; #9266 +鉧 > mŭ; #9267 +鉨 > nĭ; #9268 +鉩 > niē; #9269 +鉪 > dì; #926A +鉫 > jiā; #926B +鉬 > mù; #926C +鉭 > dàn; #926D +鉮 > shēn; #926E +鉯 > yĭ; #926F +鉰 > sī; #9270 +鉱 > kuàng; #9271 +鉲 > kă; #9272 +鉳 > bĕi; #9273 +鉴 > jiàn; #9274 +鉵 > tóng; #9275 +鉶 > xíng; #9276 +鉷 > hóng; #9277 +鉸 > jiăo; #9278 +鉹 > chĭ; #9279 +鉺 > èr; #927A +鉻 > gè; #927B +鉼 > bĭng; #927C +鉽 > shì; #927D +鉾 > móu; #927E +鉿 > jiá; #927F +銀 > yín; #9280 +銁 > jūn; #9281 +銂 > zhōu; #9282 +銃 > chòng; #9283 +銄 > shàng; #9284 +銅 > tóng; #9285 +銆 > mò; #9286 +銇 > lèi; #9287 +銈 > jī; #9288 +銉 > yù; #9289 +銊 > xù; #928A +銋 > rén; #928B +銌 > zùn; #928C +銍 > zhì; #928D +銎 > qīong; #928E +銏 > shàn; #928F +銐 > chì; #9290 +銑 > xiăn; #9291 +銒 > xíng; #9292 +銓 > quán; #9293 +銔 > pī; #9294 +銕 > tiĕ; #9295 +銖 > zhū; #9296 +銗 > hóu; #9297 +銘 > míng; #9298 +銙 > kuă; #9299 +銚 > yáo; #929A +銛 > xiān; #929B +銜 > xián; #929C +銝 > xīu; #929D +銞 > jūn; #929E +銟 > chā; #929F +銠 > lăo; #92A0 +銡 > jí; #92A1 +銢 > pĭ; #92A2 +銣 > rŭ; #92A3 +銤 > mĭ; #92A4 +銥 > yĭ; #92A5 +銦 > yīn; #92A6 +銧 > guāng; #92A7 +銨 > ān; #92A8 +銩 > diōu; #92A9 +銪 > yŏu; #92AA +銫 > sè; #92AB +銬 > kào; #92AC +銭 > qián; #92AD +銮 > luán; #92AE +銰 > āi; #92B0 +銱 > diào; #92B1 +銲 > hàn; #92B2 +銳 > rùi; #92B3 +銴 > shì; #92B4 +銵 > kēng; #92B5 +銶 > qíu; #92B6 +銷 > xiāo; #92B7 +銸 > zhé; #92B8 +銹 > xìu; #92B9 +銺 > zàng; #92BA +銻 > tì; #92BB +銼 > cùo; #92BC +銽 > guā; #92BD +銾 > gŏng; #92BE +銿 > zhōng; #92BF +鋀 > dòu; #92C0 +鋁 > lǚ; #92C1 +鋂 > méi; #92C2 +鋃 > láng; #92C3 +鋄 > wăn; #92C4 +鋅 > xīn; #92C5 +鋆 > yún; #92C6 +鋇 > bèi; #92C7 +鋈 > wù; #92C8 +鋉 > sù; #92C9 +鋊 > yù; #92CA +鋋 > chán; #92CB +鋌 > tĭng; #92CC +鋍 > bó; #92CD +鋎 > hàn; #92CE +鋏 > jiá; #92CF +鋐 > hóng; #92D0 +鋑 > cuān; #92D1 +鋒 > fēng; #92D2 +鋓 > chān; #92D3 +鋔 > wăn; #92D4 +鋕 > zhì; #92D5 +鋖 > sī; #92D6 +鋗 > xuān; #92D7 +鋘 > wú; #92D8 +鋙 > wú; #92D9 +鋚 > tiáo; #92DA +鋛 > gŏng; #92DB +鋜 > zhúo; #92DC +鋝 > lǜe; #92DD +鋞 > xíng; #92DE +鋟 > qiān; #92DF +鋠 > shèn; #92E0 +鋡 > hán; #92E1 +鋢 > lǜe; #92E2 +鋣 > xié; #92E3 +鋤 > chú; #92E4 +鋥 > zhèng; #92E5 +鋦 > jú; #92E6 +鋧 > xiàn; #92E7 +鋨 > tiĕ; #92E8 +鋩 > máng; #92E9 +鋪 > pū; #92EA +鋫 > lí; #92EB +鋬 > pàn; #92EC +鋭 > rùi; #92ED +鋮 > chéng; #92EE +鋯 > gào; #92EF +鋰 > lĭ; #92F0 +鋱 > tè; #92F1 +鋳 > zhù; #92F3 +鋵 > tū; #92F5 +鋶 > lĭu; #92F6 +鋷 > zùi; #92F7 +鋸 > jù; #92F8 +鋹 > chăng; #92F9 +鋺 > yuān; #92FA +鋻 > jiàn; #92FB +鋼 > gāng; #92FC +鋽 > diào; #92FD +鋾 > táo; #92FE +鋿 > cháng; #92FF +錀 > lún; #9300 +錁 > kuă; #9301 +錂 > líng; #9302 +錃 > bēi; #9303 +錄 > lù; #9304 +錅 > lí; #9305 +錆 > qiāng; #9306 +錇 > póu; #9307 +錈 > juàn; #9308 +錉 > mín; #9309 +錊 > zùi; #930A +錋 > péng; #930B +錌 > àn; #930C +錍 > pí; #930D +錎 > xiàn; #930E +錏 > yà; #930F +錐 > zhūi; #9310 +錑 > lèi; #9311 +錒 > ā; #9312 +錓 > kōng; #9313 +錔 > tà; #9314 +錕 > kūn; #9315 +錖 > dŭ; #9316 +錗 > wèi; #9317 +錘 > chúi; #9318 +錙 > zī; #9319 +錚 > zhēng; #931A +錛 > bēn; #931B +錜 > niē; #931C +錝 > cóng; #931D +錞 > qún; #931E +錟 > tán; #931F +錠 > dìng; #9320 +錡 > qí; #9321 +錢 > qián; #9322 +錣 > zhúo; #9323 +錤 > qí; #9324 +錥 > yù; #9325 +錦 > jĭn; #9326 +錧 > guăn; #9327 +錨 > máo; #9328 +錩 > chāng; #9329 +錪 > tiăn; #932A +錫 > xí; #932B +錬 > liàn; #932C +錭 > táo; #932D +錮 > gù; #932E +錯 > cùo; #932F +錰 > shù; #9330 +錱 > zhēn; #9331 +録 > lù; #9332 +錳 > mĕng; #9333 +錴 > lù; #9334 +錵 > huā; #9335 +錶 > biăo; #9336 +錷 > gá; #9337 +錸 > lái; #9338 +錹 > kĕn; #9339 +錼 > nài; #933C +錽 > wăn; #933D +錾 > zàn; #933E +鍀 > dé; #9340 +鍁 > xiān; #9341 +鍃 > hūo; #9343 +鍄 > liàng; #9344 +鍆 > mén; #9346 +鍇 > kăi; #9347 +鍈 > yīng; #9348 +鍉 > dī; #9349 +鍊 > liàn; #934A +鍋 > gūo; #934B +鍌 > xiăn; #934C +鍍 > dù; #934D +鍎 > tú; #934E +鍏 > wéi; #934F +鍐 > cōng; #9350 +鍑 > fù; #9351 +鍒 > róu; #9352 +鍓 > jí; #9353 +鍔 > è; #9354 +鍕 > róu; #9355 +鍖 > chĕn; #9356 +鍗 > tí; #9357 +鍘 > zhá; #9358 +鍙 > hòng; #9359 +鍚 > yáng; #935A +鍛 > duàn; #935B +鍜 > xiā; #935C +鍝 > yú; #935D +鍞 > kēng; #935E +鍟 > xīng; #935F +鍠 > huáng; #9360 +鍡 > wĕi; #9361 +鍢 > fù; #9362 +鍣 > zhāo; #9363 +鍤 > chá; #9364 +鍥 > qiè; #9365 +鍦 > shé; #9366 +鍧 > hōng; #9367 +鍨 > kúi; #9368 +鍩 > tiăn; #9369 +鍪 > móu; #936A +鍫 > qiāo; #936B +鍬 > qiāo; #936C +鍭 > hóu; #936D +鍮 > tōu; #936E +鍯 > cōng; #936F +鍰 > huán; #9370 +鍱 > yè; #9371 +鍲 > mín; #9372 +鍳 > jiàn; #9373 +鍴 > duān; #9374 +鍵 > jiàn; #9375 +鍶 > sōng; #9376 +鍷 > kūi; #9377 +鍸 > hú; #9378 +鍹 > xuān; #9379 +鍺 > dŭo; #937A +鍻 > jié; #937B +鍼 > zhēn; #937C +鍽 > biān; #937D +鍾 > zhōng; #937E +鍿 > zī; #937F +鎀 > xīu; #9380 +鎁 > yé; #9381 +鎂 > mĕi; #9382 +鎃 > pài; #9383 +鎄 > āi; #9384 +鎅 > jiè; #9385 +鎇 > méi; #9387 +鎈 > chūo; #9388 +鎉 > tà; #9389 +鎊 > bàng; #938A +鎋 > xiá; #938B +鎌 > lián; #938C +鎍 > sŭo; #938D +鎎 > xì; #938E +鎏 > líu; #938F +鎐 > zú; #9390 +鎑 > yè; #9391 +鎒 > nòu; #9392 +鎓 > wēng; #9393 +鎔 > róng; #9394 +鎕 > táng; #9395 +鎖 > sŭo; #9396 +鎗 > qiāng; #9397 +鎘 > gé; #9398 +鎙 > shùo; #9399 +鎚 > chúi; #939A +鎛 > bó; #939B +鎜 > pán; #939C +鎝 > sà; #939D +鎞 > bì; #939E +鎟 > săng; #939F +鎠 > gāng; #93A0 +鎡 > zī; #93A1 +鎢 > wù; #93A2 +鎣 > yìng; #93A3 +鎤 > huăng; #93A4 +鎥 > tiáo; #93A5 +鎦 > líu; #93A6 +鎧 > kăi; #93A7 +鎨 > sŭn; #93A8 +鎩 > shā; #93A9 +鎪 > sōu; #93AA +鎫 > wàn; #93AB +鎬 > hào; #93AC +鎭 > zhèn; #93AD +鎮 > zhèn; #93AE +鎯 > lŭo; #93AF +鎰 > yì; #93B0 +鎱 > yuán; #93B1 +鎲 > tăng; #93B2 +鎳 > niè; #93B3 +鎴 > xí; #93B4 +鎵 > jiā; #93B5 +鎶 > gē; #93B6 +鎷 > mă; #93B7 +鎸 > juān; #93B8 +鎻 > sŭo; #93BB +鎿 > ná; #93BF +鏀 > lŭ; #93C0 +鏁 > sŭo; #93C1 +鏂 > ōu; #93C2 +鏃 > zú; #93C3 +鏄 > tuán; #93C4 +鏅 > xīu; #93C5 +鏆 > guàn; #93C6 +鏇 > xuàn; #93C7 +鏈 > liàn; #93C8 +鏉 > shòu; #93C9 +鏊 > áo; #93CA +鏋 > măn; #93CB +鏌 > mò; #93CC +鏍 > lúo; #93CD +鏎 > bì; #93CE +鏏 > wèi; #93CF +鏐 > líu; #93D0 +鏑 > dí; #93D1 +鏒 > qiāo; #93D2 +鏓 > cōng; #93D3 +鏔 > yí; #93D4 +鏕 > lù; #93D5 +鏖 > áo; #93D6 +鏗 > kēng; #93D7 +鏘 > qiāng; #93D8 +鏙 > cūi; #93D9 +鏚 > qì; #93DA +鏛 > cháng; #93DB +鏜 > tāng; #93DC +鏝 > màn; #93DD +鏞 > yōng; #93DE +鏟 > chăn; #93DF +鏠 > fēng; #93E0 +鏡 > jìng; #93E1 +鏢 > biāo; #93E2 +鏣 > shù; #93E3 +鏤 > lòu; #93E4 +鏥 > xìu; #93E5 +鏦 > cōng; #93E6 +鏧 > lóng; #93E7 +鏨 > zàn; #93E8 +鏩 > jiàn; #93E9 +鏪 > cáo; #93EA +鏫 > lí; #93EB +鏬 > xià; #93EC +鏭 > xī; #93ED +鏮 > kāng; #93EE +鏰 > bèng; #93F0 +鏳 > zhēng; #93F3 +鏴 > lù; #93F4 +鏵 > huá; #93F5 +鏶 > jí; #93F6 +鏷 > pú; #93F7 +鏸 > hùi; #93F8 +鏹 > qiāng; #93F9 +鏺 > pō; #93FA +鏻 > lín; #93FB +鏼 > sŭo; #93FC +鏽 > xìu; #93FD +鏾 > săn; #93FE +鏿 > chēng; #93FF +鐀 > kùi; #9400 +鐁 > sī; #9401 +鐂 > lìu; #9402 +鐃 > náo; #9403 +鐄 > héng; #9404 +鐅 > piĕ; #9405 +鐆 > sùi; #9406 +鐇 > fán; #9407 +鐈 > qiáo; #9408 +鐉 > quān; #9409 +鐊 > yáng; #940A +鐋 > tàng; #940B +鐌 > xiàng; #940C +鐍 > jué; #940D +鐎 > jiāo; #940E +鐏 > zūn; #940F +鐐 > liáo; #9410 +鐑 > jié; #9411 +鐒 > láo; #9412 +鐓 > dùi; #9413 +鐔 > tán; #9414 +鐕 > zān; #9415 +鐖 > jī; #9416 +鐗 > jiăn; #9417 +鐘 > zhōng; #9418 +鐙 > dēng; #9419 +鐚 > yà; #941A +鐛 > yìng; #941B +鐜 > dùi; #941C +鐝 > jué; #941D +鐞 > nòu; #941E +鐟 > tì; #941F +鐠 > pŭ; #9420 +鐡 > tiĕ; #9421 +鐤 > dĭng; #9424 +鐥 > shàn; #9425 +鐦 > kāi; #9426 +鐧 > jiăn; #9427 +鐨 > fèi; #9428 +鐩 > sùi; #9429 +鐪 > lŭ; #942A +鐫 > juān; #942B +鐬 > hùi; #942C +鐭 > yù; #942D +鐮 > lián; #942E +鐯 > zhúo; #942F +鐰 > qiāo; #9430 +鐱 > qiān; #9431 +鐲 > zhúo; #9432 +鐳 > léi; #9433 +鐴 > bì; #9434 +鐵 > tiĕ; #9435 +鐶 > huán; #9436 +鐷 > yè; #9437 +鐸 > dúo; #9438 +鐹 > gŭo; #9439 +鐺 > dāng; #943A +鐻 > jù; #943B +鐼 > fén; #943C +鐽 > dá; #943D +鐾 > bèi; #943E +鐿 > yì; #943F +鑀 > ài; #9440 +鑁 > zōng; #9441 +鑂 > xùn; #9442 +鑃 > diào; #9443 +鑄 > zhù; #9444 +鑅 > héng; #9445 +鑆 > zhùi; #9446 +鑇 > jī; #9447 +鑈 > niē; #9448 +鑉 > tà; #9449 +鑊 > hùo; #944A +鑋 > qìng; #944B +鑌 > bīn; #944C +鑍 > yīng; #944D +鑎 > kùi; #944E +鑏 > níng; #944F +鑐 > xū; #9450 +鑑 > jiàn; #9451 +鑒 > jiàn; #9452 +鑔 > chă; #9454 +鑕 > zhì; #9455 +鑖 > miè; #9456 +鑗 > lí; #9457 +鑘 > léi; #9458 +鑙 > jī; #9459 +鑚 > zuàn; #945A +鑛 > kuàng; #945B +鑜 > shàng; #945C +鑝 > péng; #945D +鑞 > là; #945E +鑟 > dú; #945F +鑠 > shùo; #9460 +鑡 > chùo; #9461 +鑢 > lǜ; #9462 +鑣 > biāo; #9463 +鑤 > bào; #9464 +鑥 > lŭ; #9465 +鑨 > lóng; #9468 +鑩 > è; #9469 +鑪 > lú; #946A +鑫 > xīn; #946B +鑬 > jiàn; #946C +鑭 > làn; #946D +鑮 > bó; #946E +鑯 > jiān; #946F +鑰 > yào; #9470 +鑱 > chán; #9471 +鑲 > xiāng; #9472 +鑳 > jiàn; #9473 +鑴 > xī; #9474 +鑵 > guàn; #9475 +鑶 > cáng; #9476 +鑷 > niè; #9477 +鑸 > lĕi; #9478 +鑹 > cuàn; #9479 +鑺 > qú; #947A +鑻 > pàn; #947B +鑼 > lúo; #947C +鑽 > zuàn; #947D +鑾 > luán; #947E +鑿 > záo; #947F +钀 > niè; #9480 +钁 > jué; #9481 +钂 > tăng; #9482 +钃 > shŭ; #9483 +钄 > lán; #9484 +钅 > jīn; #9485 +钆 > qíu; #9486 +钇 > yĭ; #9487 +针 > zhēn; #9488 +钉 > dīng; #9489 +钊 > zhāo; #948A +钋 > pò; #948B +钌 > diăo; #948C +钍 > tŭ; #948D +钎 > qiān; #948E +钏 > chuàn; #948F +钐 > shàn; #9490 +钑 > jí; #9491 +钒 > fán; #9492 +钓 > diào; #9493 +钔 > mén; #9494 +钕 > nǚ; #9495 +钖 > xí; #9496 +钗 > chāi; #9497 +钘 > xíng; #9498 +钙 > gài; #9499 +钚 > bù; #949A +钛 > tài; #949B +钜 > jù; #949C +钝 > dùn; #949D +钞 > chāo; #949E +钟 > zhōng; #949F +钠 > nà; #94A0 +钡 > bèi; #94A1 +钢 > gāng; #94A2 +钣 > băn; #94A3 +钤 > qián; #94A4 +钥 > yào; #94A5 +钦 > qīn; #94A6 +钧 > jūn; #94A7 +钨 > wù; #94A8 +钩 > gōu; #94A9 +钪 > kàng; #94AA +钫 > fāng; #94AB +钬 > húo; #94AC +钭 > tŏu; #94AD +钮 > nĭu; #94AE +钯 > bă; #94AF +钰 > yù; #94B0 +钱 > qián; #94B1 +钲 > zhēng; #94B2 +钳 > qián; #94B3 +钴 > gū; #94B4 +钵 > bō; #94B5 +钶 > ē; #94B6 +钷 > pō; #94B7 +钸 > bù; #94B8 +钹 > bá; #94B9 +钺 > yuè; #94BA +钻 > zuàn; #94BB +钼 > mù; #94BC +钽 > dàn; #94BD +钾 > jiă; #94BE +钿 > diàn; #94BF +铀 > yóu; #94C0 +铁 > tiĕ; #94C1 +铂 > bó; #94C2 +铃 > líng; #94C3 +铄 > shùo; #94C4 +铅 > qiān; #94C5 +铆 > lĭu; #94C6 +铇 > bào; #94C7 +铈 > shì; #94C8 +铉 > xuàn; #94C9 +铊 > shé; #94CA +铋 > bì; #94CB +铌 > nĭ; #94CC +铍 > pī; #94CD +铎 > dúo; #94CE +铏 > xíng; #94CF +铐 > kào; #94D0 +铑 > lăo; #94D1 +铒 > èr; #94D2 +铓 > máng; #94D3 +铔 > yà; #94D4 +铕 > yŏu; #94D5 +铖 > chéng; #94D6 +铗 > jiá; #94D7 +铘 > yé; #94D8 +铙 > náo; #94D9 +铚 > zhì; #94DA +铛 > dāng; #94DB +铜 > tóng; #94DC +铝 > lǚ; #94DD +铞 > diào; #94DE +铟 > yīn; #94DF +铠 > kăi; #94E0 +铡 > zhá; #94E1 +铢 > zhū; #94E2 +铣 > xiăn; #94E3 +铤 > tĭng; #94E4 +铥 > dīu; #94E5 +铦 > xiān; #94E6 +铧 > huá; #94E7 +铨 > quán; #94E8 +铩 > shā; #94E9 +铪 > jiá; #94EA +铫 > yáo; #94EB +铬 > gè; #94EC +铭 > míng; #94ED +铮 > zhēng; #94EE +铯 > sè; #94EF +铰 > jiăo; #94F0 +铱 > yĭ; #94F1 +铲 > chăn; #94F2 +铳 > chòng; #94F3 +铴 > tàng; #94F4 +铵 > ān; #94F5 +银 > yín; #94F6 +铷 > rŭ; #94F7 +铸 > zhù; #94F8 +铹 > láo; #94F9 +铺 > pū; #94FA +铻 > wú; #94FB +铼 > lái; #94FC +铽 > tè; #94FD +链 > liàn; #94FE +铿 > kēng; #94FF +销 > xiāo; #9500 +锁 > sŭo; #9501 +锂 > lĭ; #9502 +锃 > zhèng; #9503 +锄 > chú; #9504 +锅 > gūo; #9505 +锆 > gào; #9506 +锇 > tiĕ; #9507 +锈 > xìu; #9508 +锉 > cùo; #9509 +锊 > lǜe; #950A +锋 > fēng; #950B +锌 > xīn; #950C +锍 > lĭu; #950D +锎 > kāi; #950E +锏 > jiăn; #950F +锐 > rùi; #9510 +锑 > tì; #9511 +锒 > láng; #9512 +锓 > qiān; #9513 +锔 > jú; #9514 +锕 > ā; #9515 +锖 > qiāng; #9516 +锗 > dŭo; #9517 +锘 > tiăn; #9518 +错 > cùo; #9519 +锚 > máo; #951A +锛 > bēn; #951B +锜 > qí; #951C +锝 > dé; #951D +锞 > kuă; #951E +锟 > kūn; #951F +锠 > chāng; #9520 +锡 > xí; #9521 +锢 > gù; #9522 +锣 > lúo; #9523 +锤 > chúi; #9524 +锥 > zhūi; #9525 +锦 > jĭn; #9526 +锧 > zhì; #9527 +锨 > xiān; #9528 +锩 > juàn; #9529 +锪 > hūo; #952A +锫 > póu; #952B +锬 > tán; #952C +锭 > dìng; #952D +键 > jiàn; #952E +锯 > jù; #952F +锰 > mĕng; #9530 +锱 > zī; #9531 +锲 > qiè; #9532 +锳 > yīng; #9533 +锴 > kăi; #9534 +锵 > qiāng; #9535 +锶 > sōng; #9536 +锷 > è; #9537 +锸 > chá; #9538 +锹 > qiāo; #9539 +锺 > zhōng; #953A +锻 > duàn; #953B +锼 > sōu; #953C +锽 > huáng; #953D +锾 > huán; #953E +锿 > āi; #953F +镀 > dù; #9540 +镁 > mĕi; #9541 +镂 > lòu; #9542 +镃 > zī; #9543 +镄 > fèi; #9544 +镅 > méi; #9545 +镆 > mò; #9546 +镇 > zhèn; #9547 +镈 > bó; #9548 +镉 > gé; #9549 +镊 > niè; #954A +镋 > tăng; #954B +镌 > juān; #954C +镍 > niè; #954D +镎 > ná; #954E +镏 > líu; #954F +镐 > hào; #9550 +镑 > bàng; #9551 +镒 > yì; #9552 +镓 > jiā; #9553 +镔 > bīn; #9554 +镕 > róng; #9555 +镖 > biāo; #9556 +镗 > tāng; #9557 +镘 > màn; #9558 +镙 > lúo; #9559 +镚 > bèng; #955A +镛 > yōng; #955B +镜 > jìng; #955C +镝 > dí; #955D +镞 > zú; #955E +镟 > xuàn; #955F +镠 > líu; #9560 +镡 > tán; #9561 +镢 > jué; #9562 +镣 > liáo; #9563 +镤 > pú; #9564 +镥 > lŭ; #9565 +镦 > dùi; #9566 +镧 > làn; #9567 +镨 > pŭ; #9568 +镩 > cuàn; #9569 +镪 > qiāng; #956A +镫 > dēng; #956B +镬 > hùo; #956C +镭 > léi; #956D +镮 > huán; #956E +镯 > zhúo; #956F +镰 > lián; #9570 +镱 > yì; #9571 +镲 > chă; #9572 +镳 > biāo; #9573 +镴 > là; #9574 +镵 > chán; #9575 +镶 > xiāng; #9576 +長 > cháng; #9577 +镸 > cháng; #9578 +镹 > jĭu; #9579 +镺 > ăo; #957A +镻 > dié; #957B +镼 > qū; #957C +镽 > liăo; #957D +镾 > mí; #957E +长 > cháng; #957F +門 > mén; #9580 +閁 > mà; #9581 +閂 > shuān; #9582 +閃 > shăn; #9583 +閄 > hùo; #9584 +閅 > mén; #9585 +閆 > yàn; #9586 +閇 > bì; #9587 +閈 > hàn; #9588 +閉 > bì; #9589 +開 > kāi; #958B +閌 > kàng; #958C +閍 > bēng; #958D +閎 > hóng; #958E +閏 > rùn; #958F +閐 > sàn; #9590 +閑 > xián; #9591 +閒 > xián; #9592 +間 > jiān; #9593 +閔 > mĭn; #9594 +閕 > xiā; #9595 +閗 > dòu; #9597 +閘 > zhá; #9598 +閙 > nào; #9599 +閚 > jian; #959A +閛 > pēng; #959B +閜 > xiă; #959C +閝 > líng; #959D +閞 > biàn; #959E +閟 > bì; #959F +閠 > rùn; #95A0 +閡 > hé; #95A1 +関 > guān; #95A2 +閣 > gé; #95A3 +閤 > gé; #95A4 +閥 > fá; #95A5 +閦 > chù; #95A6 +閧 > hòng; #95A7 +閨 > gūi; #95A8 +閩 > mĭn; #95A9 +閫 > kŭn; #95AB +閬 > lăng; #95AC +閭 > lǘ; #95AD +閮 > tíng; #95AE +閯 > shà; #95AF +閰 > jú; #95B0 +閱 > yuè; #95B1 +閲 > yuè; #95B2 +閳 > chăn; #95B3 +閴 > qù; #95B4 +閵 > lìn; #95B5 +閶 > chāng; #95B6 +閷 > shài; #95B7 +閸 > kŭn; #95B8 +閹 > yān; #95B9 +閺 > mín; #95BA +閻 > yán; #95BB +閼 > è; #95BC +閽 > hūn; #95BD +閾 > yù; #95BE +閿 > wén; #95BF +闀 > xiàng; #95C0 +闁 > bao; #95C1 +闂 > xiàng; #95C2 +闃 > qù; #95C3 +闄 > yăo; #95C4 +闅 > wén; #95C5 +闆 > băn; #95C6 +闇 > àn; #95C7 +闈 > wéi; #95C8 +闉 > yīn; #95C9 +闊 > kùo; #95CA +闋 > què; #95CB +闌 > lán; #95CC +闍 > dū; #95CD +闐 > tián; #95D0 +闑 > niè; #95D1 +闒 > tà; #95D2 +闓 > kăi; #95D3 +闔 > hé; #95D4 +闕 > què; #95D5 +闖 > chuăng; #95D6 +闗 > guān; #95D7 +闘 > dòu; #95D8 +闙 > qĭ; #95D9 +闚 > kūi; #95DA +闛 > táng; #95DB +關 > guān; #95DC +闝 > piáo; #95DD +闞 > kàn; #95DE +闟 > xì; #95DF +闠 > hùi; #95E0 +闡 > chăn; #95E1 +闢 > pì; #95E2 +闣 > dàng; #95E3 +闤 > huán; #95E4 +闥 > tà; #95E5 +闦 > wén; #95E6 +门 > mén; #95E8 +闩 > shuān; #95E9 +闪 > shăn; #95EA +闫 > yàn; #95EB +闬 > hàn; #95EC +闭 > bì; #95ED +问 > wèn; #95EE +闯 > chuăng; #95EF +闰 > rùn; #95F0 +闱 > wéi; #95F1 +闲 > xián; #95F2 +闳 > hóng; #95F3 +间 > jiān; #95F4 +闵 > mĭn; #95F5 +闶 > kàng; #95F6 +闷 > mèn; #95F7 +闸 > zhá; #95F8 +闹 > nào; #95F9 +闺 > gūi; #95FA +闻 > wén; #95FB +闼 > tà; #95FC +闽 > mĭn; #95FD +闾 > lǘ; #95FE +闿 > kăi; #95FF +阀 > fá; #9600 +阁 > gé; #9601 +阂 > hé; #9602 +阃 > kŭn; #9603 +阄 > jīu; #9604 +阅 > yuè; #9605 +阆 > lăng; #9606 +阇 > dū; #9607 +阈 > yù; #9608 +阉 > yān; #9609 +阊 > chāng; #960A +阋 > xì; #960B +阌 > wén; #960C +阍 > hūn; #960D +阎 > yán; #960E +阏 > è; #960F +阐 > chăn; #9610 +阑 > lán; #9611 +阒 > qù; #9612 +阓 > hùi; #9613 +阔 > kùo; #9614 +阕 > què; #9615 +阖 > gé; #9616 +阗 > tián; #9617 +阘 > tà; #9618 +阙 > què; #9619 +阚 > kàn; #961A +阛 > huán; #961B +阜 > fù; #961C +阝 > fù; #961D +阞 > lè; #961E +队 > dùi; #961F +阠 > xìn; #9620 +阡 > qiān; #9621 +阢 > wù; #9622 +阣 > yì; #9623 +阤 > túo; #9624 +阥 > yīn; #9625 +阦 > yáng; #9626 +阧 > dŏu; #9627 +阨 > è; #9628 +阩 > shēng; #9629 +阪 > băn; #962A +阫 > péi; #962B +阬 > kēng; #962C +阭 > yŭn; #962D +阮 > ruăn; #962E +阯 > zhĭ; #962F +阰 > pí; #9630 +阱 > jĭng; #9631 +防 > fáng; #9632 +阳 > yáng; #9633 +阴 > yīn; #9634 +阵 > zhèn; #9635 +阶 > jiē; #9636 +阷 > chēng; #9637 +阸 > è; #9638 +阹 > qū; #9639 +阺 > dĭ; #963A +阻 > zŭ; #963B +阼 > zùo; #963C +阽 > diàn; #963D +阾 > lĭng; #963E +阿 > ā; #963F +陀 > túo; #9640 +陁 > túo; #9641 +陂 > pō; #9642 +陃 > bĭng; #9643 +附 > fù; #9644 +际 > jì; #9645 +陆 > lù; #9646 +陇 > lŏng; #9647 +陈 > chén; #9648 +陉 > xíng; #9649 +陊 > dùo; #964A +陋 > lòu; #964B +陌 > mò; #964C +降 > jiàng; #964D +陎 > shū; #964E +陏 > dùo; #964F +限 > xiàn; #9650 +陑 > ér; #9651 +陒 > gŭi; #9652 +陓 > yū; #9653 +陔 > gāi; #9654 +陕 > shăn; #9655 +陖 > xùn; #9656 +陗 > qiào; #9657 +陘 > xíng; #9658 +陙 > chún; #9659 +陚 > fù; #965A +陛 > bì; #965B +陜 > xiá; #965C +陝 > shăn; #965D +陞 > shēng; #965E +陟 > zhì; #965F +陠 > pū; #9660 +陡 > dŏu; #9661 +院 > yuàn; #9662 +陣 > zhèn; #9663 +除 > chú; #9664 +陥 > xiàn; #9665 +陧 > niè; #9667 +陨 > yŭn; #9668 +险 > xiăn; #9669 +陪 > péi; #966A +陫 > péi; #966B +陬 > zōu; #966C +陭 > yī; #966D +陮 > dŭi; #966E +陯 > lún; #966F +陰 > yīn; #9670 +陱 > jū; #9671 +陲 > chúi; #9672 +陳 > chén; #9673 +陴 > pí; #9674 +陵 > líng; #9675 +陶 > táo; #9676 +陷 > xiàn; #9677 +陸 > lù; #9678 +陹 > sheng; #9679 +険 > xiăn; #967A +陻 > yīn; #967B +陼 > zhŭ; #967C +陽 > yáng; #967D +陾 > réng; #967E +陿 > shăn; #967F +隀 > chóng; #9680 +隁 > yàn; #9681 +隂 > yīn; #9682 +隃 > yú; #9683 +隄 > tí; #9684 +隅 > yú; #9685 +隆 > lóng; #9686 +隇 > wēi; #9687 +隈 > wēi; #9688 +隉 > niè; #9689 +隊 > dùi; #968A +隋 > súi; #968B +隌 > ăn; #968C +隍 > huáng; #968D +階 > jiē; #968E +随 > súi; #968F +隐 > yĭn; #9690 +隑 > gāi; #9691 +隒 > yăn; #9692 +隓 > hūi; #9693 +隔 > gé; #9694 +隕 > yŭn; #9695 +隖 > wù; #9696 +隗 > wĕi; #9697 +隘 > ài; #9698 +隙 > xì; #9699 +隚 > táng; #969A +際 > jì; #969B +障 > zhàng; #969C +隝 > dăo; #969D +隞 > áo; #969E +隟 > xì; #969F +隠 > yĭn; #96A0 +隢 > rào; #96A2 +隣 > lín; #96A3 +隤 > túi; #96A4 +隥 > dèng; #96A5 +隦 > pĭ; #96A6 +隧 > sùi; #96A7 +隨 > súi; #96A8 +隩 > yù; #96A9 +險 > xiăn; #96AA +隫 > fēn; #96AB +隬 > nĭ; #96AC +隭 > ér; #96AD +隮 > jī; #96AE +隯 > dăo; #96AF +隰 > xí; #96B0 +隱 > yĭn; #96B1 +隲 > é; #96B2 +隳 > hūi; #96B3 +隴 > lŏng; #96B4 +隵 > xī; #96B5 +隶 > lì; #96B6 +隷 > lì; #96B7 +隸 > lì; #96B8 +隹 > zhūi; #96B9 +隺 > hè; #96BA +隻 > zhī; #96BB +隼 > zhŭn; #96BC +隽 > jùn; #96BD +难 > nán; #96BE +隿 > yì; #96BF +雀 > què; #96C0 +雁 > yàn; #96C1 +雂 > qián; #96C2 +雃 > yă; #96C3 +雄 > xíong; #96C4 +雅 > yă; #96C5 +集 > jí; #96C6 +雇 > gù; #96C7 +雈 > huán; #96C8 +雉 > zhì; #96C9 +雊 > gòu; #96CA +雋 > jùn; #96CB +雌 > cí; #96CC +雍 > yōng; #96CD +雎 > jū; #96CE +雏 > chú; #96CF +雐 > hū; #96D0 +雑 > zá; #96D1 +雒 > lùo; #96D2 +雓 > yú; #96D3 +雔 > chóu; #96D4 +雕 > diāo; #96D5 +雖 > sūi; #96D6 +雗 > hàn; #96D7 +雘 > hùo; #96D8 +雙 > shuāng; #96D9 +雚 > guàn; #96DA +雛 > chú; #96DB +雜 > zá; #96DC +雝 > yōng; #96DD +雞 > jī; #96DE +雟 > xī; #96DF +雠 > chóu; #96E0 +雡 > lìu; #96E1 +離 > lí; #96E2 +難 > nán; #96E3 +雤 > xué; #96E4 +雥 > zá; #96E5 +雦 > jí; #96E6 +雧 > jí; #96E7 +雨 > yŭ; #96E8 +雩 > yú; #96E9 +雪 > xuĕ; #96EA +雫 > nă; #96EB +雬 > fŏu; #96EC +雭 > sè; #96ED +雮 > mù; #96EE +雯 > wén; #96EF +雰 > fēn; #96F0 +雱 > páng; #96F1 +雲 > yún; #96F2 +雳 > lì; #96F3 +雴 > lì; #96F4 +雵 > ăng; #96F5 +零 > líng; #96F6 +雷 > léi; #96F7 +雸 > án; #96F8 +雹 > báo; #96F9 +雺 > méng; #96FA +電 > diàn; #96FB +雼 > dàng; #96FC +雽 > xíng; #96FD +雾 > wù; #96FE +雿 > zhào; #96FF +需 > xū; #9700 +霁 > jì; #9701 +霂 > mù; #9702 +霃 > chén; #9703 +霄 > xiāo; #9704 +霅 > zhá; #9705 +霆 > tíng; #9706 +震 > zhèn; #9707 +霈 > pèi; #9708 +霉 > méi; #9709 +霊 > líng; #970A +霋 > qī; #970B +霌 > chōu; #970C +霍 > hùo; #970D +霎 > shà; #970E +霏 > fēi; #970F +霐 > wēng; #9710 +霑 > zhān; #9711 +霒 > yīn; #9712 +霓 > ní; #9713 +霔 > chòu; #9714 +霕 > tún; #9715 +霖 > lín; #9716 +霘 > dòng; #9718 +霙 > yīng; #9719 +霚 > wù; #971A +霛 > líng; #971B +霜 > shuāng; #971C +霝 > líng; #971D +霞 > xiá; #971E +霟 > hóng; #971F +霠 > yīn; #9720 +霡 > mò; #9721 +霢 > mài; #9722 +霣 > yŭn; #9723 +霤 > lìu; #9724 +霥 > mèng; #9725 +霦 > bīn; #9726 +霧 > wù; #9727 +霨 > wèi; #9728 +霩 > hùo; #9729 +霪 > yín; #972A +霫 > xí; #972B +霬 > yì; #972C +霭 > ăi; #972D +霮 > dàn; #972E +霯 > dèng; #972F +霰 > xiàn; #9730 +霱 > yù; #9731 +露 > lù; #9732 +霳 > lóng; #9733 +霴 > dài; #9734 +霵 > jí; #9735 +霶 > páng; #9736 +霷 > yáng; #9737 +霸 > bà; #9738 +霹 > pī; #9739 +霺 > wéi; #973A +霼 > xĭ; #973C +霽 > jì; #973D +霾 > mái; #973E +霿 > mèng; #973F +靀 > méng; #9740 +靁 > léi; #9741 +靂 > lì; #9742 +靃 > hùo; #9743 +靄 > ăi; #9744 +靅 > fèi; #9745 +靆 > dài; #9746 +靇 > lóng; #9747 +靈 > líng; #9748 +靉 > ài; #9749 +靊 > fēng; #974A +靋 > lì; #974B +靌 > băo; #974C +靎 > hè; #974E +靏 > hè; #974F +靐 > bìng; #9750 +靑 > qīng; #9751 +青 > qīng; #9752 +靓 > jìng; #9753 +靔 > tiān; #9754 +靕 > zhēn; #9755 +靖 > jìng; #9756 +靗 > chèng; #9757 +靘 > qìng; #9758 +静 > jìng; #9759 +靚 > jìng; #975A +靛 > diàn; #975B +靜 > jìng; #975C +靝 > tiān; #975D +非 > fēi; #975E +靟 > fēi; #975F +靠 > kào; #9760 +靡 > mĭ; #9761 +面 > miàn; #9762 +靣 > miàn; #9763 +靤 > pào; #9764 +靥 > yè; #9765 +靦 > tiăn; #9766 +靧 > hùi; #9767 +靨 > yè; #9768 +革 > gé; #9769 +靪 > dīng; #976A +靫 > chā; #976B +靬 > jiān; #976C +靭 > rèn; #976D +靮 > dí; #976E +靯 > dù; #976F +靰 > wù; #9770 +靱 > rèn; #9771 +靲 > qín; #9772 +靳 > jìn; #9773 +靴 > xuē; #9774 +靵 > nĭu; #9775 +靶 > bă; #9776 +靷 > yĭn; #9777 +靸 > să; #9778 +靹 > nà; #9779 +靺 > mò; #977A +靻 > zŭ; #977B +靼 > dá; #977C +靽 > bàn; #977D +靾 > yì; #977E +靿 > yào; #977F +鞀 > táo; #9780 +鞁 > túo; #9781 +鞂 > jiá; #9782 +鞃 > hóng; #9783 +鞄 > páo; #9784 +鞅 > yăng; #9785 +鞇 > yīn; #9787 +鞈 > jiá; #9788 +鞉 > táo; #9789 +鞊 > jí; #978A +鞋 > xié; #978B +鞌 > ān; #978C +鞍 > ān; #978D +鞎 > hén; #978E +鞏 > gŏng; #978F +鞑 > dá; #9791 +鞒 > qiāo; #9792 +鞓 > tīng; #9793 +鞔 > wăn; #9794 +鞕 > yìng; #9795 +鞖 > sūi; #9796 +鞗 > tiáo; #9797 +鞘 > qiào; #9798 +鞙 > xuàn; #9799 +鞚 > kòng; #979A +鞛 > bĕng; #979B +鞜 > tà; #979C +鞝 > zhăng; #979D +鞞 > bĭng; #979E +鞟 > kùo; #979F +鞠 > jú; #97A0 +鞡 > la; #97A1 +鞢 > xiè; #97A2 +鞣 > róu; #97A3 +鞤 > bāng; #97A4 +鞥 > yì; #97A5 +鞦 > qīu; #97A6 +鞧 > qīu; #97A7 +鞨 > hé; #97A8 +鞩 > xiào; #97A9 +鞪 > mù; #97AA +鞫 > jú; #97AB +鞬 > jiān; #97AC +鞭 > biān; #97AD +鞮 > dī; #97AE +鞯 > jiān; #97AF +鞱 > tāo; #97B1 +鞲 > gōu; #97B2 +鞳 > tà; #97B3 +鞴 > bèi; #97B4 +鞵 > xié; #97B5 +鞶 > pán; #97B6 +鞷 > gé; #97B7 +鞸 > bì; #97B8 +鞹 > kùo; #97B9 +鞺 > tang; #97BA +鞻 > lóu; #97BB +鞼 > gùi; #97BC +鞽 > qiáo; #97BD +鞾 > xuē; #97BE +鞿 > jī; #97BF +韀 > jiān; #97C0 +韁 > jiāng; #97C1 +韂 > chàn; #97C2 +韃 > dá; #97C3 +韄 > hùo; #97C4 +韅 > xiăn; #97C5 +韆 > qiān; #97C6 +韇 > dú; #97C7 +韈 > wà; #97C8 +韉 > jiān; #97C9 +韊 > lán; #97CA +韋 > wéi; #97CB +韌 > rèn; #97CC +韍 > fú; #97CD +韎 > mèi; #97CE +韏 > juàn; #97CF +韐 > gé; #97D0 +韑 > wĕi; #97D1 +韒 > qiào; #97D2 +韓 > hán; #97D3 +韔 > chàng; #97D4 +韖 > róu; #97D6 +韗 > xùn; #97D7 +韘 > shè; #97D8 +韙 > wĕi; #97D9 +韚 > gé; #97DA +韛 > bèi; #97DB +韜 > tāo; #97DC +韝 > gōu; #97DD +韞 > yùn; #97DE +韠 > bì; #97E0 +韡 > wĕi; #97E1 +韢 > hùi; #97E2 +韣 > dú; #97E3 +韤 > wà; #97E4 +韥 > dú; #97E5 +韦 > wéi; #97E6 +韧 > rèn; #97E7 +韨 > fú; #97E8 +韩 > hán; #97E9 +韪 > wĕi; #97EA +韫 > yùn; #97EB +韬 > tāo; #97EC +韭 > jĭu; #97ED +韮 > jĭu; #97EE +韯 > xiān; #97EF +韰 > xiè; #97F0 +韱 > xiān; #97F1 +韲 > jī; #97F2 +音 > yīn; #97F3 +韴 > zá; #97F4 +韵 > yùn; #97F5 +韶 > sháo; #97F6 +韷 > lè; #97F7 +韸 > péng; #97F8 +韹 > héng; #97F9 +韺 > yīng; #97FA +韻 > yùn; #97FB +韼 > péng; #97FC +韽 > yīn; #97FD +韾 > yīn; #97FE +響 > xiăng; #97FF +頀 > hù; #9800 +頁 > yè; #9801 +頂 > dĭng; #9802 +頃 > qĭng; #9803 +頄 > pàn; #9804 +項 > xiàng; #9805 +順 > shùn; #9806 +頇 > hān; #9807 +須 > xū; #9808 +頉 > yí; #9809 +頊 > xù; #980A +頋 > gù; #980B +頌 > sòng; #980C +頍 > kŭi; #980D +頎 > qí; #980E +頏 > háng; #980F +預 > yù; #9810 +頑 > wán; #9811 +頒 > bān; #9812 +頓 > dùn; #9813 +頔 > dí; #9814 +頕 > dān; #9815 +頖 > pàn; #9816 +頗 > pŏ; #9817 +領 > lĭng; #9818 +頙 > cè; #9819 +頚 > jĭng; #981A +頛 > lĕi; #981B +頜 > hé; #981C +頝 > qiāo; #981D +頞 > è; #981E +頟 > é; #981F +頠 > wĕi; #9820 +頡 > jié; #9821 +頢 > guā; #9822 +頣 > shĕn; #9823 +頤 > yí; #9824 +頥 > shĕn; #9825 +頦 > hái; #9826 +頧 > dūi; #9827 +頨 > piān; #9828 +頩 > pīng; #9829 +頪 > lèi; #982A +頫 > fŭ; #982B +頬 > jiá; #982C +頭 > tóu; #982D +頮 > hùi; #982E +頯 > kúi; #982F +頰 > jiá; #9830 +頱 > lè; #9831 +頲 > tian; #9832 +頳 > chēng; #9833 +頴 > yĭng; #9834 +頵 > jūn; #9835 +頶 > hú; #9836 +頷 > hàn; #9837 +頸 > jĭng; #9838 +頹 > túi; #9839 +頺 > túi; #983A +頻 > pín; #983B +頼 > lài; #983C +頽 > túi; #983D +頾 > zī; #983E +頿 > zī; #983F +顀 > chúi; #9840 +顁 > dìng; #9841 +顂 > lài; #9842 +顃 > yán; #9843 +顄 > hàn; #9844 +顅 > jiān; #9845 +顆 > kē; #9846 +顇 > cùi; #9847 +顈 > jĭong; #9848 +顉 > qīn; #9849 +顊 > yí; #984A +顋 > sāi; #984B +題 > tí; #984C +額 > é; #984D +顎 > è; #984E +顏 > yán; #984F +顐 > hún; #9850 +顑 > kăn; #9851 +顒 > yóng; #9852 +顓 > zhuān; #9853 +顔 > yán; #9854 +顕 > xiăn; #9855 +顖 > xìn; #9856 +顗 > yĭ; #9857 +願 > yuàn; #9858 +顙 > săng; #9859 +顚 > diān; #985A +顛 > diān; #985B +顜 > jiăng; #985C +顝 > kū; #985D +類 > lèi; #985E +顟 > liáo; #985F +顠 > piào; #9860 +顡 > yì; #9861 +顢 > mán; #9862 +顣 > qī; #9863 +顤 > rào; #9864 +顥 > hào; #9865 +顦 > qiáo; #9866 +顧 > gù; #9867 +顨 > xùn; #9868 +顩 > qiān; #9869 +顪 > hūi; #986A +顫 > zhàn; #986B +顬 > rú; #986C +顭 > hōng; #986D +顮 > bīn; #986E +顯 > xiăn; #986F +顰 > pín; #9870 +顱 > lú; #9871 +顲 > lăn; #9872 +顳 > niè; #9873 +顴 > quán; #9874 +页 > yè; #9875 +顶 > dĭng; #9876 +顷 > qĭng; #9877 +顸 > hān; #9878 +项 > xiàng; #9879 +顺 > shùn; #987A +须 > xū; #987B +顼 > xù; #987C +顽 > wán; #987D +顾 > gù; #987E +顿 > dùn; #987F +颀 > qí; #9880 +颁 > bān; #9881 +颂 > sòng; #9882 +颃 > háng; #9883 +预 > yù; #9884 +颅 > lú; #9885 +领 > lĭng; #9886 +颇 > pŏ; #9887 +颈 > jĭng; #9888 +颉 > jié; #9889 +颊 > jiá; #988A +颋 > tian; #988B +颌 > hàn; #988C +颍 > yĭng; #988D +颎 > jĭong; #988E +颏 > hái; #988F +颐 > yí; #9890 +频 > pín; #9891 +颒 > hùi; #9892 +颓 > túi; #9893 +颔 > hàn; #9894 +颕 > yĭng; #9895 +颖 > yĭng; #9896 +颗 > kē; #9897 +题 > tí; #9898 +颙 > yóng; #9899 +颚 > è; #989A +颛 > zhuān; #989B +颜 > yán; #989C +额 > é; #989D +颞 > niè; #989E +颟 > mán; #989F +颠 > diān; #98A0 +颡 > săng; #98A1 +颢 > hào; #98A2 +颣 > lèi; #98A3 +颤 > zhàn; #98A4 +颥 > rú; #98A5 +颦 > pín; #98A6 +颧 > quán; #98A7 +風 > fēng; #98A8 +颩 > biāo; #98A9 +颫 > fú; #98AB +颬 > xiā; #98AC +颭 > zhăn; #98AD +颮 > biāo; #98AE +颯 > sà; #98AF +颰 > bá; #98B0 +颱 > tái; #98B1 +颲 > liè; #98B2 +颳 > guā; #98B3 +颴 > xuàn; #98B4 +颵 > shào; #98B5 +颶 > jù; #98B6 +颷 > bī; #98B7 +颸 > sī; #98B8 +颹 > wĕi; #98B9 +颺 > yáng; #98BA +颻 > yáo; #98BB +颼 > sōu; #98BC +颽 > kăi; #98BD +颾 > sāo; #98BE +颿 > fán; #98BF +飀 > líu; #98C0 +飁 > xí; #98C1 +飂 > liáo; #98C2 +飃 > piāo; #98C3 +飄 > piāo; #98C4 +飅 > líu; #98C5 +飆 > biāo; #98C6 +飇 > biāo; #98C7 +飈 > biăo; #98C8 +飉 > liáo; #98C9 +飋 > sè; #98CB +飌 > fēng; #98CC +飍 > biāo; #98CD +风 > fēng; #98CE +飏 > yáng; #98CF +飐 > zhăn; #98D0 +飑 > biāo; #98D1 +飒 > sà; #98D2 +飓 > jù; #98D3 +飔 > sī; #98D4 +飕 > sōu; #98D5 +飖 > yáo; #98D6 +飗 > líu; #98D7 +飘 > piāo; #98D8 +飙 > biāo; #98D9 +飚 > biāo; #98DA +飛 > fēi; #98DB +飜 > fān; #98DC +飝 > fēi; #98DD +飞 > fēi; #98DE +食 > shí; #98DF +飠 > shí; #98E0 +飡 > cān; #98E1 +飢 > jī; #98E2 +飣 > dìng; #98E3 +飤 > sì; #98E4 +飥 > tūo; #98E5 +飦 > zhān; #98E6 +飧 > sūn; #98E7 +飨 > xiăng; #98E8 +飩 > tún; #98E9 +飪 > rèn; #98EA +飫 > yù; #98EB +飬 > juàn; #98EC +飭 > chì; #98ED +飮 > yĭn; #98EE +飯 > fàn; #98EF +飰 > fàn; #98F0 +飱 > sūn; #98F1 +飲 > yĭn; #98F2 +飳 > zhù; #98F3 +飴 > yí; #98F4 +飵 > zhăi; #98F5 +飶 > bì; #98F6 +飷 > jiĕ; #98F7 +飸 > tāo; #98F8 +飹 > lĭu; #98F9 +飺 > cí; #98FA +飻 > tiè; #98FB +飼 > sì; #98FC +飽 > băo; #98FD +飾 > shì; #98FE +飿 > dùo; #98FF +餀 > hài; #9900 +餁 > rèn; #9901 +餂 > tiăn; #9902 +餃 > jiăo; #9903 +餄 > jiá; #9904 +餅 > bĭng; #9905 +餆 > yáo; #9906 +餇 > tóng; #9907 +餈 > cí; #9908 +餉 > xiăng; #9909 +養 > yăng; #990A +餋 > yăng; #990B +餌 > ĕr; #990C +餍 > yàn; #990D +餎 > le; #990E +餏 > yī; #990F +餐 > cān; #9910 +餑 > bó; #9911 +餒 > nĕi; #9912 +餓 > è; #9913 +餔 > bū; #9914 +餕 > jùn; #9915 +餖 > dòu; #9916 +餗 > sù; #9917 +餘 > yú; #9918 +餙 > shì; #9919 +餚 > yáo; #991A +餛 > hún; #991B +餜 > gŭo; #991C +餝 > shì; #991D +餞 > jiàn; #991E +餟 > zhùi; #991F +餠 > bĭng; #9920 +餡 > xiàn; #9921 +餢 > bù; #9922 +餣 > yè; #9923 +餤 > tán; #9924 +餥 > fĕi; #9925 +餦 > zhāng; #9926 +餧 > wèi; #9927 +館 > guăn; #9928 +餩 > è; #9929 +餪 > nuăn; #992A +餫 > hún; #992B +餬 > hú; #992C +餭 > huáng; #992D +餮 > tiè; #992E +餯 > hùi; #992F +餰 > jiān; #9930 +餱 > hóu; #9931 +餲 > hé; #9932 +餳 > xíng; #9933 +餴 > fēn; #9934 +餵 > wèi; #9935 +餶 > gŭ; #9936 +餷 > chā; #9937 +餸 > sòng; #9938 +餹 > táng; #9939 +餺 > bó; #993A +餻 > gāo; #993B +餼 > xì; #993C +餽 > kùi; #993D +餾 > lìu; #993E +餿 > sōu; #993F +饀 > táo; #9940 +饁 > yè; #9941 +饂 > yún; #9942 +饃 > mó; #9943 +饄 > táng; #9944 +饅 > mán; #9945 +饆 > bì; #9946 +饇 > yù; #9947 +饈 > xīu; #9948 +饉 > jĭn; #9949 +饊 > săn; #994A +饋 > kùi; #994B +饌 > zhuàn; #994C +饍 > shàn; #994D +饎 > chì; #994E +饏 > dàn; #994F +饐 > yì; #9950 +饑 > jī; #9951 +饒 > ráo; #9952 +饓 > chēng; #9953 +饔 > yōng; #9954 +饕 > tāo; #9955 +饖 > hùi; #9956 +饗 > xiăng; #9957 +饘 > zhān; #9958 +饙 > fēn; #9959 +饚 > hài; #995A +饛 > méng; #995B +饜 > yàn; #995C +饝 > mó; #995D +饞 > chán; #995E +饟 > xiăng; #995F +饠 > lúo; #9960 +饡 > zuàn; #9961 +饢 > năng; #9962 +饣 > shí; #9963 +饤 > dìng; #9964 +饥 > jī; #9965 +饦 > tūo; #9966 +饧 > xíng; #9967 +饨 > tún; #9968 +饩 > xì; #9969 +饪 > rèn; #996A +饫 > yù; #996B +饬 > chì; #996C +饭 > fàn; #996D +饮 > yĭn; #996E +饯 > jiàn; #996F +饰 > shì; #9970 +饱 > băo; #9971 +饲 > sì; #9972 +饳 > dùo; #9973 +饴 > yí; #9974 +饵 > ĕr; #9975 +饶 > ráo; #9976 +饷 > xiăng; #9977 +饸 > jiá; #9978 +饹 > le; #9979 +饺 > jiăo; #997A +饻 > yī; #997B +饼 > bĭng; #997C +饽 > bó; #997D +饾 > dòu; #997E +饿 > è; #997F +馀 > yú; #9980 +馁 > nĕi; #9981 +馂 > jùn; #9982 +馃 > gŭo; #9983 +馄 > hún; #9984 +馅 > xiàn; #9985 +馆 > guăn; #9986 +馇 > chā; #9987 +馈 > kùi; #9988 +馉 > gŭ; #9989 +馊 > sōu; #998A +馋 > chán; #998B +馌 > yè; #998C +馍 > mó; #998D +馎 > bó; #998E +馏 > lìu; #998F +馐 > xīu; #9990 +馑 > jĭn; #9991 +馒 > mán; #9992 +馓 > săn; #9993 +馔 > zhuàn; #9994 +馕 > năng; #9995 +首 > shŏu; #9996 +馗 > kúi; #9997 +馘 > gúo; #9998 +香 > xiāng; #9999 +馚 > fén; #999A +馛 > bá; #999B +馜 > nĭ; #999C +馝 > bì; #999D +馞 > bó; #999E +馟 > tú; #999F +馠 > hān; #99A0 +馡 > fēi; #99A1 +馢 > jiān; #99A2 +馣 > ān; #99A3 +馤 > ăi; #99A4 +馥 > fù; #99A5 +馦 > xiān; #99A6 +馧 > wēn; #99A7 +馨 > xīn; #99A8 +馩 > fén; #99A9 +馪 > bīn; #99AA +馫 > xīng; #99AB +馬 > mă; #99AC +馭 > yù; #99AD +馮 > féng; #99AE +馯 > hàn; #99AF +馰 > dì; #99B0 +馱 > túo; #99B1 +馲 > tūo; #99B2 +馳 > chí; #99B3 +馴 > xún; #99B4 +馵 > zhù; #99B5 +馶 > zhī; #99B6 +馷 > pèi; #99B7 +馸 > xìn; #99B8 +馹 > rì; #99B9 +馺 > sà; #99BA +馻 > yĭn; #99BB +馼 > wén; #99BC +馽 > zhí; #99BD +馾 > dàn; #99BE +馿 > lǘ; #99BF +駀 > yóu; #99C0 +駁 > bó; #99C1 +駂 > băo; #99C2 +駃 > kuài; #99C3 +駄 > túo; #99C4 +駅 > yì; #99C5 +駆 > qū; #99C6 +駈 > qū; #99C8 +駉 > jīong; #99C9 +駊 > bŏ; #99CA +駋 > zhāo; #99CB +駌 > yuān; #99CC +駍 > pēng; #99CD +駎 > zhòu; #99CE +駏 > jù; #99CF +駐 > zhù; #99D0 +駑 > nú; #99D1 +駒 > jū; #99D2 +駓 > pí; #99D3 +駔 > zăng; #99D4 +駕 > jià; #99D5 +駖 > líng; #99D6 +駗 > zhēn; #99D7 +駘 > tái; #99D8 +駙 > fù; #99D9 +駚 > yăng; #99DA +駛 > shĭ; #99DB +駜 > bì; #99DC +駝 > túo; #99DD +駞 > túo; #99DE +駟 > sì; #99DF +駠 > líu; #99E0 +駡 > mà; #99E1 +駢 > pián; #99E2 +駣 > táo; #99E3 +駤 > zhì; #99E4 +駥 > róng; #99E5 +駦 > téng; #99E6 +駧 > dòng; #99E7 +駨 > xún; #99E8 +駩 > quán; #99E9 +駪 > shēn; #99EA +駫 > jīong; #99EB +駬 > ĕr; #99EC +駭 > hài; #99ED +駮 > bó; #99EE +駯 > zhu; #99EF +駰 > yīn; #99F0 +駱 > lùo; #99F1 +駳 > dàn; #99F3 +駴 > xiè; #99F4 +駵 > líu; #99F5 +駶 > jú; #99F6 +駷 > sŏng; #99F7 +駸 > qīn; #99F8 +駹 > máng; #99F9 +駺 > liáng; #99FA +駻 > hàn; #99FB +駼 > tú; #99FC +駽 > xuàn; #99FD +駾 > tùi; #99FE +駿 > jùn; #99FF +騀 > é; #9A00 +騁 > chĕng; #9A01 +騂 > xīn; #9A02 +騃 > ái; #9A03 +騄 > lù; #9A04 +騅 > zhūi; #9A05 +騆 > zhōu; #9A06 +騇 > shĕ; #9A07 +騈 > pián; #9A08 +騉 > kūn; #9A09 +騊 > táo; #9A0A +騋 > lái; #9A0B +騌 > zōng; #9A0C +騍 > kè; #9A0D +騎 > qí; #9A0E +騏 > qí; #9A0F +騐 > yàn; #9A10 +騑 > fēi; #9A11 +騒 > sāo; #9A12 +験 > yăn; #9A13 +騔 > jié; #9A14 +騕 > yăo; #9A15 +騖 > wù; #9A16 +騗 > piàn; #9A17 +騘 > cōng; #9A18 +騙 > piàn; #9A19 +騚 > qián; #9A1A +騛 > fēi; #9A1B +騜 > huáng; #9A1C +騝 > jiān; #9A1D +騞 > hùo; #9A1E +騟 > yù; #9A1F +騠 > tí; #9A20 +騡 > quán; #9A21 +騢 > xiá; #9A22 +騣 > zōng; #9A23 +騤 > kúi; #9A24 +騥 > róu; #9A25 +騦 > sī; #9A26 +騧 > guā; #9A27 +騨 > túo; #9A28 +騩 > kùi; #9A29 +騪 > sōu; #9A2A +騫 > qiān; #9A2B +騬 > chéng; #9A2C +騭 > zhì; #9A2D +騮 > líu; #9A2E +騯 > páng; #9A2F +騰 > téng; #9A30 +騱 > xī; #9A31 +騲 > căo; #9A32 +騳 > dú; #9A33 +騴 > yàn; #9A34 +騵 > yuán; #9A35 +騶 > zōu; #9A36 +騷 > sāo; #9A37 +騸 > shàn; #9A38 +騹 > lí; #9A39 +騺 > zhì; #9A3A +騻 > shuăng; #9A3B +騼 > lù; #9A3C +騽 > xí; #9A3D +騾 > lúo; #9A3E +騿 > zhāng; #9A3F +驀 > mò; #9A40 +驁 > áo; #9A41 +驂 > cān; #9A42 +驃 > piào; #9A43 +驄 > cōng; #9A44 +驅 > qū; #9A45 +驆 > bì; #9A46 +驇 > zhì; #9A47 +驈 > yù; #9A48 +驉 > xū; #9A49 +驊 > huá; #9A4A +驋 > bō; #9A4B +驌 > sù; #9A4C +驍 > xiāo; #9A4D +驎 > lín; #9A4E +驏 > chăn; #9A4F +驐 > dūn; #9A50 +驑 > líu; #9A51 +驒 > túo; #9A52 +驓 > zēng; #9A53 +驔 > tán; #9A54 +驕 > jiāo; #9A55 +驖 > tiĕ; #9A56 +驗 > yàn; #9A57 +驘 > lúo; #9A58 +驙 > zhān; #9A59 +驚 > jīng; #9A5A +驛 > yì; #9A5B +驜 > yè; #9A5C +驝 > tūo; #9A5D +驞 > bīn; #9A5E +驟 > zòu; #9A5F +驠 > yàn; #9A60 +驡 > péng; #9A61 +驢 > lǘ; #9A62 +驣 > téng; #9A63 +驤 > xiāng; #9A64 +驥 > jì; #9A65 +驦 > shuāng; #9A66 +驧 > jú; #9A67 +驨 > xī; #9A68 +驩 > huān; #9A69 +驪 > lí; #9A6A +驫 > biāo; #9A6B +马 > mă; #9A6C +驭 > yù; #9A6D +驮 > túo; #9A6E +驯 > xún; #9A6F +驰 > chí; #9A70 +驱 > qū; #9A71 +驲 > rì; #9A72 +驳 > bó; #9A73 +驴 > lǘ; #9A74 +驵 > zăng; #9A75 +驶 > shĭ; #9A76 +驷 > sì; #9A77 +驸 > fù; #9A78 +驹 > jū; #9A79 +驺 > zōu; #9A7A +驻 > zhù; #9A7B +驼 > túo; #9A7C +驽 > nú; #9A7D +驾 > jià; #9A7E +驿 > yì; #9A7F +骀 > tái; #9A80 +骁 > xiāo; #9A81 +骂 > mà; #9A82 +骃 > yīn; #9A83 +骄 > jiāo; #9A84 +骅 > huá; #9A85 +骆 > lùo; #9A86 +骇 > hài; #9A87 +骈 > pián; #9A88 +骉 > biāo; #9A89 +骊 > lí; #9A8A +骋 > chĕng; #9A8B +验 > yàn; #9A8C +骍 > xīn; #9A8D +骎 > qīn; #9A8E +骏 > jùn; #9A8F +骐 > qí; #9A90 +骑 > qí; #9A91 +骒 > kè; #9A92 +骓 > zhūi; #9A93 +骔 > zōng; #9A94 +骕 > sù; #9A95 +骖 > cān; #9A96 +骗 > piàn; #9A97 +骘 > zhì; #9A98 +骙 > kúi; #9A99 +骚 > sāo; #9A9A +骛 > wù; #9A9B +骜 > áo; #9A9C +骝 > líu; #9A9D +骞 > qiān; #9A9E +骟 > shàn; #9A9F +骠 > piào; #9AA0 +骡 > lúo; #9AA1 +骢 > cōng; #9AA2 +骣 > chăn; #9AA3 +骤 > zòu; #9AA4 +骥 > jì; #9AA5 +骦 > shuāng; #9AA6 +骧 > xiāng; #9AA7 +骨 > gŭ; #9AA8 +骩 > wĕi; #9AA9 +骪 > wĕi; #9AAA +骫 > wĕi; #9AAB +骬 > yú; #9AAC +骭 > gàn; #9AAD +骮 > yì; #9AAE +骯 > āng; #9AAF +骰 > tóu; #9AB0 +骱 > xiè; #9AB1 +骲 > bāo; #9AB2 +骳 > bì; #9AB3 +骴 > chī; #9AB4 +骵 > tĭ; #9AB5 +骶 > dĭ; #9AB6 +骷 > kū; #9AB7 +骸 > hái; #9AB8 +骹 > qiāo; #9AB9 +骺 > gòu; #9ABA +骻 > kuà; #9ABB +骼 > gé; #9ABC +骽 > tŭi; #9ABD +骾 > gĕng; #9ABE +骿 > pián; #9ABF +髀 > bì; #9AC0 +髁 > kē; #9AC1 +髂 > kà; #9AC2 +髃 > yú; #9AC3 +髄 > sŭi; #9AC4 +髅 > lóu; #9AC5 +髆 > bó; #9AC6 +髇 > xiāo; #9AC7 +髈 > páng; #9AC8 +髉 > bō; #9AC9 +髊 > cī; #9ACA +髋 > kuān; #9ACB +髌 > bìn; #9ACC +髍 > mó; #9ACD +髎 > liáo; #9ACE +髏 > lóu; #9ACF +髐 > náo; #9AD0 +髑 > dú; #9AD1 +髒 > zāng; #9AD2 +髓 > sŭi; #9AD3 +體 > tĭ; #9AD4 +髕 > bìn; #9AD5 +髖 > kuān; #9AD6 +髗 > lú; #9AD7 +高 > gāo; #9AD8 +髙 > gāo; #9AD9 +髚 > qiào; #9ADA +髛 > kāo; #9ADB +髜 > qiāo; #9ADC +髝 > lào; #9ADD +髞 > zào; #9ADE +髟 > biāo; #9ADF +髠 > kūn; #9AE0 +髡 > kūn; #9AE1 +髢 > tì; #9AE2 +髣 > făng; #9AE3 +髤 > xīu; #9AE4 +髥 > rán; #9AE5 +髦 > máo; #9AE6 +髧 > dàn; #9AE7 +髨 > kūn; #9AE8 +髩 > bìn; #9AE9 +髪 > fà; #9AEA +髫 > tiáo; #9AEB +髬 > peng; #9AEC +髭 > zī; #9AED +髮 > fă; #9AEE +髯 > rán; #9AEF +髰 > tì; #9AF0 +髱 > pào; #9AF1 +髲 > pī; #9AF2 +髳 > máo; #9AF3 +髴 > fú; #9AF4 +髵 > ér; #9AF5 +髶 > róng; #9AF6 +髷 > qū; #9AF7 +髸 > gong; #9AF8 +髹 > xīu; #9AF9 +髺 > guà; #9AFA +髻 > jì; #9AFB +髼 > péng; #9AFC +髽 > zhuā; #9AFD +髾 > shāo; #9AFE +髿 > shā; #9AFF +鬀 > tì; #9B00 +鬁 > lì; #9B01 +鬂 > bìn; #9B02 +鬃 > zōng; #9B03 +鬄 > tì; #9B04 +鬅 > péng; #9B05 +鬆 > sōng; #9B06 +鬇 > zhēng; #9B07 +鬈 > quán; #9B08 +鬉 > zōng; #9B09 +鬊 > shùn; #9B0A +鬋 > jiān; #9B0B +鬌 > dŭo; #9B0C +鬍 > hú; #9B0D +鬎 > là; #9B0E +鬏 > jīu; #9B0F +鬐 > qí; #9B10 +鬑 > lián; #9B11 +鬒 > zhĕn; #9B12 +鬓 > bìn; #9B13 +鬔 > péng; #9B14 +鬕 > mò; #9B15 +鬖 > sān; #9B16 +鬗 > màn; #9B17 +鬘 > mán; #9B18 +鬙 > sēng; #9B19 +鬚 > xū; #9B1A +鬛 > liè; #9B1B +鬜 > qiān; #9B1C +鬝 > qiān; #9B1D +鬞 > nóng; #9B1E +鬟 > huán; #9B1F +鬠 > kuài; #9B20 +鬡 > níng; #9B21 +鬢 > bìn; #9B22 +鬣 > liè; #9B23 +鬤 > ráng; #9B24 +鬥 > dòu; #9B25 +鬦 > dòu; #9B26 +鬧 > nào; #9B27 +鬨 > hōng; #9B28 +鬩 > xì; #9B29 +鬪 > dòu; #9B2A +鬫 > hăn; #9B2B +鬬 > dòu; #9B2C +鬭 > dòu; #9B2D +鬮 > jīu; #9B2E +鬯 > chàng; #9B2F +鬰 > yù; #9B30 +鬱 > yù; #9B31 +鬲 > lì; #9B32 +鬳 > juàn; #9B33 +鬴 > fŭ; #9B34 +鬵 > qián; #9B35 +鬶 > gūi; #9B36 +鬷 > zōng; #9B37 +鬸 > lìu; #9B38 +鬹 > gūi; #9B39 +鬺 > shāng; #9B3A +鬻 > yù; #9B3B +鬼 > gŭi; #9B3C +鬽 > mèi; #9B3D +鬾 > jì; #9B3E +鬿 > qí; #9B3F +魀 > jiè; #9B40 +魁 > kúi; #9B41 +魂 > hún; #9B42 +魃 > bá; #9B43 +魄 > pò; #9B44 +魅 > mèi; #9B45 +魆 > xù; #9B46 +魇 > yăn; #9B47 +魈 > xiāo; #9B48 +魉 > liăng; #9B49 +魊 > yù; #9B4A +魋 > túi; #9B4B +魌 > qī; #9B4C +魍 > wăng; #9B4D +魎 > liăng; #9B4E +魏 > wèi; #9B4F +魐 > jiān; #9B50 +魑 > chī; #9B51 +魒 > piāo; #9B52 +魓 > bì; #9B53 +魔 > mó; #9B54 +魕 > jĭ; #9B55 +魖 > xū; #9B56 +魗 > chŏu; #9B57 +魘 > yăn; #9B58 +魙 > zhăn; #9B59 +魚 > yú; #9B5A +魛 > dāo; #9B5B +魜 > rén; #9B5C +魝 > jì; #9B5D +魟 > gōng; #9B5F +魠 > túo; #9B60 +魡 > diào; #9B61 +魢 > jĭ; #9B62 +魣 > xù; #9B63 +魤 > é; #9B64 +魥 > è; #9B65 +魦 > shā; #9B66 +魧 > háng; #9B67 +魨 > tún; #9B68 +魩 > mò; #9B69 +魪 > jiè; #9B6A +魫 > shĕn; #9B6B +魬 > făn; #9B6C +魭 > yuán; #9B6D +魮 > bí; #9B6E +魯 > lŭ; #9B6F +魰 > wén; #9B70 +魱 > hú; #9B71 +魲 > lú; #9B72 +魳 > zá; #9B73 +魴 > fáng; #9B74 +魵 > fén; #9B75 +魶 > nà; #9B76 +魷 > yóu; #9B77 +魺 > hé; #9B7A +魻 > xiá; #9B7B +魼 > qū; #9B7C +魽 > hān; #9B7D +魾 > pí; #9B7E +魿 > líng; #9B7F +鮀 > túo; #9B80 +鮁 > bō; #9B81 +鮂 > qíu; #9B82 +鮃 > píng; #9B83 +鮄 > fú; #9B84 +鮅 > bì; #9B85 +鮆 > jì; #9B86 +鮇 > wèi; #9B87 +鮈 > jū; #9B88 +鮉 > diāo; #9B89 +鮊 > bó; #9B8A +鮋 > yóu; #9B8B +鮌 > gŭn; #9B8C +鮍 > pī; #9B8D +鮎 > nián; #9B8E +鮏 > xīng; #9B8F +鮐 > tái; #9B90 +鮑 > bào; #9B91 +鮒 > fù; #9B92 +鮓 > zhă; #9B93 +鮔 > jù; #9B94 +鮕 > gū; #9B95 +鮙 > tà; #9B99 +鮚 > jié; #9B9A +鮛 > shù; #9B9B +鮜 > hòu; #9B9C +鮝 > xiăng; #9B9D +鮞 > ér; #9B9E +鮟 > àn; #9B9F +鮠 > wéi; #9BA0 +鮡 > tiāo; #9BA1 +鮢 > zhū; #9BA2 +鮣 > yìn; #9BA3 +鮤 > liè; #9BA4 +鮥 > lùo; #9BA5 +鮦 > tóng; #9BA6 +鮧 > yí; #9BA7 +鮨 > qí; #9BA8 +鮩 > bìng; #9BA9 +鮪 > wĕi; #9BAA +鮫 > jiăo; #9BAB +鮬 > bù; #9BAC +鮭 > gūi; #9BAD +鮮 > xiān; #9BAE +鮯 > gé; #9BAF +鮰 > húi; #9BB0 +鮳 > kăo; #9BB3 +鮵 > dúo; #9BB5 +鮶 > jūn; #9BB6 +鮷 > tí; #9BB7 +鮸 > măn; #9BB8 +鮹 > xiāo; #9BB9 +鮺 > ză; #9BBA +鮻 > shā; #9BBB +鮼 > qīn; #9BBC +鮽 > yú; #9BBD +鮾 > nĕi; #9BBE +鮿 > zhé; #9BBF +鯀 > gŭn; #9BC0 +鯁 > gĕng; #9BC1 +鯂 > su; #9BC2 +鯃 > wú; #9BC3 +鯄 > qíu; #9BC4 +鯅 > tíng; #9BC5 +鯆 > fŭ; #9BC6 +鯇 > wăn; #9BC7 +鯈 > yóu; #9BC8 +鯉 > lĭ; #9BC9 +鯊 > shā; #9BCA +鯋 > shā; #9BCB +鯌 > gào; #9BCC +鯍 > méng; #9BCD +鯒 > yŏng; #9BD2 +鯓 > ní; #9BD3 +鯔 > zī; #9BD4 +鯕 > qí; #9BD5 +鯖 > qīng; #9BD6 +鯗 > xiăng; #9BD7 +鯘 > nĕi; #9BD8 +鯙 > chún; #9BD9 +鯚 > jì; #9BDA +鯛 > diāo; #9BDB +鯜 > qiè; #9BDC +鯝 > gù; #9BDD +鯞 > zhŏu; #9BDE +鯟 > dōng; #9BDF +鯠 > lái; #9BE0 +鯡 > fēi; #9BE1 +鯢 > ní; #9BE2 +鯣 > yì; #9BE3 +鯤 > kūn; #9BE4 +鯥 > lù; #9BE5 +鯦 > jìu; #9BE6 +鯧 > chāng; #9BE7 +鯨 > jīng; #9BE8 +鯩 > lún; #9BE9 +鯪 > líng; #9BEA +鯫 > zōu; #9BEB +鯬 > lí; #9BEC +鯭 > mĕng; #9BED +鯮 > zōng; #9BEE +鯯 > zhì; #9BEF +鯰 > nián; #9BF0 +鯴 > shī; #9BF4 +鯵 > shēn; #9BF5 +鯶 > hŭn; #9BF6 +鯷 > shì; #9BF7 +鯸 > hóu; #9BF8 +鯹 > xīng; #9BF9 +鯺 > zhū; #9BFA +鯻 > là; #9BFB +鯼 > zōng; #9BFC +鯽 > jì; #9BFD +鯾 > biān; #9BFE +鯿 > biān; #9BFF +鰀 > huàn; #9C00 +鰁 > quán; #9C01 +鰂 > zé; #9C02 +鰃 > wēi; #9C03 +鰄 > wēi; #9C04 +鰅 > yú; #9C05 +鰆 > qūn; #9C06 +鰇 > róu; #9C07 +鰈 > dié; #9C08 +鰉 > huáng; #9C09 +鰊 > liàn; #9C0A +鰋 > yăn; #9C0B +鰌 > qíu; #9C0C +鰍 > qīu; #9C0D +鰎 > jiàn; #9C0E +鰏 > bì; #9C0F +鰐 > è; #9C10 +鰑 > yáng; #9C11 +鰒 > fù; #9C12 +鰓 > sāi; #9C13 +鰔 > jiăn; #9C14 +鰕 > xiá; #9C15 +鰖 > tŭo; #9C16 +鰗 > hú; #9C17 +鰙 > rùo; #9C19 +鰛 > wēn; #9C1B +鰜 > jiān; #9C1C +鰝 > hào; #9C1D +鰞 > wū; #9C1E +鰟 > fáng; #9C1F +鰠 > sāo; #9C20 +鰡 > líu; #9C21 +鰢 > mă; #9C22 +鰣 > shí; #9C23 +鰤 > shī; #9C24 +鰥 > yín; #9C25 +鰦 > z̄; #9C26 +鰧 > téng; #9C27 +鰨 > tà; #9C28 +鰩 > yáo; #9C29 +鰪 > gé; #9C2A +鰫 > róng; #9C2B +鰬 > qián; #9C2C +鰭 > qí; #9C2D +鰮 > wēn; #9C2E +鰯 > rùo; #9C2F +鰱 > lián; #9C31 +鰲 > áo; #9C32 +鰳 > lè; #9C33 +鰴 > hūi; #9C34 +鰵 > mĭn; #9C35 +鰶 > jì; #9C36 +鰷 > tiáo; #9C37 +鰸 > qū; #9C38 +鰹 > jiān; #9C39 +鰺 > sāo; #9C3A +鰻 > mán; #9C3B +鰼 > xí; #9C3C +鰽 > qíu; #9C3D +鰾 > biào; #9C3E +鰿 > jī; #9C3F +鱀 > jì; #9C40 +鱁 > zhú; #9C41 +鱂 > jiāng; #9C42 +鱃 > qīu; #9C43 +鱄 > zhuān; #9C44 +鱅 > yóng; #9C45 +鱆 > zhāng; #9C46 +鱇 > kāng; #9C47 +鱈 > xuĕ; #9C48 +鱉 > biē; #9C49 +鱊 > jué; #9C4A +鱋 > qū; #9C4B +鱌 > xiàng; #9C4C +鱍 > bō; #9C4D +鱎 > jiāo; #9C4E +鱏 > xún; #9C4F +鱐 > sù; #9C50 +鱑 > huáng; #9C51 +鱒 > zùn; #9C52 +鱓 > shàn; #9C53 +鱔 > shàn; #9C54 +鱕 > fān; #9C55 +鱖 > jué; #9C56 +鱗 > lín; #9C57 +鱘 > xún; #9C58 +鱙 > miáo; #9C59 +鱚 > xĭ; #9C5A +鱝 > fèn; #9C5D +鱞 > guān; #9C5E +鱟 > hòu; #9C5F +鱠 > kuài; #9C60 +鱡 > zéi; #9C61 +鱢 > sāo; #9C62 +鱣 > zhān; #9C63 +鱤 > găn; #9C64 +鱥 > gùi; #9C65 +鱦 > shéng; #9C66 +鱧 > lĭ; #9C67 +鱨 > cháng; #9C68 +鱬 > rú; #9C6C +鱭 > jì; #9C6D +鱮 > xù; #9C6E +鱯 > hùo; #9C6F +鱱 > lì; #9C71 +鱲 > liè; #9C72 +鱳 > lì; #9C73 +鱴 > miè; #9C74 +鱵 > zhēn; #9C75 +鱶 > xiăng; #9C76 +鱷 > è; #9C77 +鱸 > lú; #9C78 +鱹 > guàn; #9C79 +鱺 > lí; #9C7A +鱻 > xiān; #9C7B +鱼 > yú; #9C7C +鱽 > dāo; #9C7D +鱾 > jĭ; #9C7E +鱿 > yóu; #9C7F +鲀 > tún; #9C80 +鲁 > lŭ; #9C81 +鲂 > fáng; #9C82 +鲃 > bā; #9C83 +鲄 > hé; #9C84 +鲅 > bō; #9C85 +鲆 > píng; #9C86 +鲇 > nián; #9C87 +鲈 > lú; #9C88 +鲉 > yóu; #9C89 +鲊 > zhă; #9C8A +鲋 > fù; #9C8B +鲌 > bó; #9C8C +鲍 > bào; #9C8D +鲎 > hòu; #9C8E +鲏 > pī; #9C8F +鲐 > tái; #9C90 +鲑 > gūi; #9C91 +鲒 > jié; #9C92 +鲓 > kăo; #9C93 +鲔 > wĕi; #9C94 +鲕 > ér; #9C95 +鲖 > tóng; #9C96 +鲗 > zé; #9C97 +鲘 > hòu; #9C98 +鲙 > kuài; #9C99 +鲚 > jì; #9C9A +鲛 > jiăo; #9C9B +鲜 > xiān; #9C9C +鲝 > ză; #9C9D +鲞 > xiăng; #9C9E +鲟 > xún; #9C9F +鲠 > gĕng; #9CA0 +鲡 > lí; #9CA1 +鲢 > lián; #9CA2 +鲣 > jiān; #9CA3 +鲤 > lĭ; #9CA4 +鲥 > shí; #9CA5 +鲦 > tiáo; #9CA6 +鲧 > gŭn; #9CA7 +鲨 > shā; #9CA8 +鲩 > wăn; #9CA9 +鲪 > jūn; #9CAA +鲫 > jì; #9CAB +鲬 > yŏng; #9CAC +鲭 > qīng; #9CAD +鲮 > líng; #9CAE +鲯 > qí; #9CAF +鲰 > zōu; #9CB0 +鲱 > fēi; #9CB1 +鲲 > kūn; #9CB2 +鲳 > chāng; #9CB3 +鲴 > gù; #9CB4 +鲵 > ní; #9CB5 +鲶 > nián; #9CB6 +鲷 > diāo; #9CB7 +鲸 > jīng; #9CB8 +鲹 > shēn; #9CB9 +鲺 > shī; #9CBA +鲻 > zī; #9CBB +鲼 > fèn; #9CBC +鲽 > dié; #9CBD +鲾 > bì; #9CBE +鲿 > cháng; #9CBF +鳀 > shì; #9CC0 +鳁 > wēn; #9CC1 +鳂 > wēi; #9CC2 +鳃 > sāi; #9CC3 +鳄 > è; #9CC4 +鳅 > qīu; #9CC5 +鳆 > fù; #9CC6 +鳇 > huáng; #9CC7 +鳈 > quán; #9CC8 +鳉 > jiāng; #9CC9 +鳊 > biān; #9CCA +鳋 > sāo; #9CCB +鳌 > áo; #9CCC +鳍 > qí; #9CCD +鳎 > tà; #9CCE +鳏 > yín; #9CCF +鳐 > yáo; #9CD0 +鳑 > fáng; #9CD1 +鳒 > jiān; #9CD2 +鳓 > lè; #9CD3 +鳔 > biào; #9CD4 +鳕 > xuĕ; #9CD5 +鳖 > biē; #9CD6 +鳗 > mán; #9CD7 +鳘 > mĭn; #9CD8 +鳙 > yóng; #9CD9 +鳚 > wèi; #9CDA +鳛 > xí; #9CDB +鳜 > jué; #9CDC +鳝 > shàn; #9CDD +鳞 > lín; #9CDE +鳟 > zùn; #9CDF +鳠 > hùo; #9CE0 +鳡 > găn; #9CE1 +鳢 > lĭ; #9CE2 +鳣 > zhān; #9CE3 +鳤 > guăn; #9CE4 +鳥 > niăo; #9CE5 +鳦 > yĭ; #9CE6 +鳧 > fú; #9CE7 +鳨 > lì; #9CE8 +鳩 > jīu; #9CE9 +鳪 > bŭ; #9CEA +鳫 > yàn; #9CEB +鳬 > fú; #9CEC +鳭 > diāo; #9CED +鳮 > jī; #9CEE +鳯 > fèng; #9CEF +鳱 > gān; #9CF1 +鳲 > shī; #9CF2 +鳳 > fèng; #9CF3 +鳴 > míng; #9CF4 +鳵 > băo; #9CF5 +鳶 > yuān; #9CF6 +鳷 > zhī; #9CF7 +鳸 > hù; #9CF8 +鳹 > qín; #9CF9 +鳺 > fū; #9CFA +鳻 > fēn; #9CFB +鳼 > wén; #9CFC +鳽 > jiān; #9CFD +鳾 > shī; #9CFE +鳿 > yù; #9CFF +鴀 > fŏu; #9D00 +鴁 > yiāo; #9D01 +鴂 > juè; #9D02 +鴃 > jué; #9D03 +鴄 > pī; #9D04 +鴅 > huān; #9D05 +鴆 > zhèn; #9D06 +鴇 > băo; #9D07 +鴈 > yàn; #9D08 +鴉 > yā; #9D09 +鴊 > zhèng; #9D0A +鴋 > fāng; #9D0B +鴌 > fèng; #9D0C +鴍 > wén; #9D0D +鴎 > ōu; #9D0E +鴏 > tè; #9D0F +鴐 > jiā; #9D10 +鴑 > nú; #9D11 +鴒 > líng; #9D12 +鴓 > miè; #9D13 +鴔 > fú; #9D14 +鴕 > túo; #9D15 +鴖 > wén; #9D16 +鴗 > lì; #9D17 +鴘 > biàn; #9D18 +鴙 > zhì; #9D19 +鴚 > gē; #9D1A +鴛 > yuān; #9D1B +鴜 > zī; #9D1C +鴝 > qú; #9D1D +鴞 > xiāo; #9D1E +鴟 > zhī; #9D1F +鴠 > dàn; #9D20 +鴡 > jū; #9D21 +鴢 > yòu; #9D22 +鴣 > gū; #9D23 +鴤 > zhōng; #9D24 +鴥 > yù; #9D25 +鴦 > yāng; #9D26 +鴧 > ròng; #9D27 +鴨 > yā; #9D28 +鴩 > tiĕ; #9D29 +鴪 > yù; #9D2A +鴬 > yīng; #9D2C +鴭 > zhūi; #9D2D +鴮 > wū; #9D2E +鴯 > ér; #9D2F +鴰 > guā; #9D30 +鴱 > ài; #9D31 +鴲 > zhī; #9D32 +鴳 > yàn; #9D33 +鴴 > héng; #9D34 +鴵 > jiāo; #9D35 +鴶 > jí; #9D36 +鴷 > liè; #9D37 +鴸 > zhū; #9D38 +鴹 > rén; #9D39 +鴺 > yí; #9D3A +鴻 > hóng; #9D3B +鴼 > lùo; #9D3C +鴽 > rú; #9D3D +鴾 > móu; #9D3E +鴿 > gē; #9D3F +鵀 > rèn; #9D40 +鵁 > jiāo; #9D41 +鵂 > xīu; #9D42 +鵃 > zhōu; #9D43 +鵄 > zhī; #9D44 +鵅 > lùo; #9D45 +鵉 > luán; #9D49 +鵊 > jiá; #9D4A +鵋 > jì; #9D4B +鵌 > yú; #9D4C +鵍 > huān; #9D4D +鵎 > tŭo; #9D4E +鵏 > bū; #9D4F +鵐 > wú; #9D50 +鵑 > juān; #9D51 +鵒 > yù; #9D52 +鵓 > bó; #9D53 +鵔 > xùn; #9D54 +鵕 > xùn; #9D55 +鵖 > bì; #9D56 +鵗 > xī; #9D57 +鵘 > jùn; #9D58 +鵙 > jú; #9D59 +鵚 > tú; #9D5A +鵛 > jīng; #9D5B +鵜 > tí; #9D5C +鵝 > é; #9D5D +鵞 > é; #9D5E +鵟 > kuáng; #9D5F +鵠 > hú; #9D60 +鵡 > wŭ; #9D61 +鵢 > shēn; #9D62 +鵣 > lài; #9D63 +鵦 > lù; #9D66 +鵧 > píng; #9D67 +鵨 > shū; #9D68 +鵩 > fú; #9D69 +鵪 > ān; #9D6A +鵫 > zhào; #9D6B +鵬 > péng; #9D6C +鵭 > qín; #9D6D +鵮 > qiān; #9D6E +鵯 > bēi; #9D6F +鵰 > diāo; #9D70 +鵱 > lù; #9D71 +鵲 > què; #9D72 +鵳 > jiān; #9D73 +鵴 > jú; #9D74 +鵵 > tù; #9D75 +鵶 > yā; #9D76 +鵷 > yuān; #9D77 +鵸 > qí; #9D78 +鵹 > lí; #9D79 +鵺 > yè; #9D7A +鵻 > zhūi; #9D7B +鵼 > kōng; #9D7C +鵽 > zhùi; #9D7D +鵾 > kūn; #9D7E +鵿 > shēng; #9D7F +鶀 > qí; #9D80 +鶁 > jīng; #9D81 +鶂 > yì; #9D82 +鶃 > yì; #9D83 +鶄 > jīng; #9D84 +鶅 > zī; #9D85 +鶆 > lái; #9D86 +鶇 > dōng; #9D87 +鶈 > qī; #9D88 +鶉 > chún; #9D89 +鶊 > gēng; #9D8A +鶋 > jū; #9D8B +鶌 > qū; #9D8C +鶏 > jī; #9D8F +鶐 > shù; #9D90 +鶒 > chì; #9D92 +鶓 > miáo; #9D93 +鶔 > róu; #9D94 +鶕 > ān; #9D95 +鶖 > qīu; #9D96 +鶗 > tí; #9D97 +鶘 > hú; #9D98 +鶙 > tí; #9D99 +鶚 > è; #9D9A +鶛 > jiē; #9D9B +鶜 > máo; #9D9C +鶝 > fú; #9D9D +鶞 > chūn; #9D9E +鶟 > tú; #9D9F +鶠 > yăn; #9DA0 +鶡 > hé; #9DA1 +鶢 > yuán; #9DA2 +鶣 > piān; #9DA3 +鶤 > yùn; #9DA4 +鶥 > méi; #9DA5 +鶦 > hú; #9DA6 +鶧 > yīng; #9DA7 +鶨 > dùn; #9DA8 +鶩 > mù; #9DA9 +鶪 > jú; #9DAA +鶬 > cāng; #9DAC +鶭 > făng; #9DAD +鶮 > gù; #9DAE +鶯 > yīng; #9DAF +鶰 > yuán; #9DB0 +鶱 > xuān; #9DB1 +鶲 > wēng; #9DB2 +鶳 > shī; #9DB3 +鶴 > hè; #9DB4 +鶵 > chú; #9DB5 +鶶 > táng; #9DB6 +鶷 > xià; #9DB7 +鶸 > rùo; #9DB8 +鶹 > líu; #9DB9 +鶺 > jí; #9DBA +鶻 > gú; #9DBB +鶼 > jiān; #9DBC +鶽 > zhŭn; #9DBD +鶾 > hàn; #9DBE +鶿 > zī; #9DBF +鷀 > zī; #9DC0 +鷁 > nì; #9DC1 +鷂 > yào; #9DC2 +鷃 > yàn; #9DC3 +鷄 > jī; #9DC4 +鷅 > lì; #9DC5 +鷆 > tián; #9DC6 +鷇 > kòu; #9DC7 +鷈 > tī; #9DC8 +鷉 > tī; #9DC9 +鷊 > nì; #9DCA +鷋 > tú; #9DCB +鷌 > mă; #9DCC +鷍 > jiāo; #9DCD +鷎 > gāo; #9DCE +鷏 > tián; #9DCF +鷐 > chén; #9DD0 +鷑 > lì; #9DD1 +鷒 > zhuān; #9DD2 +鷓 > zhè; #9DD3 +鷔 > áo; #9DD4 +鷕 > yăo; #9DD5 +鷖 > yī; #9DD6 +鷗 > ōu; #9DD7 +鷘 > chì; #9DD8 +鷙 > zhì; #9DD9 +鷚 > liáo; #9DDA +鷛 > róng; #9DDB +鷜 > lóu; #9DDC +鷝 > bì; #9DDD +鷞 > shuāng; #9DDE +鷟 > zhúo; #9DDF +鷠 > yú; #9DE0 +鷡 > wú; #9DE1 +鷢 > jué; #9DE2 +鷣 > yín; #9DE3 +鷤 > quán; #9DE4 +鷥 > sī; #9DE5 +鷦 > jiāo; #9DE6 +鷧 > yì; #9DE7 +鷨 > huā; #9DE8 +鷩 > bì; #9DE9 +鷪 > yīng; #9DEA +鷫 > sù; #9DEB +鷬 > huáng; #9DEC +鷭 > fán; #9DED +鷮 > jiāo; #9DEE +鷯 > liáo; #9DEF +鷰 > yàn; #9DF0 +鷱 > kāo; #9DF1 +鷲 > jìu; #9DF2 +鷳 > xián; #9DF3 +鷴 > xián; #9DF4 +鷵 > tú; #9DF5 +鷶 > măi; #9DF6 +鷷 > zūn; #9DF7 +鷸 > yù; #9DF8 +鷹 > yīng; #9DF9 +鷺 > lù; #9DFA +鷻 > tuán; #9DFB +鷼 > xián; #9DFC +鷽 > xué; #9DFD +鷾 > yì; #9DFE +鷿 > pì; #9DFF +鸀 > shú; #9E00 +鸁 > lúo; #9E01 +鸂 > qī; #9E02 +鸃 > yí; #9E03 +鸄 > jí; #9E04 +鸅 > zhé; #9E05 +鸆 > yú; #9E06 +鸇 > zhān; #9E07 +鸈 > yè; #9E08 +鸉 > yáng; #9E09 +鸊 > pì; #9E0A +鸋 > níng; #9E0B +鸌 > hùo; #9E0C +鸍 > mí; #9E0D +鸎 > yīng; #9E0E +鸏 > méng; #9E0F +鸐 > dí; #9E10 +鸑 > yuè; #9E11 +鸒 > yú; #9E12 +鸓 > lĕi; #9E13 +鸔 > bào; #9E14 +鸕 > lú; #9E15 +鸖 > hè; #9E16 +鸗 > lóng; #9E17 +鸘 > shuāng; #9E18 +鸙 > yuè; #9E19 +鸚 > yīng; #9E1A +鸛 > guàn; #9E1B +鸜 > qú; #9E1C +鸝 > lí; #9E1D +鸞 > luán; #9E1E +鸟 > niăo; #9E1F +鸠 > jīu; #9E20 +鸡 > jī; #9E21 +鸢 > yuān; #9E22 +鸣 > míng; #9E23 +鸤 > shī; #9E24 +鸥 > ōu; #9E25 +鸦 > yā; #9E26 +鸧 > cāng; #9E27 +鸨 > băo; #9E28 +鸩 > zhèn; #9E29 +鸪 > gū; #9E2A +鸫 > dōng; #9E2B +鸬 > lú; #9E2C +鸭 > yā; #9E2D +鸮 > xiāo; #9E2E +鸯 > yāng; #9E2F +鸰 > líng; #9E30 +鸱 > zhī; #9E31 +鸲 > qú; #9E32 +鸳 > yuān; #9E33 +鸴 > xué; #9E34 +鸵 > túo; #9E35 +鸶 > sī; #9E36 +鸷 > zhì; #9E37 +鸸 > ér; #9E38 +鸹 > guā; #9E39 +鸺 > xīu; #9E3A +鸻 > héng; #9E3B +鸼 > zhōu; #9E3C +鸽 > gē; #9E3D +鸾 > luán; #9E3E +鸿 > hóng; #9E3F +鹀 > wú; #9E40 +鹁 > bó; #9E41 +鹂 > lí; #9E42 +鹃 > juān; #9E43 +鹄 > hú; #9E44 +鹅 > é; #9E45 +鹆 > yù; #9E46 +鹇 > xián; #9E47 +鹈 > tí; #9E48 +鹉 > wŭ; #9E49 +鹊 > què; #9E4A +鹋 > miáo; #9E4B +鹌 > ān; #9E4C +鹍 > kūn; #9E4D +鹎 > bēi; #9E4E +鹏 > péng; #9E4F +鹐 > qiān; #9E50 +鹑 > chún; #9E51 +鹒 > gēng; #9E52 +鹓 > yuān; #9E53 +鹔 > sù; #9E54 +鹕 > hú; #9E55 +鹖 > hé; #9E56 +鹗 > è; #9E57 +鹘 > gú; #9E58 +鹙 > qīu; #9E59 +鹚 > zī; #9E5A +鹛 > méi; #9E5B +鹜 > mù; #9E5C +鹝 > nì; #9E5D +鹞 > yào; #9E5E +鹟 > wēng; #9E5F +鹠 > líu; #9E60 +鹡 > jí; #9E61 +鹢 > nì; #9E62 +鹣 > jiān; #9E63 +鹤 > hè; #9E64 +鹥 > yī; #9E65 +鹦 > yīng; #9E66 +鹧 > zhè; #9E67 +鹨 > liáo; #9E68 +鹩 > liáo; #9E69 +鹪 > jiāo; #9E6A +鹫 > jìu; #9E6B +鹬 > yù; #9E6C +鹭 > lù; #9E6D +鹮 > xuán; #9E6E +鹯 > zhān; #9E6F +鹰 > yīng; #9E70 +鹱 > hùo; #9E71 +鹲 > méng; #9E72 +鹳 > guàn; #9E73 +鹴 > shuāng; #9E74 +鹵 > lŭ; #9E75 +鹶 > jīn; #9E76 +鹷 > líng; #9E77 +鹸 > jiăn; #9E78 +鹹 > xián; #9E79 +鹺 > cúo; #9E7A +鹻 > jiăn; #9E7B +鹼 > jiăn; #9E7C +鹽 > yán; #9E7D +鹾 > cúo; #9E7E +鹿 > lù; #9E7F +麀 > yōu; #9E80 +麁 > cū; #9E81 +麂 > jĭ; #9E82 +麃 > biāo; #9E83 +麄 > cū; #9E84 +麅 > biāo; #9E85 +麆 > zhù; #9E86 +麇 > jūn; #9E87 +麈 > zhŭ; #9E88 +麉 > jiān; #9E89 +麊 > mí; #9E8A +麋 > mí; #9E8B +麌 > wú; #9E8C +麍 > líu; #9E8D +麎 > chén; #9E8E +麏 > jūn; #9E8F +麐 > lín; #9E90 +麑 > ní; #9E91 +麒 > qí; #9E92 +麓 > lù; #9E93 +麔 > jìu; #9E94 +麕 > jūn; #9E95 +麖 > jīng; #9E96 +麗 > lì; #9E97 +麘 > xiāng; #9E98 +麙 > yán; #9E99 +麚 > jiā; #9E9A +麛 > mí; #9E9B +麜 > lì; #9E9C +麝 > shè; #9E9D +麞 > zhāng; #9E9E +麟 > lín; #9E9F +麠 > jīng; #9EA0 +麡 > jī; #9EA1 +麢 > líng; #9EA2 +麣 > yán; #9EA3 +麤 > cū; #9EA4 +麥 > mài; #9EA5 +麦 > mài; #9EA6 +麧 > gē; #9EA7 +麨 > chăo; #9EA8 +麩 > fū; #9EA9 +麪 > miăn; #9EAA +麫 > miăn; #9EAB +麬 > fū; #9EAC +麭 > pào; #9EAD +麮 > qù; #9EAE +麯 > qú; #9EAF +麰 > móu; #9EB0 +麱 > fū; #9EB1 +麲 > xiàn; #9EB2 +麳 > lái; #9EB3 +麴 > qú; #9EB4 +麵 > miàn; #9EB5 +麷 > fēng; #9EB7 +麸 > fū; #9EB8 +麹 > qú; #9EB9 +麺 > miàn; #9EBA +麻 > má; #9EBB +麼 > mo; #9EBC +麽 > mo; #9EBD +麾 > hūi; #9EBE +黀 > zōu; #9EC0 +黁 > nēn; #9EC1 +黂 > fén; #9EC2 +黃 > huáng; #9EC3 +黄 > huáng; #9EC4 +黅 > jīn; #9EC5 +黆 > guāng; #9EC6 +黇 > tiān; #9EC7 +黈 > tŏu; #9EC8 +黉 > héng; #9EC9 +黊 > xī; #9ECA +黋 > kuăng; #9ECB +黌 > héng; #9ECC +黍 > shŭ; #9ECD +黎 > lí; #9ECE +黏 > nián; #9ECF +黐 > chī; #9ED0 +黑 > hēi; #9ED1 +黒 > hēi; #9ED2 +黓 > yì; #9ED3 +黔 > qián; #9ED4 +黕 > dān; #9ED5 +黖 > xì; #9ED6 +黗 > tuăn; #9ED7 +默 > mò; #9ED8 +黙 > mò; #9ED9 +黚 > qián; #9EDA +黛 > dài; #9EDB +黜 > chù; #9EDC +黝 > yŏu; #9EDD +點 > diăn; #9EDE +黟 > yī; #9EDF +黠 > xiá; #9EE0 +黡 > yăn; #9EE1 +黢 > qū; #9EE2 +黣 > mĕi; #9EE3 +黤 > yăn; #9EE4 +黥 > jīng; #9EE5 +黦 > yù; #9EE6 +黧 > lí; #9EE7 +黨 > dăng; #9EE8 +黩 > dú; #9EE9 +黪 > căn; #9EEA +黫 > yīn; #9EEB +黬 > àn; #9EEC +黭 > yān; #9EED +黮 > tăn; #9EEE +黯 > àn; #9EEF +黰 > zhĕn; #9EF0 +黱 > dài; #9EF1 +黲 > căn; #9EF2 +黳 > yī; #9EF3 +黴 > méi; #9EF4 +黵 > dăn; #9EF5 +黶 > yăn; #9EF6 +黷 > dú; #9EF7 +黸 > lú; #9EF8 +黹 > zhĭ; #9EF9 +黺 > fĕn; #9EFA +黻 > fù; #9EFB +黼 > fŭ; #9EFC +黽 > mĭn; #9EFD +黾 > mĭn; #9EFE +黿 > yuán; #9EFF +鼀 > cù; #9F00 +鼁 > qù; #9F01 +鼂 > cháo; #9F02 +鼃 > wā; #9F03 +鼄 > zhū; #9F04 +鼅 > zhī; #9F05 +鼆 > máng; #9F06 +鼇 > áo; #9F07 +鼈 > biē; #9F08 +鼉 > túo; #9F09 +鼊 > bì; #9F0A +鼋 > yuán; #9F0B +鼌 > cháo; #9F0C +鼍 > túo; #9F0D +鼎 > dĭng; #9F0E +鼏 > mì; #9F0F +鼐 > nài; #9F10 +鼑 > dĭng; #9F11 +鼒 > zī; #9F12 +鼓 > gŭ; #9F13 +鼔 > gŭ; #9F14 +鼕 > dōng; #9F15 +鼖 > fén; #9F16 +鼗 > táo; #9F17 +鼘 > yuān; #9F18 +鼙 > pí; #9F19 +鼚 > chāng; #9F1A +鼛 > gāo; #9F1B +鼜 > qì; #9F1C +鼝 > yuān; #9F1D +鼞 > tāng; #9F1E +鼟 > tēng; #9F1F +鼠 > shŭ; #9F20 +鼡 > shŭ; #9F21 +鼢 > fén; #9F22 +鼣 > fèi; #9F23 +鼤 > wén; #9F24 +鼥 > bá; #9F25 +鼦 > diāo; #9F26 +鼧 > túo; #9F27 +鼨 > tóng; #9F28 +鼩 > qú; #9F29 +鼪 > shēng; #9F2A +鼫 > shí; #9F2B +鼬 > yòu; #9F2C +鼭 > shí; #9F2D +鼮 > tíng; #9F2E +鼯 > wú; #9F2F +鼰 > niàn; #9F30 +鼱 > jīng; #9F31 +鼲 > hún; #9F32 +鼳 > jú; #9F33 +鼴 > yăn; #9F34 +鼵 > tú; #9F35 +鼶 > tí; #9F36 +鼷 > xī; #9F37 +鼸 > xiăn; #9F38 +鼹 > yăn; #9F39 +鼺 > léi; #9F3A +鼻 > bí; #9F3B +鼼 > yăo; #9F3C +鼽 > qíu; #9F3D +鼾 > hān; #9F3E +鼿 > wū; #9F3F +齀 > wù; #9F40 +齁 > hóu; #9F41 +齂 > xì; #9F42 +齃 > gé; #9F43 +齄 > zhā; #9F44 +齅 > xìu; #9F45 +齆 > wèng; #9F46 +齇 > zhā; #9F47 +齈 > nóng; #9F48 +齉 > nàng; #9F49 +齊 > qí; #9F4A +齋 > zhāi; #9F4B +齌 > jì; #9F4C +齍 > zī; #9F4D +齎 > jī; #9F4E +齏 > jī; #9F4F +齐 > qí; #9F50 +齑 > jī; #9F51 +齒 > chĭ; #9F52 +齓 > chèn; #9F53 +齔 > chèn; #9F54 +齕 > hé; #9F55 +齖 > yá; #9F56 +齗 > kĕn; #9F57 +齘 > xiè; #9F58 +齙 > páo; #9F59 +齚 > cùo; #9F5A +齛 > shì; #9F5B +齜 > zī; #9F5C +齝 > chī; #9F5D +齞 > niàn; #9F5E +齟 > jŭ; #9F5F +齠 > tiáo; #9F60 +齡 > líng; #9F61 +齢 > líng; #9F62 +齣 > chū; #9F63 +齤 > quán; #9F64 +齥 > xiè; #9F65 +齦 > kĕn; #9F66 +齧 > niè; #9F67 +齨 > jìu; #9F68 +齩 > yăo; #9F69 +齪 > chùo; #9F6A +齫 > kŭn; #9F6B +齬 > yŭ; #9F6C +齭 > chŭ; #9F6D +齮 > yĭ; #9F6E +齯 > ní; #9F6F +齰 > cùo; #9F70 +齱 > zōu; #9F71 +齲 > qŭ; #9F72 +齳 > nĕn; #9F73 +齴 > xiăn; #9F74 +齵 > óu; #9F75 +齶 > è; #9F76 +齷 > wò; #9F77 +齸 > yì; #9F78 +齹 > chūo; #9F79 +齺 > zōu; #9F7A +齻 > diān; #9F7B +齼 > chŭ; #9F7C +齽 > jìn; #9F7D +齾 > yà; #9F7E +齿 > chĭ; #9F7F +龀 > chèn; #9F80 +龁 > hé; #9F81 +龂 > kĕn; #9F82 +龃 > jŭ; #9F83 +龄 > líng; #9F84 +龅 > páo; #9F85 +龆 > tiáo; #9F86 +龇 > zī; #9F87 +龈 > kĕn; #9F88 +龉 > yŭ; #9F89 +龊 > chùo; #9F8A +龋 > qŭ; #9F8B +龌 > wò; #9F8C +龍 > lóng; #9F8D +龎 > páng; #9F8E +龏 > gōng; #9F8F +龐 > páng; #9F90 +龑 > yăn; #9F91 +龒 > lóng; #9F92 +龓 > lóng; #9F93 +龔 > gōng; #9F94 +龕 > kān; #9F95 +龖 > tà; #9F96 +龗 > líng; #9F97 +龘 > tà; #9F98 +龙 > lóng; #9F99 +龚 > gōng; #9F9A +龛 > kān; #9F9B +龜 > gūi; #9F9C +龝 > qīu; #9F9D +龞 > biē; #9F9E +龟 > gūi; #9F9F +龠 > yuè; #9FA0 +龡 > chùi; #9FA1 +龢 > hé; #9FA2 +龣 > jué; #9FA3 +龤 > xié; #9FA4 +龥 > yù; #9FA5 +癩 > là; #F90E +兀 > wù; #FA0C +嗀 > hùo; #FA0D +塚 > zhŏng; #FA10 +晴 > qíng; #FA12 +凞 > xī; #FA15 +猪 > zhū; #FA16 +益 > yì; #FA17 +礼 > lĭ; #FA18 +神 > shén; #FA19 +祥 > xiáng; #FA1A +福 > fú; #FA1B +靖 > jìng; #FA1C +精 > jīng; #FA1D +羽 > yŭ; #FA1E +諸 > zhū; #FA22 +逸 > yì; #FA25 +都 > dū; #FA26 +飯 > fàn; #FA2A +飼 > sì; #FA2B +館 > guăn; #FA2C +鶴 > hè; #FA2D + +# eof diff --git a/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_English.txt b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_English.txt new file mode 100644 index 00000000000..fe353f3a024 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_English.txt @@ -0,0 +1,6366 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2001, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +# Date: Tue Jan 23 12:42:02 2001 +#-------------------------------------------------------------------- + +# Kanji-English + +丁>'[male adult]'; +七>'[seven]'; +万>'[ten thousand]'; +丈>'[unit of length equal 3.3 meters]'; +三>'[three]'; +上>'[top]'; +下>'[under]'; +不>'[no]'; +与>'[and]'; +丐>'[beggar]'; +丑>'[clown]'; +且>'[moreover]'; +丕>'[great]'; +世>'[generation]'; +丗>'[thirty]'; +丘>'[hill]'; +丙>'[third of heavenly stems]'; +丞>'[assist]'; +両>'[two]'; +並>'[equal to]'; +个>'[numerary adjunct]'; +中>'[central]'; +丱>'[child''s hairstyle bound in two tufts]'; +串>'[string]'; +丶>'[dot]'; +丸>'[small round object]'; +丹>'[cinnabar (native HgS)]'; +主>'[master]'; +丼>'[bowl of food]'; +丿>'[line]'; +乂>'[govern]'; +乃>'[then]'; +久>'[long time (ago)]'; +之>'[''s (marks preceding phrase as modifier of following phrase)]'; +乍>'[suddenly]'; +乎>'[interrogative or exclamatory final particle]'; +乏>'[lack]'; +乕>'[tiger]'; +乖>'[rebel]'; +乗>'[ride]'; +乘>'[ride]'; +乙>'[second heaven''s stem]'; +九>'[nine]'; +乞>'[beg]'; +也>'[also]'; +乢>'[lid]'; +乱>'[confusion]'; +乳>'[breast]'; +乾>'[dry]'; +亀>'[turtle or tortoise]'; +亂>'[confusion]'; +亅>'[hook]'; +了>'[to finish]'; +予>'[I]'; +争>'[dispute]'; +亊>'[affair]'; +事>'[affair]'; +二>'[two]'; +于>'[in]'; +云>'[say]'; +互>'[mutually]'; +五>'[five]'; +井>'[well]'; +亘>'[extend across]'; +亙>'[extend across]'; +些>'[little]'; +亜>'[asia]'; +亞>'[asia]'; +亟>'[urgently]'; +亠>'[head]'; +亡>'[death]'; +亢>'[high]'; +交>'[mix]'; +亥>'[last of 12 earth branches]'; +亦>'[also]'; +亨>'[smoothly]'; +享>'[enjoy]'; +京>'[capital city]'; +亭>'[pavilion]'; +亮>'[bright]'; +亰>'[capital city]'; +亳>'[name of district in Anhui]'; +亶>'[sincere]'; +人>'[man]'; +什>'[file of ten soldiers]'; +仁>'[humaneness]'; +仂>'[surplus or excess]'; +仄>'[slanting]'; +仆>'[fall forward]'; +仇>'[enemy]'; +今>'[now]'; +介>'[forerunner]'; +仍>'[yet]'; +从>'[from]'; +仏>'[buddha]'; +仔>'[small thing]'; +仕>'[official]'; +他>'[other]'; +仗>'[rely upon]'; +付>'[give]'; +仙>'[Taoist super-being]'; +仝>'[together]'; +仞>'[ancient unit of measure (8 feet)]'; +仟>'[one thousand]'; +代>'[replace]'; +令>'[command]'; +以>'[by means of]'; +仭>'[ancient unit of measure (8 feet)]'; +仮>'[falsehood]'; +仰>'[raise the head to look]'; +仲>'[middle brother]'; +件>'[numerary adjunct for article]'; +价>'[price]'; +任>'[trust to]'; +企>'[plan a project]'; +伉>'[compare]'; +伊>'[third person pronoun]'; +伍>'[five]'; +伎>'[talent]'; +伏>'[crouch]'; +伐>'[cut down]'; +休>'[rest]'; +会>'[assemble]'; +伜>'[deputy]'; +伝>'[summon]'; +伯>'[older brother]'; +估>'[merchant]'; +伴>'[companion]'; +伶>'[lonely]'; +伸>'[extend]'; +伺>'[serve]'; +似>'[resemble]'; +伽>'[transcription of sanskrit gha in buddhist texts ('ëmâæ' \"samgha\")]'; +佃>'[tenant farmer]'; +但>'[only]'; +佇>'[wait]'; +位>'[throne]'; +低>'[low]'; +住>'[reside]'; +佐>'[assist]'; +佑>'[help]'; +体>'[body]'; +何>'[what]'; +佗>'[other]'; +余>'[I]'; +佚>'[indulge in pleasures]'; +佛>'[buddha (contraction of MC 'bhiêtdha')]'; +作>'[make]'; +佝>'[rickets]'; +佞>'[flattery]'; +佩>'[belt ornament]'; +佯>'[pretend]'; +佰>'[hundred]'; +佳>'[good]'; +併>'[combine]'; +佶>'[strong]'; +佻>'[frivolous]'; +佼>'[beautiful]'; +使>'[cause]'; +侃>'[upright and strong]'; +來>'[come]'; +侈>'[luxurious]'; +例>'[precedent]'; +侍>'[serve]'; +侏>'[small]'; +侑>'[help]'; +侖>'[logical reasons]'; +侘>'[disappointed]'; +供>'[supply]'; +依>'[rely on]'; +侠>'[chivalrous person]'; +価>'[price]'; +侫>'[flattery]'; +侭>'[complete]'; +侮>'[insult]'; +侯>'[marquis]'; +侵>'[invade]'; +侶>'[companion]'; +便>'[convenience]'; +係>'[bind]'; +促>'[urge]'; +俄>'[sudden(ly)]'; +俊>'[talented]'; +俎>'[chopping board or block]'; +俐>'[smooth]'; +俑>'[wooden figure buried with dead]'; +俔>'[like]'; +俗>'[social customs]'; +俘>'[prisoner of war]'; +俚>'[rustic]'; +俛>'[make effort]'; +保>'[protect]'; +俟>'[wait for]'; +信>'[trust]'; +俣>'[big]'; +#"俣>'[big]'", +俥>'[rickshaw]'; +修>'[study]'; +俯>'[bow down]'; +俳>'[actor]'; +俵>'[divide]'; +俶>'[start]'; +俸>'[wages]'; +俺>'[personal pronoun]'; +俾>'[so that]'; +倅>'[deputy]'; +倆>'[clever]'; +倉>'[granary]'; +個>'[numerary adjunct]'; +倍>'[times]'; +倏>'[hastily]'; +們>'[adjunct pronoun indicate plural]'; +倒>'[fall over]'; +倔>'[stubborn]'; +倖>'[lucky]'; +候>'[wait]'; +倚>'[rely on]'; +借>'[borrow]'; +倡>'[guide]'; +倣>'[imitate]'; +値>'[price]'; +倥>'[boorish]'; +倦>'[be tired of]'; +倨>'[arrogant]'; +倩>'[beautiful]'; +倪>'[feeble]'; +倫>'[normal human relationships]'; +倬>'[noticeable]'; +倭>'[dwarf]'; +倶>'[all]'; +倹>'[temperate]'; +偃>'[cease]'; +假>'[falsehood]'; +偈>'[brave]'; +偉>'[great]'; +偏>'[inclined one side]'; +偐>'[false]'; +偕>'[together]'; +偖>'[rip up]'; +做>'[work]'; +停>'[stop]'; +健>'[strong]'; +偬>'[urgent]'; +偲>'[talented]'; +側>'[side]'; +偵>'[spy]'; +偶>'[accidentally]'; +偸>'[to steal]'; +偽>'[false]'; +傀>'[great]'; +傅>'[tutor]'; +傍>'[by side of]'; +傑>'[hero]'; +傘>'[umbrella]'; +備>'[prepare]'; +傚>'[imitate]'; +催>'[press]'; +傭>'[hire]'; +傲>'[proud]'; +傳>'[summon]'; +傴>'[humpback]'; +債>'[debt]'; +傷>'[wound]'; +傾>'[upset]'; +僂>'[humpback]'; +僅>'[only]'; +僉>'[all]'; +僊>'[Taoist super-being]'; +働>'[labor]'; +像>'[picture]'; +僑>'[sojourn]'; +僕>'[slave]'; +僖>'[joy]'; +僚>'[companion]'; +僞>'[false]'; +僣>'[assume]'; +僥>'[be lucky]'; +僧>'[buddhist priest]'; +僭>'[assume]'; +僮>'[page]'; +僵>'[stiff and motionless]'; +價>'[price]'; +僻>'[out-of-the-way]'; +儀>'[ceremony]'; +儁>'[outstanding]'; +儂>'[I]'; +億>'[hundred million]'; +儉>'[temperate]'; +儒>'[confucian scholar]'; +儔>'[companion]'; +儕>'[a company]'; +#"儕>'[a company]'", +儘>'[utmost]'; +#"儘>'[utmost]'", +償>'[repay]'; +儡>'[puppet]'; +優>'[superior]'; +儲>'[save money]'; +儷>'[spouse]'; +儺>'[rich]'; +儻>'[if]'; +儼>'[grave]'; +儿>'[son]'; +兀>'[to cut off the feet]'; +允>'[to grant]'; +元>'[first]'; +兄>'[elder brother]'; +充>'[fill]'; +兆>'[omen]'; +兇>'[atrocious]'; +先>'[first]'; +光>'[light]'; +克>'[gram]'; +兌>'[cash]'; +免>'[spare]'; +兎>'[rabbit]'; +児>'[son]'; +兒>'[son]'; +兔>'[rabbit]'; +党>'[political party]'; +兜>'[pouch]'; +兢>'[fearful]'; +入>'[enter]'; +全>'[maintain]'; +兩>'[two]'; +兪>'[surname]'; +八>'[eight]'; +公>'[fair]'; +六>'[number six]'; +兮>'[exclamatory particle]'; +共>'[together with]'; +兵>'[soldier]'; +其>'[his]'; +具>'[tool]'; +典>'[law]'; +兼>'[unite]'; +冀>'[hope for]'; +冂>'[wide]'; +内>'[inside]'; +円>'[yen]'; +冉>'[tender]'; +冊>'[book]'; +册>'[book]'; +再>'[again]'; +冏>'[[not found in dictionary]]'; +冐>'[risk]'; +冑>'[helmet]'; +冒>'[risk]'; +冓>'[a secluded place]'; +冕>'[crown]'; +冖>'[cover]'; +冗>'[excessive]'; +写>'[write]'; +冠>'[cap]'; +冢>'[burial mound]'; +冤>'[grievance]'; +冥>'[dark]'; +冦>'[bandits]'; +冨>'[abundant]'; +冩>'[write]'; +冪>'[cover-cloth]'; +冫>'[ice]'; +冬>'[winter]'; +冰>'[ice]'; +冱>'[freezing]'; +冲>'[soar]'; +决>'[decide]'; +冴>'[freezing]'; +况>'[condition]'; +冶>'[smelt]'; +冷>'[cold]'; +冽>'[cold and raw]'; +凄>'[bitter cold]'; +凅>'[dried up]'; +准>'[approve]'; +凉>'[cool]'; +凋>'[be withered]'; +凌>'[pure]'; +凍>'[freeze]'; +凖>'[rule]'; +凛>'[to shiver with cold or fear]'; +凜>'[shiver with cold or fear]'; +凝>'[coagulate]'; +几>'[small table]'; +凡>'[all]'; +処>'[place]'; +凧>'[kite]'; +凩>'[wintry wind]'; +凪>'[calm]'; +凭>'[lean on]'; +凰>'[female phoenix]'; +凱>'[triumphant]'; +凵>'[receptacle]'; +凶>'[culprit]'; +凸>'[protrude]'; +凹>'[concave]'; +出>'[go out]'; +函>'[correspondence]'; +凾>'[correspondence]'; +刀>'[knife]'; +刃>'[edged tool]'; +刄>'[edged tool]'; +分>'[divide]'; +切>'[cut]'; +刈>'[cut off]'; +刊>'[publication]'; +刋>'[publication]'; +刎>'[behead]'; +刑>'[punishment]'; +刔>'[scoop out]'; +列>'[line]'; +初>'[beginning]'; +判>'[judge]'; +別>'[separate]'; +刧>'[disaster]'; +利>'[gains]'; +刪>'[to cut]'; +刮>'[shave]'; +到>'[go to]'; +刳>'[cut out]'; +制>'[system]'; +刷>'[brush]'; +券>'[certificate]'; +刹>'[temple]'; +刺>'[stab]'; +刻>'[carve]'; +剃>'[shave]'; +剄>'[cut throat]'; +則>'[rule]'; +削>'[scrape off]'; +剋>'[subdue]'; +剌>'[slash]'; +前>'[in front]'; +剏>'[establish]'; +剔>'[pick out]'; +剖>'[split in two]'; +剛>'[hard]'; +剞>'[carving or engraving knife]'; +剣>'[sword]'; +剤>'[medicinal preparation]'; +剥>'[peel]'; +剩>'[leftovers]'; +剪>'[scissors]'; +副>'[assist]'; +剰>'[leftovers]'; +剱>'[sword]'; +割>'[cut]'; +剳>'[brief note]'; +剴>'[sharpen]'; +創>'[establish]'; +剽>'[rob]'; +剿>'[destroy]'; +劃>'[divide]'; +劇>'[theatrical plays]'; +劈>'[cut apart]'; +劉>'[surname]'; +劍>'[sword]'; +劑>'[medicinal preparation]'; +劒>'[sword]'; +劔>'[sword]'; +力>'[power]'; +功>'[achievement]'; +加>'[add to]'; +劣>'[bad]'; +助>'[help]'; +努>'[exert]'; +劫>'[take by force]'; +劬>'[be diligent]'; +劭>'[encourage]'; +励>'[strive]'; +労>'[labor]'; +劵>'[certificate]'; +効>'[efficacious]'; +劼>'[be discreet]'; +劾>'[examine into]'; +勁>'[strong]'; +勃>'[suddenly]'; +勅>'[imperial degree]'; +勇>'[brave]'; +勉>'[endeavor]'; +勍>'[strong]'; +勒>'[strangle]'; +動>'[move]'; +勗>'[enjoin]'; +勘>'[investigate]'; +務>'[affairs]'; +勝>'[victory]'; +勞>'[labor]'; +募>'[levy]'; +勠>'[join forces]'; +勢>'[power]'; +勣>'[achievements]'; +勤>'[industrious]'; +勦>'[destroy]'; +勧>'[recommend]'; +勲>'[meritorious deed]'; +勳>'[meritorious deed]'; +勵>'[strive]'; +勸>'[recommend]'; +勹>'[wrap]'; +勺>'[spoon]'; +勾>'[hook]'; +勿>'[must not]'; +匁>'[Japanese unit of weight (1/1000 of a kan)]'; +匂>'[fragrance]'; +包>'[wrap]'; +匆>'[hastily]'; +匈>'[breast]'; +匍>'[crawl]'; +匏>'[gourd]'; +匐>'[fall prostrate]'; +匕>'[spoon]'; +化>'[change]'; +北>'[north]'; +匙>'[spoon]'; +匚>'[box]'; +匝>'[full circle]'; +匠>'[craftsman]'; +匡>'[correct]'; +匣>'[small box]'; +匪>'[bandits]'; +匯>'[concourse]'; +匱>'[to lack]'; +匳>'[ladies toilet case with mirror]'; +匸>'[box]'; +匹>'[bolt of cloth]'; +区>'[area]'; +医>'[cure]'; +匿>'[hide]'; +區>'[area]'; +十>'[ten]'; +千>'[thousand]'; +卅>'[thirty]'; +卆>'[soldier]'; +升>'[arise]'; +午>'[noon]'; +卉>'[general term for plants]'; +半>'[half]'; +卍>'[swastika - fourth of auspicious]'; +卑>'[humble]'; +卒>'[soldier]'; +卓>'[profound]'; +協>'[be united]'; +南>'[south]'; +#"南>'[south]'", +博>'[gamble]'; +卜>'[fortune telling]'; +卞>'[be impatient]'; +占>'[divine]'; +卦>'[fortune telling]'; +卩>'[seal]'; +卮>'[measuring cup]'; +卯>'[4th of Earth Branches]'; +印>'[print]'; +危>'[dangerous]'; +即>'[promptly]'; +却>'[still]'; +卵>'[egg]'; +卷>'[scroll]'; +卸>'[lay down]'; +卻>'[still]'; +卿>'[noble]'; +厂>'[factory]'; +厄>'[adversity]'; +厖>'[bulky]'; +厘>'[thousandth part of tael]'; +厚>'[thick]'; +原>'[source]'; +厠>'[mingle with]'; +厥>'[personal pronoun - he]'; +厦>'[big building]'; +厨>'[kitchen]'; +厩>'[stable]'; +厭>'[dislike]'; +厮>'[servant]'; +厰>'[factory]'; +厳>'[strict]'; +厶>'[private]'; +去>'[go away]'; +参>'[take part in]'; +參>'[take part in]'; +又>'[and]'; +叉>'[crotch]'; +及>'[extend]'; +友>'[friend]'; +双>'[set of two]'; +反>'[reverse]'; +収>'[gather together]'; +叔>'[father''s younger brother]'; +取>'[take]'; +受>'[receive]'; +叙>'[express]'; +叛>'[rebel]'; +叟>'[old man]'; +叡>'[astute]'; +叢>'[bush]'; +口>'[mouth]'; +古>'[old]'; +句>'[sentence]'; +叨>'[talkative]'; +叩>'[knock]'; +只>'[only]'; +叫>'[cry]'; +召>'[imperial decree]'; +叭>'[trumpet]'; +叮>'[exhort or enjoin repeatedly]'; +可>'[may]'; +台>'[platform]'; +叱>'[scold]'; +史>'[history]'; +右>'[right]'; +叶>'[to harmonize]'; +号>'[mark]'; +司>'[take charge of]'; +#"叹>'[sigh]'", +吁>'[interjection \"Alas!\"]'; +吃>'[eat]'; +各>'[each]'; +合>'[combine]'; +吉>'[lucky]'; +吊>'[condole]'; +吋>'[inch]'; +同>'[same]'; +名>'[name]'; +后>'[queen]'; +吏>'[government official]'; +吐>'[vomit]'; +向>'[toward]'; +君>'[sovereign]'; +吝>'[stingy]'; +吟>'[sing]'; +吠>'[bark]'; +否>'[not]'; +吩>'[order]'; +含>'[hold in mouth]'; +听>'[hear]'; +吭>'[throat]'; +吮>'[suck with mouth]'; +吶>'[raise voice]'; +吸>'[inhale]'; +吹>'[blow]'; +吻>'[kiss]'; +吼>'[roar]'; +吽>'[\"OM\"]'; +吾>'[i]'; +呀>'[particle used express surprise]'; +呂>'[surname]'; +呆>'[dull]'; +呈>'[submit]'; +呉>'[one of warring states]'; +告>'[tell]'; +呎>'[foot]'; +呑>'[swallow]'; +#"呜>'[sound of crying]'", +周>'[zhou dynasty]'; +呪>'[curse]'; +#"呰>'[................................]'", +呱>'[wail]'; +味>'[taste]'; +呵>'[scold]'; +呶>'[talkative]'; +呷>'[suck]'; +呻>'[groan]'; +呼>'[breathe sigh]'; +命>'[life]'; +咀>'[suck]'; +咄>'[noise of rage]'; +咆>'[roar]'; +咋>'[why? how? what?]'; +和>'[harmony]'; +咎>'[fault]'; +咏>'[sing song or poem]'; +咐>'[instruct]'; +咒>'[curse]'; +咢>'[sound]'; +咤>'[scold]'; +咥>'[sound of cat]'; +咨>'[inquire]'; +咫>'[foot measure of Zhou dynasty]'; +咬>'[bite]'; +咯>'[final particle]'; +咲>'[smile]'; +咳>'[cough]'; +咸>'[together]'; +咼>'[chat]'; +咽>'[throat]'; +咾>'[a noise]'; +哀>'[sad]'; +品>'[article]'; +哂>'[smile]'; +哄>'[coax]'; +哇>'[vomit]'; +哈>'[sound of laughter]'; +哉>'[final exclamatory particle]'; +#"哗>'[rushing sound]'", +員>'[member]'; +哢>'[syllable]'; +哥>'[elder brother]'; +哦>'[oh? really? is that so?]'; +哨>'[whistle]'; +哩>'[mile]'; +哭>'[weep]'; +哮>'[cough]'; +哲>'[wise]'; +哺>'[chew food]'; +哽>'[choke]'; +唄>'[final particle of assertion pathaka]'; +唆>'[make mischief]'; +唇>'[lips]'; +唏>'[weep or sob]'; +唐>'[tang dynasty]'; +唔>'[hold in mouth]'; +唖>'[dumb]'; +售>'[sell]'; +唯>'[only]'; +唱>'[sing]'; +唳>'[cry of bird]'; +唸>'[recite]'; +唹>'[to smile at]'; +唾>'[spit]'; +啀>'[gnaw]'; +啄>'[to peck]'; +#"啄>'[to peck]'", +商>'[commerce]'; +啌>'[animal disease]'; +問>'[ask (about)]'; +啓>'[open]'; +啖>'[eat]'; +啗>'[eat]'; +啜>'[sip]'; +#"啜>'[sip]'", +啣>'[hold in mouth]'; +啻>'[only]'; +啼>'[weep]'; +啾>'[wailing of child]'; +喀>'[vomit]'; +喃>'[keep talking]'; +善>'[good]'; +喇>'[horn]'; +喉>'[throat]'; +喊>'[shout]'; +喋>'[nag]'; +喘>'[pant]'; +喙>'[beak]'; +喚>'[call]'; +喜>'[like]'; +喝>'[drink]'; +喞>'[chirping of insects]'; +喟>'[heave sigh]'; +喧>'[lively]'; +喨>'[wail]'; +喩>'[metaphor]'; +喪>'[mourning]'; +喫>'[eat]'; +喬>'[tall]'; +單>'[single]'; +喰>'[to eat]'; +営>'[encampment]'; +嗄>'[hoarse of voice]'; +嗅>'[smell]'; +嗇>'[miserly]'; +嗔>'[be angry at]'; +嗚>'[sound of crying]'; +嗜>'[be fond of]'; +嗟>'[sigh]'; +嗣>'[to connect]'; +嗤>'[laugh at]'; +嗷>'[loud clamor]'; +嗹>'[chatter]'; +嗽>'[cough]'; +嗾>'[to set a dog on]'; +嘆>'[sigh]'; +嘉>'[excellent]'; +嘔>'[vomit]'; +嘖>'[interjection of approval or admi]'; +嘗>'[taste]'; +嘘>'[exhale]'; +嘛>'[final exclamatory particle]'; +嘩>'[rushing sound]'; +嘯>'[roar]'; +嘱>'[order]'; +嘲>'[ridicule]'; +嘴>'[mouth]'; +嘶>'[neighing of a horse]'; +嘸>'[unclear]'; +噂>'[meet]'; +噌>'[scold]'; +噎>'[choke]'; +噐>'[receptacle]'; +噛>'[bite]'; +噤>'[close]'; +器>'[receptacle]'; +噪>'[be noisy]'; +噫>'[belch]'; +噬>'[bite]'; +噴>'[spurt]'; +噸>'[metric ton]'; +噺>'[story]'; +嚀>'[enjoin]'; +嚆>'[give forth sound]'; +嚇>'[scare]'; +嚊>'[to pant]'; +嚏>'[sneeze]'; +嚔>'[sneeze]'; +#"嚜>'[be silent]'", +嚢>'[bag]'; +嚥>'[swallow]'; +嚮>'[guide]'; +嚴>'[strict]'; +嚶>'[seek friends]'; +嚼>'[prattle]'; +囀>'[sing]'; +囁>'[move lip when speaking]'; +囂>'[be noisy]'; +#"囂>'[be noisy]'", +囈>'[talk in one''s sleep]'; +#"囍>'[double happiness]'", +囑>'[order]'; +囓>'[gnaw]'; +囗>'[erect]'; +囘>'[return]'; +囚>'[prisoner]'; +四>'[four]'; +回>'[return]'; +因>'[cause]'; +団>'[sphere]'; +囮>'[inveigle]'; +困>'[surround]'; +囲>'[surround]'; +図>'[diagram]'; +囹>'[prison]'; +固>'[become solid]'; +国>'[nation]'; +囿>'[pen up]'; +圀>'[nation]'; +圃>'[garden]'; +圄>'[prison]'; +圈>'[to circle]'; +圉>'[stable]'; +國>'[nation]'; +圍>'[surround]'; +圏>'[to circle]'; +園>'[garden]'; +圓>'[circle]'; +圖>'[diagram]'; +團>'[sphere]'; +圜>'[circle]'; +土>'[soil]'; +圦>'[(kokuji) water gate]'; +#"圦>'[(kokuji) water gate]'", +在>'[be at]'; +圭>'[jade pointed at top]'; +地>'[earth]'; +#"圳>'[furrow in field]'", +#"圳>'[furrow in field]'", +圻>'[border]'; +址>'[site]'; +坂>'[hillside]'; +均>'[equal]'; +坊>'[neighborhood]'; +坎>'[pit]'; +坏>'[rotten]'; +坐>'[sit]'; +坑>'[pit]'; +坡>'[slope]'; +坤>'[earth]'; +坦>'[flat]'; +坩>'[earthenware]'; +坪>'[level ground]'; +坿>'[mound]'; +垂>'[let down]'; +#"垆>'[black clods of earth]'", +#"垉>'[................................]'", +型>'[pattern]'; +垓>'[border]'; +垠>'[boundary]'; +垢>'[dirt]'; +垣>'[low wall]'; +垤>'[ant-hill]'; +#"垩>'[holy]'", +#"垮>'[be defeated]'", +#"垲>'[high and dry place]'", +埀>'[let down]'; +埃>'[fine dust]'; +埆>'[stony]'; +埋>'[bury]'; +城>'[castle]'; +埒>'[enclosure]'; +埓>'[enclosure]'; +埔>'[plain]'; +#"埔>'[plain]'", +埜>'[open country]'; +域>'[district]'; +埠>'[port city]'; +#"埣>'[................................]'", +埴>'[soil with large clay content]'; +執>'[hold in hand]'; +培>'[bank up with dirt]'; +基>'[foundation]'; +埼>'[headland]'; +堀>'[cave]'; +堂>'[hall]'; +堅>'[hard]'; +堆>'[heap]'; +堊>'[white earth]'; +堋>'[bury]'; +堕>'[fall]'; +堙>'[bury]'; +堝>'[crucible]'; +堡>'[fort]'; +堤>'[dike]'; +堪>'[adequately capable of]'; +堯>'[a legendary ancient emperor-sage]'; +堰>'[dam]'; +報>'[report]'; +場>'[open space]'; +堵>'[wall]'; +堺>'[person''s name]'; +堽>'[mound]'; +塀>'[wall]'; +塁>'[rampart]'; +塊>'[piece]'; +塋>'[grave]'; +塑>'[model in clay]'; +塒>'[roost]'; +塔>'[tower]'; +塗>'[smear]'; +塘>'[pond]'; +塙>'[truly]'; +塚>'[cemetery]'; +塞>'[stop up]'; +塢>'[entrenchment]'; +塩>'[salt]'; +填>'[fill in]'; +#"塭>'[[not found in any dictionary]]'", +塲>'[open space]'; +塵>'[dust]'; +塹>'[moat]'; +塾>'[village school]'; +境>'[boundery]'; +墅>'[villa]'; +墓>'[grave]'; +増>'[increase]'; +墜>'[fall down]'; +墟>'[high mound]'; +墨>'[ink]'; +墫>'[cup]'; +墮>'[fall]'; +墳>'[grave]'; +#"墳>'[grave]'", +#"墳>'[grave]'", +墺>'[4 walls]'; +墻>'[wall]'; +墾>'[cultivate]'; +壁>'[partition wall]'; +壅>'[to obstruct]'; +壇>'[altar]'; +壊>'[bad]'; +壌>'[soil]'; +壑>'[bed of torrent]'; +壓>'[press]'; +壕>'[trench]'; +#"壖>'[open space along water]'", +壘>'[rampart]'; +壙>'[tomb]'; +壜>'[earthen jar or jug]'; +壞>'[bad]'; +壟>'[grave]'; +壤>'[soil]'; +#"壥>'[................................]'", +士>'[scholar]'; +壬>'[ninth of ten celestial stems]'; +壮>'[big]'; +壯>'[big]'; +声>'[sound]'; +壱>'[number one]'; +売>'[sell]'; +壷>'[jar]'; +壹>'[number one]'; +壺>'[jar]'; +壻>'[son-in-law]'; +壼>'[palace corridor or passageway]'; +壽>'[old age]'; +夂>'[go]'; +変>'[change]'; +夊>'[Radical No. 35]'; +夏>'[summer]'; +夐>'[long]'; +夕>'[evening]'; +外>'[out]'; +夘>'[4th of Earth Branches]'; +夙>'[early in morning]'; +多>'[much]'; +夛>'[much]'; +夜>'[night]'; +夢>'[dream]'; +夥>'[companion]'; +大>'[big]'; +天>'[sky]'; +太>'[very]'; +夫>'[man]'; +夬>'[parted]'; +夭>'[young]'; +央>'[center]'; +失>'[lose]'; +夲>'[advance quickly]'; +夷>'[ancient barbarian tribes]'; +夸>'[extravagant]'; +夾>'[be wedged or inserted between]'; +奄>'[ere long]'; +奇>'[strange]'; +奈>'[but]'; +奉>'[offer]'; +奎>'[stride of man]'; +奏>'[memorialize emperor]'; +奐>'[be numerous]'; +契>'[deed]'; +奔>'[run fast]'; +奕>'[in sequence]'; +套>'[case]'; +奘>'[large]'; +奚>'[where? what? how? why?]'; +奠>'[pay respect]'; +奢>'[extravagant]'; +奥>'[mysterious]'; +奧>'[mysterious]'; +奨>'[prize]'; +奩>'[lady''s vanity case]'; +奪>'[take by force]'; +奬>'[prize]'; +奮>'[strive]'; +女>'[woman]'; +奴>'[slave]'; +奸>'[crafty]'; +好>'[good]'; +妁>'[act as go-between]'; +如>'[if]'; +妃>'[wife]'; +妄>'[absurd]'; +妊>'[conceive]'; +妍>'[beautiful]'; +妓>'[prostitute]'; +妖>'[strange]'; +妙>'[mysterious]'; +#"妙>'[mysterious]'", +妝>'[adorn oneself]'; +妣>'[one''s deceased mother]'; +妥>'[satisfactory]'; +妨>'[interfere with]'; +妬>'[jealous]'; +妲>'[concubine of last ruler of shang]'; +妹>'[younger sister]'; +妻>'[wife]'; +妾>'[concubine]'; +姆>'[child''s governess]'; +姉>'[elder sister]'; +始>'[begin]'; +姐>'[elder sister]'; +姑>'[father''s sister]'; +姓>'[one''s family name]'; +委>'[appoint]'; +姙>'[conceive]'; +姚>'[handsome]'; +姜>'[surname]'; +姥>'[maternal grandmother]'; +姦>'[adultery]'; +姨>'[mother/wife''s sister]'; +姪>'[niece]'; +姫>'[beauty]'; +#"姱>'[beautiful]'", +姻>'[relatives by marriage]'; +姿>'[one''s manner]'; +威>'[pomp]'; +娃>'[baby]'; +娉>'[beautiful]'; +娑>'[dance]'; +娘>'[mother]'; +#"娚>'[................................]'", +娜>'[elegant]'; +娟>'[beautiful]'; +娠>'[pregnant]'; +娥>'[be beautiful]'; +娩>'[give birth child]'; +娯>'[pleasure]'; +娵>'[star]'; +娶>'[marry]'; +娼>'[prostitute]'; +婀>'[be beautiful]'; +婁>'[surname]'; +婆>'[old woman]'; +婉>'[amiable]'; +婚>'[get married]'; +婢>'[servant girl]'; +婦>'[married women]'; +婪>'[covet]'; +婬>'[obscene]'; +婿>'[son-in-law]'; +媒>'[go-between]'; +媚>'[charming]'; +媛>'[beauty]'; +媼>'[old woman]'; +媽>'[mother]'; +媾>'[marry]'; +嫁>'[marry]'; +嫂>'[sister-in-law]'; +嫉>'[jealousy]'; +嫋>'[slender and delicate]'; +嫌>'[hate]'; +嫐>'[frolic]'; +嫖>'[patronize prostitutes]'; +嫗>'[old woman]'; +嫡>'[legal wife]'; +嫣>'[charming]'; +嫦>'[name of a moon goddess]'; +嫩>'[soft]'; +嫺>'[refined]'; +嫻>'[elegant]'; +嬉>'[enjoy]'; +嬋>'[beautiful]'; +嬌>'[seductive and loveable]'; +嬖>'[favorite]'; +嬢>'[troubled]'; +嬪>'[court lady]'; +嬬>'[mistress]'; +嬰>'[baby]'; +嬲>'[frolic]'; +#"嬴>'[to win]'", +嬾>'[lazy]'; +孀>'[widow]'; +孃>'[troubled]'; +孅>'[slender]'; +子>'[offspring]'; +孑>'[remaining]'; +孔>'[opening]'; +孕>'[be pregnant]'; +字>'[letter]'; +存>'[exist]'; +孚>'[brood over eggs]'; +孛>'[comet]'; +孜>'[be as diligent as possible]'; +孝>'[filial piety]'; +孟>'[first in series]'; +季>'[quarter of year]'; +孤>'[orphan]'; +孥>'[one''s children]'; +学>'[learning]'; +孩>'[baby]'; +孫>'[grandchild]'; +孰>'[who? which? what? which one?]'; +孱>'[weak]'; +孳>'[breed in large numbers]'; +孵>'[sit on eggs]'; +學>'[learning]'; +孺>'[child]'; +宀>'[roof]'; +它>'[it]'; +宅>'[residence]'; +宇>'[house]'; +守>'[defend]'; +安>'[peaceful]'; +宋>'[Song dynasty]'; +完>'[complete]'; +宍>'[flesh]'; +宏>'[wide]'; +宕>'[stone quarry]'; +宗>'[lineage]'; +官>'[official]'; +宙>'[time as concept]'; +定>'[decide]'; +宛>'[seem]'; +宜>'[suitable]'; +宝>'[treasure]'; +実>'[real]'; +客>'[guest]'; +宣>'[declare]'; +室>'[room]'; +宥>'[forgive]'; +宦>'[officialdom]'; +宮>'[palace]'; +宰>'[to slaughter]'; +害>'[injure]'; +宴>'[entertain]'; +宵>'[night]'; +家>'[house]'; +宸>'[imperial]'; +容>'[looks]'; +宿>'[stop]'; +寂>'[still]'; +寃>'[grievance]'; +寄>'[send]'; +寅>'[respect]'; +密>'[dense]'; +寇>'[bandits]'; +#"寉>'[................................]'", +富>'[abundant]'; +寐>'[sleep]'; +寒>'[cold]'; +寓>'[residence]'; +寔>'[real]'; +寛>'[broad]'; +寝>'[sleep]'; +寞>'[silent]'; +察>'[examine]'; +寡>'[widowed]'; +寢>'[sleep]'; +寤>'[few]'; +寥>'[few]'; +實>'[real]'; +寧>'[repose]'; +寨>'[stockade]'; +審>'[examine]'; +寫>'[write]'; +寮>'[shanty]'; +寰>'[great domain]'; +寳>'[treasure]'; +寵>'[favorite]'; +寶>'[treasure]'; +寸>'[inch]'; +寺>'[court]'; +対>'[correct]'; +寿>'[old age]'; +封>'[letter]'; +専>'[monopolize]'; +射>'[shoot]'; +尅>'[subdue]'; +将>'[will]'; +將>'[will]'; +專>'[monopolize]'; +尉>'[officer]'; +尊>'[respect]'; +尋>'[seek]'; +對>'[correct]'; +導>'[direct]'; +小>'[small]'; +少>'[few]'; +尓>'[you]'; +尖>'[sharp]'; +尚>'[still]'; +尠>'[very few]'; +尢>'[weak]'; +尤>'[especially]'; +尨>'[shaggy haired dog]'; +尭>'[a legendary ancient emperor-sage]'; +就>'[just]'; +尸>'[corpse]'; +尹>'[govern]'; +尺>'[chinese measure approx. \"foot\"]'; +尻>'[end of spine]'; +尼>'[buddhist nun]'; +尽>'[exhaust]'; +尾>'[tail]'; +尿>'[urine]'; +局>'[bureau]'; +屁>'[break wind]'; +居>'[live]'; +屆>'[numerary adjunct for time]'; +屈>'[bend]'; +届>'[numerary adjunct for time]'; +屋>'[house]'; +屍>'[corpse]'; +屎>'[excrement]'; +屏>'[folding screen]'; +屐>'[wooden shoes]'; +屑>'[bits]'; +屓>'[gigantic strength]'; +展>'[open]'; +属>'[class]'; +屠>'[butcher]'; +屡>'[frequently]'; +層>'[storey]'; +履>'[footwear]'; +屬>'[class]'; +屮>'[sprout]'; +屯>'[village]'; +山>'[mountain]'; +屶>'[lofty]'; +屹>'[to rise high]'; +岌>'[perilous]'; +岐>'[high]'; +岑>'[steep]'; +岔>'[diverge]'; +岡>'[ridge or crest of hill]'; +岨>'[uneven]'; +岩>'[cliff]'; +岫>'[mountain peak]'; +岬>'[cape]'; +岱>'[daishan one of five sacred mount]'; +岳>'[mountain peak]'; +#"岶>'[................................]'", +岷>'[min mountain]'; +岸>'[bank]'; +#"岺>'[mountain ridge]'", +#"岺>'[mountain ridge]'", +岾>'[mountain pass (korean)]'; +#"峄>'[range of peaks]'", +峇>'[cave]'; +峙>'[stand erect]'; +峠>'[mountain pass]'; +峡>'[gorge]'; +峨>'[lofty]'; +峩>'[lofty]'; +峪>'[valley]'; +峭>'[steep]'; +峯>'[peak]'; +峰>'[peak]'; +島>'[island]'; +#"峺>'[................................]'", +峻>'[high]'; +峽>'[gorge]'; +崇>'[esteem]'; +崋>'[flowery]'; +崎>'[rough]'; +崑>'[Kunlun mountains in Jiang Su province.]'; +崔>'[high]'; +崕>'[cliff]'; +崖>'[cliff]'; +崗>'[post]'; +崘>'[kunlun mountains in jiangsu]'; +崙>'[kunlun mountains in jiangsu]'; +崚>'[hilly]'; +崛>'[towering]'; +崟>'[cliffs]'; +崢>'[high]'; +崩>'[rupture]'; +嵋>'[omei mountain in sichuan]'; +嵌>'[inlay]'; +嵎>'[mountain recess]'; +嵐>'[mountain mist]'; +嵒>'[cliff]'; +嵜>'[rough]'; +嵩>'[high]'; +嵬>'[high]'; +嵯>'[high]'; +嵳>'[high]'; +嵶>'[low part of a mountain]'; +嶂>'[cliff]'; +嶄>'[high]'; +嶇>'[steep]'; +嶋>'[island]'; +嶌>'[island]'; +#"嶐>'[................................]'", +嶝>'[path leading up a mountain]'; +嶢>'[high or tall]'; +#"嶬>'[................................]'", +嶮>'[high]'; +嶷>'[range of mountains in hunan prov]'; +嶺>'[mountain ridge]'; +嶼>'[island]'; +嶽>'[mountain peak]'; +巉>'[steep]'; +巌>'[cliff]'; +巍>'[high]'; +巒>'[mountain range]'; +巓>'[summit of mountain]'; +巖>'[cliff]'; +巛>'[river]'; +川>'[stream]'; +州>'[administrative division]'; +巡>'[patrol]'; +巣>'[nest]'; +工>'[labor]'; +左>'[left]'; +巧>'[skillful]'; +巨>'[large]'; +巫>'[wizard]'; +差>'[differ]'; +己>'[self]'; +已>'[already]'; +巳>'[sixth of twelve branches]'; +巴>'[greatly desire]'; +巵>'[measuring cup]'; +巷>'[alley]'; +巻>'[scroll]'; +巽>'[5th of the 8 trigrams]'; +巾>'[kerchief]'; +市>'[market]'; +布>'[cotton cloth]'; +帆>'[sail]'; +帋>'[paper]'; +希>'[rare]'; +帑>'[a treasury]'; +帖>'[invitation card]'; +帙>'[book cover]'; +帚>'[broom]'; +帛>'[silks]'; +帝>'[supreme ruler]'; +帥>'[commander]'; +師>'[teacher]'; +席>'[seat]'; +帯>'[belt]'; +帰>'[return]'; +帳>'[tent]'; +帶>'[belt]'; +帷>'[tent]'; +常>'[common]'; +帽>'[hat]'; +幀>'[picture]'; +幃>'[curtain that forms wall]'; +幄>'[tent]'; +幅>'[piece]'; +幇>'[help]'; +幌>'[curtain]'; +幎>'[cover-cloth]'; +幔>'[curtain]'; +幕>'[curtain]'; +幗>'[women''s headgear]'; +幟>'[flag]'; +幡>'[pennant]'; +幢>'[carriage curtain]'; +幣>'[currency]'; +幤>'[evil]'; +干>'[oppose]'; +平>'[flat]'; +年>'[year]'; +幵>'[even level. to raise in both hands]'; +并>'[combine]'; +幸>'[luck(ily)]'; +幹>'[trunk of tree or of human body]'; +幺>'[one]'; +幻>'[illusion]'; +幼>'[infant]'; +幽>'[quiet]'; +幾>'[how many? how much? (a)few]'; +广>'[wide]'; +庁>'[hall]'; +広>'[broad]'; +庄>'[village]'; +庇>'[cover]'; +床>'[bed]'; +序>'[series]'; +底>'[bottom]'; +庖>'[kitchen]'; +店>'[shop]'; +庚>'[seventh of ten cyclical stems]'; +府>'[prefecture]'; +庠>'[village school]'; +度>'[degree]'; +座>'[seat]'; +庫>'[armory]'; +庭>'[courtyard]'; +庵>'[buddhist monastery or nunnery]'; +庶>'[numerous]'; +康>'[peaceful]'; +庸>'[usual]'; +廁>'[toilet]'; +廂>'[side-room]'; +廃>'[abrogate]'; +廈>'[big building]'; +廉>'[upright]'; +廊>'[corridor]'; +廏>'[stable]'; +廐>'[stable]'; +廓>'[broad]'; +廖>'[surname]'; +廚>'[kitchen]'; +廛>'[store]'; +廝>'[servant]'; +廟>'[temple]'; +廠>'[factory]'; +廡>'[corridor]'; +廢>'[abrogate]'; +廣>'[broad]'; +廨>'[government office]'; +廩>'[granary]'; +廬>'[hut]'; +廰>'[hall]'; +廱>'[harmonious]'; +廳>'[hall]'; +廴>'[go]'; +延>'[delay]'; +廷>'[court]'; +廸>'[enlighten]'; +建>'[build]'; +廻>'[circle around]'; +廼>'[then]'; +廾>'[two hands]'; +廿>'[twenty]'; +弁>'[conical cap worn under zhou dyna]'; +弃>'[reject]'; +弄>'[do]'; +弉>'[large]'; +弊>'[evil]'; +弋>'[catch]'; +弌>'[number one]'; +弍>'[number two]'; +式>'[style]'; +弐>'[number two]'; +弑>'[to kill one''s superior]'; +弓>'[bow]'; +弔>'[condole]'; +引>'[pull]'; +弖>'[phonetic for \"te\" (Japanese)]'; +弗>'[not]'; +弘>'[enlarge]'; +弛>'[loosen]'; +弟>'[young brother]'; +弥>'[extensive]'; +弦>'[string]'; +弧>'[wooden bow]'; +弩>'[cross-bow]'; +弭>'[stop]'; +弯>'[bend]'; +弱>'[weak]'; +張>'[stretch]'; +強>'[strong]'; +弸>'[bow stretched full]'; +弼>'[aid]'; +#"弼>'[aid]'", +#"彁>'[................................]'", +彈>'[pellet]'; +彊>'[stubborn]'; +彌>'[extensive]'; +彎>'[bend]'; +彑>'[snout]'; +当>'[bear]'; +彖>'[a hog]'; +彗>'[broomstick]'; +彙>'[collect]'; +彜>'[yi]'; +彝>'[yi]'; +彡>'[hair]'; +形>'[form]'; +彦>'[elegant]'; +彩>'[hue]'; +彪>'[tiger]'; +彫>'[carve]'; +彬>'[cultivated]'; +彭>'[name of ancient country]'; +彰>'[clear]'; +影>'[shadow]'; +彳>'[step with left foot]'; +彷>'[like]'; +役>'[service]'; +彼>'[that]'; +彿>'[resembling]'; +往>'[go]'; +征>'[invade]'; +徂>'[go]'; +徃>'[go]'; +径>'[narrow path]'; +待>'[treat]'; +徇>'[comply with]'; +很>'[very]'; +徊>'[linger]'; +律>'[statute]'; +後>'[behind]'; +徐>'[slowly]'; +徑>'[narrow path]'; +徒>'[disciple]'; +従>'[from]'; +得>'[obtain]'; +徘>'[walk back and forth]'; +徙>'[move one''s abode]'; +從>'[from]'; +徠>'[induce]'; +御>'[drive]'; +徨>'[doubtful]'; +復>'[return]'; +循>'[obey]'; +徭>'[conscript labor]'; +微>'[small]'; +徳>'[virtue]'; +徴>'[summon]'; +徹>'[penetrate]'; +徼>'[frontier]'; +徽>'[a badge]'; +心>'[heart]'; +必>'[surely]'; +忌>'[jealous]'; +忍>'[endure]'; +忖>'[guess]'; +志>'[purpose]'; +忘>'[forget]'; +忙>'[busy]'; +応>'[should]'; +忝>'[disgrace]'; +忠>'[loyalty]'; +忤>'[insubordinate]'; +快>'[rapid]'; +忰>'[suffer]'; +忱>'[truth]'; +念>'[think of]'; +忸>'[blush]'; +忻>'[delightful]'; +忽>'[suddenly]'; +忿>'[get angry]'; +怎>'[what? why? how?]'; +怏>'[discontented]'; +#"怐>'[................................]'", +怒>'[anger]'; +怕>'[fear]'; +怖>'[terror]'; +怙>'[rely on]'; +怛>'[grieved]'; +怜>'[pity]'; +思>'[think]'; +怠>'[idle]'; +怡>'[harmony]'; +急>'[quick]'; +怦>'[eager]'; +性>'[nature]'; +怨>'[hatred]'; +怩>'[shy]'; +怪>'[strange]'; +怫>'[sorry]'; +怯>'[lacking in courage]'; +怱>'[hastily]'; +怺>'[to endure]'; +恁>'[that]'; +恂>'[careful]'; +恃>'[rely on]'; +恆>'[constant]'; +恊>'[be united]'; +恋>'[love]'; +恍>'[seemingly]'; +恐>'[fear]'; +恒>'[constant]'; +恕>'[forgive]'; +恙>'[illness]'; +恚>'[anger]'; +恟>'[scared]'; +恠>'[strange]'; +恢>'[restore]'; +恣>'[indulge oneself]'; +恤>'[show pity]'; +恥>'[shame]'; +恨>'[hatred]'; +恩>'[kindness]'; +恪>'[respectful]'; +恫>'[in pain]'; +恬>'[quiet]'; +恭>'[respectful]'; +息>'[rest]'; +恰>'[just]'; +恵>'[favor]'; +#"恶>'[evil]'", +悁>'[irritable]'; +悃>'[sincere]'; +悄>'[silent]'; +悉>'[know]'; +悋>'[stingy]'; +悌>'[brotherly]'; +悍>'[courageous]'; +悒>'[sorrowful]'; +悔>'[repent]'; +悖>'[be contradictory to]'; +#"悖>'[be contradictory to]'", +悚>'[be afraid]'; +悛>'[repent]'; +悟>'[apprehend]'; +悠>'[long]'; +患>'[suffer]'; +悦>'[pleased]'; +悧>'[smooth]'; +悩>'[angered]'; +悪>'[evil]'; +悲>'[sorrow]'; +悳>'[ethics]'; +悴>'[suffer]'; +悵>'[disappointed]'; +悶>'[gloomy]'; +悸>'[fearful]'; +悼>'[grieve]'; +悽>'[sorrowful]'; +情>'[feeling]'; +惆>'[distressed]'; +惇>'[be kind]'; +惑>'[confuse]'; +惓>'[careful]'; +惘>'[disconcerted]'; +惚>'[absent-minded]'; +惜>'[pity]'; +惟>'[but]'; +惠>'[favor]'; +惡>'[evil]'; +惣>'[overall [questionable variant]]'; +惧>'[fear]'; +惨>'[sad]'; +惰>'[indolent]'; +惱>'[angered]'; +想>'[think]'; +惴>'[afraid]'; +惶>'[fearful]'; +惷>'[wriggle]'; +惹>'[irritate]'; +惺>'[intelligent]'; +惻>'[feel anguish]'; +愀>'[change one''s countenance]'; +愁>'[anxiety]'; +愃>'[relax]'; +愆>'[fault]'; +愈>'[more and more]'; +愉>'[pleasant]'; +愍>'[pity]'; +愎>'[obstinate]'; +意>'[thought]'; +愕>'[startled]'; +愚>'[stupid]'; +愛>'[love]'; +感>'[feel]'; +愡>'[absent-minded]'; +愧>'[ashamed]'; +愨>'[sincerity]'; +愬>'[accuse]'; +愴>'[sad]'; +愼>'[act with care]'; +愽>'[gamble]'; +愾>'[anger]'; +愿>'[sincere]'; +慂>'[urge]'; +慄>'[shiver]'; +慇>'[careful]'; +慈>'[kind]'; +慊>'[to resent]'; +態>'[manner]'; +慌>'[nervous]'; +慍>'[angry]'; +慎>'[act with care]'; +#"慓>'[................................]'", +慕>'[long for]'; +慘>'[sad]'; +慙>'[ashamed]'; +慚>'[ashamed]'; +慝>'[do evil in secret]'; +慟>'[sadness]'; +慢>'[slow(ly)]'; +慣>'[habit]'; +慥>'[sincere]'; +慧>'[bright]'; +慨>'[sigh]'; +慫>'[to alarm]'; +慮>'[be concerned]'; +#"慯>'[................................]'", +慰>'[comfort]'; +慱>'[sad]'; +慳>'[miserly]'; +慴>'[fear]'; +慵>'[indolent]'; +慶>'[congratulate]'; +慷>'[ardent]'; +慾>'[lust]'; +憂>'[sad]'; +憇>'[rest]'; +憊>'[tired]'; +憎>'[hate]'; +憐>'[pity]'; +憑>'[lean on]'; +憔>'[be worn-out]'; +憖>'[cautious]'; +憙>'[like]'; +憚>'[dread]'; +憤>'[resent]'; +憧>'[irresolute]'; +憩>'[rest]'; +憫>'[pity]'; +憬>'[rouse]'; +憮>'[regretful]'; +憲>'[constitution]'; +憶>'[remember]'; +憺>'[peace]'; +憾>'[regret]'; +懃>'[courteous]'; +懆>'[anxious]'; +懇>'[sincere]'; +懈>'[idle]'; +應>'[should]'; +懊>'[vexed]'; +懋>'[splendid]'; +懌>'[enjoy]'; +懍>'[be afraid of]'; +懐>'[bosom]'; +懣>'[be sick at heart]'; +懦>'[weak]'; +懲>'[punish]'; +懴>'[regret]'; +懶>'[lazy]'; +懷>'[bosom]'; +懸>'[hang]'; +懺>'[regret]'; +懼>'[fear]'; +懽>'[happy]'; +懾>'[afraid]'; +懿>'[virtuous]'; +戀>'[love]'; +戈>'[halberd]'; +戉>'[a battle-axe]'; +戊>'[fifth of ten celestial stems]'; +戌>'[eleventh of terrestrial branches]'; +戍>'[defend borders]'; +戎>'[arms]'; +成>'[completed]'; +我>'[our]'; +戒>'[warn]'; +戔>'[small]'; +或>'[or]'; +戚>'[relative]'; +戛>'[lance]'; +戝>'[pirate]'; +戞>'[lance]'; +戟>'[halberd with crescent blade]'; +戡>'[subjugate]'; +戦>'[war]'; +截>'[cut off]'; +戮>'[kill]'; +戯>'[theatrical play]'; +戰>'[war]'; +戲>'[theatrical play]'; +戳>'[prick]'; +戴>'[wear on top]'; +戸>'[door]'; +戻>'[perverse]'; +房>'[house]'; +所>'[place]'; +扁>'[flat]'; +扇>'[fan]'; +扈>'[escort]'; +扉>'[door panel]'; +手>'[hand]'; +才>'[talent]'; +扎>'[pull up]'; +打>'[strike]'; +払>'[shake off]'; +托>'[hold up with palm]'; +扛>'[carry on shoulders]'; +扞>'[ward off]'; +扠>'[pick up with fork or pincers]'; +扣>'[knock]'; +扨>'[pick up with fork or pincers]'; +扮>'[dress up]'; +扱>'[collect]'; +扶>'[support]'; +批>'[comment]'; +扼>'[grasp]'; +找>'[search]'; +承>'[inherit]'; +技>'[skill]'; +#"抂>'[................................]'", +抃>'[to clap hands]'; +抄>'[copy]'; +抉>'[choose]'; +把>'[hold]'; +抑>'[press down]'; +抒>'[express]'; +抓>'[scratch]'; +抔>'[take or hold up in both hands]'; +投>'[throw]'; +抖>'[tremble]'; +抗>'[resist]'; +折>'[break off]'; +抛>'[throw (away)]'; +抜>'[uproot]'; +択>'[select]'; +披>'[wear]'; +抬>'[lift]'; +抱>'[embrace]'; +抵>'[resist]'; +抹>'[smear]'; +抻>'[pull]'; +押>'[mortgage]'; +抽>'[draw out]'; +拂>'[shake off]'; +担>'[carry]'; +拆>'[break up]'; +拇>'[thumb]'; +拈>'[pick up with fingers]'; +拉>'[pull]'; +拊>'[slap]'; +拌>'[mix]'; +拍>'[clap]'; +拏>'[take]'; +拐>'[kidnap]'; +拑>'[to clamp]'; +拒>'[ward off with hand]'; +拓>'[expand]'; +拔>'[uproot]'; +拗>'[obstinate]'; +拘>'[restrain]'; +拙>'[stupid]'; +招>'[beckon]'; +拜>'[do obeisance]'; +拝>'[do obeisance]'; +拠>'[occupy]'; +拡>'[expand]'; +括>'[include]'; +拭>'[wipe away stains with cloth]'; +拮>'[laboring hard]'; +拯>'[help]'; +拱>'[fold hands on breast]'; +拳>'[fist]'; +#"拴>'[bind with rope]'", +拶>'[press]'; +拷>'[torture and interrogate]'; +拾>'[pick up]'; +拿>'[take]'; +持>'[sustain]'; +挂>'[hang]'; +指>'[finger]'; +挈>'[assist]'; +按>'[put hand on]'; +挌>'[fight]'; +挑>'[load carried on shoulders]'; +挙>'[raise]'; +挟>'[clasp under arm]'; +#"挥>'[direct]'", +挨>'[near]'; +挫>'[push down]'; +振>'[raise]'; +挺>'[stand upright]'; +挽>'[pull]'; +挾>'[clasp under arm]'; +挿>'[insert]'; +捉>'[grasp]'; +捌>'[break open]'; +捍>'[ward off]'; +捏>'[pick with fingers]'; +捐>'[contribute]'; +捕>'[arrest]'; +捗>'[make progress]'; +捜>'[search]'; +捧>'[hold up in two hands]'; +捨>'[discard]'; +捩>'[twist with hands]'; +捫>'[stoke]'; +据>'[occupy]'; +捲>'[curl]'; +捶>'[strike with stick]'; +捷>'[win]'; +捺>'[press down heavily with fringers]'; +捻>'[twist with fingers]'; +掀>'[lift]'; +掃>'[sweep]'; +授>'[give to]'; +掉>'[turn]'; +掌>'[palm of hand]'; +掎>'[drag aside]'; +掏>'[take out]'; +排>'[row]'; +掖>'[stick in]'; +掘>'[dig]'; +掛>'[hang]'; +#"掟>'[................................]'", +掠>'[rob]'; +採>'[gather]'; +探>'[find]'; +掣>'[drag]'; +接>'[receive]'; +控>'[accuse]'; +推>'[push]'; +掩>'[cover]'; +措>'[place]'; +掫>'[be on night watch]'; +掬>'[grasp with both hands]'; +#"掱>'[pickpocket]'", +掴>'[box one''s ears]'; +#"掴>'[box one''s ears]'", +掻>'[scratch lightly]'; +掾>'[a general designation of officials]'; +揀>'[choose]'; +揃>'[shear]'; +揄>'[lift]'; +揆>'[prime minister]'; +揉>'[rub]'; +描>'[copy]'; +提>'[hold in hand]'; +插>'[insert]'; +揖>'[salute]'; +揚>'[scatter]'; +換>'[change]'; +握>'[grasp]'; +揣>'[put things under clothes]'; +揩>'[rub and wipe]'; +揮>'[direct]'; +援>'[aid]'; +揶>'[make fun of]'; +揺>'[wag]'; +搆>'[pull]'; +損>'[diminish]'; +搏>'[seize]'; +搓>'[trample]'; +搖>'[wag]'; +搗>'[hull]'; +搜>'[search]'; +搦>'[grasp]'; +搨>'[rub]'; +搬>'[transfer]'; +搭>'[join together]'; +搴>'[extract]'; +搶>'[plunder]'; +携>'[lead by hand]'; +搾>'[press]'; +摂>'[take in]'; +摎>'[to strangle]'; +摘>'[pluck]'; +摧>'[destroy]'; +摩>'[rub]'; +摯>'[sincere]'; +摶>'[roll around with hand]'; +摸>'[gently touch with hand]'; +摺>'[fold]'; +撃>'[strike]'; +撈>'[scoop out of water]'; +撒>'[release]'; +撓>'[scratch]'; +撕>'[rip]'; +撚>'[twirl in fingers]'; +撞>'[knock against]'; +撤>'[omit]'; +撥>'[move]'; +撩>'[lift up]'; +撫>'[pat]'; +播>'[sow]'; +撮>'[little bit]'; +撰>'[compose]'; +撲>'[pound]'; +撹>'[disturb]'; +撻>'[flog]'; +撼>'[move]'; +擁>'[embrace]'; +擂>'[rub]'; +擅>'[monopolize]'; +擇>'[select]'; +操>'[conduct]'; +擒>'[catch]'; +擔>'[carry]'; +擘>'[thumb]'; +據>'[occupy]'; +擠>'[crowd]'; +擡>'[carry]'; +擢>'[pull up]'; +擣>'[hull]'; +擦>'[wipe]'; +擧>'[raise]'; +擬>'[draft]'; +擯>'[exclude]'; +擱>'[place]'; +擲>'[throw]'; +擴>'[expand]'; +#"擶>'[................................]'", +擺>'[put]'; +擽>'[tickle]'; +擾>'[disturb]'; +攀>'[climb]'; +攅>'[save]'; +攘>'[seize]'; +攜>'[lead by hand]'; +攝>'[take in]'; +攣>'[tangled]'; +攤>'[spread out]'; +攪>'[disturb]'; +攫>'[snatch away]'; +攬>'[grasp]'; +支>'[disperse]'; +攴>'[rap]'; +攵>'[rap]'; +收>'[gather together]'; +攷>'[examine]'; +攸>'[distant]'; +改>'[change]'; +攻>'[attack]'; +放>'[put]'; +政>'[government]'; +故>'[ancient]'; +效>'[result]'; +敍>'[express]'; +敏>'[fast]'; +救>'[save]'; +敕>'[an imperial order or decree]'; +敖>'[ramble]'; +敗>'[be defeated]'; +敘>'[express]'; +教>'[teach]'; +敝>'[break]'; +敞>'[roomy]'; +敢>'[dare]'; +散>'[scatter]'; +敦>'[esteem]'; +敬>'[respect]'; +数>'[number]'; +敲>'[strike]'; +整>'[orderly]'; +敵>'[enemy]'; +敷>'[spread]'; +數>'[number]'; +斂>'[draw back]'; +斃>'[kill]'; +文>'[literature]'; +斈>'[learning]'; +斉>'[even]'; +斌>'[refined]'; +斎>'[vegetarian diet]'; +斐>'[graceful]'; +斑>'[mottled]'; +斗>'[chinese peck]'; +料>'[consider]'; +斛>'[dry measure 10 or 5 times of dou]'; +斜>'[slanting]'; +斟>'[pour wine or tea into cup]'; +斡>'[revolve]'; +斤>'[catty]'; +斥>'[scold]'; +斧>'[axe]'; +斫>'[cut]'; +斬>'[cut]'; +断>'[sever]'; +斯>'[this]'; +新>'[new]'; +斷>'[sever]'; +方>'[square]'; +於>'[in]'; +施>'[grant]'; +旁>'[side]'; +旃>'[silk banner with bent pole]'; +旄>'[a kind of ancient flag]'; +旅>'[trip]'; +旆>'[flag ornament]'; +旋>'[revolve]'; +旌>'[banner or flag adorned with feat]'; +族>'[family clan]'; +旒>'[fringes of pearls on crowns]'; +旗>'[banner]'; +旙>'[a pennant]'; +旛>'[a pennant]'; +无>'[negative]'; +旡>'[choke on something eaten]'; +既>'[already]'; +日>'[sun]'; +旦>'[dawn]'; +旧>'[old]'; +旨>'[purpose]'; +早>'[early]'; +旬>'[ten-day period]'; +旭>'[rising sun]'; +旱>'[drought]'; +旺>'[prosper]'; +旻>'[heaven]'; +昂>'[rise]'; +昃>'[afternoon]'; +昆>'[elder brother]'; +昇>'[rise]'; +昊>'[summer time]'; +昌>'[light of sun]'; +明>'[bright]'; +昏>'[dusk]'; +易>'[change]'; +昔>'[formerly]'; +昜>'[to open out]'; +星>'[star]'; +映>'[project]'; +春>'[spring]'; +昧>'[obscure]'; +昨>'[yesterday]'; +昭>'[bright]'; +是>'[indeed]'; +昴>'[one of the 28 constellations]'; +昵>'[intimate]'; +昶>'[a long day. bright. extended. clear]'; +昼>'[daytime]'; +昿>'[extensive]'; +晁>'[morning]'; +時>'[time]'; +晃>'[bright]'; +晄>'[bright]'; +晉>'[advance]'; +晋>'[advance]'; +晏>'[peaceful]'; +晒>'[dry in sun]'; +晝>'[daytime]'; +晞>'[dry]'; +晟>'[clear]'; +晢>'[light of stars]'; +晤>'[have interview with]'; +晦>'[dark]'; +晧>'[daybreak]'; +晨>'[early morning]'; +晩>'[night]'; +普>'[universal]'; +景>'[scenery]'; +晰>'[clear]'; +晴>'[clear weather]'; +晶>'[crystal]'; +智>'[wisdom]'; +暁>'[dawn]'; +#"暂>'[temporary]'", +暄>'[warm]'; +暇>'[leisure]'; +暈>'[halo in sky]'; +暉>'[sunshine]'; +暎>'[sun beginning decline]'; +暑>'[hot]'; +暖>'[warm]'; +暗>'[dark]'; +暘>'[rising sun]'; +暝>'[dark]'; +暢>'[smoothly]'; +暦>'[calendar]'; +暫>'[temporary]'; +暮>'[evening]'; +暴>'[violent]'; +暸>'[bright]'; +暹>'[rise]'; +暼>'[take fleeting glance at]'; +暾>'[morning sun]'; +曁>'[and]'; +曄>'[bright]'; +曇>'[become cloudy]'; +曉>'[dawn]'; +曖>'[obscure]'; +曙>'[bright]'; +曚>'[twilight just before sun rises]'; +曜>'[glorious]'; +曝>'[sun]'; +曠>'[extensive]'; +曦>'[sunlight]'; +曩>'[in ancient times]'; +曰>'[say]'; +曲>'[crooked]'; +曳>'[trail]'; +更>'[more]'; +曵>'[trail]'; +曷>'[why? what? where?]'; +書>'[book]'; +曹>'[ministry officials]'; +曼>'[long]'; +曽>'[already]'; +曾>'[already]'; +替>'[change]'; +最>'[most]'; +會>'[assemble]'; +月>'[moon]'; +有>'[have]'; +朋>'[friend]'; +服>'[clothes]'; +朏>'[light of crescent moon]'; +朔>'[first day of lunar month]'; +朕>'[pronoun \"i\"]'; +朖>'[clear]'; +朗>'[clear]'; +望>'[look at or forward]'; +朝>'[dynasty]'; +朞>'[full year]'; +期>'[period of time]'; +朦>'[condition or appearance of moon]'; +朧>'[condition or appearance of moon]'; +木>'[tree]'; +未>'[not yet]'; +末>'[final]'; +本>'[root]'; +札>'[letter]'; +朮>'[skill]'; +朱>'[cinnabar]'; +朴>'[simple]'; +朶>'[cluster of flowers]'; +#"朶>'[cluster of flowers]'", +#"朶>'[cluster of flowers]'", +机>'[desk]'; +朽>'[decayed]'; +朿>'[stab]'; +#"杀>'[kill]'", +杆>'[pole]'; +杉>'[various species of pine and fir]'; +李>'[plum]'; +杏>'[apricot]'; +材>'[material]'; +村>'[village]'; +杓>'[handle of cup]'; +杖>'[cane]'; +杙>'[a tiny wooden post]'; +杜>'[stop]'; +杞>'[willow]'; +束>'[bind]'; +杠>'[lever]'; +条>'[clause]'; +杢>'[woodworker]'; +#"杢>'[woodworker]'", +杤>'[type of oak]'; +来>'[come]'; +杪>'[tip of twig]'; +杭>'[cross stream]'; +杯>'[cup]'; +杰>'[hero]'; +東>'[east]'; +杲>'[bright sun]'; +杳>'[obscure]'; +杵>'[pestle]'; +杷>'[loquat]'; +杼>'[shuttle of loom]'; +松>'[pine tree]'; +板>'[plank]'; +#"枅>'[................................]'", +枇>'[loquat]'; +枉>'[useless]'; +枋>'[sandalwood]'; +枌>'[variety of elm with small seeds]'; +析>'[split wood]'; +枕>'[pillow]'; +林>'[forest]'; +枚>'[stalk of shrub]'; +果>'[fruit]'; +枝>'[branches]'; +#"枟>'[wood streaks]'", +#"枟>'[wood streaks]'", +枢>'[door hinge]'; +#"枦>'[................................]'", +枩>'[pine tree]'; +枯>'[dried out]'; +枳>'[trifoliate orange]'; +枴>'[cane]'; +架>'[rack]'; +枷>'[cangue scaffold]'; +枸>'[kind of aspen found in sichuan]'; +枹>'[drumstick]'; +柁>'[large tie-beams]'; +柄>'[handle]'; +#"柆>'[................................]'", +#"柈>'[container]'", +柎>'[calyx of flower]'; +柏>'[cypress]'; +某>'[certain thing or person]'; +柑>'[tangerine]'; +染>'[dye]'; +柔>'[soft]'; +柘>'[a thorny tree]'; +柚>'[pumelo]'; +柝>'[watchman''s rattle]'; +柞>'[oak]'; +柢>'[root]'; +柤>'[hawthorn]'; +#"柧>'[................................]'", +柩>'[coffin which contains corpse]'; +柬>'[letter]'; +柮>'[flat pieces of wood]'; +柯>'[axe-handle]'; +柱>'[pillar]'; +柳>'[willow tree]'; +柴>'[firewood]'; +柵>'[fence]'; +査>'[investigate]'; +柾>'[straight grain]'; +柿>'[persimmon]'; +栂>'[a kind of evergreen tree]'; +栃>'[type of oak]'; +栄>'[glory]'; +栓>'[wooden peg]'; +栖>'[perch]'; +栗>'[chestnut tree]'; +栞>'[publication]'; +校>'[school]'; +栢>'[cypress]'; +栩>'[species of oak]'; +株>'[numerary adjunct for trees]'; +栫>'[fence]'; +栲>'[mangrove]'; +栴>'[sandalwood]'; +核>'[seed]'; +根>'[root]'; +格>'[pattern]'; +栽>'[cultivate]'; +桀>'[chicken roost]'; +桁>'[cross-beams of roof]'; +桂>'[cassia or cinnamon]'; +桃>'[peach]'; +框>'[frame]'; +案>'[table]'; +#"桍>'[................................]'", +桎>'[fetters]'; +桐>'[name applied various trees]'; +桑>'[mulberry tree]'; +桓>'[variety of tree]'; +桔>'[chinese bellflower]'; +#"桙>'[................................]'", +桜>'[cherry]'; +桝>'[................]'; +#"桝>'[................]'", +档>'[shelf]'; +桧>'[chinese cypress]'; +桴>'[raft]'; +桶>'[pail]'; +桷>'[rafter]'; +#"桾>'[................................]'", +桿>'[pole]'; +梁>'[bridge]'; +梃>'[a club]'; +梅>'[plums]'; +梍>'[tree name]'; +梏>'[handcuffs]'; +梓>'[catalpa ovata]'; +梔>'[gardenia]'; +梗>'[stem of flower]'; +#"梘>'[bamboo tube]'", +條>'[clause]'; +梟>'[owl thus]'; +梠>'[small beam supporting rafters at]'; +梢>'[pointed tip of something long like a branch]'; +梦>'[dream]'; +梧>'[sterculia platanifolia]'; +梨>'[pear]'; +梭>'[weaver''s shuttle]'; +梯>'[ladder]'; +械>'[weapons]'; +梱>'[doorsill]'; +梳>'[comb]'; +梵>'[buddhist]'; +梶>'[oar]'; +梹>'[the areca-nut]'; +#"梹>'[the areca-nut]'", +梼>'[block of wood]'; +棄>'[reject]'; +#"棆>'[................................]'", +棉>'[cotton]'; +棊>'[chess]'; +棋>'[chess]'; +棍>'[stick]'; +棒>'[stick]'; +#"棔>'[................................]'", +棕>'[hemp palm]'; +棗>'[date tree]'; +棘>'[jujube tree]'; +棚>'[tent]'; +棟>'[main beams supporting house]'; +棠>'[crab apple tree]'; +#"棡>'[................................]'", +棣>'[kerria japonica plant]'; +棧>'[warehouse]'; +森>'[forest]'; +棯>'[jujube tree]'; +棲>'[perch]'; +棹>'[oar]'; +棺>'[coffin]'; +椀>'[bowl]'; +椁>'[outer-coffin]'; +椄>'[to graft]'; +椅>'[chair]'; +椈>'[cedar]'; +椋>'[fruit]'; +椌>'[instrument]'; +植>'[plant]'; +椎>'[hammer]'; +椏>'[the forking branch of a tree]'; +椒>'[pepper]'; +#"椓>'[strike]'", +#"椚>'[................................]'", +椛>'[type of birch]'; +検>'[check]'; +#"椡>'[................................]'", +#"椢>'[................................]'", +#"椣>'[................................]'", +#"椥>'[................................]'", +#"椦>'[................................]'", +#"椨>'[................................]'", +椪>'[machilus nanmu]'; +椰>'[palm tree]'; +椴>'[poplar]'; +椶>'[palm tree]'; +椹>'[a chopping board]'; +椽>'[beams]'; +椿>'[father]'; +楊>'[willow]'; +楓>'[maple tree]'; +楔>'[wedge]'; +楕>'[oval-shaped]'; +楙>'[name of plant]'; +楚>'[name of feudal state]'; +#"楜>'[................................]'", +楝>'[melia japonica]'; +楞>'[used for ceylon in buddhist text]'; +楠>'[name of tree]'; +楡>'[elm tree]'; +楢>'[tinder]'; +楪>'[small dish]'; +楫>'[oar]'; +業>'[profession]'; +楮>'[mulberry]'; +楯>'[shield]'; +楳>'[plums]'; +#"楴>'[................................]'", +極>'[extreme]'; +楷>'[model style of chinese writing]'; +楸>'[mallotus japonicus]'; +楹>'[column]'; +楼>'[building of two or more stories]'; +楽>'[happy]'; +#"楽>'[happy]'", +#"榀>'[[not found in dictionary]]'", +概>'[generally]'; +#"榉>'[type of elm]'", +榎>'[small evergreen shrub]'; +#"榑>'[................................]'", +榔>'[betel-nut tree]'; +榕>'[banyan tree]'; +榛>'[hazelnut]'; +榜>'[placard]'; +#"榠>'[................................]'", +榧>'[type of yew]'; +榮>'[glory]'; +榱>'[rafter]'; +榲>'[pillar]'; +榴>'[pomegranate]'; +榻>'[cot]'; +榾>'[pieces of wood]'; +榿>'[alder]'; +槁>'[wither]'; +槃>'[tray]'; +槇>'[tip of a tree]'; +槊>'[spear]'; +構>'[frame]'; +槌>'[hammer]'; +槍>'[spear]'; +槎>'[raft]'; +槐>'[locust tree]'; +槓>'[lever]'; +様>'[shape]'; +#"様>'[shape]'", +#"槛>'[threshold]'", +#"槛>'[threshold]'", +槧>'[wooden tablet]'; +槨>'[outer-coffin]'; +#"槫>'[................................]'", +槭>'[maple]'; +槲>'[type of oak]'; +槹>'[spar]'; +槻>'[zelkova tree]'; +槽>'[trough]'; +槿>'[hibiscus]'; +樂>'[happy]'; +樅>'[fir tree]'; +樊>'[a railing]'; +樋>'[tree name]'; +#"樌>'[................................]'", +#"樒>'[................................]'", +樓>'[building of two or more stories]'; +#"樓>'[building of two or more stories]'", +樗>'[kind of tree with useless timber]'; +標>'[mark]'; +樛>'[bending branches]'; +樞>'[door hinge]'; +樟>'[camphor tree]'; +模>'[model]'; +#"樢>'[................................]'", +樣>'[shape]'; +権>'[power]'; +横>'[across]'; +#"横>'[across]'", +#"横>'[across]'", +樵>'[woodcutter]'; +樶>'[c]'; +樸>'[simple]'; +樹>'[tree]'; +樺>'[type of birch]'; +樽>'[goblet]'; +橄>'[olive]'; +橇>'[a sledge for transportation]'; +橈>'[bent or twisted piece of wood]'; +橋>'[bridge]'; +橘>'[orange]'; +橙>'[orange]'; +機>'[machine]'; +橡>'[chestnut oak]'; +橢>'[oval-shaped]'; +橦>'[tree]'; +#"橱>'[cabinet]'", +#"橵>'[wood placed under roof tiles]'", +#"橿>'[................................]'", +檀>'[sandalwood]'; +檄>'[call arms]'; +檍>'[ilex]'; +檎>'[small red apple]'; +檐>'[eaves of house]'; +檗>'[tree]'; +檜>'[chinese cypress]'; +檠>'[stand for lamp]'; +檢>'[check]'; +檣>'[mast]'; +檪>'[chestnut-leaved oak]'; +檬>'[type of locust oracacia]'; +檮>'[block of wood]'; +檳>'[betelnut]'; +檸>'[lemon]'; +檻>'[threshold]'; +#"櫁>'[................................]'", +櫂>'[oar]'; +櫃>'[cupboard]'; +#"櫑>'[................................]'", +櫓>'[oar]'; +櫚>'[palm]'; +櫛>'[comb out]'; +櫞>'[citrus]'; +櫟>'[chestnut-leaved oak]'; +櫨>'[supporting block]'; +櫪>'[type of oak]'; +櫺>'[carved or patterned window sills]'; +櫻>'[cherry]'; +欄>'[railing]'; +欅>'[zelkova]'; +權>'[power]'; +欒>'[name of tree]'; +欖>'[olive]'; +欝>'[luxuriant]'; +#"欞>'[the lattice of a window a sill]'", +欠>'[owe]'; +次>'[order]'; +欣>'[happy]'; +欧>'[translit.: europe]'; +欲>'[desire]'; +欷>'[sob]'; +欸>'[sighs]'; +欹>'[fierce dog]'; +欺>'[cheat]'; +欽>'[respect]'; +款>'[item]'; +歃>'[smear one''s mouth with blood of a victim when taking an oath]'; +歇>'[rest]'; +歉>'[deficient]'; +歌>'[song]'; +歎>'[sigh]'; +歐>'[translit.: europe]'; +歓>'[happy]'; +歔>'[blow through nose]'; +歙>'[to suck]'; +歛>'[draw back]'; +歟>'[final particle used express ques]'; +歡>'[happy]'; +止>'[stop]'; +正>'[right]'; +此>'[this]'; +武>'[military]'; +歩>'[step]'; +歪>'[slant]'; +歯>'[teeth]'; +歳>'[year]'; +歴>'[take place]'; +歸>'[return]'; +歹>'[bad]'; +死>'[die]'; +歿>'[die]'; +殀>'[die young]'; +殃>'[misfortune]'; +殄>'[to end]'; +殆>'[dangerous]'; +殉>'[die for cause]'; +殊>'[different]'; +残>'[injure]'; +殍>'[to starve to death]'; +殕>'[[not found in dictionary]]'; +殖>'[breed]'; +殘>'[injure]'; +殞>'[die]'; +殤>'[die young]'; +殪>'[die]'; +殫>'[utmost]'; +殯>'[encoffin]'; +殱>'[annihilate]'; +殲>'[annihilate]'; +殳>'[name of old weapon]'; +殴>'[beat]'; +段>'[section]'; +殷>'[many]'; +殺>'[kill]'; +殻>'[casing]'; +殼>'[casing]'; +殿>'[hall]'; +毀>'[destroy]'; +毅>'[resolute]'; +毆>'[beat]'; +毋>'[do not]'; +母>'[mother]'; +毎>'[every]'; +毒>'[poison]'; +毓>'[give birth to]'; +比>'[compare]'; +毘>'[help]'; +毛>'[hair]'; +#"毟>'[................................]'", +毫>'[fine hair]'; +毬>'[ball]'; +毯>'[rug]'; +毳>'[fine hair or fur on animals]'; +氈>'[felt]'; +氏>'[clan]'; +民>'[people]'; +氓>'[people]'; +气>'[steam]'; +気>'[air]'; +氛>'[gas]'; +氣>'[air]'; +氤>'[hanging fog]'; +水>'[water]'; +氷>'[ice]'; +永>'[long]'; +氾>'[overflow]'; +汀>'[sandbar]'; +汁>'[juice]'; +求>'[seek]'; +汎>'[float]'; +汐>'[night tides]'; +汕>'[basket for catching fish]'; +汗>'[perspiration]'; +汚>'[filthy]'; +汝>'[you]'; +汞>'[element mercury]'; +江>'[large river]'; +池>'[pool]'; +#"汢>'[................................]'", +汨>'[Mi(luo) river in hunan province where Qu Yuan drowned himself]'; +汪>'[vast]'; +汰>'[excessive]'; +汲>'[draw water from well]'; +#"汲>'[draw water from well]'", +決>'[decide]'; +汽>'[steam]'; +汾>'[river in shanxi province]'; +沁>'[soak into]'; +沂>'[river in southeast shandong flow]'; +沃>'[water]'; +沈>'[sink]'; +沌>'[chaotic]'; +沍>'[freezing]'; +沐>'[bathe]'; +沒>'[not]'; +沓>'[connected]'; +沖>'[pour]'; +沙>'[sand]'; +沚>'[islet in stream]'; +沛>'[abundant]'; +没>'[not]'; +沢>'[marsh]'; +沫>'[froth]'; +沮>'[stop]'; +沱>'[rivers]'; +河>'[river]'; +沸>'[boil]'; +油>'[oil]'; +沺>'[turbulent]'; +治>'[govern]'; +沼>'[lake]'; +沽>'[buy and sell]'; +沾>'[moisten]'; +沿>'[follow course]'; +況>'[condition]'; +泄>'[leak]'; +泅>'[swim]'; +泉>'[spring]'; +泊>'[anchor vessel]'; +泌>'[to seep out]'; +泓>'[clear]'; +法>'[law]'; +泗>'[mucous]'; +泙>'[roar]'; +泛>'[drift]'; +泝>'[go upstream]'; +泡>'[bubbles]'; +波>'[waves]'; +泣>'[cry]'; +泥>'[mud]'; +注>'[concentrate]'; +泪>'[tears]'; +泯>'[destroy]'; +泰>'[great]'; +泱>'[great]'; +泳>'[dive]'; +洋>'[ocean]'; +洌>'[clear]'; +洒>'[sprinkle]'; +洗>'[wash]'; +洙>'[name of a river in shandong]'; +洛>'[river in shaanxi province]'; +洞>'[cave]'; +洟>'[snivel]'; +津>'[ferry]'; +洩>'[leak]'; +洪>'[vast]'; +洫>'[to ditch]'; +洲>'[continent]'; +洳>'[damp]'; +洵>'[true]'; +洶>'[the rush of water]'; +洸>'[sparkle]'; +活>'[live]'; +洽>'[spread]'; +派>'[school of thought]'; +流>'[flow]'; +浄>'[pure]'; +浅>'[shallow]'; +浙>'[zhejiang province]'; +浚>'[dredge]'; +浜>'[creek]'; +浣>'[to wash]'; +浤>'[beating of ocean]'; +浦>'[bank of river]'; +浩>'[great]'; +浪>'[wave]'; +浬>'[nautical mile]'; +浮>'[float]'; +浴>'[bathe]'; +海>'[sea]'; +浸>'[soak]'; +浹>'[saturate]'; +涅>'[blacken]'; +消>'[vanish]'; +涌>'[surge up]'; +涎>'[saliva]'; +涓>'[brook]'; +涕>'[tear]'; +涙>'[tears]'; +涛>'[large waves]'; +涜>'[ditch]'; +涯>'[shore]'; +液>'[sap]'; +涵>'[soak]'; +涸>'[dried up]'; +涼>'[cool]'; +淀>'[shallow water]'; +淅>'[water used wash rice]'; +淆>'[confused]'; +淇>'[river in henan province]'; +淋>'[drip]'; +淌>'[trickle]'; +淑>'[good]'; +淒>'[bitter cold]'; +#"淕>'[................................]'", +淘>'[wash in sieve]'; +淙>'[gurgling sound of water]'; +淞>'[name of a river in Jiangsu]'; +淡>'[weak]'; +淤>'[mud]'; +淦>'[river in jiangxi province: water]'; +淨>'[pure]'; +淪>'[be lost]'; +淫>'[obscene]'; +淬>'[temper]'; +淮>'[river in anhui province]'; +深>'[deep]'; +淳>'[cyanogen]'; +淵>'[gulf]'; +混>'[mix]'; +淹>'[drown]'; +淺>'[shallow]'; +添>'[append]'; +清>'[clear]'; +渇>'[thirsty]'; +済>'[help]'; +渉>'[ford stream]'; +渊>'[surge up]'; +渋>'[astringent]'; +渓>'[mountain stream]'; +渕>'[surge up]'; +渙>'[scatter]'; +渚>'[small sand bank]'; +減>'[decrease]'; +渝>'[change]'; +渟>'[(of water) not flowing]'; +渠>'[ditch]'; +渡>'[cross]'; +渣>'[refuse]'; +渤>'[swelling]'; +渥>'[moisten]'; +渦>'[swirl]'; +温>'[lukewarm]'; +渫>'[beating of ocean]'; +測>'[measure]'; +渭>'[name of a river in shaanxi]'; +渮>'[river in shandong province]'; +港>'[port]'; +游>'[swim]'; +渺>'[endlessly long]'; +渾>'[muddy]'; +湃>'[sound of waves]'; +湊>'[piece together]'; +湍>'[rapid water current]'; +湎>'[flushed with drink]'; +湖>'[lake]'; +湘>'[hunan province]'; +湛>'[deep]'; +湟>'[river in qinghai province]'; +湧>'[well up]'; +湫>'[a small pond]'; +湮>'[bury]'; +湯>'[hot water]'; +湲>'[flow]'; +#"湳>'[[not found in dictionary]]'", +湾>'[bay]'; +湿>'[wet]'; +#"湿>'[wet]'", +#"湿>'[wet]'", +溌>'[pour]'; +溏>'[pool]'; +源>'[spring]'; +準>'[rule]'; +溘>'[abruptly]'; +溜>'[slide]'; +溝>'[ditch]'; +溟>'[drizzling rain]'; +溢>'[overflow]'; +溥>'[big]'; +溪>'[mountain stream]'; +溯>'[go upstream]'; +溲>'[urinate]'; +溶>'[melt]'; +溷>'[privy]'; +溺>'[drown]'; +溽>'[moist]'; +滂>'[torrential]'; +滄>'[blue]'; +滅>'[extinguish]'; +滉>'[deep]'; +滋>'[grow]'; +滌>'[wash]'; +滑>'[slip]'; +滓>'[sediment]'; +滔>'[overflow]'; +滕>'[ county in shandong province]'; +滝>'[raining]'; +滞>'[block up]'; +滬>'[shanghai]'; +滯>'[block up]'; +滲>'[soak through]'; +滴>'[drip]'; +滷>'[thick gravy]'; +滸>'[riverbank]'; +滾>'[turn]'; +滿>'[fill]'; +漁>'[to fish]'; +漂>'[float]'; +漆>'[varnish]'; +漉>'[filter]'; +漏>'[leak]'; +漑>'[water]'; +漓>'[river in guangxi province]'; +演>'[perform]'; +漕>'[transport by water]'; +漠>'[desert]'; +漢>'[chinese people]'; +漣>'[flowing water]'; +漫>'[overflow of water]'; +漬>'[soak]'; +漱>'[gargle]'; +漲>'[rise in price]'; +漸>'[gradually]'; +漾>'[overflow]'; +漿>'[any thick fluid]'; +潁>'[river in anhui]'; +潅>'[pour]'; +潔>'[clean]'; +潘>'[surname]'; +潛>'[hide]'; +潜>'[hide]'; +潟>'[land inundated with salt from ti]'; +潤>'[soft]'; +潦>'[to flood]'; +潭>'[deep pool]'; +潮>'[tide]'; +潯>'[steep bank by stream]'; +潰>'[flooding river]'; +潴>'[pond]'; +潸>'[weep]'; +潺>'[sound of flowing water]'; +潼>'[high]'; +澀>'[astringent]'; +澁>'[astringent]'; +澂>'[clear and still water]'; +澄>'[purify water by allowing sedimen]'; +澆>'[spray]'; +澎>'[splatter]'; +澑>'[slide]'; +澗>'[brook]'; +澡>'[wash]'; +澣>'[cleanse]'; +澤>'[marsh]'; +#"澪>'[................................]'", +澱>'[sediment]'; +澳>'[inlet]'; +澹>'[calm]'; +激>'[arouse]'; +濁>'[muddy]'; +濂>'[waterfall]'; +濃>'[thick]'; +濆>'[river bank]'; +濔>'[many]'; +濕>'[wet]'; +濘>'[mud]'; +濛>'[drizzling]'; +濟>'[help]'; +濠>'[moat]'; +濡>'[immerse]'; +濤>'[large waves]'; +濫>'[flood]'; +濬>'[dredge]'; +濮>'[county in Henan province]'; +濯>'[wash out]'; +濱>'[beach]'; +濳>'[hide]'; +濶>'[broad]'; +濺>'[sprinkle]'; +濾>'[strain out]'; +瀁>'[waves]'; +瀉>'[drain off]'; +瀋>'[juice]'; +瀏>'[clear]'; +瀑>'[waterfall]'; +瀕>'[approach]'; +瀘>'[river in jiangxi province]'; +瀚>'[vast]'; +瀛>'[sea]'; +瀝>'[trickle]'; +瀞>'[pool in a river]'; +瀟>'[sound of beating wind and rain]'; +瀦>'[pond]'; +瀧>'[raining]'; +瀬>'[swift current]'; +瀰>'[overflow]'; +瀲>'[waves]'; +瀾>'[overflowing]'; +灌>'[pour]'; +灑>'[sprinkle]'; +灘>'[bank]'; +灣>'[bay]'; +火>'[fire]'; +灯>'[lantern]'; +灰>'[ashes]'; +灸>'[cauterize with moxa]'; +灼>'[burn]'; +災>'[calamity]'; +炉>'[fireplace]'; +炊>'[cook]'; +炎>'[flame]'; +炒>'[fry]'; +炙>'[roast]'; +炬>'[torch]'; +炭>'[charcoal]'; +炮>'[large gun]'; +炯>'[bright]'; +炳>'[bright]'; +炸>'[fry in oil]'; +点>'[dot]'; +為>'[do]'; +烈>'[fiery]'; +烋>'[boast]'; +烏>'[crow]'; +烙>'[brand]'; +烝>'[rise]'; +烟>'[smoke]'; +烱>'[bright]'; +烹>'[boil]'; +烽>'[signal fire]'; +焉>'[thereupon]'; +焔>'[flame]'; +焙>'[dry over slow fire]'; +焚>'[burn]'; +焜>'[fire]'; +無>'[negative]'; +焦>'[burned]'; +然>'[yes]'; +焼>'[burn]'; +煉>'[smelt]'; +煌>'[bright]'; +煎>'[fry in fat or oil]'; +煕>'[bright]'; +煖>'[warm]'; +煙>'[smoke]'; +煢>'[alone]'; +煤>'[coal]'; +煥>'[shining]'; +煦>'[kind]'; +照>'[shine]'; +煩>'[bother]'; +煬>'[roast]'; +煮>'[cook]'; +煽>'[stir up]'; +熄>'[put out]'; +熈>'[bright]'; +熊>'[bear]'; +熏>'[smoke]'; +熔>'[melt]'; +#"熕>'[................................]'", +熙>'[bright]'; +熟>'[well cooked]'; +熨>'[iron]'; +熬>'[cook down]'; +熱>'[hot]'; +熹>'[dim light]'; +熾>'[burning hot]'; +燃>'[burn]'; +燈>'[lantern]'; +燉>'[heat with fire]'; +燎>'[burn]'; +燐>'[phosphorus]'; +燒>'[burn]'; +燔>'[to roast]'; +燕>'[swallow (bird)]'; +#"燗>'[................................]'", +營>'[encampment]'; +燠>'[warm]'; +燥>'[dry]'; +燦>'[vivid]'; +燧>'[flintstone]'; +燬>'[burn down]'; +燭>'[candle]'; +燮>'[harmonize]'; +#"燵>'[................................]'", +燹>'[fire]'; +燻>'[smoke]'; +燼>'[cinders]'; +燿>'[shine]'; +爆>'[crackle]'; +爍>'[shine]'; +爐>'[fireplace]'; +爛>'[rotten]'; +爨>'[oven]'; +爪>'[claw]'; +爬>'[crawl]'; +爭>'[dispute]'; +爰>'[lead on to]'; +爲>'[do]'; +爵>'[feudal title or rank]'; +父>'[father]'; +爺>'[father]'; +爻>'[diagrams for divination]'; +爼>'[chopping board or block]'; +爽>'[happy]'; +爾>'[you]'; +爿>'[half of tree trunk]'; +牀>'[bed]'; +牆>'[wall]'; +片>'[slice]'; +版>'[printing blocks]'; +牋>'[memorandum]'; +牌>'[signboard]'; +牒>'[documents]'; +牘>'[writing tablet]'; +牙>'[tooth]'; +牛>'[cow]'; +牝>'[female of species]'; +牟>'[make]'; +牡>'[male of animals]'; +牢>'[prison]'; +牧>'[tend cattle]'; +物>'[thing]'; +牲>'[sacrificial animal]'; +牴>'[gore]'; +特>'[special]'; +牽>'[drag]'; +牾>'[to oppose]'; +犀>'[rhinoceros]'; +犁>'[plow]'; +犂>'[plow]'; +犇>'[run fast]'; +犒>'[entertain victorious soldiers]'; +犖>'[brindled ox]'; +犠>'[sacrifice]'; +犢>'[calf]'; +犧>'[sacrifice]'; +犬>'[dog]'; +犯>'[commit crime]'; +犲>'[wolf]'; +状>'[form]'; +犹>'[like]'; +狂>'[insane]'; +狃>'[to covet]'; +狄>'[tribe from northern china]'; +狆>'[pekinese dog]'; +狎>'[be familiar with]'; +狐>'[species of fox]'; +狒>'[baboon]'; +狗>'[dog]'; +狙>'[ape]'; +#"狛>'[................................]'", +狠>'[vicious]'; +狡>'[cunning]'; +狢>'[animal name]'; +狩>'[winter hunting]'; +独>'[alone]'; +狭>'[narrow]'; +狷>'[rash]'; +狸>'[fox]'; +狹>'[narrow]'; +狼>'[wolf]'; +狽>'[legendary animal with short fore]'; +猊>'[lion]'; +猖>'[mad]'; +猗>'[exclamation of admiration]'; +猛>'[violent]'; +猜>'[guess]'; +猝>'[abruptly]'; +猟>'[hunt]'; +猥>'[vulgar]'; +猩>'[species of orangutan]'; +猪>'[pig]'; +猫>'[cat]'; +献>'[offer]'; +#"猯>'[................................]'", +猴>'[monkey]'; +猶>'[like]'; +猷>'[plan]'; +猾>'[crafty]'; +猿>'[ape]'; +獄>'[prison]'; +獅>'[lion]'; +獎>'[prize]'; +獏>'[the panther]'; +獗>'[unruly]'; +獣>'[beast]'; +獨>'[alone]'; +獪>'[sly]'; +獰>'[ferocious appearance]'; +獲>'[obtain]'; +獵>'[hunt]'; +獸>'[beast]'; +獺>'[otter]'; +獻>'[offer]'; +玄>'[deep]'; +率>'[to lead]'; +玉>'[jade]'; +王>'[king]'; +玖>'[black-colored jade]'; +玩>'[play with]'; +玲>'[tinkling of jade]'; +玳>'[tortoise shell]'; +玻>'[glass]'; +珀>'[amber]'; +珂>'[inferior kind of jade]'; +珈>'[ornament attached woman''s hairpi]'; +珊>'[coral]'; +珍>'[precious]'; +珎>'[precious]'; +珞>'[kind of necklace]'; +珠>'[precious stone]'; +珥>'[ear ornament]'; +珪>'[jade table conferred upon feudal]'; +班>'[class]'; +珮>'[jade ornament]'; +珱>'[necklace made of precious stones]'; +#"珸>'[................................]'", +現>'[appear]'; +球>'[ball]'; +琅>'[variety of white carnelian]'; +理>'[reason]'; +琉>'[sparkling stone]'; +琢>'[polish jade]'; +琥>'[jewel in shape of tiger]'; +琲>'[necklace]'; +琳>'[beautiful jade]'; +琴>'[chinese lute or guitar]'; +琵>'[guitar-like instrument]'; +琶>'[guitar-like instrument]'; +琺>'[enamel]'; +琿>'[bright]'; +瑁>'[fine piece of jade]'; +瑕>'[flaw in gem]'; +瑙>'[agate]'; +瑚>'[coral]'; +瑛>'[luster of gem]'; +瑜>'[flawless gem or jewel]'; +瑞>'[felicitous omen]'; +瑟>'[large stringed musical instrument]'; +瑠>'[precious stone]'; +瑣>'[fragments]'; +瑤>'[precious jade]'; +瑩>'[lustre of gems]'; +瑪>'[agate]'; +瑯>'[kind of white cornelian]'; +瑰>'[extraordinary]'; +瑳>'[luster of gem]'; +瑶>'[precious jade]'; +瑾>'[brilliance of gems]'; +璃>'[glass]'; +璋>'[jade plaything]'; +璞>'[unpolished gem]'; +璢>'[precious stone]'; +璧>'[piece of jade with hole in it]'; +環>'[jade ring or bracelet]'; +璽>'[imperial signet]'; +瓊>'[jade]'; +瓏>'[gem cut like dragon]'; +瓔>'[necklace made of precious stones]'; +瓜>'[melon]'; +瓠>'[bottle gourd]'; +瓢>'[ladle made from dried gourd]'; +瓣>'[petal]'; +瓦>'[tile]'; +瓧>'[decagram]'; +瓩>'[kilowatt]'; +瓮>'[earthen jar]'; +瓰>'[[not found in dictionary]]'; +瓱>'[milligram]'; +#"瓲>'[................................]'", +瓶>'[jug]'; +瓷>'[crockery]'; +瓸>'[hectogram]'; +甃>'[brick wall of a well]'; +甄>'[examine]'; +甅>'[centigram]'; +甌>'[bowl]'; +甍>'[rafters supporting roof tiles]'; +甎>'[brick]'; +甑>'[boiler for steaming rice]'; +甓>'[glazed tiles]'; +甕>'[earthen jar]'; +甘>'[sweetness]'; +甚>'[great extent]'; +甜>'[sweet]'; +甞>'[taste]'; +生>'[life]'; +産>'[give birth]'; +甥>'[sister''s child]'; +甦>'[be reborn]'; +用>'[use]'; +甫>'[begin]'; +甬>'[path]'; +田>'[field]'; +由>'[cause]'; +甲>'[armor]'; +申>'[to state to a superior]'; +男>'[male]'; +甸>'[suburbs of capital]'; +町>'[raised path between fields]'; +画>'[painting]'; +甼>'[raised path between fields]'; +畄>'[stop]'; +畆>'[chinese land measure]'; +#"畉>'[................................]'", +畊>'[plow]'; +畋>'[till land]'; +界>'[boundary]'; +#"畍>'[................................]'", +畏>'[fear]'; +畑>'[dry (as opposed to rice) field]'; +畔>'[boundary path dividing fields]'; +留>'[stop]'; +畚>'[straw basket]'; +畛>'[border]'; +畜>'[livestock]'; +畝>'[chinese land measure]'; +畠>'[garden]'; +畢>'[end]'; +畤>'[place for worshipping the haven]'; +略>'[approximately]'; +畦>'[sections in vegetable farm]'; +畧>'[approximately]'; +#"畩>'[................................]'", +番>'[take turns]'; +畫>'[delineate]'; +#"畭>'[................................]'", +異>'[different]'; +畳>'[repeat]'; +畴>'[farmland]'; +當>'[bear]'; +畷>'[raised path between fields]'; +畸>'[odd]'; +畿>'[imperial domain]'; +疂>'[repeat]'; +疆>'[boundary]'; +疇>'[farmland]'; +疉>'[repeat]'; +疊>'[repeat]'; +疋>'[roll]'; +疎>'[neglect]'; +疏>'[neglect]'; +疑>'[doubt]'; +疔>'[carbuncle]'; +疚>'[chronic disease]'; +疝>'[hernia]'; +疣>'[wart]'; +疥>'[scabies]'; +疫>'[epidemic]'; +疱>'[acne]'; +疲>'[feel tired]'; +疳>'[childhood diseases]'; +疵>'[flaw]'; +疸>'[jaundice]'; +疹>'[measles]'; +疼>'[aches]'; +疽>'[ulcer]'; +疾>'[illness]'; +痂>'[scab]'; +痃>'[indigestion]'; +病>'[illness]'; +症>'[disease]'; +痊>'[be healed]'; +痍>'[wound]'; +痒>'[itch]'; +痔>'[hemorrhoids]'; +痕>'[scar]'; +痘>'[smallpox]'; +痙>'[convulsions]'; +痛>'[pain]'; +痞>'[dyspepsia]'; +痢>'[dysentry]'; +痣>'[spots]'; +痩>'[thin]'; +痰>'[phlegm]'; +痲>'[pock-marked]'; +痳>'[pock-marked]'; +痴>'[foolish]'; +痺>'[paralysis]'; +痼>'[chronic disease]'; +痾>'[chronic illness]'; +痿>'[paralysis]'; +瘁>'[feel tired]'; +瘉>'[get well]'; +瘋>'[crazy]'; +瘍>'[ulcers]'; +瘟>'[epidemic]'; +瘠>'[thin]'; +瘡>'[tumor]'; +瘢>'[scar]'; +瘤>'[tumor]'; +瘧>'[intermittent fever]'; +瘰>'[scrofula]'; +瘴>'[malaria pestilential vapors]'; +瘻>'[fistula]'; +療>'[be healed]'; +癆>'[consumption]'; +癇>'[epilepsy]'; +癈>'[abrogate]'; +癌>'[cancer]'; +癒>'[get well]'; +癖>'[craving]'; +癘>'[sore]'; +癜>'[erythema]'; +癡>'[silly]'; +癢>'[itch]'; +癧>'[scrofulous lumps or swellings]'; +癨>'[quickly]'; +癩>'[leprosy]'; +癪>'[spasms]'; +癬>'[ringworms]'; +癰>'[carbuncle]'; +癲>'[crazy]'; +癶>'[legs]'; +癸>'[last of ten celestial stems]'; +発>'[issue]'; +登>'[rise]'; +發>'[issue]'; +白>'[white]'; +百>'[one hundred]'; +皀>'[kernel]'; +皃>'[countenance]'; +的>'[possessive]'; +皆>'[all]'; +皇>'[royal]'; +皈>'[follow]'; +皋>'[the high land along a river]'; +皎>'[white]'; +皐>'[the high land along a river]'; +皓>'[bright]'; +皖>'[anhui province]'; +皙>'[white]'; +皚>'[brilliant white]'; +皮>'[skin]'; +皰>'[pimples]'; +皴>'[chapped]'; +皷>'[drum]'; +皸>'[crack]'; +皹>'[crack]'; +皺>'[wrinkles]'; +皿>'[shallow container]'; +盂>'[basin]'; +盃>'[glass]'; +盆>'[basin]'; +盈>'[fill]'; +益>'[profit]'; +盍>'[what? why not? correspond]'; +盒>'[small box or case]'; +盖>'[cover]'; +盗>'[rob]'; +盛>'[abundant]'; +盜>'[rob]'; +盞>'[small cup or container]'; +盟>'[swear]'; +盡>'[exhaust]'; +監>'[supervise]'; +盤>'[tray]'; +盥>'[wash]'; +盧>'[cottage]'; +盪>'[to toss about]'; +目>'[eye]'; +盲>'[blind]'; +直>'[straight]'; +相>'[mutual]'; +盻>'[glare]'; +盾>'[shield]'; +省>'[province]'; +眄>'[to look askance]'; +眇>'[blind in one eye]'; +眈>'[gloat]'; +眉>'[eyebrows]'; +看>'[look]'; +県>'[county]'; +眛>'[dim]'; +#"眞>'[real]'", +真>'[real]'; +眠>'[close eyes]'; +#"眤>'[................................]'", +眥>'[eye sockets]'; +眦>'[corner of the eyes]'; +眩>'[confuse]'; +眷>'[take interest in]'; +眸>'[pupil of eye]'; +眺>'[look at]'; +眼>'[eye]'; +着>'[make move]'; +睇>'[look at]'; +睚>'[corner of eye]'; +睛>'[eyeball]'; +睡>'[sleep]'; +督>'[supervise]'; +睥>'[look askance at]'; +睦>'[friendly]'; +睨>'[look askance at]'; +睫>'[eyelashes]'; +睹>'[look at]'; +睾>'[testicle]'; +睿>'[shrewd]'; +瞋>'[glare with anger]'; +瞎>'[blind]'; +瞑>'[close eyes]'; +瞞>'[deceive]'; +瞠>'[look at]'; +瞥>'[take fleeting glance at]'; +瞬>'[wink]'; +瞭>'[bright]'; +瞰>'[watch]'; +瞳>'[pupil of eye]'; +瞶>'[dim]'; +#"瞶>'[dim]'", +瞻>'[look]'; +瞼>'[eyelid]'; +瞽>'[blind]'; +瞿>'[surname]'; +矇>'[stupid]'; +矍>'[look about in firght or alarm]'; +矗>'[straight]'; +矚>'[watch carefully]'; +矛>'[spear]'; +矜>'[pity]'; +矢>'[arrow]'; +矣>'[particle of completed action]'; +知>'[know]'; +矧>'[much more]'; +矩>'[carpenter''s square]'; +短>'[short]'; +矮>'[short]'; +矯>'[correct]'; +石>'[stone]'; +矼>'[stone bridge]'; +砂>'[sand]'; +砌>'[stone steps]'; +砒>'[arsenic]'; +研>'[grind]'; +砕>'[break]'; +砠>'[rocky]'; +砥>'[whetstone]'; +砦>'[stockade]'; +砧>'[anvil]'; +砲>'[gun]'; +破>'[break]'; +砺>'[whetstone]'; +砿>'[mine]'; +硅>'[silicon]'; +硝>'[saltpeter]'; +硫>'[sulfur]'; +硬>'[hard]'; +硯>'[inkstone]'; +#"硲>'[................................]'", +#"硴>'[................................]'", +硼>'[borax]'; +碁>'[chess]'; +碆>'[arrow-tip]'; +碇>'[anchor]'; +碌>'[rough]'; +碍>'[obstruct]'; +碎>'[break]'; +碑>'[stone tablet]'; +碓>'[pestle]'; +#"碕>'[................................]'", +碗>'[bowl]'; +碚>'[suburb]'; +碣>'[stone tablet]'; +碧>'[jade]'; +碩>'[great]'; +碪>'[stone slab used for washing clot]'; +碯>'[agate]'; +#"碵>'[................................]'", +確>'[sure]'; +碼>'[number]'; +碾>'[roller]'; +磁>'[magnetic]'; +磅>'[pound]'; +#"磆>'[................................]'", +磊>'[pile of rocks or stones]'; +磋>'[polish]'; +磐>'[large rock]'; +磑>'[stone mill]'; +磔>'[downward stroke slanting righ]'; +磚>'[tile]'; +磧>'[sand and gravel]'; +磨>'[grind]'; +磬>'[musical instrument]'; +磯>'[jetty]'; +磴>'[steps on ledge]'; +磽>'[barren land]'; +礁>'[reef]'; +#"礇>'[................................]'", +礎>'[foundation stone]'; +#"礎>'[foundation stone]'", +#"礒>'[................................]'", +礙>'[obstruct]'; +礦>'[mine]'; +礪>'[whetstone]'; +礫>'[gravel]'; +礬>'[alum]'; +示>'[show]'; +礼>'[social custom]'; +社>'[god of the soil and altars to him]'; +祀>'[to sacrifice]'; +祁>'[pray]'; +祇>'[only]'; +祈>'[pray]'; +祉>'[happiness]'; +祐>'[divine intervention]'; +祓>'[exorcise]'; +祕>'[mysterious]'; +祖>'[ancestor]'; +祗>'[respect]'; +祚>'[throne]'; +祝>'[pray for happiness or blessings]'; +神>'[spirit]'; +祟>'[evil spirit]'; +祠>'[ancestral temple]'; +祢>'[one''s deceased father]'; +祥>'[good luck]'; +票>'[slip of paper or bamboo]'; +祭>'[sacrifice to]'; +祷>'[pray]'; +祺>'[good luck]'; +祿>'[blessing]'; +禀>'[report to]'; +禁>'[restrict]'; +禄>'[blessing]'; +禅>'[meditation]'; +禊>'[semi-annual ceremony of purifica]'; +禍>'[misfortune]'; +禎>'[lucky]'; +福>'[happiness]'; +禝>'[[not found in dictionary]]'; +禦>'[defend]'; +禧>'[happiness]'; +禪>'[meditation]'; +禮>'[social custom]'; +禰>'[one''s deceased father]'; +禳>'[pray or sacrifice]'; +禹>'[legendary hsia dynasty founder]'; +禺>'[district]'; +禽>'[birds]'; +禾>'[grain still on stalk]'; +禿>'[bald]'; +秀>'[ear of grain]'; +私>'[private]'; +秉>'[grasp]'; +秋>'[autumn]'; +科>'[section]'; +秒>'[beard of grain or corn]'; +秕>'[empty grain or rice husk]'; +秘>'[secret]'; +租>'[rent]'; +#"秡>'[................................]'", +秣>'[fodder]'; +秤>'[balance]'; +秦>'[feudal state of qin]'; +秧>'[rice seedlings]'; +秩>'[order]'; +秬>'[black millet]'; +称>'[call]'; +移>'[change place]'; +稀>'[rare]'; +稈>'[stalk of grain]'; +程>'[journey]'; +稍>'[little]'; +税>'[taxes]'; +稔>'[ripe grain]'; +稗>'[darnels]'; +#"稘>'[................................]'", +稙>'[grain ready for grinding]'; +稚>'[young]'; +稜>'[corner]'; +稟>'[report to]'; +稠>'[dense]'; +種>'[seed]'; +稱>'[call]'; +稲>'[rice growing in field]'; +稷>'[god of cereals]'; +稻>'[rice growing in field]'; +稼>'[sow grain]'; +稽>'[examine]'; +稾>'[draft]'; +稿>'[draft]'; +穀>'[corn]'; +穂>'[ear of grain]'; +#"穃>'[................................]'", +穆>'[majestic]'; +穉>'[young grain]'; +積>'[accumulate]'; +穎>'[rice tassel]'; +#"穎>'[rice tassel]'", +穐>'[fall]'; +穗>'[ear of grain]'; +穡>'[farm]'; +穢>'[dirty]'; +穣>'[stalks of grain]'; +穩>'[stable]'; +穫>'[harvest]'; +穰>'[stalks of grain]'; +穴>'[cave]'; +究>'[examine]'; +穹>'[high and vast]'; +空>'[empty]'; +穽>'[hole]'; +穿>'[penetrate]'; +突>'[suddenly]'; +窃>'[secretly]'; +窄>'[narrow]'; +窈>'[obscure]'; +窒>'[stop up]'; +窓>'[window]'; +窕>'[slender]'; +窖>'[pit]'; +窗>'[window]'; +窘>'[embrassassed]'; +窟>'[hole]'; +窩>'[nest]'; +窪>'[hollow]'; +窮>'[poor]'; +窯>'[kiln]'; +窰>'[kiln]'; +窶>'[poor]'; +窺>'[peep]'; +窿>'[mine shaft]'; +竃>'[furnace]'; +竄>'[run away]'; +竅>'[hole]'; +竇>'[surname]'; +竈>'[furnace]'; +竊>'[secretly]'; +立>'[stand]'; +竍>'[decaliter]'; +竏>'[kiloliter]'; +竒>'[strange]'; +竓>'[milliliter]'; +竕>'[deciliter]'; +站>'[stand up]'; +竚>'[stand and wait for long time]'; +竜>'[dragon]'; +竝>'[combine]'; +竟>'[finally]'; +章>'[composition]'; +竡>'[hectoliter]'; +竢>'[wait for]'; +竣>'[terminate]'; +童>'[child]'; +竦>'[revere]'; +竪>'[perpendicular]'; +竭>'[put forth great effort]'; +端>'[end]'; +竰>'[centiliter]'; +競>'[contend]'; +竸>'[contend]'; +竹>'[bamboo]'; +竺>'[india]'; +竿>'[bamboo pole]'; +#"笂>'[................................]'", +笄>'[hairpin]'; +笆>'[bamboo fence]'; +笈>'[bamboo box used carry books]'; +笊>'[ladle]'; +笋>'[bamboo shoots]'; +笏>'[tablet held by someone having au]'; +笑>'[smile]'; +#"笘>'[................................]'", +笙>'[small gourd-shaped musical instrument]'; +笛>'[bamboo flute]'; +笞>'[bamboo rod used for beatings]'; +笠>'[bamboo hat]'; +笥>'[a hamper]'; +符>'[i.d. tag]'; +笨>'[foolish]'; +第>'[sequence]'; +笳>'[a reed leaf whistle]'; +笵>'[a bamboo form]'; +#"笶>'[................................]'", +笹>'[small bamboo]'; +筅>'[bamboo brush]'; +筆>'[writing brush]'; +筈>'[arrow end]'; +等>'[rank]'; +筋>'[muscles]'; +筌>'[bamboo fish trap]'; +筍>'[bamboo shoot]'; +筏>'[raft]'; +筐>'[bamboo basket or chest]'; +筑>'[ancient lute]'; +筒>'[thick piece of bamboo]'; +答>'[answer]'; +策>'[scheme]'; +筝>'[stringed musical instrument]'; +筥>'[round-shaped bamboo basket for]'; +筧>'[bamboo water pipe]'; +筬>'[reed of a loom]'; +筮>'[divination with stalks of plants]'; +筰>'[cable]'; +筱>'[dwarf bamboo]'; +筴>'[type of grass used in divination]'; +筵>'[bamboo mat]'; +筺>'[bamboo basket or chest]'; +箆>'[fine-toothed comb]'; +箇>'[numerary adjunct]'; +箋>'[note]'; +箍>'[hoop]'; +箏>'[stringed musical instrument]'; +箒>'[broom]'; +箔>'[reed screen]'; +箕>'[sieve]'; +算>'[count]'; +箘>'[fine bamboo]'; +箙>'[quiver]'; +箚>'[brief note]'; +箜>'[ancient string music instrument]'; +箝>'[tweezers]'; +#"箟>'[................................]'", +管>'[pipe]'; +箪>'[small bamboo basket for holding]'; +箭>'[arrow]'; +箱>'[case]'; +箴>'[needle]'; +箸>'[chopsticks]'; +節>'[knot]'; +篁>'[bamboo grove]'; +範>'[pattern]'; +篆>'[seal script]'; +篇>'[chapter]'; +築>'[build]'; +篋>'[ratton box]'; +篌>'[ancient music instrument]'; +篏>'[inlay]'; +篝>'[bamboo basket]'; +篠>'[dwarf bamboo]'; +篤>'[deep]'; +篥>'[bulgle]'; +篦>'[fine-toothed comb]'; +篩>'[sieve]'; +篭>'[cage]'; +篳>'[wicker]'; +#"篶>'[................................]'", +篷>'[awning]'; +簀>'[bed mat]'; +簇>'[swarm]'; +簍>'[bamboo basket]'; +簑>'[a coir raincoat]'; +簒>'[usurp]'; +#"簓>'[................................]'", +簔>'[a coir raincoat]'; +#"簗>'[................................]'", +簟>'[bamboo mat]'; +簡>'[simple]'; +簣>'[bamboo basket for carrying earth]'; +簧>'[reed of woodwind instrument]'; +簪>'[hairpin]'; +簫>'[musical instrument like pan-pipes]'; +簷>'[eaves of house]'; +簸>'[winnower]'; +簽>'[sign]'; +簾>'[a blind]'; +簿>'[register]'; +籀>'[recite]'; +籃>'[basket]'; +籌>'[chip]'; +籍>'[record]'; +籏>'[flag]'; +籐>'[climbing plants]'; +籔>'[bamboo basket]'; +籖>'[tally]'; +籘>'[climbing plants]'; +籟>'[bamboo flute]'; +籠>'[cage]'; +籤>'[tally]'; +籥>'[key]'; +籬>'[bamboo or wooden fence]'; +米>'[hulled or husked uncooked rice]'; +籵>'[dm]'; +籾>'[unhulled rice]'; +粁>'[km]'; +粂>'[surname]'; +粃>'[empty husks of grain]'; +粉>'[powder]'; +粋>'[pure]'; +粍>'[mm]'; +#"粐>'[................................]'", +粒>'[grain]'; +粕>'[lees]'; +粗>'[rough]'; +粘>'[viscous]'; +粛>'[pay respects]'; +粟>'[unhusked millet]'; +#"粡>'[................................]'", +粢>'[grain offered in ritual sacrific]'; +粤>'[Guangdong and Guangxi provinces]'; +粥>'[rice gruel]'; +粧>'[toilet]'; +粨>'[hm]'; +#"粫>'[................................]'", +#"粭>'[................................]'", +粮>'[food]'; +粱>'[better varieties of millet]'; +粲>'[polish]'; +粳>'[non-glutinous rice]'; +粹>'[pure]'; +粽>'[dumpling made of glutinous rice]'; +精>'[essence]'; +#"糀>'[................................]'", +#"糂>'[................................]'", +糅>'[blend]'; +糊>'[paste]'; +糎>'[mm]'; +糒>'[food for a journey]'; +糖>'[sugar]'; +#"糘>'[................................]'", +糜>'[rice gruel]'; +糞>'[manure]'; +糟>'[sediment]'; +糠>'[chaff]'; +糢>'[rice snacks]'; +糧>'[food]'; +糯>'[glutinous rice]'; +糲>'[unpolished rice]'; +糴>'[purchase grains]'; +糶>'[sell grains]'; +糸>'[silk]'; +糺>'[to collaborate]'; +系>'[system]'; +糾>'[investigate]'; +紀>'[record]'; +紂>'[name of an emperor]'; +約>'[treaty]'; +紅>'[red]'; +紆>'[bend]'; +紊>'[confused]'; +紋>'[line]'; +納>'[admit]'; +紐>'[knot]'; +純>'[pure]'; +紕>'[spoiled silk]'; +紗>'[gauze]'; +紘>'[string]'; +紙>'[paper]'; +級>'[level]'; +紛>'[in disorder]'; +紜>'[confused]'; +素>'[white (silk)]'; +紡>'[spin]'; +索>'[large rope]'; +紫>'[purple]'; +紬>'[kind of thin silk]'; +紮>'[tie]'; +累>'[tired]'; +細>'[fine]'; +紲>'[bridle]'; +紳>'[girdle]'; +紵>'[ramie]'; +紹>'[continue]'; +紺>'[dark blue color]'; +紿>'[cheat]'; +終>'[end]'; +絃>'[string on musical instrument]'; +組>'[class]'; +絅>'[unlined garment]'; +絆>'[loop]'; +#"絋>'[................................]'", +経>'[classic works]'; +絎>'[baste]'; +絏>'[rope]'; +結>'[knot]'; +絖>'[fine silks]'; +絛>'[silk braid]'; +絞>'[twist]'; +絡>'[enmesh]'; +絢>'[variegated]'; +絣>'[to baste for sewing]'; +給>'[give]'; +絨>'[silk]'; +絮>'[waste cotton]'; +統>'[govern]'; +絲>'[silk]'; +絳>'[deep red]'; +絵>'[draw]'; +絶>'[cut]'; +絹>'[kind of thick stiff silk]'; +#"絽>'[................................]'", +綉>'[embroider]'; +綏>'[soothe]'; +經>'[classic works]'; +継>'[continue]'; +続>'[continue]'; +#"綛>'[................................]'", +綜>'[arrange threads for weaving]'; +#"綟>'[................................]'", +綢>'[silk cloth]'; +綣>'[affectionate]'; +綫>'[line]'; +綬>'[silk ribbon attached as a seal]'; +維>'[maintain]'; +綮>'[embroidered banner]'; +綯>'[braid]'; +綰>'[to string together]'; +綱>'[heavy rope]'; +網>'[net]'; +綴>'[patch together]'; +綵>'[varicolored silk]'; +綸>'[green silk thread or tassel]'; +綺>'[fine thin silk]'; +綻>'[ripped seam]'; +綽>'[graceful]'; +綾>'[thin silk]'; +綿>'[cotton wad]'; +緇>'[black silk]'; +緊>'[tense]'; +緋>'[scarlet]'; +総>'[collect]'; +緑>'[green]'; +緒>'[end of thread]'; +#"緕>'[................................]'", +緘>'[seal]'; +線>'[thread]'; +緜>'[cotton wad]'; +緝>'[to sew in close stitches]'; +緞>'[satin]'; +締>'[tie]'; +緡>'[fishing-line]'; +緤>'[cord]'; +編>'[knit]'; +緩>'[slow]'; +緬>'[distant]'; +緯>'[woof]'; +緲>'[indistinct]'; +練>'[practice]'; +緻>'[delicate]'; +縁>'[hem]'; +縄>'[rope]'; +#"縄>'[rope]'", +縉>'[red silk]'; +縊>'[hang]'; +縋>'[climd down rope]'; +#"縑>'[fine silk]'", +縛>'[to tie]'; +縞>'[white raw silk]'; +縟>'[decorative]'; +縡>'[matter]'; +縢>'[bind]'; +縣>'[county]'; +縦>'[indulge in]'; +縫>'[sew]'; +縮>'[contract]'; +縱>'[indulge in]'; +縲>'[chain or rope used bind criminal]'; +縵>'[plain silk]'; +縷>'[thread]'; +縹>'[light blue silk]'; +#"縺>'[................................]'", +縻>'[halter for ox]'; +總>'[collect]'; +績>'[spin]'; +繁>'[complicated]'; +繃>'[bind]'; +繆>'[wind around]'; +繊>'[fine]'; +繋>'[attach]'; +繍>'[embroider]'; +織>'[weave]'; +繕>'[repair]'; +繖>'[umbrella]'; +繙>'[interpret]'; +繚>'[wind round]'; +#"繝>'[................................]'", +繞>'[entwine]'; +繦>'[string of copper coins]'; +#"繧>'[................................]'", +繩>'[rope]'; +繪>'[draw]'; +繭>'[cocoon]'; +繰>'[to reel silk from cocoons]'; +繹>'[unravel or unreel silk]'; +繻>'[fine silk guaze]'; +繼>'[continue]'; +繽>'[flourishing]'; +#"繿>'[................................]'", +纂>'[edit]'; +#"纃>'[................................]'", +纈>'[patterned silk]'; +#"纉>'[................................]'", +續>'[continue]'; +纎>'[fine]'; +纏>'[wrap]'; +#"纐>'[................................]'", +纒>'[wrap]'; +纓>'[chin strap]'; +纔>'[talent]'; +纖>'[fine]'; +纛>'[a banner]'; +纜>'[hawser]'; +缶>'[earthen crock or jar]'; +缸>'[earthen jug]'; +缺>'[be short of]'; +罅>'[crack]'; +罌>'[long necked jar or bottle]'; +罍>'[large earthenware wine jar]'; +罎>'[an earthenware jar]'; +罐>'[jar]'; +网>'[net]'; +罔>'[net]'; +罕>'[rare]'; +罘>'[screen used in ancient times]'; +罟>'[net]'; +罠>'[animal trap]'; +#"罧>'[................................]'", +罨>'[medical compress]'; +罩>'[basket for catching fish]'; +罪>'[crime]'; +罫>'[hinder]'; +置>'[place]'; +罰>'[penalty]'; +署>'[public office]'; +罵>'[accuse]'; +罷>'[cease]'; +罸>'[penalty]'; +罹>'[sorrow]'; +#"羂>'[................................]'", +羃>'[cover-cloth]'; +羅>'[net for catching birds]'; +羆>'[brown bear]'; +羇>'[inn]'; +羈>'[halter]'; +羊>'[sheep]'; +羌>'[qiang nationality]'; +美>'[beautiful]'; +羔>'[lamb]'; +羚>'[species of antelope]'; +羝>'[ram]'; +羞>'[disgrace]'; +羣>'[group]'; +群>'[group]'; +羨>'[envy]'; +義>'[right conduct]'; +羮>'[soup]'; +羯>'[wether]'; +羲>'[ancient emperor]'; +羶>'[rank odor]'; +羸>'[weak]'; +羹>'[soup]'; +羽>'[feather]'; +翁>'[old man]'; +翅>'[wings]'; +翆>'[color green]'; +翊>'[flying]'; +翌>'[bright]'; +習>'[practice]'; +翔>'[soar]'; +翕>'[agree]'; +翠>'[color green]'; +翡>'[kingfisher]'; +翦>'[scissors]'; +翩>'[fly]'; +翫>'[careless]'; +翰>'[writing brush]'; +翳>'[shade]'; +翹>'[turn up]'; +翻>'[flip over]'; +翼>'[wings]'; +耀>'[shine]'; +老>'[old]'; +考>'[examine]'; +耄>'[elderly person]'; +者>'[that which]'; +耆>'[man of sixty]'; +耋>'[aged]'; +而>'[and]'; +耐>'[endure]'; +耒>'[handle of plow]'; +耕>'[plow]'; +耗>'[consume]'; +耘>'[weed]'; +耙>'[rake]'; +耜>'[spade-shaped tool]'; +耡>'[hoe]'; +耨>'[hoe]'; +耳>'[ear]'; +耶>'[used in transliteration]'; +耻>'[shame]'; +耽>'[indulge in]'; +耿>'[bright]'; +聆>'[listen]'; +聊>'[somewhat]'; +聒>'[clamor]'; +聖>'[holy]'; +聘>'[engage]'; +聚>'[assemble]'; +聞>'[hear]'; +聟>'[son-in-law]'; +聡>'[intelligent]'; +#"聢>'[................................]'", +聨>'[connect]'; +聯>'[connect]'; +聰>'[intelligent]'; +聲>'[sound]'; +聳>'[urge on]'; +聴>'[hear]'; +聶>'[whisper]'; +職>'[duty]'; +聹>'[earwax]'; +聽>'[hear]'; +聾>'[deaf]'; +聿>'[writing brush]'; +肄>'[learn]'; +肅>'[pay respects]'; +肆>'[indulge]'; +肇>'[begin]'; +肉>'[flesh]'; +肋>'[ribs]'; +肌>'[muscle tissue]'; +肓>'[region between heart and diaphragm]'; +肖>'[look like]'; +肘>'[elbow]'; +肚>'[belly]'; +肛>'[anus]'; +肝>'[liver]'; +股>'[thighs]'; +肢>'[human limbs]'; +肥>'[fat]'; +肩>'[shoulders]'; +肪>'[animal fat]'; +肬>'[wart]'; +肭>'[fat]'; +肯>'[willing]'; +肱>'[forearm]'; +育>'[produce]'; +肴>'[cooked or prepared meat]'; +肺>'[lungs]'; +胃>'[stomach]'; +胄>'[helmet]'; +胆>'[gall bladder]'; +背>'[back]'; +胎>'[unborn child]'; +胖>'[fat]'; +胙>'[food offered in sacrificial serv]'; +胚>'[embryo]'; +胛>'[the shoulder]'; +胝>'[callous]'; +胞>'[womb]'; +胡>'[recklessly]'; +胤>'[heir]'; +胥>'[all]'; +胯>'[pelvis]'; +胱>'[bladder]'; +胴>'[the large intestine]'; +胸>'[breast]'; +胼>'[callus]'; +能>'[be able]'; +脂>'[fat]'; +脅>'[ribs]'; +脆>'[crisp]'; +脇>'[ribs]'; +脈>'[blood vessels]'; +脉>'[blood vessels]'; +脊>'[spine]'; +脚>'[leg]'; +脛>'[shinbone]'; +脣>'[lips]'; +脩>'[dried meat (used as teachers payment in ancient times)]'; +脯>'[dried meat]'; +脱>'[take off]'; +脳>'[brain]'; +脹>'[swell]'; +脾>'[spleen]'; +腆>'[prosperous]'; +腋>'[armpit]'; +腎>'[kidneys]'; +腐>'[rot]'; +腑>'[bowels]'; +腓>'[calf]'; +腔>'[chest cavity]'; +腕>'[wrist]'; +腟>'[vagina]'; +腥>'[raw meat]'; +腦>'[brain]'; +腫>'[swell]'; +腮>'[lower part of face]'; +腰>'[waist]'; +腱>'[tendons]'; +腴>'[fat]'; +腸>'[intestines]'; +腹>'[stomach]'; +腺>'[gland]'; +腿>'[legs]'; +膀>'[upper arm]'; +膂>'[backbone]'; +膃>'[fat]'; +膈>'[diaphragm]'; +膊>'[shoulders]'; +膏>'[grease]'; +膓>'[intestines]'; +膕>'[hollow]'; +膚>'[skin]'; +膜>'[membrane]'; +膝>'[knee]'; +膠>'[glue]'; +膣>'[vagina]'; +#"膤>'[................................]'", +膨>'[swell]'; +膩>'[greasy]'; +膰>'[cook meat for sacrifice or offer]'; +膳>'[meals]'; +膵>'[pancreas]'; +#"膸>'[................................]'", +膺>'[breast]'; +膽>'[gall bladder]'; +膾>'[minced meat or fish]'; +膿>'[pus]'; +臀>'[buttocks]'; +臂>'[arm]'; +臆>'[chest]'; +臈>'[year end sacrifice]'; +臉>'[face]'; +臍>'[abdominal area of crab]'; +臑>'[soft]'; +臓>'[internal organs]'; +臘>'[year end sacrifice]'; +臙>'[rouge]'; +臚>'[arrange in order]'; +臟>'[internal organs]'; +臠>'[small lump of meat]'; +臣>'[minister]'; +臥>'[lie down]'; +臧>'[good]'; +臨>'[draw near]'; +自>'[self]'; +臭>'[smell]'; +至>'[reach]'; +致>'[send]'; +臺>'[tower]'; +臻>'[reach]'; +臼>'[mortar]'; +臾>'[moment]'; +舁>'[carry on one''s shoulder]'; +舂>'[grind in mortar]'; +舅>'[mother''s brother]'; +與>'[and]'; +興>'[thrive]'; +舉>'[raise]'; +舊>'[old]'; +舌>'[tongue]'; +舍>'[house]'; +舎>'[house]'; +舐>'[lick with tongue]'; +舒>'[open up]'; +舖>'[store]'; +舗>'[store]'; +舘>'[a mansion]'; +舛>'[oppose]'; +舜>'[legendary ruler]'; +舞>'[dance]'; +舟>'[boat]'; +舩>'[boat]'; +航>'[sail]'; +舫>'[fancy boat]'; +般>'[sort]'; +舮>'[bow or prow of boat]'; +舳>'[stern of ship]'; +舵>'[rudder]'; +舶>'[large]'; +舷>'[sides of boat]'; +舸>'[large boat]'; +船>'[ship]'; +艀>'[[not found in dictionary]]'; +艇>'[small boat]'; +艘>'[counter for ships]'; +艙>'[hold of ship]'; +艚>'[ship]'; +#"艝>'[................................]'", +艟>'[ancient warship]'; +艢>'[a mast]'; +艤>'[to moor a boat to the bank]'; +艦>'[warship]'; +艨>'[long and narrow war-boat]'; +艪>'[oar]'; +艫>'[bow or prow of boat]'; +艮>'[seventh of eight diagrams]'; +良>'[good]'; +艱>'[difficult]'; +色>'[color]'; +艶>'[beautiful]'; +艷>'[beautiful]'; +艸>'[grass]'; +艾>'[artemisia]'; +芋>'[taro]'; +芍>'[peony]'; +芒>'[miscanthus sinensis]'; +芙>'[hibiscus]'; +芝>'[purplish mushroom thought promot]'; +芟>'[mow]'; +芥>'[mustard plant]'; +芦>'[rushes]'; +芫>'[daphne genkwa]'; +芬>'[fragrance]'; +芭>'[plantain or banana palm]'; +芯>'[pith from rush (juncus effusus)]'; +花>'[flower]'; +芳>'[fragrant]'; +芸>'[rue]'; +芹>'[celery]'; +芻>'[mow]'; +芽>'[bud]'; +苅>'[cut off]'; +苑>'[pasture]'; +苒>'[lush]'; +苓>'[fungus]'; +苔>'[moss]'; +苗>'[sprouts]'; +苙>'[pigsty]'; +苛>'[small]'; +苜>'[clover]'; +苞>'[variety of rush]'; +苟>'[careless]'; +苡>'[barley]'; +苣>'[kind of lettuce]'; +若>'[if]'; +苦>'[bitter]'; +苧>'[china grass]'; +苫>'[rush or straw matting]'; +英>'[petal]'; +#"苳>'[................................]'", +苴>'[sackcloth]'; +苹>'[artemisia]'; +苺>'[berries]'; +苻>'[kind of herb]'; +茂>'[thick]'; +范>'[surname]'; +茄>'[eggplant]'; +茅>'[reeds]'; +茆>'[species of grass]'; +茉>'[white jasmine]'; +茎>'[stem]'; +茖>'[allium victorialis]'; +茗>'[tea]'; +#"茗>'[tea]'", +茜>'[madder]'; +#"茣>'[................................]'", +茨>'[caltrop]'; +茫>'[vast]'; +茯>'[china root]'; +茱>'[dogwood]'; +茲>'[now]'; +茴>'[fennel]'; +茵>'[cushion]'; +茶>'[tea]'; +茸>'[soft]'; +茹>'[roots]'; +荀>'[surname]'; +荅>'[answer: small bean]'; +草>'[grass]'; +荊>'[thorns]'; +荏>'[beans]'; +荐>'[repeat]'; +荒>'[wasteland]'; +荘>'[village]'; +荳>'[beans]'; +#"荵>'[................................]'", +荷>'[lotus]'; +荻>'[reed]'; +荼>'[bitter vegetable]'; +莅>'[attend]'; +#"莇>'[................................]'", +莉>'[white jasmine]'; +莊>'[village]'; +莎>'[kind of sedge grass]'; +莓>'[moss]'; +莖>'[stem]'; +莚>'[bamboo mat]'; +莞>'[smiling]'; +#"莟>'[................................]'", +莠>'[weeds]'; +莢>'[pods of leguminous plants]'; +莨>'[herb]'; +莪>'[artemisia]'; +莫>'[do not]'; +莱>'[goosefoot]'; +莵>'[dodder]'; +莽>'[thicket]'; +菁>'[flower of leek family]'; +菅>'[coarse grass]'; +菊>'[chrysanthemum]'; +菌>'[mushroom]'; +菎>'[beautiful jade]'; +菓>'[fruits]'; +菖>'[iris]'; +菘>'[celery]'; +菜>'[vegetables]'; +菟>'[dodder]'; +菠>'[spinach and similar greens]'; +菩>'[herb]'; +菫>'[celery]'; +華>'[flowery]'; +菰>'[wild rice]'; +菱>'[water-chestnut]'; +菲>'[fragrant]'; +菴>'[small buddhist monastery]'; +#"菷>'[................................]'", +菻>'[artemisia]'; +菽>'[beans and peas]'; +萃>'[dense]'; +萄>'[grapes]'; +萇>'[averrhora carambola]'; +萋>'[luxuriant foliage]'; +萌>'[bud]'; +萍>'[duckweed]'; +萎>'[wither]'; +#"萓>'[................................]'", +萠>'[bud]'; +#"萢>'[................................]'", +萩>'[scandent hop]'; +#"萪>'[................................]'", +萬>'[ten thousand]'; +萱>'[day-lily]'; +萵>'[lettuce]'; +萸>'[dogwood]'; +萼>'[calyx of flower]'; +落>'[fall]'; +葆>'[reserve]'; +葉>'[leaf]'; +#"葎>'[................................]'", +著>'[manifest]'; +葛>'[edible bean]'; +葡>'[grapes]'; +葢>'[cover]'; +董>'[direct]'; +葦>'[reed]'; +葩>'[flowers]'; +葫>'[bottle-gourd]'; +葬>'[bury]'; +葭>'[bulrush]'; +#"葭>'[bulrush]'", +葯>'[leaf of angelica plant]'; +葱>'[scallions]'; +葵>'[sunflower]'; +葷>'[meat diet]'; +葹>'[kind of chrysanthemum]'; +葺>'[thatch]'; +蒂>'[peduncle or stem of plants]'; +#"蒄>'[................................]'", +蒋>'[surname]'; +蒐>'[collect]'; +蒔>'[transplant]'; +蒙>'[cover]'; +蒜>'[garlic]'; +蒟>'[betel pepper]'; +蒡>'[burdock]'; +蒭>'[to cutgrass]'; +蒲>'[type of rush]'; +蒸>'[steam]'; +蒹>'[reed]'; +蒻>'[rushes]'; +蒼>'[blue]'; +蒿>'[mugwort]'; +蓁>'[abundant]'; +蓄>'[store]'; +蓆>'[straw mat]'; +蓉>'[hibiscus]'; +蓊>'[luxuriant vegetation]'; +蓋>'[cover]'; +蓍>'[milfoil]'; +蓐>'[straw bed mat]'; +蓑>'[rain coat made of straw]'; +蓖>'[castor-oil plant]'; +#"蓙>'[................................]'", +蓚>'[oxalic (used in compounds)]'; +蓬>'[type of raspberry]'; +蓮>'[lotus]'; +蓴>'[edible water plant]'; +蓼>'[smartweed]'; +蓿>'[clover]'; +蔀>'[screen]'; +蔆>'[water-chestnut]'; +蔑>'[disdain]'; +蔓>'[creeping plants]'; +蔔>'[radish]'; +蔕>'[peduncle or stem of plants]'; +蔗>'[sugar cane]'; +蔘>'[ginsen]'; +蔚>'[luxuriant]'; +蔟>'[frame on which silkworms spin]'; +蔡>'[surname]'; +蔦>'[parasitic plants]'; +蔬>'[vegetables]'; +蔭>'[shade]'; +蔵>'[hide]'; +蔽>'[cover]'; +#"蕀>'[................................]'", +蕁>'[nettle]'; +蕃>'[foreign things]'; +蕈>'[mushrooms]'; +蕉>'[banana]'; +蕊>'[unopened flowers]'; +蕋>'[unopened flowers]'; +蕎>'[buckwheat]'; +蕕>'[caryopteris divaricata]'; +#"蕗>'[................................]'", +蕘>'[fuel]'; +蕚>'[calyx of flower]'; +蕣>'[hibiscus]'; +蕨>'[pteris aquilina]'; +蕩>'[pond]'; +蕪>'[luxurious growth of weeds]'; +蕭>'[common artemisia]'; +蕷>'[yam]'; +蕾>'[buds]'; +薀>'[the hippuris or mare''s tail plant]'; +薄>'[thin]'; +薇>'[osmunda regalis]'; +薈>'[luxuriant]'; +薊>'[circium]'; +薐>'[spinach]'; +薑>'[ginger]'; +薔>'[rose]'; +薗>'[garden]'; +薙>'[weed]'; +薛>'[kind of marsh grass]'; +薜>'[evergreen shrubs]'; +薤>'[allium bakeri]'; +薦>'[offer]'; +薨>'[death of prince]'; +薩>'[transliteration of \"sat\" of boddhisattva etc.]'; +薪>'[fuel]'; +薫>'[a medicinal herb]'; +薬>'[drugs]'; +薮>'[marsh]'; +薯>'[yam]'; +薹>'[cyperus rotundus]'; +薺>'[water-chestnuts]'; +藁>'[straw]'; +藉>'[mat]'; +藍>'[blue]'; +藏>'[hide]'; +藐>'[disregard]'; +藕>'[lotus root]'; +藜>'[chenopodium album]'; +藝>'[art]'; +藤>'[rattan]'; +藥>'[drugs]'; +藩>'[fence]'; +藪>'[marsh]'; +藷>'[yam]'; +藹>'[lush]'; +藺>'[rush used in making mats]'; +藻>'[splendid]'; +藾>'[shade]'; +蘂>'[stamen or pistil]'; +蘆>'[rushes]'; +蘇>'[thyme]'; +蘊>'[collect]'; +蘋>'[apple]'; +蘓>'[thyme]'; +蘖>'[stump]'; +蘗>'[stump]'; +蘚>'[moss]'; +蘢>'[tall grass]'; +蘭>'[orchid]'; +蘯>'[to toss about]'; +#"蘯>'[to toss about]'", +蘿>'[type of creeping plant]'; +虍>'[tiger]'; +虎>'[tiger]'; +虐>'[cruel]'; +虔>'[act with reverence]'; +處>'[place]'; +虚>'[false]'; +虜>'[capture]'; +虞>'[concerned about]'; +號>'[mark]'; +虧>'[lose]'; +虫>'[insects]'; +虱>'[louse]'; +虹>'[rainbow]'; +虻>'[horsefly]'; +蚊>'[mosquito]'; +蚋>'[gnat]'; +蚌>'[oysters]'; +蚓>'[earthworm]'; +蚕>'[silkworms]'; +蚣>'[centipede]'; +蚤>'[flea]'; +蚩>'[worm]'; +蚪>'[tadpole]'; +蚫>'[abalone]'; +蚯>'[earthworm]'; +蚰>'[millipede]'; +蚶>'[kind of clam]'; +蛄>'[mole cricket]'; +蛆>'[maggots]'; +蛇>'[snake]'; +蛉>'[dragonfly]'; +蛋>'[eggs]'; +蛍>'[glow-worm]'; +蛎>'[oyster]'; +蛔>'[tapeworm]'; +蛙>'[frog]'; +蛛>'[spider]'; +蛞>'[snail]'; +蛟>'[scaly dragon with four legs]'; +蛤>'[clam]'; +蛩>'[cricket]'; +#"蛩>'[cricket]'", +蛭>'[leech]'; +蛮>'[barbarians]'; +蛯>'[shrimp]'; +蛸>'[long legged spider]'; +蛹>'[chrysalis]'; +蛻>'[molt]'; +蛾>'[moth]'; +蜀>'[name of an ancient state]'; +蜂>'[bee]'; +蜃>'[marine monster which can change its shape]'; +蜆>'[a variety of bivalves]'; +蜈>'[centipede]'; +蜉>'[mayfly]'; +蜊>'[clam]'; +蜍>'[toad]'; +蜑>'[egg]'; +蜒>'[millipede]'; +蜘>'[spider]'; +蜚>'[cockroach]'; +蜜>'[honey]'; +蜥>'[lizard]'; +蜩>'[cicada]'; +蜴>'[lizard]'; +蜷>'[creep like worm]'; +蜻>'[dragonfly]'; +蜿>'[creep]'; +蝉>'[cicada]'; +蝋>'[wax]'; +蝌>'[tadpole]'; +蝎>'[scorpion]'; +蝓>'[snail]'; +蝕>'[nibble away]'; +蝗>'[kind of locust]'; +蝙>'[bat]'; +蝟>'[hedgehog]'; +蝠>'[kind of bat]'; +蝣>'[mayfly]'; +蝦>'[shrimp]'; +蝨>'[louse]'; +#"蝨>'[louse]'", +蝮>'[venomous snake]'; +蝴>'[butterfly]'; +蝶>'[butterfly]'; +蝸>'[snail]'; +蝿>'[flies]'; +螂>'[mantis]'; +融>'[melt]'; +螟>'[kind of caterpillar]'; +螢>'[glow-worm]'; +螫>'[poison]'; +螯>'[nippers]'; +螳>'[mantis]'; +螺>'[spiral shell]'; +螻>'[gryllotalpa africana]'; +螽>'[katydid]'; +蟀>'[cricket]'; +蟄>'[to hibernate]'; +蟆>'[frog]'; +蟇>'[frog]'; +蟋>'[cricket]'; +#"蟋>'[cricket]'", +蟒>'[python]'; +蟠>'[coil]'; +蟯>'[worms]'; +蟲>'[worms]'; +蟶>'[razor clam]'; +蟷>'[mantis]'; +蟹>'[crab]'; +蟻>'[ants]'; +蟾>'[toad]'; +蠅>'[flies]'; +蠍>'[scorpion]'; +蠎>'[python]'; +蠏>'[crab]'; +蠑>'[lizard]'; +蠕>'[eumenes polifomis]'; +蠖>'[measuring worm]'; +蠡>'[wood-boring insect]'; +蠢>'[wriggle]'; +蠣>'[oyster]'; +蠧>'[moth]'; +蠱>'[posion]'; +蠶>'[silkworms]'; +蠹>'[moth]'; +蠻>'[barbarians]'; +血>'[blood]'; +衂>'[to be defeated]'; +衄>'[epistaxis]'; +衆>'[multitude]'; +行>'[go]'; +衍>'[overflow]'; +衒>'[brag]'; +術>'[art]'; +街>'[street]'; +衙>'[public office]'; +衛>'[guard]'; +衝>'[rush against]'; +衞>'[guard]'; +衡>'[measure]'; +衢>'[highway]'; +衣>'[clothes]'; +表>'[show]'; +衫>'[shirt]'; +衰>'[decline]'; +衲>'[mend]'; +衵>'[chemise]'; +衷>'[heart]'; +衽>'[lapel]'; +衾>'[coverlet]'; +衿>'[collar or lapel of garment]'; +袁>'[robe]'; +袂>'[sleeves]'; +袈>'[buddhist cassock]'; +袋>'[pocket]'; +袍>'[long gown]'; +袒>'[strip]'; +袖>'[sleeve]'; +袗>'[unlined garments]'; +#"袙>'[................................]'", +袞>'[ceremonial dress worn by emperor]'; +袢>'[robe]'; +袤>'[longitude]'; +被>'[passive indicator \"by\"]'; +#"袮>'[................................]'", +#"袰>'[................................]'", +袱>'[piece of cloth used wrap bundles]'; +袴>'[pants]'; +袵>'[lapel]'; +袷>'[lined garment]'; +袿>'[gown]'; +裁>'[cut out]'; +裂>'[split]'; +#"裂>'[split]'", +#"裂>'[split]'", +装>'[dress]'; +裏>'[inside]'; +裔>'[progeny]'; +裕>'[abundant]'; +裘>'[fur garments]'; +裙>'[skirt]'; +補>'[mend]'; +裝>'[dress]'; +裟>'[a cassock or robe of a monk]'; +裡>'[inside]'; +裨>'[aid]'; +裲>'[waistcoat]'; +裳>'[clothes]'; +裴>'[surname]'; +裸>'[bare]'; +裹>'[wrap]'; +裼>'[to take off one''s top]'; +製>'[make]'; +裾>'[lapel]'; +褂>'[jacket]'; +#"褃>'[a seam in a garment]'", +複>'[repeat]'; +褊>'[cramped]'; +褌>'[trousers]'; +褐>'[coarse woolen cloth]'; +褒>'[praise]'; +褓>'[swaddling cloth]'; +褝>'[unlined garment]'; +#"褞>'[................................]'", +褥>'[mattress]'; +褪>'[strip]'; +褫>'[strip]'; +褶>'[pleat]'; +褸>'[lapel]'; +褻>'[dirty]'; +襁>'[swaddling clothes]'; +襃>'[commend]'; +襄>'[aid]'; +襌>'[unlined garment]'; +襍>'[mixed]'; +襖>'[outer garments]'; +襞>'[fold]'; +襟>'[lapel]'; +襠>'[crotch or seat of pants]'; +襤>'[ragged]'; +襦>'[short coat]'; +襪>'[socks]'; +襭>'[tuck up hem of garment and wrap]'; +襯>'[underwear]'; +襲>'[raid]'; +襴>'[a one piece garment]'; +#"襶>'[ignorant]'", +襾>'[cover]'; +西>'[west(ern)]'; +要>'[necessary]'; +覃>'[reach to]'; +覆>'[cover]'; +覇>'[rule by might rather than right]'; +覈>'[investigate]'; +覊>'[halter]'; +見>'[see]'; +規>'[rules]'; +覓>'[seek]'; +視>'[look at]'; +覗>'[peek]'; +覘>'[peek]'; +覚>'[wake up from sleep]'; +覡>'[wizard]'; +覦>'[desire strongly]'; +覧>'[look at]'; +覩>'[see]'; +親>'[relatives]'; +覬>'[covet]'; +覯>'[meet or see unexpectedly]'; +覲>'[have imperial audience]'; +観>'[see]'; +覺>'[wake up from sleep]'; +覽>'[look at]'; +覿>'[see]'; +觀>'[see]'; +角>'[horn]'; +觚>'[jug]'; +觜>'[beak]'; +觝>'[gore]'; +解>'[loosen]'; +触>'[butt]'; +觧>'[loosen]'; +觴>'[wine vessel]'; +觸>'[touch]'; +言>'[words]'; +訂>'[draw up agreement]'; +訃>'[obituary]'; +計>'[plan]'; +訊>'[inquire]'; +訌>'[confusion]'; +討>'[ask for]'; +訐>'[expose other''s secrets]'; +訓>'[teach]'; +訖>'[finish]'; +託>'[entrust]'; +記>'[record]'; +訛>'[swindle]'; +訝>'[express surprise]'; +訟>'[accuse]'; +訣>'[take leave of]'; +訥>'[slow of speech]'; +訪>'[visit]'; +設>'[build]'; +許>'[allow]'; +訳>'[translate]'; +訴>'[accuse]'; +訶>'[scold loudly]'; +診>'[examine patient]'; +註>'[explain]'; +証>'[prove]'; +詁>'[exegesis]'; +詆>'[slander]'; +詈>'[scold]'; +詐>'[cheat]'; +詑>'[cheat]'; +詒>'[bequeath]'; +詔>'[decree]'; +評>'[appraise]'; +詛>'[curse]'; +詞>'[words]'; +詠>'[sing]'; +詢>'[inquire into]'; +詣>'[reach]'; +試>'[test]'; +詩>'[poetry]'; +詫>'[be surprised]'; +詬>'[abuse]'; +詭>'[deceive]'; +詮>'[explain]'; +詰>'[question]'; +話>'[speech]'; +該>'[should]'; +詳>'[detailed]'; +詼>'[tease]'; +誂>'[tempt]'; +誄>'[eulogize]'; +誅>'[execute]'; +誇>'[exaggerate]'; +誉>'[fame]'; +誌>'[write down]'; +認>'[recognize]'; +誑>'[deceive]'; +誓>'[swear]'; +誕>'[bear children]'; +誘>'[persuade]'; +誚>'[criticize]'; +語>'[language]'; +誠>'[sincere]'; +誡>'[warn]'; +誣>'[make false accusation]'; +誤>'[err]'; +誥>'[inform]'; +誦>'[recite]'; +誨>'[teach]'; +説>'[speak]'; +読>'[read]'; +誰>'[who? whom? whose? anyone?]'; +課>'[lesson]'; +誹>'[slander]'; +誼>'[friendship]'; +調>'[transfer]'; +諂>'[flatter]'; +諄>'[patient]'; +談>'[talk]'; +請>'[ask]'; +諌>'[remonstrate]'; +諍>'[to expostulate]'; +諏>'[consult]'; +諒>'[excuse]'; +論>'[debate]'; +諚>'[(kokuji) command]'; +諛>'[flatter]'; +諜>'[intelligence report]'; +諞>'[brag]'; +諠>'[noisy]'; +諡>'[posthumous name]'; +諢>'[jokes]'; +諤>'[honest speech]'; +諦>'[careful]'; +諧>'[harmonize]'; +諫>'[remonstrate]'; +諭>'[proclaim]'; +諮>'[consult]'; +諱>'[conceal]'; +諳>'[versed in]'; +諷>'[recite]'; +諸>'[several]'; +諺>'[proverb]'; +諾>'[promise]'; +謀>'[plan]'; +謁>'[visit]'; +謂>'[say]'; +謄>'[copy]'; +謇>'[stutter]'; +謌>'[slander]'; +謎>'[riddle]'; +謐>'[calm]'; +謔>'[jeer]'; +謖>'[rise up]'; +謗>'[slander]'; +謙>'[humble]'; +謚>'[to confer posthumous titles]'; +講>'[explain]'; +謝>'[thank]'; +謠>'[sing]'; +謡>'[sing]'; +謦>'[t speak softly]'; +謨>'[scheme]'; +謫>'[charge]'; +謬>'[error]'; +謳>'[sing]'; +謹>'[prudent]'; +謾>'[deceive]'; +譁>'[noise]'; +證>'[proof]'; +譌>'[false]'; +譎>'[cunning]'; +譏>'[ridicule]'; +譖>'[to slander]'; +識>'[recognize]'; +譚>'[surname]'; +譛>'[to slander]'; +譜>'[register]'; +譟>'[clamor]'; +警>'[guard]'; +譫>'[talkative]'; +譬>'[metaphor]'; +譯>'[translate]'; +議>'[consult]'; +#"譱>'[................................]'", +譲>'[allow]'; +譴>'[reprimand]'; +護>'[protect]'; +譽>'[fame]'; +讀>'[read]'; +讃>'[praise]'; +變>'[change]'; +讌>'[feast]'; +讎>'[enemy]'; +讐>'[enemy]'; +讒>'[slander]'; +讓>'[allow]'; +讖>'[prophecy]'; +讙>'[cheer]'; +讚>'[praise]'; +谷>'[valley]'; +谺>'[the mouth of a valley]'; +谿>'[valley]'; +豁>'[open up]'; +豆>'[beans]'; +豈>'[how? what?]'; +豊>'[abundant]'; +豌>'[peas]'; +豎>'[vertical]'; +豐>'[abundant]'; +豕>'[pigs]'; +豚>'[small pig]'; +象>'[elephant]'; +豢>'[domestic animals]'; +豪>'[brave]'; +豫>'[relaxed]'; +豬>'[pig]'; +豸>'[radical 153]'; +豹>'[leopard]'; +豺>'[wolf]'; +豼>'[fox]'; +貂>'[marten]'; +貅>'[brave]'; +貉>'[badger]'; +貊>'[leopard]'; +貌>'[countenance]'; +貍>'[a fox-like animal]'; +貎>'[lion]'; +貔>'[fox]'; +貘>'[the panther]'; +貝>'[sea shell]'; +貞>'[virtuous]'; +負>'[load]'; +財>'[wealth]'; +貢>'[offer tribute]'; +貧>'[poor]'; +貨>'[goods]'; +販>'[peddler]'; +貪>'[greedy]'; +貫>'[string of 1000 coins]'; +責>'[one''s responsibility]'; +貭>'[matter]'; +貮>'[number two]'; +貯>'[store]'; +貰>'[borrow]'; +貲>'[property]'; +貳>'[number two]'; +貴>'[expensive]'; +貶>'[decrease]'; +買>'[buy]'; +貸>'[lend]'; +費>'[expenses]'; +貼>'[paste to]'; +貽>'[give to]'; +貿>'[trade]'; +賀>'[congratulate]'; +賁>'[forge ahead]'; +賂>'[bribe]'; +賃>'[rent]'; +賄>'[bribe]'; +資>'[property]'; +賈>'[surname]'; +賊>'[thief]'; +賍>'[booty]'; +賎>'[mean]'; +賑>'[relieve]'; +賓>'[guest]'; +賚>'[give]'; +賛>'[help]'; +賜>'[give]'; +賞>'[reward]'; +賠>'[indemnify]'; +賢>'[virtuous]'; +賣>'[sell]'; +賤>'[mean]'; +賦>'[tax]'; +質>'[matter]'; +賭>'[bet]'; +賺>'[make money]'; +賻>'[gift of money help pay funeral]'; +購>'[buy]'; +賽>'[compete]'; +贄>'[gift superior]'; +贅>'[unnecessary]'; +贇>'[affable]'; +贈>'[give present]'; +贊>'[help]'; +贋>'[false]'; +贍>'[support]'; +贏>'[win]'; +贐>'[farewell present]'; +贓>'[booty]'; +贔>'[strong]'; +贖>'[buy]'; +赤>'[red]'; +赦>'[forgive]'; +赧>'[blush]'; +赫>'[bright]'; +赭>'[reddish brown]'; +走>'[walk]'; +赱>'[walk]'; +赳>'[grand]'; +赴>'[go to]'; +起>'[rise]'; +趁>'[take advantage of]'; +超>'[jump over]'; +越>'[exceed]'; +趙>'[surname]'; +趣>'[what attracts one''s attention]'; +趨>'[hasten]'; +足>'[foot]'; +趺>'[sit cross-legged]'; +趾>'[toe]'; +跂>'[creeping]'; +跋>'[go by foot]'; +跌>'[stumble]'; +跏>'[sit cross-legged]'; +跖>'[sole (of the foot)]'; +跚>'[stagger]'; +跛>'[lame]'; +距>'[distance]'; +跟>'[heel]'; +跡>'[search]'; +跣>'[bare footed]'; +跨>'[straddle]'; +跪>'[kneel]'; +跫>'[sound of footsteps]'; +路>'[road]'; +跳>'[jump]'; +践>'[trample]'; +跼>'[bent]'; +#"跿>'[................................]'", +踈>'[neglect]'; +踉>'[hop]'; +踊>'[leap]'; +踏>'[step on]'; +踐>'[trample]'; +踝>'[ankle]'; +踞>'[crouch]'; +踟>'[hesitate]'; +踪>'[footprints]'; +踰>'[exceed]'; +踴>'[leap]'; +踵>'[heel]'; +蹂>'[trample under foot]'; +蹄>'[hoof]'; +蹇>'[lame]'; +蹈>'[stamp feet]'; +蹉>'[error]'; +蹊>'[footpath]'; +蹌>'[walk rapidly]'; +蹐>'[take short steps]'; +蹕>'[clear way]'; +蹙>'[urgent]'; +蹟>'[trace]'; +蹠>'[step on]'; +蹣>'[to jump over]'; +蹤>'[footprints]'; +蹲>'[squat]'; +蹴>'[kick]'; +蹶>'[stumble]'; +蹼>'[webbed feet of waterfowl]'; +躁>'[tense]'; +躄>'[cripple]'; +躅>'[walk carefully]'; +躇>'[hesitate]'; +躊>'[hesitate]'; +躋>'[ascend]'; +躍>'[skip]'; +躑>'[waver]'; +躓>'[stumble]'; +躔>'[follow in]'; +躙>'[trample down]'; +躡>'[tread]'; +躪>'[trample down]'; +身>'[body]'; +躬>'[body]'; +躯>'[body]'; +躰>'[body]'; +躱>'[hide]'; +#"躺>'[lie down]'", +#"軄>'[to govern]'", +軆>'[body]'; +#"軆>'[body]'", +車>'[cart]'; +軋>'[crush by weight]'; +軌>'[track]'; +軍>'[army]'; +軒>'[carriage]'; +軛>'[yoke]'; +軟>'[soft]'; +転>'[shift]'; +軣>'[rumble]'; +軫>'[cross board at rear of carriage]'; +軸>'[axle]'; +軻>'[axle]'; +軼>'[rush forth]'; +軽>'[light]'; +軾>'[horizontal wooden bar in front of a sedan chair]'; +較>'[compare]'; +輅>'[chariot]'; +載>'[load]'; +輊>'[low rear portion of cart]'; +輌>'[numerary adjunct for vehicles]'; +輒>'[sides of chariot where weapons]'; +輓>'[mourn]'; +輔>'[cheek bone]'; +輕>'[light]'; +輙>'[sides of chariot were weapons]'; +輛>'[numerary adjunct for vehicles]'; +輜>'[supply cart]'; +輝>'[brightness]'; +輟>'[stop]'; +輦>'[hand-cart]'; +輩>'[generation]'; +輪>'[wheel]'; +輯>'[gather up]'; +輳>'[hubs of wheel]'; +輸>'[transport]'; +輹>'[two pieces of wood underneath ca]'; +輻>'[spokes of wheel]'; +輾>'[turn over]'; +輿>'[cart]'; +轂>'[hub of wheel]'; +轄>'[linchpin of wheel]'; +轅>'[axle]'; +轆>'[windlass]'; +轉>'[shift]'; +轌>'[sled]'; +轍>'[wagon ruts]'; +轎>'[sedan-chair]'; +轗>'[fail]'; +轜>'[hearse]'; +轟>'[rumble]'; +轡>'[bridle of horse]'; +轢>'[run over something with vehicle]'; +#"轣>'[................................]'", +轤>'[pulley]'; +辛>'[bitter]'; +辜>'[crime]'; +辞>'[words]'; +辟>'[law]'; +辣>'[peppery]'; +辧>'[manage]'; +辨>'[distinguish]'; +辭>'[words]'; +辮>'[braid]'; +辯>'[dispute]'; +辰>'[early morning]'; +辱>'[humiliate]'; +農>'[agriculture]'; +辷>'[smooth]'; +辺>'[edge]'; +辻>'[crossroads]'; +込>'[crowd into]'; +辿>'[follow]'; +迂>'[doctrinaire]'; +迄>'[extend]'; +迅>'[quick]'; +迎>'[receive]'; +近>'[near]'; +返>'[return]'; +迚>'[very]'; +迢>'[far]'; +迥>'[distant]'; +迦>'[character for transliteration]'; +迩>'[be near]'; +迪>'[enlighten]'; +迫>'[coerce]'; +迭>'[repeatedly]'; +迯>'[escape]'; +述>'[narrate]'; +迴>'[revolve]'; +迷>'[bewitch]'; +迸>'[gush out]'; +迹>'[traces]'; +迺>'[then]'; +追>'[pursue]'; +退>'[step back]'; +送>'[see off]'; +逃>'[escape]'; +逅>'[meet unexpectedly]'; +逆>'[disobey]'; +逋>'[flee]'; +逍>'[ramble]'; +逎>'[strong]'; +透>'[penetrate]'; +逐>'[chase]'; +逑>'[collect]'; +逓>'[hand over]'; +途>'[way]'; +逕>'[pass by]'; +逖>'[far]'; +逗>'[tempt]'; +這>'[this]'; +通>'[pass through]'; +逝>'[pass away]'; +逞>'[indulge oneself]'; +速>'[quick]'; +造>'[construct]'; +逡>'[retreat]'; +逢>'[come upon]'; +連>'[join]'; +#"逦>'[meandering]'", +逮>'[seize]'; +週>'[week]'; +進>'[advance]'; +逵>'[thoroughfare]'; +逶>'[winding]'; +逸>'[flee]'; +逹>'[arrive at]'; +逼>'[compel]'; +逾>'[go over]'; +遁>'[hide]'; +遂>'[comply with]'; +遅>'[late]'; +遇>'[meet]'; +遉>'[spy]'; +遊>'[wander]'; +運>'[luck]'; +遍>'[everywhere]'; +過>'[pass]'; +遏>'[stop]'; +遐>'[afar]'; +遑>'[leisure]'; +遒>'[strong]'; +道>'[path]'; +達>'[arrive at]'; +違>'[disobey]'; +#"違>'[disobey]'", +遘>'[to meet]'; +遙>'[far away]'; +遜>'[humble]'; +遞>'[hand over]'; +遠>'[distant]'; +遡>'[go upstream]'; +遣>'[send]'; +遥>'[far away]'; +遨>'[ramble]'; +適>'[match]'; +遭>'[come across]'; +遮>'[cover]'; +遯>'[deceive]'; +遲>'[late]'; +遵>'[obey]'; +遶>'[entwine]'; +遷>'[move]'; +選>'[choose]'; +遺>'[lose]'; +遼>'[distant]'; +遽>'[suddenly]'; +避>'[avoid]'; +邀>'[invite]'; +邁>'[take a big stride]'; +邂>'[unexpected meeting]'; +邃>'[profound]'; +還>'[still]'; +邇>'[be near]'; +邉>'[edge]'; +邊>'[edge]'; +邏>'[patrol]'; +邑>'[area]'; +那>'[that]'; +邦>'[nation]'; +邨>'[village]'; +邪>'[wrong]'; +邯>'[city in hebei province]'; +邱>'[surname]'; +邵>'[surname]'; +邸>'[official residence]'; +郁>'[sweet smelling]'; +郊>'[suburbs]'; +郎>'[gentleman]'; +郛>'[outer walls of city]'; +郡>'[administrative division]'; +郢>'[state in today''s hubei province]'; +郤>'[crack]'; +部>'[part]'; +郭>'[outer part (of a city)]'; +郵>'[postal]'; +郷>'[country]'; +都>'[metropolis]'; +鄂>'[hubei province]'; +鄒>'[name of an ancient state]'; +鄙>'[mean]'; +鄭>'[state in today''s henan]'; +鄰>'[neighbor]'; +鄲>'[county in hebei proincev]'; +酉>'[tenth in series of twelve cyclic]'; +酊>'[drunk]'; +酋>'[chief of tribe]'; +酌>'[serve wine]'; +配>'[match]'; +酎>'[double-fermented wine]'; +酒>'[wine]'; +酔>'[intoxicated]'; +酖>'[wine with bird poison added]'; +#"酘>'[................................]'", +酢>'[toast one''s host with wine]'; +酣>'[enjoy intoxicants]'; +酥>'[butter]'; +酩>'[drunk]'; +酪>'[cream]'; +酬>'[toast]'; +酲>'[hangover]'; +#"酲>'[hangover]'", +酵>'[yeast]'; +酷>'[strong]'; +酸>'[tart]'; +#"醁>'[a kind of green-colored wine]'", +醇>'[rich]'; +醉>'[intoxicated]'; +醋>'[vinegar]'; +醍>'[essential oil of butter]'; +醐>'[purest cream]'; +醒>'[wake up]'; +醗>'[to brew for the second time]'; +醜>'[ugly looking]'; +醢>'[minced pickled meat]'; +醤>'[any jam-like or paste-like food]'; +醪>'[unclear wine]'; +醫>'[cure]'; +醯>'[vinegar]'; +醴>'[sweet wine]'; +醵>'[contribute for drinks]'; +醸>'[brew]'; +醺>'[get drunk]'; +釀>'[brew]'; +釁>'[smear with blood in sacrifice]'; +釆>'[distinguish]'; +采>'[collect]'; +釈>'[interprete]'; +釉>'[glaze]'; +釋>'[interprete]'; +里>'[unit of distance]'; +重>'[heavy]'; +野>'[open country]'; +量>'[measure]'; +釐>'[manage]'; +金>'[gold]'; +釖>'[knife]'; +釘>'[nail]'; +#"釛>'[................................]'", +釜>'[cauldron]'; +針>'[needle]'; +#"釟>'[................................]'", +釡>'[cauldron]'; +釣>'[fish]'; +釦>'[button]'; +釧>'[bracelet]'; +釵>'[ornamental hairpin]'; +#"釶>'[................................]'", +釼>'[sword]'; +#"釿>'[................................]'", +鈍>'[blunt]'; +鈎>'[hook]'; +鈑>'[plate]'; +鈔>'[paper money]'; +鈕>'[button]'; +鈞>'[unit of measure equivalent thirt]'; +鈩>'[fireplace]'; +鈬>'[bell]'; +鈴>'[bell]'; +鈷>'[cobalt]'; +鈿>'[hairpin]'; +鉄>'[iron]'; +鉅>'[steel]'; +鉈>'[thallium]'; +鉉>'[device for carrying a tripod]'; +鉋>'[carpenter''s plane]'; +#"鉐>'[................................]'", +鉗>'[pincers]'; +鉚>'[rivet]'; +鉛>'[lead plumbum]'; +鉞>'[broad-axe]'; +鉢>'[earthenware basin]'; +鉤>'[hook]'; +鉦>'[kind of gong used in ancient tim]'; +鉱>'[mine]'; +鉾>'[spear]'; +銀>'[silver]'; +銃>'[ancient weapon]'; +銅>'[copper]'; +銑>'[mill]'; +銓>'[weigh]'; +銕>'[iron]'; +銖>'[unit of weight]'; +銘>'[inscribe]'; +銚>'[large hoe]'; +銛>'[sharp]'; +銜>'[bit]'; +銭>'[money]'; +銷>'[melt]'; +銹>'[rust]'; +鋏>'[tongs]'; +鋒>'[point of spear]'; +鋤>'[hoe]'; +鋩>'[point of sword]'; +鋪>'[spread out]'; +鋭>'[sharp]'; +鋲>'[rivet]'; +鋳>'[melt]'; +鋸>'[a saw]'; +#"鋺>'[................................]'", +鋼>'[steel]'; +錆>'[the color of a mineral]'; +錏>'[soft steel]'; +錐>'[gimlet]'; +錘>'[balance weight on scale]'; +錙>'[8 oz]'; +錚>'[clanging sound]'; +錠>'[spindle]'; +錢>'[money]'; +#"錣>'[................................]'", +錦>'[brocade]'; +錨>'[anchor]'; +錫>'[tin]'; +錬>'[smelt metals]'; +錮>'[run metal into cracks]'; +錯>'[error]'; +録>'[copy]'; +#"錵>'[................................]'", +#"錺>'[................................]'", +#"錻>'[................................]'", +#"鍄>'[................................]'", +鍋>'[cooking-pot]'; +鍍>'[plate]'; +鍔>'[high]'; +#"鍖>'[................................]'", +鍛>'[forge metal]'; +#"鍜>'[................................]'", +鍠>'[weapon]'; +鍬>'[shovel]'; +鍮>'[brass]'; +鍵>'[door bolt]'; +鍼>'[needle]'; +鍾>'[cup]'; +鎌>'[sickle]'; +鎔>'[fuse]'; +鎖>'[lock]'; +鎗>'[rifle]'; +鎚>'[hammer]'; +鎧>'[armor]'; +鎬>'[stove]'; +鎭>'[town]'; +鎮>'[town]'; +鎰>'[measure of weight for gold]'; +#"鎹>'[................................]'", +鏃>'[arrowhead]'; +鏈>'[chain]'; +鏐>'[pure gold]'; +鏑>'[dysprosium the barb of an arrow]'; +鏖>'[fight end]'; +鏗>'[strike]'; +鏘>'[tinkle]'; +鏝>'[trowel]'; +鏡>'[mirror]'; +鏤>'[carve]'; +鏥>'[rust]'; +鏨>'[engraving tool]'; +鐃>'[cymbals]'; +鐇>'[vanadium]'; +鐐>'[fetters]'; +鐓>'[ferrule]'; +鐔>'[dagger]'; +鐘>'[clock]'; +鐙>'[lamp]'; +鐚>'[soft steel]'; +鐡>'[iron]'; +鐫>'[engraving tool]'; +鐵>'[iron]'; +鐶>'[metal ring]'; +鐸>'[bell]'; +鐺>'[frying pan]'; +#"鑁>'[................................]'", +鑄>'[melt]'; +鑑>'[mirror]'; +鑒>'[mirror]'; +鑓>'[spear]'; +鑚>'[drill]'; +鑛>'[mine]'; +鑞>'[solder]'; +鑠>'[melt]'; +鑢>'[file]'; +鑪>'[fireplace]'; +鑰>'[key]'; +鑵>'[jar]'; +鑷>'[tweezers]'; +鑼>'[gong]'; +鑽>'[drill]'; +鑾>'[bells hung on horse]'; +鑿>'[chisel]'; +钁>'[a mattock]'; +長>'[long]'; +門>'[gate]'; +閂>'[bolt]'; +閃>'[flash]'; +閇>'[shut]'; +閉>'[shut]'; +#"閉>'[shut]'", +開>'[open]'; +閏>'[intercalary]'; +閑>'[fence]'; +間>'[interval]'; +閔>'[mourn]'; +#"閔>'[mourn]'", +閘>'[sluice]'; +閙>'[quarrel]'; +閠>'[intercalary]'; +関>'[frontier pass]'; +閣>'[chamber]'; +閤>'[small side door]'; +閥>'[powerful and influential group]'; +閧>'[boisterous]'; +閨>'[small entrance]'; +閭>'[village of twenty-five families]'; +閲>'[examine]'; +閹>'[castrate]'; +閻>'[village gate]'; +閼>'[block]'; +閾>'[threshold]'; +闃>'[alone]'; +闇>'[close]'; +闊>'[broad]'; +闌>'[door screen]'; +闍>'[tower over city gate]'; +闔>'[close]'; +闕>'[watch tower]'; +闖>'[rush in]'; +闘>'[struggle]'; +關>'[frontier pass]'; +闡>'[explain]'; +闢>'[open]'; +闥>'[door]'; +阜>'[mound]'; +阡>'[footpaths between fields]'; +阨>'[in distress]'; +阪>'[hillside farmland]'; +阮>'[ancient musical instrument: surname]'; +阯>'[foundation]'; +防>'[defend]'; +阻>'[impede]'; +阿>'[prefix to name]'; +陀>'[steep bank]'; +陂>'[dam]'; +附>'[adhere to]'; +陋>'[narrow]'; +陌>'[foot path between rice fields]'; +降>'[descend]'; +#"降>'[descend]'", +限>'[boundary]'; +陛>'[steps leading throne]'; +陜>'[narrow]'; +陝>'[mountain pass]'; +陞>'[promote]'; +陟>'[climb]'; +院>'[courtyard]'; +陣>'[column]'; +除>'[eliminate]'; +陥>'[submerge]'; +#"陥>'[submerge]'", +陪>'[accompany]'; +陬>'[corner]'; +陰>'[\"female\" principle]'; +陲>'[frontier]'; +陳>'[exhibit]'; +陵>'[hill]'; +陶>'[pottery]'; +陷>'[submerge]'; +陸>'[land]'; +険>'[narrow pass]'; +陽>'[\"male\" principle]'; +隅>'[corner]'; +隆>'[prosperous]'; +隈>'[cove]'; +隊>'[team]'; +隋>'[Sui dynasty]'; +隍>'[dry ditch]'; +階>'[stairs]'; +随>'[follow]'; +隔>'[separate]'; +隕>'[fall]'; +隗>'[high]'; +隘>'[narrow]'; +隙>'[crack]'; +際>'[border]'; +障>'[separate]'; +#"障>'[separate]'", +隣>'[neighbor]'; +隧>'[tunnel]'; +隨>'[follow]'; +險>'[narrow pass]'; +隰>'[low]'; +隱>'[hide]'; +隲>'[stallion]'; +隴>'[mountain located between shaanxi]'; +隶>'[subservient]'; +隷>'[be subservient to]'; +隸>'[be subservient to]'; +隹>'[bird]'; +隻>'[single]'; +隼>'[aquiline (nose)]'; +雀>'[sparrow]'; +雁>'[wild goose]'; +雄>'[male of species]'; +雅>'[elegant]'; +集>'[assemble]'; +雇>'[employ]'; +雉>'[pheasant]'; +雋>'[superior]'; +雌>'[female]'; +雍>'[harmony]'; +雎>'[osprey]'; +雑>'[mixed]'; +雕>'[engrave]'; +雖>'[although]'; +雙>'[set of two]'; +雛>'[chick]'; +雜>'[mixed]'; +離>'[leave]'; +難>'[difficult]'; +雨>'[rain]'; +雪>'[snow]'; +#"雫>'[................................]'", +雰>'[atmosphere]'; +雲>'[clouds]'; +零>'[zero]'; +雷>'[thunder]'; +雹>'[hail]'; +電>'[electricity]'; +需>'[need]'; +霄>'[sky]'; +霆>'[a sudden peal of thunder]'; +震>'[shake]'; +霈>'[torrential rains]'; +霊>'[spirit]'; +霍>'[quickly]'; +霎>'[light rain]'; +霏>'[falling of snow and rain]'; +霑>'[be moistened]'; +霓>'[rainbow]'; +霖>'[long spell of rain]'; +霙>'[sleet]'; +霜>'[frost]'; +霞>'[rosy clouds]'; +霤>'[drip]'; +霧>'[fog]'; +霪>'[long and heavy rain]'; +霰>'[hail]'; +露>'[dew]'; +霸>'[rule by might rather than right]'; +霹>'[thunder]'; +霽>'[clear up after rain cease be ang]'; +霾>'[misty]'; +靂>'[thunderclap]'; +靄>'[cloudy sky]'; +靆>'[cloudy sky]'; +靈>'[spirit]'; +靉>'[cloudy sky]'; +青>'[blue]'; +靖>'[pacify]'; +静>'[quiet]'; +靜>'[quiet]'; +非>'[not]'; +靠>'[lean on]'; +靡>'[divide]'; +面>'[face]'; +#"靤>'[................................]'", +靦>'[timid]'; +靨>'[dimples]'; +革>'[leather]'; +靫>'[strong and pliable]'; +靭>'[strong and pliable]'; +靱>'[strong and pliable]'; +靴>'[boots]'; +#"靹>'[................................]'", +靺>'[stocking]'; +靼>'[tartars]'; +#"鞁>'[................................]'", +鞄>'[to work hides]'; +鞅>'[leather strap over horse''s neck]'; +#"鞅>'[leather strap over horse''s neck]'", +鞋>'[shoes]'; +鞍>'[saddle]'; +鞏>'[bind]'; +#"鞏>'[bind]'", +鞘>'[scabbard]'; +#"鞜>'[................................]'", +鞠>'[bow]'; +鞣>'[tan]'; +鞦>'[leather stap]'; +鞨>'[tribe]'; +鞫>'[interrogate]'; +鞭>'[whip]'; +#"鞳>'[................................]'", +鞴>'[saddle up horse]'; +韃>'[tatars]'; +韆>'[swing]'; +韈>'[socks]'; +韋>'[tanned leather]'; +韓>'[fence]'; +韜>'[sheath]'; +韭>'[scallion]'; +韮>'[scallion]'; +韲>'[break or smash into pieces]'; +音>'[sound]'; +韵>'[rhyme]'; +韶>'[music of emperor Shun]'; +韻>'[rhyme]'; +響>'[make sound]'; +頁>'[page]'; +頂>'[top]'; +頃>'[moment]'; +項>'[neck]'; +順>'[obey]'; +須>'[must]'; +頌>'[laud]'; +頏>'[fly down or downward]'; +預>'[prepare]'; +頑>'[obstinate]'; +頒>'[confer]'; +頓>'[pause]'; +頗>'[lean one side]'; +領>'[neck]'; +頚>'[neck]'; +頡>'[fly upward]'; +頤>'[cheeks]'; +頬>'[cheeks]'; +頭>'[head]'; +頴>'[rice tassel]'; +頷>'[chin]'; +頸>'[neck]'; +頻>'[frequently]'; +頼>'[rely]'; +頽>'[ruined]'; +顆>'[grain]'; +顋>'[lower part of face]'; +題>'[forehead]'; +額>'[forehead]'; +顎>'[jaw]'; +顏>'[face]'; +顔>'[face]'; +顕>'[manifest]'; +願>'[desire]'; +顛>'[top]'; +類>'[class]'; +顧>'[look back]'; +顫>'[shiver]'; +顯>'[manifest]'; +顰>'[frown]'; +顱>'[skull]'; +顳>'[the temporal bone]'; +顴>'[cheek bones]'; +風>'[wind]'; +#"風>'[wind]'", +颯>'[sound of wind]'; +颱>'[taiphoon]'; +颶>'[cyclone]'; +飃>'[whirlwind]'; +飄>'[whirlwind]'; +飆>'[whirlwind]'; +飛>'[fly]'; +飜>'[flip over]'; +食>'[eat]'; +飢>'[hunger]'; +飩>'[stuffed dumplings]'; +飫>'[surfeited]'; +飭>'[order]'; +飮>'[drink]'; +飯>'[cooked rice]'; +飲>'[drink]'; +飴>'[sweet-meats]'; +飼>'[raise animals]'; +飽>'[eat heartily]'; +飾>'[decorate]'; +餃>'[stuffed dumplings]'; +餅>'[rice-cakes]'; +餉>'[rations and pay for soldiers]'; +養>'[raise]'; +餌>'[bait]'; +餐>'[eat]'; +餒>'[hungry]'; +餓>'[be hungry]'; +餔>'[eat]'; +餘>'[surplus]'; +#"餝>'[................................]'", +餞>'[farewell party]'; +餠>'[rice-cakes]'; +餡>'[pastry filling]'; +餤>'[incite]'; +館>'[public building]'; +餬>'[porridge]'; +餮>'[a legendary animal]'; +餽>'[make present of food]'; +餾>'[distill]'; +#"饁>'[carry meal to workers in field]'", +饅>'[steamed bread]'; +饉>'[time of famine or crop failure]'; +饋>'[offer food superior]'; +饌>'[feed]'; +饐>'[spoiled]'; +饑>'[starve]'; +饒>'[bountiful]'; +饕>'[gluttonous]'; +饗>'[host banquet]'; +首>'[head]'; +馗>'[cheekbone]'; +馘>'[cut off left ear]'; +香>'[fragrant]'; +馥>'[fragrance]'; +馨>'[fragrant]'; +馬>'[horse]'; +馭>'[drive]'; +馮>'[surname]'; +馳>'[go quickly or swiftly]'; +馴>'[tame]'; +#"馼>'[................................]'", +駁>'[varicolored]'; +#"駃>'[gallop]'", +駅>'[relay station]'; +駆>'[spur horse on]'; +駈>'[spur horse on]'; +駐>'[to be stationed at]'; +駑>'[tired]'; +駒>'[colt]'; +駕>'[drive]'; +駘>'[tired]'; +駛>'[sail]'; +駝>'[camel]'; +駟>'[team of four horses]'; +駢>'[team of horses]'; +駭>'[terrify]'; +駮>'[a kind of fierce animal]'; +駱>'[white horse w. black mane]'; +#"駱>'[white horse w. black mane]'", +駸>'[galloping]'; +#"駻>'[................................]'", +駿>'[excellent horse]'; +騁>'[gallop horse]'; +騅>'[piebald horse]'; +騎>'[ride horseback]'; +騏>'[piebald horse]'; +騒>'[harass]'; +験>'[test]'; +騙>'[swindle]'; +騨>'[dappled]'; +騫>'[raise]'; +騰>'[fly]'; +騷>'[harass]'; +騾>'[mule]'; +驀>'[suddenly]'; +驂>'[two outside ones in three horse]'; +驃>'[charger]'; +驅>'[spur horse on]'; +驍>'[excellent horse]'; +驕>'[spirited horse]'; +驗>'[test]'; +驚>'[frighten]'; +驛>'[relay station]'; +驟>'[procedure]'; +驢>'[donkey]'; +驤>'[gallop about with head uplifted]'; +驥>'[thoroughbred horse]'; +驩>'[happy]'; +驪>'[pure black horse]'; +驫>'[horses]'; +骨>'[bone]'; +骭>'[shin bone]'; +骰>'[die]'; +骸>'[skelton]'; +骼>'[bone]'; +髀>'[buttocks]'; +髄>'[bone marrow]'; +髏>'[skull]'; +髑>'[skull]'; +髓>'[bone marrow]'; +體>'[body]'; +高>'[high]'; +髞>'[high]'; +髟>'[hair]'; +髢>'[wig]'; +髣>'[similar to]'; +髦>'[flowing hair of young child]'; +髪>'[hair]'; +髫>'[children''s hair style]'; +髭>'[mustache]'; +髮>'[hair]'; +髯>'[beard]'; +#"髱>'[................................]'", +髴>'[disheveled hair]'; +#"髷>'[................................]'", +髻>'[hair rolled up in a bun]'; +鬆>'[lax]'; +鬘>'[beautiful hair]'; +鬚>'[beard]'; +鬟>'[dress hair in coiled knot]'; +鬢>'[hair on temples]'; +鬣>'[horse''s mane]'; +鬥>'[struggle]'; +鬧>'[quarrel]'; +鬨>'[boisterous]'; +鬩>'[feud]'; +鬪>'[struggle]'; +鬮>'[lots (to be drawn)]'; +鬯>'[sacrificial wine]'; +鬱>'[luxuriant]'; +鬲>'[type of caldron]'; +鬻>'[sell]'; +鬼>'[ghost]'; +魁>'[chief]'; +魂>'[soul]'; +魃>'[drought demon]'; +魄>'[vigor]'; +魅>'[kind of forest demon]'; +魍>'[demons]'; +魎>'[a kind of monster]'; +魏>'[kingdom of wei]'; +魑>'[a montain demon resembling tiger]'; +魔>'[demon]'; +魘>'[nightmare]'; +魚>'[fish]'; +魯>'[foolish]'; +魴>'[bream]'; +#"鮁>'[the bonito]'", +鮎>'[sheatfish]'; +鮑>'[abalone]'; +鮒>'[carp]'; +鮓>'[minced and salted fish]'; +#"鮓>'[minced and salted fish]'", +#"鮓>'[minced and salted fish]'", +鮟>'[anglerfish]'; +鮠>'[a kind of shad with a head like a sturgeon]'; +#"鮨>'[................................]'", +鮪>'[kind of sturgeon]'; +鮫>'[shark]'; +鮭>'[salmon]'; +鮮>'[fresh]'; +#"鮴>'[................................]'", +#"鮹>'[................................]'", +鯀>'[giant fish]'; +鯆>'[the skate or ray]'; +鯉>'[carp]'; +鯊>'[shark]'; +鯏>'[a dialect........ name of fish]'; +#"鯑>'[................................]'", +#"鯒>'[................................]'", +鯔>'[mullet]'; +鯖>'[mackerel]'; +鯛>'[pagrosomus major]'; +鯡>'[herring]'; +鯢>'[cryptobranchus japonicus]'; +#"鯣>'[................................]'", +鯤>'[spawn]'; +鯨>'[whale]'; +鯰>'[sheat]'; +#"鯱>'[................................]'", +#"鯱>'[................................]'", +#"鯱>'[................................]'", +#"鰄>'[................................]'", +#"鰆>'[................................]'", +鰈>'[flatfish]'; +鰉>'[sturgeon]'; +#"鰊>'[................................]'", +鰌>'[loach]'; +鰍>'[loach]'; +鰐>'[alligator]'; +鰒>'[abalone]'; +鰓>'[fish gills]'; +#"鰔>'[................................]'", +鰕>'[shrimp]'; +鰛>'[sardine]'; +#"鰡>'[................................]'", +#"鰤>'[................................]'", +鰥>'[huge fish]'; +鰭>'[fin]'; +鰮>'[sardine]'; +#"鰯>'[................................]'", +#"鰯>'[................................]'", +鰲>'[huge sea turtle]'; +鰹>'[skipjack]'; +#"鰺>'[................................]'", +鰻>'[eel]'; +鰾>'[swimming bladder of fish]'; +#"鱆>'[................................]'", +鱇>'[anglerfish]'; +鱈>'[codfish]'; +鱒>'[barbel]'; +鱗>'[fish scales]'; +#"鱘>'[sturgeon]'", +鱠>'[minced fish]'; +鱧>'[snakehead]'; +#"鱶>'[................................]'", +鱸>'[sea perch]'; +鳥>'[bird]'; +鳧>'[wild duck]'; +鳩>'[pigeon]'; +鳫>'[wild goose]'; +鳬>'[wild duck]'; +#"鳰>'[................................]'", +鳳>'[male phoenix]'; +鳴>'[cry of bird or animal]'; +鳶>'[kite]'; +鴃>'[shrike]'; +鴆>'[bird resembling secretary falcon]'; +鴇>'[bustard]'; +鴈>'[wild goose]'; +鴉>'[crow]'; +鴎>'[seagull]'; +鴒>'[species of lark]'; +鴕>'[ostrich]'; +鴛>'[male mandarin duck]'; +鴟>'[kite]'; +鴣>'[species of taiwan pigeon]'; +鴦>'[female mandarin duck]'; +鴨>'[duck]'; +鴪>'[swoop]'; +鴫>'[a snipe]'; +鴬>'[oriole]'; +鴻>'[species of wild swan]'; +#"鴾>'[................................]'", +鴿>'[pigeon]'; +#"鵁>'[................................]'", +鵄>'[kite]'; +#"鵄>'[kite]'", +#"鵄>'[kite]'", +#"鵐>'[................................]'", +鵑>'[cuckoo]'; +鵙>'[a shrike]'; +鵜>'[pelican]'; +鵝>'[goose]'; +鵞>'[goose]'; +鵠>'[target]'; +鵡>'[species of parrot]'; +#"鵡>'[species of parrot]'", +鵬>'[fabulous bird of enormous size]'; +鵯>'[bird]'; +鵲>'[magpie]'; +#"鵺>'[................................]'", +鶇>'[thrush]'; +鶉>'[quail]'; +鶏>'[chicken]'; +鶚>'[osprey]'; +#"鶤>'[................................]'", +鶩>'[duck]'; +鶫>'[thrush]'; +鶯>'[oriole]'; +#"鶱>'[soar]'", +鶴>'[crane]'; +鶸>'[[not found in any dictionary]]'; +鶺>'[wagtail]'; +鶻>'[a kind of pigeon]'; +鷁>'[fishhawk bow or prow]'; +鷂>'[sparrow hawk]'; +鷄>'[chicken]'; +鷆>'[bird name]'; +鷏>'[bird name]'; +鷓>'[partridge]'; +鷙>'[hawk]'; +鷦>'[wren]'; +#"鷫>'[turquoise kingfisher]'", +鷯>'[wren]'; +鷲>'[condor]'; +鷸>'[snipe]'; +鷹>'[eagle]'; +鷺>'[heron]'; +鷽>'[oriental bullfinch]'; +鸚>'[species of parrot]'; +鸛>'[crane]'; +鸞>'[fabulous bird]'; +鹵>'[saline soil]'; +鹸>'[alkaline]'; +鹹>'[salty]'; +鹽>'[salt]'; +鹿>'[deer]'; +麁>'[rough]'; +麈>'[species of deer]'; +麋>'[elk]'; +麌>'[stag]'; +麑>'[fawn]'; +麒>'[legendary auspicious animal]'; +麓>'[foot of hill]'; +麕>'[muntjac deer]'; +麗>'[beautiful]'; +麝>'[musk deer]'; +麟>'[female of chinese unicorn]'; +麥>'[wheat]'; +麦>'[wheat]'; +麩>'[bran]'; +麪>'[flour]'; +#"麫>'[flour]'", +麸>'[bran]'; +麹>'[yeast]'; +麺>'[flour]'; +麻>'[hemp]'; +麼>'[interrogative final particle]'; +麾>'[pennant]'; +麿>'[I]'; +黄>'[yellow]'; +黌>'[school]'; +黍>'[glutinous millet]'; +黎>'[surname]'; +黏>'[stick to]'; +黐>'[stick]'; +黒>'[black]'; +黔>'[black]'; +默>'[silent]'; +黙>'[silent]'; +黛>'[blacken eyebrows]'; +黜>'[dismiss]'; +黝>'[black]'; +點>'[dot]'; +黠>'[sly]'; +黥>'[tattooing face]'; +黨>'[political party]'; +黯>'[dark]'; +黴>'[mold]'; +黶>'[mole]'; +黷>'[dishonor]'; +黹>'[embroidery]'; +黻>'[special pattern of embroidery]'; +黼>'[embroidered official or sacrific]'; +黽>'[to strive]'; +鼇>'[huge sea turtle]'; +鼈>'[fresh water turtle]'; +鼎>'[large]'; +鼓>'[drum]'; +鼕>'[rattle of drums]'; +鼠>'[rat]'; +鼡>'[rat]'; +鼬>'[weasel]'; +鼻>'[nose]'; +鼾>'[snore loudly]'; +齊>'[even]'; +齋>'[vegetarian diet]'; +齎>'[take in both hands and offer to]'; +齏>'[break or smash into pieces]'; +齒>'[teeth]'; +齔>'[lose baby teeth and get adult te]'; +齟>'[irregular teeth]'; +齠>'[lose baby teeth and get adult teeth]'; +齡>'[age]'; +齢>'[age]'; +齣>'[act]'; +齦>'[gums]'; +齧>'[bite]'; +齪>'[narrow]'; +齬>'[uneven teeth]'; +齲>'[tooth decay]'; +齶>'[palate]'; +齷>'[narrow]'; +龍>'[dragon]'; +龕>'[niche]'; +龜>'[turtle or tortoise]'; +龝>'[autumn]'; +龠>'[flute]'; + +# eof diff --git a/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_OnRomaji.txt b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_OnRomaji.txt new file mode 100644 index 00000000000..7435913f8ff --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_OnRomaji.txt @@ -0,0 +1,6216 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2001, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +# Date: Tue Jan 23 12:42:03 2001 +#-------------------------------------------------------------------- + +# Kanji-OnRomaji + +丁>Tei; +七>Shichi; +万>Man; +丈>Jou; +三>San; +上>Jou; +下>Ka; +不>Fu; +与>Yo; +丐>Kai; +丑>Chuu; +且>Sha; +丕>Hi; +世>Se; +丗>Sei; +丘>Kyuu; +丙>Hei; +丞>Shou; +両>Ryou; +並>Hei; +个>Ka; +中>Chuu; +丱>Kan; +串>Sen; +丶>Chu; +丸>Gan; +丹>Tan; +主>Shu; +丼>Sei; +丿>Hetsu; +乂>Gai; +乃>Dai; +久>Kyuu; +之>Shi; +乍>Saku; +乎>Ko; +乏>Bou; +乕>Ko; +乖>Kai; +乗>Jou; +乘>Jou; +乙>Otsu; +九>Kyuu; +乞>Kotsu; +也>Ya; +乢>Gai; +乱>Ran; +乳>Nyuu; +乾>Kan; +亀>Ki; +亂>Ran; +亅>Ketsu; +了>Ryou; +予>Yo; +争>Sou; +亊>Ji; +事>Ji; +二>Ni; +于>U; +云>Un; +互>Go; +五>Go; +井>Sei; +亘>Sen; +亙>Kou; +些>Sa; +亜>A; +亞>A; +亟>Kyoku; +亠>Tou; +亡>Bou; +亢>Kou; +交>Kou; +亥>Gai; +亦>Eki; +亨>Kyou; +享>Kyou; +京>Kyou; +亭>Tei; +亮>Ryou; +亰>Kei; +亳>Haku; +亶>Tan; +人>Jin; +什>Shuu; +仁>Jin; +仂>Roku; +仄>Soku; +仆>Fu; +仇>Kyuu; +今>Kon; +介>Kai; +仍>Jou; +从>Juu; +仏>Butsu; +仔>Shi; +仕>Shi; +他>Ta; +仗>Jou; +付>Fu; +仙>Sen; +仝>Dou; +仞>Jin; +仟>Sen; +代>Dai; +令>Rei; +以>I; +仭>Jin; +仮>Ka; +仰>Gyou; +仲>Chuu; +件>Ken; +价>Kai; +任>Nin; +企>Ki; +伉>Kou; +伊>I; +伍>Go; +伎>Ki; +伏>Fuku; +伐>Batsu; +休>Kyuu; +会>Kai; +伜>Sai; +伝>Den; +伯>Haku; +估>Ko; +伴>Han; +伶>Rei; +伸>Shin; +伺>Shi; +似>Ji; +伽>Kya; +佃>Ten; +但>Tan; +佇>Cho; +位>I; +低>Tei; +住>Juu; +佐>Sa; +佑>Yuu; +体>Tei; +何>Ka; +佗>Ta; +余>Yo; +佚>Itsu; +佛>Butsu; +作>Saku; +佝>Kou; +佞>Nei; +佩>Hai; +佯>You; +佰>Haku; +佳>Ka; +併>Hei; +佶>Kitsu; +佻>Chou; +佼>Kou; +使>Shi; +侃>Kan; +來>Rai; +侈>Shi; +例>Rei; +侍>Ji; +侏>Shu; +侑>Yuu; +侖>Ron; +侘>Ta; +供>Kyou; +依>I; +侠>Kyou; +価>Ka; +侫>Nei; +侭>Jin; +侮>Bu; +侯>Kou; +侵>Shin; +侶>Ryo; +便>Ben; +係>Kei; +促>Soku; +俄>Ga; +俊>Shun; +俎>Sho; +俐>Ri; +俑>You; +俔>Ken; +俗>Zoku; +俘>Fu; +俚>Ri; +俛>Ben; +保>Ho; +俟>Shi; +信>Shin; +修>Shuu; +俯>Fu; +俳>Hai; +俵>Hyou; +俶>Shuku; +俸>Hou; +俺>En; +俾>Hi; +倅>Sai; +倆>Ryou; +倉>Sou; +個>Ko; +倍>Bai; +倏>Shuku; +們>Mon; +倒>Tou; +倔>Kutsu; +倖>Kou; +候>Kou; +倚>I; +借>Shaku; +倡>Shou; +倣>Hou; +値>Chi; +倥>Kou; +倦>Ken; +倨>Kyo; +倩>Sen; +倪>Gei; +倫>Rin; +倬>Taku; +倭>I; +倶>Ku; +倹>Ken; +偃>En; +假>Ka; +偈>Ketsu; +偉>I; +偏>Hen; +偐>Gan; +偕>Kai; +偖>Sha; +做>Saku; +停>Tei; +健>Ken; +偬>Sou; +偲>Shi; +側>Soku; +偵>Tei; +偶>Guu; +偸>Chuu; +偽>Gi; +傀>Kai; +傅>Fu; +傍>Bou; +傑>Ketsu; +傘>San; +備>Bi; +傚>Kou; +催>Sai; +傭>You; +傲>Gou; +傳>Den; +傴>U; +債>Sai; +傷>Shou; +傾>Kei; +僂>Rou; +僅>Kin; +僉>Sen; +僊>Sen; +働>Dou; +像>Zou; +僑>Kyou; +僕>Boku; +僖>Ki; +僚>Ryou; +僞>Gi; +僣>Sen; +僥>Kyou; +僧>Sou; +僭>Sen; +僮>Dou; +僵>Kyou; +價>Ka; +僻>Heki; +儀>Gi; +儁>Shun; +儂>Dou; +億>Oku; +儉>Ken; +儒>Ju; +儔>Chuu; +儕>Sei; +儖>Ran; +儘>Jin; +儚>Bou; +償>Shou; +儡>Rai; +優>Yuu; +儲>Cho; +儷>Rei; +儺>Da; +儻>Tou; +儼>Gen; +儿>Jin; +兀>Kotsu; +允>In; +元>Gen; +兄>Kei; +充>Juu; +兆>Chou; +兇>Kyou; +先>Sen; +光>Kou; +克>Koku; +兌>Da; +免>Men; +兎>To; +児>Ji; +兒>Ji; +兔>To; +党>Tou; +兜>Tou; +兢>Kyou; +入>Nyuu; +全>Zen; +兩>Ryou; +兪>Yu; +八>Hachi; +公>Kou; +六>Roku; +兮>Kei; +共>Kyou; +兵>Hei; +其>Ki; +具>Gu; +典>Ten; +兼>Ken; +冀>Ki; +冂>Kei; +内>Dai; +円>En; +冉>Nen; +冊>Satsu; +册>Satsu; +再>Sai; +冏>Kei; +冐>Bou; +冑>Chuu; +冒>Bou; +冓>Kou; +冕>Ben; +冖>Beki; +冗>Jou; +写>Sha; +冠>Kan; +冢>Chou; +冤>En; +冥>Mei; +冦>Kou; +冨>Fu; +冩>Sha; +冪>Beki; +冫>Hyou; +冬>Tou; +冰>Hyou; +冱>Go; +冲>Chuu; +决>Ketsu; +冴>Go; +况>Kyou; +冶>Ya; +冷>Rei; +冽>Retsu; +凄>Sei; +凅>Ko; +准>Jun; +凉>Ryou; +凋>Chou; +凌>Ryou; +凍>Tou; +凖>Jun; +凛>Rin; +凜>Rin; +凝>Gyou; +几>Ki; +凡>Bon; +処>Sho; +凭>Hyou; +凰>Kou; +凱>Gai; +凵>Kan; +凶>Kyou; +凸>Totsu; +凹>Ou; +出>Shutsu; +函>Kan; +凾>Kan; +刀>Tou; +刃>Jin; +刄>Jin; +分>Bun; +切>Setsu; +刈>Gai; +刊>Kan; +刋>Sen; +刎>Fun; +刑>Kei; +刔>Ketsu; +列>Retsu; +初>Sho; +判>Han; +別>Betsu; +刧>Gou; +利>Ri; +刪>San; +刮>Katsu; +到>Tou; +刳>Ko; +制>Sei; +刷>Satsu; +券>Ken; +刹>Satsu; +刺>Shi; +刻>Koku; +剃>Tei; +剄>Kei; +則>Soku; +削>Saku; +剋>Koku; +剌>Ratsu; +前>Zen; +剏>Sou; +剔>Teki; +剖>Bou; +剛>Gou; +剞>Ki; +剣>Ken; +剤>Zai; +剥>Haku; +剩>Jou; +剪>Sen; +副>Fuku; +剰>Jou; +剱>Ken; +割>Katsu; +剳>Tou; +剴>Gai; +創>Sou; +剽>Hyou; +剿>Shou; +劃>Kaku; +劇>Geki; +劈>Heki; +劉>Ru; +劍>Ken; +劑>Zai; +劒>Ken; +劔>Ken; +力>Ryoku; +功>Kou; +加>Ka; +劣>Retsu; +助>Jo; +努>Do; +劫>Gou; +劬>Ku; +劭>Shou; +励>Rei; +労>Rou; +劵>Ken; +効>Kou; +劼>Katsu; +劾>Gai; +勁>Kei; +勃>Botsu; +勅>Choku; +勇>Yuu; +勉>Ben; +勍>Kei; +勒>Roku; +動>Dou; +勗>Bou; +勘>Kan; +務>Mu; +勝>Shou; +勞>Rou; +募>Bo; +勠>Riku; +勢>Sei; +勣>Seki; +勤>Kin; +勦>Sou; +勧>Kan; +勲>Kun; +勳>Kun; +勵>Rei; +勸>Kan; +勹>Hou; +勺>Shaku; +勾>Kou; +勿>Butsu; +包>Hou; +匆>Sou; +匈>Kyou; +匍>Ho; +匏>Hou; +匐>Hoku; +匕>Hi; +化>Ka; +北>Hoku; +匙>Shi; +匚>Hou; +匝>Sou; +匠>Shou; +匡>Kyou; +匣>Kou; +匪>Hi; +匯>Wai; +匱>Ki; +匳>Ren; +匸>Kei; +匹>Hitsu; +区>Ku; +医>I; +匿>Toku; +區>Ku; +十>Juu; +千>Sen; +卅>Sou; +卆>Sotsu; +升>Shou; +午>Go; +卉>Ki; +半>Han; +卍>Ban; +卑>Hi; +卒>Sotsu; +卓>Taku; +協>Kyou; +南>Nan; +単>Tan; +博>Haku; +卜>Boku; +卞>Hen; +占>Sen; +卦>Ka; +卩>Setsu; +卮>Shi; +卯>Bou; +印>In; +危>Ki; +即>Soku; +却>Kyaku; +卵>Ran; +卷>Kan; +卸>Sha; +卻>Kyaku; +卿>Kyou; +厂>Kan; +厄>Yaku; +厖>Bou; +厘>Rin; +厚>Kou; +原>Gen; +厠>Shi; +厥>Ketsu; +厦>Ka; +厨>Zu; +厩>Kyuu; +厭>En; +厮>Shi; +厰>Shou; +厳>Gen; +厶>Shi; +去>Kyo; +参>San; +參>San; +又>Yuu; +叉>Sha; +及>Kyuu; +友>Yuu; +双>Sou; +反>Han; +収>Shuu; +叔>Shuku; +取>Shu; +受>Ju; +叙>Jo; +叛>Han; +叟>Sou; +叡>Ei; +叢>Sou; +口>Kou; +古>Ko; +句>Ku; +叨>Tou; +叩>Kou; +只>Shi; +叫>Kyou; +召>Shou; +叭>Hatsu; +叮>Tei; +可>Ka; +台>Dai; +叱>Shitsu; +史>Shi; +右>U; +叶>Kyou; +号>Gou; +司>Shi; +吁>Ku; +吃>Kitsu; +各>Kaku; +合>Gou; +吉>Kichi; +吊>Chou; +吋>Sun; +同>Dou; +名>Mei; +后>Kou; +吏>Ri; +吐>To; +向>Kou; +君>Kun; +吝>Rin; +吟>Gin; +吠>Hai; +否>Hi; +吩>Fun; +含>Gan; +听>Kin; +吭>Kou; +吮>Sen; +吶>Totsu; +吸>Kyuu; +吹>Sui; +吻>Fun; +吼>Kou; +吽>In; +吾>Go; +呀>Ga; +呂>Ryo; +呆>Bou; +呈>Tei; +呉>Go; +告>Koku; +呎>Seki; +呑>Don; +呟>Gen; +周>Shuu; +呪>Ju; +呰>Shi; +呱>Ko; +味>Mi; +呵>Ka; +呶>Do; +呷>Kou; +呻>Shin; +呼>Ko; +命>Mei; +咀>So; +咄>Totsu; +咆>Hou; +咋>Saku; +和>Wa; +咎>Kyuu; +咏>Ei; +咐>Ho; +咒>Shu; +咢>Gaku; +咤>Ta; +咥>Ki; +咨>Shi; +咫>Shi; +咬>Kou; +咯>Kaku; +咲>Shou; +咳>Gai; +咸>Kan; +咼>Ka; +咽>In; +咾>Rou; +哀>Ai; +品>Hin; +哂>Shin; +哄>Kou; +哇>Ai; +哈>Gou; +哉>Sai; +員>In; +哢>Rou; +哥>Ka; +哦>Ga; +哨>Shou; +哩>Ri; +哭>Koku; +哮>Kou; +哲>Tetsu; +哺>Ho; +哽>Kou; +唄>Bai; +唆>Sa; +唇>Shin; +唏>Ki; +唐>Tou; +唔>Go; +唖>A; +售>Shuu; +唯>Yui; +唱>Shou; +唳>Rei; +唸>Ten; +唹>Yo; +唾>Da; +啀>Gai; +啄>Taku; +啅>Taku; +商>Shou; +啌>Kou; +問>Mon; +啓>Kei; +啖>Tan; +啗>Tan; +啜>Setsu; +啝>Ka; +啣>Kan; +啻>Shi; +啼>Tei; +啾>Shuu; +喀>Kaku; +喃>Nan; +善>Zen; +喇>Ratsu; +喉>Kou; +喊>Kan; +喋>Chou; +喘>Zen; +喙>Kai; +喚>Kan; +喜>Ki; +喝>Katsu; +喞>Soku; +喟>Ki; +喧>Ken; +喨>Ryou; +喩>Yu; +喪>Sou; +喫>Kitsu; +喬>Kyou; +單>Tan; +営>Ei; +嗄>Sa; +嗅>Kyuu; +嗇>Shoku; +嗔>Shin; +嗚>O; +嗜>Shi; +嗟>Sa; +嗣>Shi; +嗤>Shi; +嗷>Gou; +嗹>Ren; +嗽>Soku; +嗾>Sou; +嘆>Tan; +嘉>Ka; +嘔>Ou; +嘖>Saku; +嘗>Shou; +嘘>Kyo; +嘛>Ma; +嘩>Ka; +嘯>Shou; +嘱>Shoku; +嘲>Tou; +嘴>Shi; +嘶>Sei; +嘸>Bu; +噂>Son; +噌>Sou; +噎>Itsu; +噐>Ki; +噛>Gou; +噤>Kin; +器>Ki; +噪>Sou; +噫>I; +噬>Zei; +噴>Fun; +噸>Ton; +嚀>Dei; +嚆>Kou; +嚇>Kaku; +嚊>Hi; +嚏>Tei; +嚔>Tei; +嚠>Ryuu; +嚢>Nou; +嚥>En; +嚮>Kou; +嚴>Gen; +嚶>Ou; +嚼>Shaku; +囀>Ten; +囁>Shou; +囂>Gou; +囃>Sou; +囈>Gei; +囎>So; +囑>Shoku; +囓>Ketsu; +囗>I; +囘>Kai; +囚>Shuu; +四>Shi; +回>Kai; +因>In; +団>Dan; +囮>Ka; +困>Kon; +囲>I; +図>To; +囹>Rei; +固>Ko; +国>Koku; +囿>Yuu; +圀>Koku; +圃>Ho; +圄>Gyo; +圈>Ken; +圉>Gyo; +國>Koku; +圍>I; +圏>Ken; +園>En; +圓>En; +圖>To; +團>Dan; +圜>Kan; +土>Do; +圧>Atsu; +在>Zai; +圭>Kei; +地>Chi; +圻>Ki; +址>Shi; +坂>Han; +均>Kin; +坊>Bou; +坎>Kan; +坏>Hai; +坐>Za; +坑>Kou; +坡>Ha; +坤>Kon; +坦>Tan; +坩>Kan; +坪>Hei; +坿>Fu; +垂>Sui; +垈>Tai; +垉>Hou; +型>Kei; +垓>Gai; +垠>Gin; +垢>Kou; +垣>En; +垤>Tetsu; +埀>Sui; +埃>Ai; +埆>Kaku; +埋>Mai; +城>Jou; +埒>Rachi; +埓>Rachi; +埔>Ho; +埜>Sho; +域>Iki; +埠>Fu; +埣>Sai; +埴>Shoku; +執>Shitsu; +培>Bai; +基>Ki; +埼>Ki; +堀>Kutsu; +堂>Dou; +堅>Ken; +堆>Tai; +堊>A; +堋>Hou; +堕>Da; +堙>In; +堝>Ka; +堡>Hou; +堤>Tei; +堪>Kan; +堯>Gyou; +堰>En; +報>Hou; +場>Jou; +堵>To; +堺>Kai; +堽>Kou; +塁>Rui; +塊>Kai; +塋>Ei; +塑>So; +塒>Shi; +塔>Tou; +塗>To; +塘>Tou; +塙>Kaku; +塚>Chou; +塞>Soku; +塢>O; +塩>En; +填>Ten; +塲>Jou; +塵>Jin; +塹>Zan; +塾>Juku; +境>Kyou; +墅>Sho; +墓>Bo; +増>Zou; +墜>Tsui; +墟>Kyo; +墨>Boku; +墫>Shun; +墮>Da; +墳>Fun; +墸>Sho; +墺>Ou; +墻>Shou; +墾>Kon; +壁>Heki; +壅>You; +壇>Dan; +壊>Kai; +壌>Jou; +壑>Gaku; +壓>Atsu; +壕>Gou; +壘>Rui; +壙>Kou; +壜>Tan; +壞>Kai; +壟>Ryou; +壤>Jou; +壥>Ten; +士>Shi; +壬>Jin; +壮>Sou; +壯>Sou; +声>Sei; +壱>Ichi; +売>Bai; +壷>Ko; +壹>Ichi; +壺>Ko; +壻>Sei; +壼>Kon; +壽>Ju; +夂>Chi; +変>Hen; +夊>Sui; +夏>Ka; +夐>Kei; +夕>Seki; +外>Gai; +夘>Bou; +夙>Shuku; +多>Ta; +夛>Ta; +夜>Ya; +夢>Mu; +夥>Ka; +大>Tai; +天>Ten; +太>Ta; +夫>Fu; +夬>Ketsu; +夭>You; +央>Ou; +失>Shitsu; +夲>Tou; +夷>I; +夸>Ko; +夾>Kou; +奄>En; +奇>Ki; +奈>Na; +奉>Hou; +奎>Kei; +奏>Sou; +奐>Kan; +契>Setsu; +奔>Hon; +奕>Eki; +套>Tou; +奘>Jou; +奚>Kei; +奠>Ten; +奢>Sha; +奥>Ou; +奧>Ou; +奨>Shou; +奩>Ren; +奪>Datsu; +奬>Shou; +奮>Fun; +女>Jo; +奴>Do; +奸>Kan; +好>Kou; +妁>Shaku; +如>Jo; +妃>Hi; +妄>Bou; +妊>Nin; +妍>Ken; +妓>Ki; +妖>You; +妙>Myou; +妛>Shi; +妝>Sou; +妣>Hi; +妥>Da; +妨>Bou; +妬>To; +妲>Datsu; +妹>Mai; +妻>Sai; +妾>Shou; +姆>Bo; +姉>Shi; +始>Shi; +姐>So; +姑>Ko; +姓>Sei; +委>I; +姙>Nin; +姚>You; +姜>Kyou; +姥>Bo; +姦>Kan; +姨>I; +姪>Tetsu; +姫>Ki; +姶>Ou; +姻>In; +姿>Shi; +威>I; +娃>A; +娉>Hei; +娑>Sha; +娘>Jou; +娚>Nan; +娜>Da; +娟>Ken; +娠>Shin; +娥>Ga; +娩>Ben; +娯>Go; +娵>Shu; +娶>Shu; +娼>Shou; +婀>A; +婁>Rou; +婆>Ba; +婉>En; +婚>Kon; +婢>Hi; +婦>Fu; +婪>Ran; +婬>In; +婿>Sei; +媒>Bai; +媚>Bi; +媛>En; +媼>Ou; +媽>Bo; +媾>Kou; +嫁>Ka; +嫂>Sou; +嫉>Shitsu; +嫋>Jou; +嫌>Ken; +嫐>Jou; +嫖>Hyou; +嫗>Ou; +嫡>Chaku; +嫣>En; +嫦>Kou; +嫩>Don; +嫺>Kan; +嫻>Kan; +嬉>Ki; +嬋>Sen; +嬌>Kyou; +嬖>Hei; +嬢>Jou; +嬪>Hin; +嬬>Shu; +嬰>Ei; +嬲>Jou; +嬾>Ran; +孀>Sou; +孃>Jou; +孅>Sen; +子>Shi; +孑>Getsu; +孔>Kou; +孕>You; +字>Ji; +存>Son; +孚>Fu; +孛>Botsu; +孜>Shi; +孝>Kou; +孟>Mou; +季>Ki; +孤>Ko; +孥>Do; +学>Gaku; +孩>Kai; +孫>Son; +孰>Juku; +孱>Sen; +孳>Shi; +孵>Fu; +學>Gaku; +孺>Ju; +宀>Ben; +它>Ta; +宅>Taku; +宇>U; +守>Shu; +安>An; +宋>Sou; +完>Kan; +宍>Niku; +宏>Kou; +宕>Tou; +宗>Shuu; +官>Kan; +宙>Chuu; +定>Tei; +宛>En; +宜>Gi; +宝>Hou; +実>Jitsu; +客>Kyaku; +宣>Sen; +室>Shitsu; +宥>Yuu; +宦>Kan; +宮>Kyuu; +宰>Sai; +害>Gai; +宴>En; +宵>Shou; +家>Ka; +宸>Shin; +容>You; +宿>Shuku; +寂>Jaku; +寃>En; +寄>Ki; +寅>In; +密>Mitsu; +寇>Kou; +寉>Kaku; +富>Fu; +寐>Bi; +寒>Kan; +寓>Guu; +寔>Shoku; +寛>Kan; +寝>Shin; +寞>Baku; +察>Satsu; +寡>Ka; +寢>Shin; +寤>Go; +寥>Ryou; +實>Jitsu; +寧>Nei; +寨>Sai; +審>Shin; +寫>Sha; +寮>Ryou; +寰>Kan; +寳>Hou; +寵>Chou; +寶>Hou; +寸>Sun; +寺>Ji; +対>Tai; +寿>Ju; +封>Fuu; +専>Sen; +射>Sha; +尅>Koku; +将>Shou; +將>Shou; +專>Sen; +尉>I; +尊>Son; +尋>Jin; +對>Tai; +導>Dou; +小>Shou; +少>Shou; +尓>Ji; +尖>Sen; +尚>Shou; +尠>Sen; +尢>Ou; +尤>Yuu; +尨>Bou; +尭>Gyou; +就>Shuu; +尸>Shi; +尹>In; +尺>Shaku; +尻>Kou; +尼>Ni; +尽>Jin; +尾>Bi; +尿>Nyou; +局>Kyoku; +屁>Hi; +居>Kyo; +屆>Kai; +屈>Kutsu; +届>Kai; +屋>Oku; +屍>Shi; +屎>Shi; +屏>Hei; +屐>Geki; +屑>Setsu; +屓>Ki; +展>Ten; +属>Zoku; +屠>To; +屡>Ru; +層>Sou; +履>Ri; +屬>Zoku; +屮>Sa; +屯>Ton; +山>San; +屹>Kitsu; +岌>Kyuu; +岐>Ki; +岑>Shin; +岔>Sa; +岡>Kou; +岨>So; +岩>Gan; +岫>Shuu; +岬>Kou; +岱>Tai; +岳>Gaku; +岶>Haku; +岷>Bin; +岸>Gan; +岻>Ji; +岾>Sen; +峇>Kou; +峙>Ji; +峡>Kyou; +峨>Ga; +峩>Ga; +峪>Yoku; +峭>Shou; +峯>Hou; +峰>Hou; +島>Tou; +峺>Kou; +峻>Shun; +峽>Kyou; +崇>Suu; +崋>Ka; +崎>Ki; +崑>Kon; +崔>Sai; +崕>Gai; +崖>Gai; +崗>Kou; +崘>Ron; +崙>Ron; +崚>Ryou; +崛>Kutsu; +崟>Gin; +崢>Sou; +崩>Hou; +嵋>Bi; +嵌>Kan; +嵎>Guu; +嵐>Ran; +嵒>Gan; +嵜>Ki; +嵩>Suu; +嵬>Kai; +嵯>Sa; +嵳>Sa; +嶂>Shou; +嶄>San; +嶇>Ku; +嶋>Tou; +嶌>Tou; +嶐>Ryuu; +嶝>Tou; +嶢>Gyou; +嶬>Gi; +嶮>Ken; +嶷>Gi; +嶺>Rei; +嶼>Sho; +嶽>Gaku; +巉>San; +巌>Gan; +巍>Gi; +巒>Ran; +巓>Ten; +巖>Gan; +巛>Sen; +川>Sen; +州>Shuu; +巡>Jun; +巣>Sou; +工>Kou; +左>Sa; +巧>Kou; +巨>Kyo; +巫>Fu; +差>Shi; +己>Ki; +已>I; +巳>Shi; +巴>Ha; +巵>Shi; +巷>Kou; +巻>Kan; +巽>Son; +巾>Kin; +市>Shi; +布>Fu; +帆>Han; +帋>Shi; +希>Ki; +帑>Do; +帖>Jou; +帙>Chitsu; +帚>Sou; +帛>Haku; +帝>Tei; +帥>Sotsu; +師>Shi; +席>Seki; +帯>Tai; +帰>Ki; +帳>Chou; +帶>Tai; +帷>I; +常>Jou; +帽>Bou; +幀>Tei; +幃>I; +幄>Aku; +幅>Fuku; +幇>Hou; +幌>Kou; +幎>Beki; +幔>Ban; +幕>Maku; +幗>Kaku; +幟>Shi; +幡>Han; +幢>Tou; +幣>Hei; +幤>Hei; +干>Kan; +平>Hei; +年>Nen; +幵>Ken; +并>Hei; +幸>Kou; +幹>Kan; +幺>You; +幻>Gen; +幼>You; +幽>Yuu; +幾>Ki; +广>Gen; +庁>Chou; +広>Kou; +庄>Sou; +庇>Hi; +床>Shou; +序>Jo; +底>Tei; +庖>Hou; +店>Ten; +庚>Kou; +府>Fu; +庠>Shou; +度>Taku; +座>Za; +庫>Ko; +庭>Tei; +庵>An; +庶>Sho; +康>Kou; +庸>You; +廁>Shi; +廂>Shou; +廃>Hai; +廈>Ka; +廉>Ren; +廊>Rou; +廐>Kyuu; +廓>Kaku; +廖>Ryou; +廚>Zu; +廛>Ten; +廝>Shi; +廟>Byou; +廠>Shou; +廡>Bu; +廢>Hai; +廣>Kou; +廨>Kai; +廩>Rin; +廬>Ryo; +廰>Chou; +廱>You; +廳>Chou; +廴>In; +延>En; +廷>Tei; +廸>Teki; +建>Ken; +廻>Kai; +廼>Dai; +廾>Kyou; +廿>Juu; +弁>Ben; +弃>Ki; +弄>Rou; +弉>Jou; +弊>Hei; +弋>Yoku; +弌>Itsu; +弍>Ni; +式>Shiki; +弐>Ni; +弑>Shi; +弓>Kyuu; +弔>Chou; +引>In; +弗>Futsu; +弘>Kou; +弛>Shi; +弟>Tei; +弥>Mi; +弦>Gen; +弧>Ko; +弩>Do; +弭>Bi; +弯>Wan; +弱>Jaku; +張>Chou; +強>Kyou; +弸>Hou; +弼>Hitsu; +弾>Dan; +彁>Sei; +彈>Dan; +彊>Kyou; +彌>Bi; +彎>Wan; +彑>Kei; +当>Tou; +彖>Tan; +彗>Sui; +彙>I; +彜>I; +彝>I; +彡>San; +形>Kei; +彦>Gen; +彩>Sai; +彪>Hyou; +彫>Chou; +彬>Hin; +彭>Hou; +彰>Shou; +影>Ei; +彳>Teki; +彷>Hou; +役>Yaku; +彼>Hi; +彿>Futsu; +往>Ou; +征>Sei; +徂>So; +徃>Ou; +径>Kei; +待>Tai; +徇>Shun; +很>Kon; +徊>Kai; +律>Ritsu; +後>Go; +徐>Jo; +徑>Kei; +徒>To; +従>Juu; +得>Toku; +徘>Hai; +徙>Shi; +從>Juu; +徠>Rai; +御>Gyo; +徨>Kou; +復>Fuku; +循>Jun; +徭>You; +微>Bi; +徳>Toku; +徴>Chou; +徹>Tetsu; +徼>Kyou; +徽>Ki; +心>Shin; +必>Hitsu; +忌>Ki; +忍>Nin; +忖>Son; +志>Shi; +忘>Bou; +忙>Bou; +応>Ou; +忝>Ten; +忠>Chuu; +忤>Go; +快>Kai; +忰>Sui; +忱>Shin; +念>Nen; +忸>Jiku; +忻>Kin; +忽>Kotsu; +忿>Fun; +怎>Shin; +怏>You; +怐>Kou; +怒>Do; +怕>Ha; +怖>Fu; +怙>Ko; +怛>Datsu; +怜>Rei; +思>Shi; +怠>Tai; +怡>I; +急>Kyuu; +怦>Hou; +性>Sei; +怨>En; +怩>Ji; +怪>Kai; +怫>Futsu; +怯>Kyou; +怱>Sou; +恁>In; +恂>Jun; +恃>Ji; +恆>Kou; +恊>Kyou; +恋>Ren; +恍>Kou; +恐>Kyou; +恒>Kou; +恕>Jo; +恙>You; +恚>I; +恟>Kyou; +恠>Kai; +恢>Kai; +恣>Shi; +恤>Jutsu; +恥>Chi; +恨>Kon; +恩>On; +恪>Kaku; +恫>Dou; +恬>Ten; +恭>Kyou; +息>Soku; +恰>Kou; +恵>Kei; +恷>Kyuu; +悁>En; +悃>Kon; +悄>Shou; +悉>Shitsu; +悋>Rin; +悌>Tei; +悍>Kan; +悒>Yuu; +悔>Kai; +悖>Hai; +悗>Ban; +悚>Shou; +悛>Sen; +悟>Go; +悠>Yuu; +患>Kan; +悦>Etsu; +悧>Ri; +悩>Nou; +悪>Aku; +悲>Hi; +悳>Toku; +悴>Sui; +悵>Chou; +悶>Mon; +悸>Ki; +悼>Tou; +悽>Sei; +情>Jou; +惆>Chuu; +惇>Ton; +惑>Waku; +惓>Ken; +惘>Bou; +惚>Kotsu; +惜>Seki; +惟>I; +惠>Kei; +惡>O; +惣>Sou; +惧>Ku; +惨>San; +惰>Da; +惱>Nou; +想>Sou; +惴>Zui; +惶>Kou; +惷>Shun; +惹>Ja; +惺>Sei; +惻>Soku; +愀>Shou; +愁>Shuu; +愃>Ken; +愆>Ken; +愈>Yu; +愉>Yu; +愍>Bin; +愎>Hyoku; +意>I; +愕>Gaku; +愚>Gu; +愛>Ai; +感>Kan; +愡>Sou; +愧>Ki; +愨>Kaku; +愬>So; +愴>Sou; +愼>Shin; +愽>Haku; +愾>Ki; +愿>Gen; +慂>You; +慄>Ritsu; +慇>In; +慈>Ji; +慊>Ken; +態>Tai; +慌>Kou; +慍>Un; +慎>Shin; +慓>Hyou; +慕>Bo; +慘>San; +慙>Zan; +慚>Zan; +慝>Toku; +慟>Dou; +慢>Man; +慣>Kan; +慥>Zou; +慧>Kei; +慨>Gai; +慫>Shou; +慮>Ryo; +慯>Shou; +慰>I; +慱>Tan; +慳>Kan; +慴>Shou; +慵>You; +慶>Kei; +慷>Kou; +慾>Yoku; +憂>Yuu; +憇>Kei; +憊>Hai; +憎>Zou; +憐>Ren; +憑>Hyou; +憔>Shou; +憖>Gin; +憙>Ki; +憚>Tan; +憤>Fun; +憧>Dou; +憩>Kei; +憫>Bin; +憬>Kei; +憮>Bu; +憲>Ken; +憶>Oku; +憺>Tan; +憾>Kan; +懃>Kin; +懆>Sou; +懇>Kon; +懈>Kai; +應>You; +懊>Ou; +懋>Bou; +懌>Eki; +懍>Rin; +懐>Kai; +懣>Mon; +懦>Da; +懲>Chou; +懴>Zan; +懶>Ran; +懷>Kai; +懸>Ken; +懺>Zan; +懼>Ku; +懽>Kan; +懾>Shou; +懿>I; +戀>Ren; +戈>Ka; +戉>Etsu; +戊>Bo; +戌>Jutsu; +戍>Ju; +戎>Juu; +成>Sei; +我>Ga; +戒>Kai; +戔>San; +或>Waku; +戚>Seki; +戛>Katsu; +戝>Zoku; +戞>Katsu; +戟>Geki; +戡>Kan; +戦>Sen; +截>Setsu; +戮>Riku; +戯>Gi; +戰>Sen; +戲>Gi; +戳>Taku; +戴>Tai; +戸>Ko; +戻>Rei; +房>Bou; +所>Sho; +扁>Hen; +扇>Sen; +扈>Ko; +扉>Hi; +手>Shu; +才>Sai; +扎>Satsu; +打>Da; +払>Futsu; +托>Taku; +扛>Kou; +扞>Kan; +扠>Sa; +扣>Kou; +扮>Fun; +扱>Sou; +扶>Fu; +批>Hi; +扼>Aku; +找>Ka; +承>Shou; +技>Gi; +抂>Kyou; +抃>Ben; +抄>Shou; +抉>Ketsu; +把>Ha; +抑>Yoku; +抒>Jo; +抓>Sou; +抔>Hou; +投>Tou; +抖>Tou; +抗>Kou; +折>Setsu; +抛>Hou; +抜>Batsu; +択>Taku; +披>Hi; +抬>Tai; +抱>Hou; +抵>Tei; +抹>Matsu; +抻>Shin; +押>Ou; +抽>Chuu; +拂>Futsu; +担>Tan; +拆>Taku; +拇>Bo; +拈>Den; +拉>Ratsu; +拊>Fu; +拌>Han; +拍>Haku; +拏>Da; +拐>Kai; +拑>Kan; +拒>Kyo; +拓>Taku; +拔>Batsu; +拗>Ou; +拘>Kou; +拙>Setsu; +招>Shou; +拜>Hai; +拝>Hai; +拠>Kyo; +拡>Kaku; +括>Katsu; +拭>Shiki; +拮>Kitsu; +拯>Jou; +拱>Kyou; +拳>Ken; +拵>Son; +拶>Satsu; +拷>Gou; +拾>Shuu; +拿>Da; +持>Ji; +挂>Kei; +指>Shi; +挈>Ketsu; +按>An; +挌>Kaku; +挑>Chou; +挙>Kyo; +挟>Kyou; +挧>Ku; +挨>Ai; +挫>Za; +振>Shin; +挺>Tei; +挽>Ban; +挾>Kyou; +挿>Sou; +捉>Soku; +捌>Hatsu; +捍>Kan; +捏>Detsu; +捐>En; +捕>Ho; +捗>Ho; +捜>Sou; +捧>Hou; +捨>Sha; +捩>Retsu; +捫>Mon; +据>Kyo; +捲>Ken; +捶>Sui; +捷>Shou; +捺>Natsu; +捻>Nen; +掀>Kin; +掃>Sou; +授>Ju; +掉>Tou; +掌>Shou; +掎>Ki; +掏>Tou; +排>Hai; +掖>Eki; +掘>Kutsu; +掛>Ka; +掟>Tou; +掠>Ryaku; +採>Sai; +探>Tan; +掣>Sei; +接>Setsu; +控>Kou; +推>Sui; +掩>En; +措>So; +掫>Sou; +掬>Kiku; +掲>Kei; +掴>Kaku; +掻>Sou; +掾>En; +揀>Kan; +揃>Sen; +揄>Yu; +揆>Ki; +揉>Juu; +描>Byou; +提>Tei; +插>Sou; +揖>Yuu; +揚>You; +換>Kan; +握>Aku; +揣>Shi; +揩>Kai; +揮>Ki; +援>En; +揶>Ya; +揺>You; +搆>Kou; +損>Son; +搏>Haku; +搓>Sa; +搖>You; +搗>Tou; +搜>Sou; +搦>Jaku; +搨>Tou; +搬>Han; +搭>Tou; +搴>Ken; +搶>Shou; +携>Kei; +搾>Saku; +摂>Setsu; +摎>Kyuu; +摘>Teki; +摧>Sai; +摩>Ma; +摯>Shi; +摶>Tan; +摸>Mo; +摺>Shou; +撃>Geki; +撈>Rou; +撒>San; +撓>Dou; +撕>Sei; +撚>Nen; +撞>Shu; +撤>Tetsu; +撥>Hatsu; +撩>Ryou; +撫>Bu; +播>Ha; +撮>Satsu; +撰>San; +撲>Boku; +撹>Kaku; +撻>Tachi; +撼>Kan; +擁>You; +擂>Rai; +擅>Sen; +擇>Taku; +操>Sou; +擒>Kin; +擔>Tan; +擘>Haku; +據>Kyo; +擠>Sei; +擡>Tai; +擢>Teki; +擣>Tou; +擦>Satsu; +擧>Kyo; +擬>Gi; +擯>Hin; +擱>Kaku; +擲>Teki; +擴>Kaku; +擶>Sen; +擺>Hai; +擽>Ryaku; +擾>Jou; +攀>Han; +攅>San; +攘>Jou; +攜>Kei; +攝>Setsu; +攣>Ren; +攤>Tan; +攪>Kaku; +攫>Kaku; +攬>Ran; +支>Shi; +攴>Hoku; +攵>Boku; +收>Shuu; +攷>Kou; +攸>Yuu; +改>Kai; +攻>Kou; +放>Hou; +政>Sei; +故>Ko; +效>Kou; +敍>Jo; +敏>Bin; +救>Kyuu; +敕>Choku; +敖>Gou; +敗>Hai; +敘>Jo; +教>Kyou; +敝>Hei; +敞>Shou; +敢>Kan; +散>San; +敦>Ton; +敬>Kei; +数>Suu; +敲>Kou; +整>Sei; +敵>Teki; +敷>Fu; +數>Suu; +斂>Ren; +斃>Hei; +文>Bun; +斈>Gaku; +斉>Sei; +斌>Hin; +斎>Sai; +斐>Hi; +斑>Han; +斗>To; +料>Ryou; +斛>Koku; +斜>Sha; +斟>Shin; +斡>Atsu; +斤>Kin; +斥>Seki; +斧>Fu; +斫>Shaku; +斬>Zan; +断>Dan; +斯>Shi; +新>Shin; +斷>Dan; +方>Hou; +於>O; +施>Shi; +旁>Hou; +旃>Sen; +旄>Bou; +旅>Ryo; +旆>Hai; +旋>Sen; +旌>Sei; +族>Zoku; +旒>Ryuu; +旗>Ki; +旙>Han; +旛>Han; +无>Bu; +旡>Ki; +既>Ki; +日>Nichi; +旦>Tan; +旧>Kyuu; +旨>Shi; +早>Sou; +旬>Jun; +旭>Kyoku; +旱>Kan; +旺>Ou; +旻>Bin; +昂>Kou; +昃>Soku; +昆>Kon; +昇>Shou; +昊>Kou; +昌>Shou; +明>Mei; +昏>Kon; +易>Eki; +昔>Seki; +昜>You; +星>Sei; +映>Ei; +春>Shun; +昧>Mai; +昨>Saku; +昭>Shou; +是>Ze; +昴>Bou; +昵>Jitsu; +昶>Chou; +昼>Chuu; +昿>Kou; +晁>Chou; +時>Ji; +晃>Kou; +晄>Kou; +晉>Shin; +晋>Shin; +晏>An; +晒>Sai; +晝>Chuu; +晞>Ki; +晟>Sei; +晢>Setsu; +晤>Go; +晦>Kai; +晧>Kou; +晨>Shin; +晩>Ban; +普>Fu; +景>Kei; +晰>Seki; +晴>Sei; +晶>Shou; +智>Chi; +暁>Gyou; +暃>Hi; +暄>Ken; +暇>Ka; +暈>Un; +暉>Ki; +暎>Ei; +暑>Sho; +暖>Dan; +暗>An; +暘>You; +暝>Mei; +暢>Chou; +暦>Reki; +暫>Zan; +暮>Bo; +暴>Bou; +暸>Ryou; +暹>Sen; +暼>Hetsu; +暾>Ton; +曁>Ki; +曄>You; +曇>Don; +曉>Gyou; +曖>Ai; +曙>Sho; +曚>Bou; +曜>You; +曝>Baku; +曠>Kou; +曦>Gi; +曩>Dou; +曰>Etsu; +曲>Kyoku; +曳>Ei; +更>Kou; +曵>Ei; +曷>Katsu; +書>Sho; +曹>Sou; +曼>Ban; +曽>Zo; +曾>So; +替>Tai; +最>Sai; +會>Kai; +月>Getsu; +有>Yuu; +朋>Hou; +服>Fuku; +朏>Hi; +朔>Saku; +朕>Chin; +朖>Rou; +朗>Rou; +望>Bou; +朝>Chou; +朞>Ki; +期>Ki; +朦>Bou; +朧>Rou; +木>Boku; +未>Mi; +末>Matsu; +本>Hon; +札>Satsu; +朮>Jutsu; +朱>Shu; +朴>Boku; +朶>Da; +朷>Tou; +朸>Ryoku; +机>Ki; +朽>Kyuu; +朿>Shi; +杆>Kan; +杉>San; +李>Ri; +杏>Kyou; +材>Zai; +村>Son; +杓>Hyou; +杖>Jou; +杙>Yoku; +杜>Do; +杞>Ko; +束>Soku; +杠>Kou; +条>Jou; +来>Rai; +杪>Byou; +杭>Kou; +杯>Hai; +杰>Ketsu; +東>Tou; +杲>Kou; +杳>You; +杵>Sho; +杷>Ha; +杼>Cho; +松>Shou; +板>Han; +枅>Kei; +枇>Hi; +枉>Ou; +枋>Hou; +枌>Fun; +析>Seki; +枕>Chin; +林>Rin; +枚>Mai; +果>Ka; +枝>Shi; +枢>Suu; +枦>Ro; +枩>Shou; +枯>Ko; +枳>Ki; +枴>Kai; +架>Ka; +枷>Ka; +枸>Ku; +枹>Hou; +柁>Ta; +柄>Hei; +柆>Rou; +柊>Shuu; +柎>Fu; +柏>Haku; +某>Bou; +柑>Kan; +染>Sen; +柔>Nyuu; +柘>Sha; +柚>Yuu; +柝>Taku; +柞>Saku; +柢>Tei; +柤>Sa; +柧>Ko; +柩>Kyuu; +柬>Kan; +柮>Totsu; +柯>Ka; +柱>Chuu; +柳>Ryuu; +柴>Sai; +柵>Saku; +査>Sa; +柾>Kyuu; +柿>Shi; +栄>Ei; +栓>Sen; +栖>Sei; +栗>Ritsu; +栞>Kan; +校>Kou; +栢>Haku; +栩>Ku; +株>Chu; +栫>Son; +栲>Gou; +栴>Sen; +核>Kaku; +根>Kon; +格>Kou; +栽>Sai; +桀>Ketsu; +桁>Kou; +桂>Kei; +桃>Tou; +框>Kyou; +案>An; +桍>Ko; +桎>Shitsu; +桐>Dou; +桑>Sou; +桓>Kan; +桔>Kitsu; +桙>U; +桜>Ou; +桟>San; +档>Tou; +桧>Kai; +桴>Fu; +桶>Tou; +桷>Kaku; +桾>Kun; +桿>Kan; +梁>Ryou; +梃>Tei; +梅>Bai; +梍>Sou; +梏>Koku; +梓>Shi; +梔>Shi; +梗>Kou; +梛>Da; +條>Jou; +梟>Kyou; +梠>Ryo; +梢>Shou; +梦>Mu; +梧>Go; +梨>Ri; +梭>Sa; +梯>Tei; +械>Kai; +梱>Kon; +梳>So; +梵>Fuu; +梶>Bi; +梹>Bin; +梼>Tou; +棄>Ki; +棆>Rin; +棉>Men; +棊>Ki; +棋>Ki; +棍>Kon; +棒>Bou; +棔>Kon; +棕>Shu; +棗>Sou; +棘>Kyoku; +棚>Hou; +棟>Tou; +棠>Tou; +棡>Kou; +棣>Tei; +棧>San; +森>Shin; +棯>Jin; +棲>Sei; +棹>Tou; +棺>Kan; +椀>Wan; +椁>Kaku; +椄>Setsu; +椅>I; +椈>Kiku; +椋>Ryou; +椌>Kou; +植>Shoku; +椎>Tsui; +椏>A; +椒>Shou; +検>Ken; +椢>Kai; +椦>Ken; +椰>Ya; +椴>Tan; +椶>Shu; +椹>Chin; +椽>Ten; +椿>Chin; +楊>You; +楓>Fuu; +楔>Setsu; +楕>Da; +楙>Bou; +楚>So; +楜>Ko; +楝>Ren; +楞>Rou; +楠>Nan; +楡>Yu; +楢>Shuu; +楪>You; +楫>Shuu; +業>Gyou; +楮>Cho; +楯>Jun; +楳>Bai; +楴>Tei; +極>Goku; +楷>Kai; +楸>Shuu; +楹>Ei; +楼>Rou; +楽>Gaku; +概>Gai; +榎>Ka; +榑>Fu; +榔>Rou; +榕>You; +榛>Shin; +榜>Bou; +榠>Bei; +榧>Hi; +榮>Ei; +榱>Sui; +榲>Otsu; +榴>Ryuu; +榻>Tou; +榾>Kotsu; +榿>Ki; +槁>Kou; +槃>Han; +槇>Ten; +槊>Saku; +構>Kou; +槌>Tsui; +槍>Sou; +槎>Sa; +槐>Kai; +槓>Kou; +様>You; +槙>Ten; +槝>Tou; +槞>Rou; +槧>San; +槨>Kaku; +槫>Tan; +槭>Shuku; +槲>Koku; +槹>Kou; +槻>Ki; +槽>Sou; +槿>Kin; +樂>Gaku; +樅>Shou; +樊>Han; +樋>Tou; +樌>Kan; +樒>Mitsu; +樓>Rou; +樔>Sou; +樗>Cho; +標>Hyou; +樛>Kyuu; +樞>Suu; +樟>Shou; +模>Mo; +樢>Boku; +樣>You; +権>Ken; +横>Ou; +樵>Shou; +樶>Sai; +樸>Boku; +樹>Ju; +樺>Ka; +樽>Son; +橄>Kan; +橇>Zei; +橈>Dou; +橋>Kyou; +橘>Kitsu; +橙>Tou; +機>Ki; +橡>Shou; +橢>Da; +橦>Tou; +橸>Shou; +橿>Kyou; +檀>Tan; +檄>Geki; +檍>Yoku; +檎>Go; +檐>En; +檗>Haku; +檜>Kai; +檠>Kei; +檢>Ken; +檣>Shou; +檪>Reki; +檬>Bou; +檮>Tou; +檳>Bin; +檸>Nei; +檻>Kan; +櫁>Mitsu; +櫂>Tou; +櫃>Ki; +櫑>Rai; +櫓>Ro; +櫚>Ryo; +櫛>Shitsu; +櫞>En; +櫟>Reki; +櫨>Ro; +櫪>Reki; +櫺>Rei; +櫻>Ou; +欄>Ran; +欅>Kyo; +權>Ken; +欒>Ran; +欖>Ran; +欝>Utsu; +欠>Ketsu; +次>Ji; +欣>Gon; +欧>Ou; +欲>Yoku; +欷>Ki; +欸>Ai; +欹>I; +欺>Gi; +欽>Kin; +款>Kan; +歃>Sou; +歇>Ketsu; +歉>Ken; +歌>Ka; +歎>Tan; +歐>Ou; +歓>Kan; +歔>Kyo; +歙>Kyuu; +歛>Kan; +歟>Yo; +歡>Kan; +止>Shi; +正>Sei; +此>Shi; +武>Bu; +歩>Ho; +歪>Wai; +歯>Shi; +歳>Sei; +歴>Reki; +歸>Ki; +歹>Gatsu; +死>Shi; +歿>Botsu; +殀>You; +殃>You; +殄>Ten; +殆>Tai; +殉>Jun; +殊>Shu; +残>Zan; +殍>Hyou; +殕>Fuu; +殖>Shoku; +殘>Zan; +殞>In; +殤>Shou; +殪>Ei; +殫>Tan; +殯>Hin; +殱>Sen; +殲>Sen; +殳>Shu; +殴>Ou; +段>Dan; +殷>In; +殺>Satsu; +殻>Kaku; +殼>Kaku; +殿>Ten; +毀>Ki; +毅>Ki; +毆>Ou; +毋>Bu; +母>Bo; +毎>Mai; +毒>Doku; +毓>Iku; +比>Hi; +毘>Hi; +毛>Mou; +毫>Kou; +毬>Kyuu; +毯>Tan; +毳>Zei; +氈>Sen; +氏>Shi; +民>Min; +氓>Bou; +气>Ki; +気>Ki; +氛>Fun; +氣>Ki; +氤>In; +水>Sui; +氷>Hyou; +永>Ei; +氾>Han; +汀>Tei; +汁>Juu; +求>Kyuu; +汎>Han; +汐>Seki; +汕>San; +汗>Kan; +汚>O; +汝>Jo; +汞>Kou; +江>Kou; +池>Chi; +汨>Beki; +汪>Ou; +汰>Ta; +汲>Kyuu; +汳>Hen; +決>Ketsu; +汽>Ki; +汾>Fun; +沁>Shin; +沂>Ki; +沃>Yoku; +沈>Chin; +沌>Ton; +沍>Go; +沐>Boku; +沒>Botsu; +沓>Tou; +沖>Chuu; +沙>Sa; +沚>Shi; +沛>Hai; +没>Botsu; +沢>Taku; +沫>Matsu; +沮>Sho; +沱>Ta; +河>Ka; +沸>Futsu; +油>Yu; +沺>Ten; +治>Ji; +沼>Shou; +沽>Ko; +沾>Ten; +沿>En; +況>Kyou; +泄>Ei; +泅>Shuu; +泉>Sen; +泊>Haku; +泌>Hitsu; +泓>Ou; +法>Hou; +泗>Shi; +泙>Hou; +泛>Han; +泝>So; +泡>Hou; +波>Ha; +泣>Kyuu; +泥>Dei; +注>Chuu; +泪>Rui; +泯>Bin; +泰>Tai; +泱>Ou; +泳>Ei; +洋>You; +洌>Retsu; +洒>Sei; +洗>Sen; +洙>Shu; +洛>Raku; +洞>Dou; +洟>I; +津>Shin; +洩>Ei; +洪>Kou; +洫>Kyoku; +洲>Shuu; +洳>Jo; +洵>Shun; +洶>Kyou; +洸>Kou; +活>Katsu; +洽>Kou; +派>Ha; +流>Ryuu; +浄>Jou; +浅>Sen; +浙>Setsu; +浚>Shun; +浜>Hin; +浣>Kan; +浤>Kou; +浦>Ho; +浩>Kou; +浪>Rou; +浬>Ri; +浮>Fu; +浴>Yoku; +海>Kai; +浸>Shin; +浹>Shou; +涅>Netsu; +消>Shou; +涌>Yuu; +涎>Sen; +涓>Ken; +涕>Tei; +涙>Rui; +涛>Tou; +涜>Toku; +涯>Gai; +液>Eki; +涵>Kan; +涸>Ko; +涼>Ryou; +淀>Ten; +淅>Seki; +淆>Kou; +淇>Ki; +淋>Rin; +淌>Shou; +淑>Shuku; +淒>Sei; +淕>Riku; +淘>Tou; +淙>Sou; +淞>Shou; +淡>Tan; +淤>Yo; +淦>Kan; +淨>Jou; +淪>Rin; +淫>In; +淬>Sai; +淮>Wai; +深>Shin; +淳>Shun; +淵>En; +混>Kon; +淹>En; +淺>Sen; +添>Ten; +清>Sei; +渇>Katsu; +済>Sai; +渉>Shou; +渊>En; +渋>Juu; +渓>Kei; +渕>En; +渙>Kan; +渚>Sho; +減>Gen; +渝>Yu; +渟>Tei; +渠>Kyo; +渡>To; +渣>Sa; +渤>Botsu; +渥>Aku; +渦>Ka; +温>On; +渫>Setsu; +測>Soku; +渭>I; +渮>Ka; +港>Kou; +游>Yuu; +渺>Byou; +渾>Kon; +湃>Hai; +湊>Sou; +湍>Tan; +湎>Ben; +湖>Ko; +湘>Shou; +湛>Tan; +湟>Kou; +湧>Yuu; +湫>Shou; +湮>In; +湯>Tou; +湲>Kan; +湶>Sen; +湾>Wan; +湿>Shitsu; +満>Man; +溂>Ratsu; +溌>Hatsu; +溏>Tou; +源>Gen; +準>Jun; +溘>Kou; +溜>Ryuu; +溝>Kou; +溟>Mei; +溢>Itsu; +溥>Ho; +溪>Kei; +溯>So; +溲>Sou; +溶>You; +溷>Kon; +溺>Deki; +溽>Joku; +滂>Bou; +滄>Sou; +滅>Metsu; +滉>Kou; +滋>Ji; +滌>Deki; +滑>Katsu; +滓>Sai; +滔>Tou; +滕>Tou; +滝>Sou; +滞>Tai; +滬>Ko; +滯>Tai; +滲>Shin; +滴>Teki; +滷>Ro; +滸>Ko; +滾>Kon; +滿>Man; +漁>Ryou; +漂>Hyou; +漆>Shitsu; +漉>Roku; +漏>Rou; +漑>Gai; +漓>Ri; +演>En; +漕>Sou; +漠>Baku; +漢>Kan; +漣>Ran; +漫>Man; +漬>Shi; +漱>Sou; +漲>Chou; +漸>Zen; +漾>You; +漿>Shou; +潁>Ei; +潅>Kan; +潔>Ketsu; +潘>Han; +潛>Sen; +潜>Sen; +潟>Seki; +潤>Jun; +潦>Rou; +潭>Tan; +潮>Chou; +潯>Jin; +潰>Kai; +潴>Cho; +潸>San; +潺>San; +潼>Dou; +澀>Juu; +澁>Juu; +澂>Chou; +澄>Chou; +澆>Gyou; +澎>Hou; +澑>Ryuu; +澗>Kan; +澡>Sou; +澣>Kan; +澤>Taku; +澪>Rei; +澱>Ten; +澳>Iku; +澹>Tan; +激>Geki; +濁>Daku; +濂>Ren; +濃>Nou; +濆>Fun; +濔>Dei; +濕>Shitsu; +濘>Nei; +濟>Sai; +濠>Gou; +濡>Ju; +濤>Tou; +濫>Ran; +濬>Shun; +濮>Hoku; +濯>Taku; +濱>Hin; +濳>Sen; +濶>Katsu; +濺>Sen; +濾>Ryo; +瀁>You; +瀉>Sha; +瀋>Shin; +瀏>Ryuu; +瀑>Baku; +瀕>Hin; +瀘>Ro; +瀚>Kan; +瀛>Ei; +瀝>Reki; +瀞>Jou; +瀟>Shou; +瀦>Cho; +瀧>Sou; +瀬>Rai; +瀰>Bi; +瀲>Ren; +瀾>Ran; +灌>Kan; +灑>Sai; +灘>Dan; +灣>Wan; +火>Ka; +灯>Tou; +灰>Kai; +灸>Kyuu; +灼>Shaku; +災>Sai; +炉>Ro; +炊>Sui; +炎>En; +炒>Sou; +炙>Sha; +炬>Ko; +炭>Tan; +炮>Hou; +炯>Kei; +炳>Hei; +炸>Saku; +点>Ten; +為>I; +烈>Retsu; +烋>Kou; +烏>U; +烙>Raku; +烝>Jou; +烟>En; +烱>Kei; +烹>Hou; +烽>Hou; +焉>En; +焔>En; +焙>Hou; +焚>Fun; +焜>Kon; +無>Mu; +焦>Shou; +然>Zen; +焼>Shou; +煉>Ren; +煌>Kou; +煎>Sen; +煕>Ki; +煖>Dan; +煙>En; +煢>Kei; +煤>Bai; +煥>Kan; +煦>Ku; +照>Shou; +煩>Han; +煬>You; +煮>Sha; +煽>Sen; +熄>Soku; +熈>Ki; +熊>Yuu; +熏>Kun; +熔>You; +熕>Kou; +熙>Ki; +熟>Juku; +熨>I; +熬>Gou; +熱>Netsu; +熹>Ki; +熾>Shi; +燃>Nen; +燈>Tou; +燉>Ton; +燎>Ryou; +燐>Rin; +燒>Shou; +燔>Han; +燕>En; +燗>Ran; +營>Ei; +燠>Iku; +燥>Sou; +燦>San; +燧>Sui; +燬>Ki; +燭>Shoku; +燮>Shou; +燵>Tatsu; +燹>Sen; +燻>Kun; +燼>Jin; +燿>You; +爆>Baku; +爍>Shaku; +爐>Ro; +爛>Ran; +爨>San; +爪>Sou; +爬>Ha; +爭>Sou; +爰>En; +爲>I; +爵>Shaku; +父>Fu; +爺>Ya; +爻>Kou; +爼>Sho; +爽>Sou; +爾>Ji; +爿>Shou; +牀>Sou; +牆>Shou; +片>Hen; +版>Han; +牋>Sen; +牌>Hai; +牒>Chou; +牘>Toku; +牙>Ga; +牛>Gyuu; +牝>Hin; +牟>Bou; +牡>Bo; +牢>Rou; +牧>Boku; +物>Motsu; +牲>Sei; +牴>Tei; +特>Toku; +牽>Ken; +牾>Go; +犀>Sei; +犁>Ri; +犂>Ri; +犇>Hon; +犒>Kou; +犖>Raku; +犠>Gi; +犢>Toku; +犧>Gi; +犬>Ken; +犯>Han; +犲>Sai; +状>Jou; +犹>Yuu; +狂>Kyou; +狃>Juu; +狄>Teki; +狆>Chuu; +狎>Kou; +狐>Ko; +狒>Hi; +狗>Kou; +狙>Sho; +狛>Haku; +狠>Gan; +狡>Kou; +狢>Kaku; +狩>Shu; +独>Doku; +狭>Kyou; +狷>Ken; +狸>Ri; +狹>Kyou; +狼>Rou; +狽>Hai; +猊>Gei; +猖>Shou; +猗>I; +猛>Mou; +猜>Sai; +猝>Sotsu; +猟>Ryou; +猥>Wai; +猩>Sei; +猪>Cho; +猫>Byou; +献>Ken; +猯>Tan; +猴>Kou; +猶>Yuu; +猷>Yuu; +猾>Katsu; +猿>En; +獄>Goku; +獅>Shi; +獎>Shou; +獏>Baku; +獗>Ketsu; +獣>Juu; +獨>Doku; +獪>Kai; +獰>Dou; +獲>Kaku; +獵>Ryou; +獸>Juu; +獺>Datsu; +獻>Ken; +玄>Gen; +率>Ritsu; +玉>Gyoku; +王>Ou; +玖>Kyuu; +玩>Gan; +玲>Rei; +玳>Tai; +玻>Ha; +珀>Haku; +珂>Ka; +珈>Ka; +珊>San; +珍>Chin; +珎>Chin; +珞>Raku; +珠>Shu; +珥>Ji; +珪>Kei; +班>Han; +珮>Hai; +珱>Ei; +珸>Go; +現>Gen; +球>Kyuu; +琅>Rou; +理>Ri; +琉>Ryuu; +琢>Taku; +琥>Ko; +琲>Hai; +琳>Rin; +琴>Kin; +琵>Bi; +琶>Ha; +琺>Hou; +琿>Kon; +瑁>Bou; +瑕>Ka; +瑙>Dou; +瑚>Ko; +瑛>Ei; +瑜>Yu; +瑞>Zui; +瑟>Shitsu; +瑠>Ryuu; +瑣>Sa; +瑤>You; +瑩>Ei; +瑪>Ba; +瑯>Rou; +瑰>Kai; +瑳>Sa; +瑶>You; +瑾>Kin; +璃>Ri; +璋>Shou; +璞>Haku; +璢>Ryuu; +璧>Heki; +環>Kan; +璽>Ji; +瓊>Kei; +瓏>Rou; +瓔>Ei; +瓜>Ka; +瓠>Ko; +瓢>Hyou; +瓣>Ben; +瓦>Ga; +瓮>Ou; +瓶>Hei; +瓷>Shi; +甃>Shuu; +甄>Ken; +甌>Ou; +甍>Bou; +甎>Sen; +甑>Sou; +甓>Heki; +甕>Ou; +甘>Kan; +甚>Shin; +甜>Ten; +甞>Shou; +生>Sei; +産>San; +甥>Sei; +甦>So; +用>You; +甫>Ho; +甬>You; +田>Den; +由>Yuu; +甲>Kou; +申>Shin; +男>Dan; +甸>Ten; +町>Chou; +画>Ga; +甼>Chou; +畄>Ryuu; +畆>Ho; +畉>Fu; +畊>Kou; +畋>Ten; +界>Kai; +畍>Kai; +畏>I; +畔>Han; +留>Ryuu; +畚>Hon; +畛>Shin; +畜>Chiku; +畝>Ho; +畢>Hitsu; +畤>Shi; +略>Ryaku; +畦>Kei; +畧>Ryaku; +番>Ban; +畫>Kaku; +畭>Yo; +異>I; +畳>Jou; +畴>Chuu; +當>Tou; +畷>Tetsu; +畸>Ki; +畿>Ki; +疂>Jou; +疆>Kyou; +疇>Chuu; +疉>Jou; +疊>Jou; +疋>So; +疎>So; +疏>So; +疑>Gi; +疔>Chou; +疚>Kyuu; +疝>San; +疣>Yuu; +疥>Kai; +疫>Eki; +疱>Hou; +疲>Hi; +疳>Kan; +疵>Shi; +疸>Tan; +疹>Shin; +疼>Tou; +疽>Sho; +疾>Shitsu; +痂>Ka; +痃>Ken; +病>Byou; +症>Shou; +痊>Sen; +痍>I; +痒>You; +痔>Ji; +痕>Kon; +痘>Tou; +痙>Kei; +痛>Ts; +痞>Hi; +痢>Ri; +痣>Shi; +痩>Sou; +痰>Tan; +痲>Ma; +痳>Rin; +痴>Chi; +痺>Hi; +痼>Ko; +痾>A; +痿>I; +瘁>Sui; +瘉>Yu; +瘋>Fuu; +瘍>You; +瘟>On; +瘠>Seki; +瘡>Sou; +瘢>Han; +瘤>Ryuu; +瘧>Gyaku; +瘰>Rui; +瘴>Shou; +瘻>Rou; +療>Ryou; +癆>Rou; +癇>Kan; +癈>Hai; +癌>Gan; +癒>Yu; +癖>Heki; +癘>Rei; +癜>Den; +癡>Chi; +癢>You; +癧>Reki; +癨>Kaku; +癩>Rai; +癪>Shaku; +癬>Sen; +癰>You; +癲>Ten; +癶>Hatsu; +癸>Ki; +発>Hotsu; +登>Tou; +發>Hotsu; +白>Haku; +百>Hyaku; +皀>Hyuu; +皃>Bou; +的>Teki; +皆>Kai; +皇>Kou; +皈>Ki; +皋>Kou; +皎>Kou; +皐>Kou; +皓>Kou; +皖>Kan; +皙>Seki; +皚>Gai; +皮>Hi; +皰>Hou; +皴>Shun; +皷>Ko; +皸>Kun; +皹>Kun; +皺>Suu; +皿>Bai; +盂>U; +盃>Hai; +盆>Bon; +盈>Ei; +益>Eki; +盍>Kou; +盒>Kou; +盖>Gai; +盗>Tou; +盛>Sei; +盜>Tou; +盞>San; +盟>Mei; +盡>Jin; +監>Kan; +盤>Ban; +盥>Kan; +盧>Ro; +盪>Tou; +目>Moku; +盲>Mou; +直>Choku; +相>Shou; +盻>Kei; +盾>Jun; +省>Sei; +眄>Ben; +眇>Byou; +眈>Tan; +眉>Bi; +看>Kan; +県>Ken; +眛>Mai; +眞>Shin; +真>Shin; +眠>Min; +眤>Tei; +眥>Sei; +眦>Sei; +眩>Gen; +眷>Ken; +眸>Bou; +眺>Chou; +眼>Gan; +着>Chaku; +睇>Tei; +睚>Gai; +睛>Sei; +睡>Sui; +督>Toku; +睥>Hei; +睦>Boku; +睨>Gei; +睫>Shou; +睹>To; +睾>Kou; +睿>Ei; +瞋>Shin; +瞎>Katsu; +瞑>Mei; +瞞>Ban; +瞠>Dou; +瞥>Betsu; +瞬>Shun; +瞭>Ryou; +瞰>Kan; +瞳>Dou; +瞶>Ki; +瞹>Ai; +瞻>Sen; +瞼>Ken; +瞽>Ko; +瞿>Ku; +矇>Mou; +矍>Kaku; +矗>Chiku; +矚>Shoku; +矛>Mu; +矜>Kin; +矢>Shi; +矣>I; +知>Chi; +矧>Shin; +矩>Ku; +短>Tan; +矮>Wai; +矯>Kyou; +石>Shaku; +矼>Kou; +砂>Sa; +砌>Sei; +砒>Hi; +研>Ken; +砕>Sai; +砠>Sho; +砥>Shi; +砦>Sai; +砧>Chin; +砲>Hou; +破>Ha; +砺>Rei; +砿>Kou; +硅>Kei; +硝>Shou; +硫>Ryuu; +硬>Kou; +硯>Ken; +硼>Hou; +碁>Go; +碆>Ha; +碇>Tei; +碌>Roku; +碍>Gai; +碎>Sai; +碑>Hi; +碓>Tai; +碕>Ki; +碗>Wan; +碚>Hai; +碣>Ketsu; +碧>Heki; +碩>Seki; +碪>Chin; +碯>Dou; +碵>Seki; +確>Kaku; +碼>Ba; +碾>Ten; +磁>Ji; +磅>Hou; +磆>Katsu; +磊>Rai; +磋>Sa; +磐>Han; +磑>Gai; +磔>Taku; +磚>Sen; +磧>Seki; +磨>Ma; +磬>Kei; +磯>Ki; +磴>Tou; +磽>Kou; +礁>Shou; +礇>Iku; +礎>So; +礑>Tou; +礒>Gi; +礙>Gai; +礦>Kou; +礪>Rei; +礫>Reki; +礬>Ban; +示>Shi; +礼>Rei; +社>Sha; +祀>Shi; +祁>Ki; +祇>Ki; +祈>Ki; +祉>Shi; +祐>Yuu; +祓>Futsu; +祕>Hi; +祖>So; +祗>Shi; +祚>So; +祝>Shuku; +神>Shin; +祟>Sui; +祠>Shi; +祢>Dei; +祥>Shou; +票>Hyou; +祭>Sai; +祷>Tou; +祺>Ki; +祿>Roku; +禀>Rin; +禁>Kin; +禄>Roku; +禅>Zen; +禊>Kei; +禍>Ka; +禎>Tei; +福>Fuku; +禝>Shoku; +禦>Gyo; +禧>Ki; +禪>Zen; +禮>Rei; +禰>Dei; +禳>Jou; +禹>U; +禺>Guu; +禽>Kin; +禾>Ka; +禿>Toku; +秀>Shuu; +私>Shi; +秉>Hei; +秋>Shuu; +科>Ka; +秒>Byou; +秕>Hi; +秘>Hi; +租>So; +秡>Hatsu; +秣>Matsu; +秤>Shou; +秦>Shin; +秧>Ou; +秩>Chitsu; +秬>Kyo; +称>Shou; +移>I; +稀>Ki; +稈>Kan; +程>Tei; +稍>Sou; +税>Zei; +稔>Jin; +稗>Hai; +稘>Ki; +稙>Choku; +稚>Chi; +稜>Ryou; +稟>Rin; +稠>Chuu; +種>Shu; +稱>Shou; +稲>Tou; +稷>Shoku; +稻>Tou; +稼>Ka; +稽>Kei; +稾>Kou; +稿>Kou; +穀>Koku; +穂>Sui; +穃>You; +穆>Boku; +穉>Chi; +積>Seki; +穎>Ei; +穏>On; +穐>Shuu; +穗>Sui; +穡>Shoku; +穢>Ai; +穣>Jou; +穩>On; +穫>Kaku; +穰>Jou; +穴>Ketsu; +究>Kyuu; +穹>Kyuu; +空>Kuu; +穽>Sei; +穿>Sen; +突>Totsu; +窃>Setsu; +窄>Saku; +窈>You; +窒>Chitsu; +窓>Sou; +窕>Chou; +窖>Kou; +窗>Sou; +窘>Kin; +窟>Kutsu; +窩>Ka; +窪>Wa; +窮>Kyuu; +窯>You; +窰>You; +窶>Ku; +窺>Ki; +窿>Ryuu; +竃>Sou; +竄>Zan; +竅>Kyou; +竇>Tou; +竈>Sou; +竊>Setsu; +立>Ritsu; +竒>Ki; +站>Tan; +竚>Cho; +竜>Ryuu; +竝>Hei; +竟>Kei; +章>Shou; +竢>Shi; +竣>Shun; +童>Dou; +竦>Shou; +竪>Ju; +竭>Ketsu; +端>Tan; +競>Kyou; +竸>Kei; +竹>Chiku; +竺>Toku; +竿>Kan; +笄>Kei; +笆>Ha; +笈>Kyuu; +笊>Sou; +笋>Jun; +笏>Kotsu; +笑>Shou; +笘>Sen; +笙>Sou; +笛>Teki; +笞>Chi; +笠>Ryuu; +笥>Shi; +符>Fu; +笨>Hon; +第>Dai; +笳>Ka; +笵>Han; +笶>Shi; +筅>Sen; +筆>Hitsu; +筈>Katsu; +等>Tou; +筋>Kin; +筌>Sen; +筍>Jun; +筏>Batsu; +筐>Kyou; +筑>Chiku; +筒>Tou; +答>Tou; +策>Saku; +筝>Sou; +筥>Kyo; +筧>Ken; +筬>Sei; +筮>Sei; +筰>Saku; +筱>Shou; +筴>Kyou; +筵>En; +筺>Kyou; +箆>Hei; +箇>Ka; +箋>Sen; +箍>Ko; +箏>Sou; +箒>Sou; +箔>Haku; +箕>Ki; +算>San; +箘>Kin; +箙>Fuku; +箚>Satsu; +箜>Kou; +箝>Kan; +箟>Kin; +管>Kan; +箪>Tan; +箭>Sen; +箱>Shou; +箴>Shin; +箸>Cho; +節>Setsu; +篁>Kou; +範>Han; +篆>Ten; +篇>Hen; +築>Chiku; +篋>Kyou; +篌>Kou; +篏>Kan; +篝>Kou; +篠>Shou; +篤>Toku; +篥>Ritsu; +篦>Hei; +篩>Shi; +篭>Rou; +篳>Hitsu; +篶>En; +篷>Hou; +簀>Saku; +簇>Sou; +簍>Rou; +簑>Sa; +簒>San; +簓>Sen; +簔>Sa; +簟>Ten; +簡>Kan; +簣>Ki; +簧>Kou; +簪>Shin; +簫>Shou; +簷>En; +簸>Ha; +簽>Sen; +簾>Ren; +簿>Bo; +籀>Chuu; +籃>Ran; +籌>Chuu; +籍>Seki; +籐>Tou; +籔>Su; +籖>Sen; +籘>Tou; +籟>Rai; +籠>Rou; +籤>Sen; +籥>Yaku; +籬>Ri; +米>Bei; +粃>Hi; +粉>Fun; +粋>Sui; +粐>Ro; +粒>Ryuu; +粕>Haku; +粗>So; +粘>Nen; +粛>Shuku; +粟>Zoku; +粡>Tou; +粢>Shi; +粤>Etsu; +粥>Shuku; +粧>Shou; +粫>Ji; +粮>Ryou; +粱>Ryou; +粲>San; +粳>Kou; +粹>Sui; +粽>Sou; +精>Sei; +糂>San; +糅>Juu; +糊>Ko; +糒>Hi; +糖>Tou; +糜>Bi; +糞>Fun; +糟>Sou; +糠>Kou; +糢>Bo; +糧>Ryou; +糯>Da; +糲>Rei; +糴>Teki; +糶>Chou; +糸>Shi; +糺>Kyuu; +系>Kei; +糾>Kyuu; +紀>Ki; +紂>Chuu; +約>Yaku; +紅>Ku; +紆>U; +紊>Bin; +紋>Mon; +納>Tou; +紐>Chuu; +純>Shun; +紕>Hi; +紗>Sa; +紘>Kou; +紙>Shi; +級>Kyuu; +紛>Fun; +紜>Un; +素>So; +紡>Bou; +索>Saku; +紫>Shi; +紬>Chuu; +紮>Satsu; +累>Rui; +細>Sei; +紲>Setsu; +紳>Shin; +紵>Cho; +紹>Shou; +紺>Kon; +紿>Tai; +終>Shuu; +絃>Gen; +組>So; +絅>Kei; +絆>Ban; +絋>Kou; +経>Kei; +絎>Kou; +絏>Setsu; +結>Ketsu; +絖>Kou; +絛>Jou; +絞>Kou; +絡>Raku; +絢>Ken; +絣>Hou; +給>Kyuu; +絨>Juu; +絮>Jo; +統>Tou; +絲>Shi; +絳>Kou; +絵>Kai; +絶>Zetsu; +絹>Ken; +絽>Ryo; +綉>Tou; +綏>Sui; +經>Kei; +継>Kei; +続>Zoku; +綜>Sou; +綟>Rei; +綢>Chuu; +綣>Ken; +綫>Sen; +綬>Ju; +維>I; +綮>Kei; +綯>Tou; +綰>Wan; +綱>Kou; +網>Mou; +綴>Tei; +綵>Sai; +綸>Rin; +綺>Ki; +綻>Tan; +綽>Shaku; +綾>Ryou; +綿>Men; +緇>Shi; +緊>Kin; +緋>Hi; +総>Sou; +緑>Ryoku; +緒>Sho; +緘>Kan; +線>Sen; +緜>Ben; +緝>Shuu; +緞>Tan; +締>Tei; +緡>Bin; +緤>Setsu; +編>Hen; +緩>Kan; +緬>Men; +緯>I; +緲>Byou; +練>Ren; +緻>Chi; +縁>En; +縄>Jou; +縉>Shin; +縊>Ei; +縋>Tsui; +縒>Shi; +縛>Baku; +縞>Kou; +縟>Joku; +縡>Sai; +縢>Tou; +縣>Ken; +縦>Juu; +縫>Hou; +縮>Shuku; +縱>Juu; +縲>Rui; +縵>Ban; +縷>Ru; +縹>Hyou; +縺>Ren; +縻>Bi; +總>Sou; +績>Seki; +繁>Han; +繃>Hou; +繆>Kyuu; +繊>Sen; +繋>Kei; +繍>Shuu; +織>Shoku; +繕>Zen; +繖>San; +繙>Han; +繚>Ryou; +繝>Kan; +繞>Jou; +繦>Kyou; +繧>Un; +繩>Jou; +繪>Kai; +繭>Ken; +繰>Sou; +繹>Eki; +繻>Ju; +繼>Kei; +繽>Hin; +繿>Ran; +纂>San; +纈>Ketsu; +纉>San; +續>Zoku; +纎>Sen; +纏>Ten; +纒>Ten; +纓>Ei; +纔>San; +纖>Sen; +纛>Tou; +纜>Ran; +缶>Kan; +缸>Kou; +缺>Ketsu; +罅>Ka; +罌>Ou; +罍>Rai; +罎>Tan; +罐>Kan; +网>Bou; +罔>Bou; +罕>Kan; +罘>Fu; +罟>Ko; +罠>Bin; +罧>Shin; +罨>An; +罩>Tou; +罪>Zai; +罫>Kei; +置>Chi; +罰>Batsu; +署>Sho; +罵>Ba; +罷>Hi; +罸>Batsu; +罹>Ri; +羂>Ken; +羃>Beki; +羅>Ra; +羆>Hi; +羇>Ki; +羈>Ki; +羊>You; +羌>Kyou; +美>Bi; +羔>Kou; +羚>Rei; +羝>Tei; +羞>Shuu; +羣>Gun; +群>Gun; +羨>Sen; +義>Gi; +羮>Kou; +羯>Katsu; +羲>Gi; +羶>Sen; +羸>Rui; +羹>Kou; +羽>U; +翁>Ou; +翅>Shi; +翆>Sui; +翊>Yoku; +翌>Yoku; +習>Shuu; +翔>Shou; +翕>Kyuu; +翠>Sui; +翡>Hi; +翦>Sen; +翩>Hen; +翫>Gan; +翰>Kan; +翳>Ei; +翹>Gyou; +翻>Hon; +翼>Yoku; +耀>You; +老>Rou; +考>Kou; +耄>Mou; +者>Sha; +耆>Ki; +耋>Tetsu; +而>Ji; +耐>Tai; +耒>Rai; +耕>Kou; +耗>Mou; +耘>Un; +耙>Ha; +耜>Shi; +耡>Jo; +耨>Dou; +耳>Ji; +耶>Ya; +耻>Chi; +耽>Tan; +耿>Kou; +聆>Rei; +聊>Ryou; +聒>Katsu; +聖>Sei; +聘>Hei; +聚>Shuu; +聞>Bun; +聟>Sei; +聡>Sou; +聨>Ren; +聯>Ren; +聰>Sou; +聲>Sei; +聳>Shou; +聴>Chou; +聶>Jou; +職>Shoku; +聹>Dei; +聽>Chou; +聾>Rou; +聿>Itsu; +肄>I; +肅>Shuku; +肆>Shi; +肇>Chou; +肉>Niku; +肋>Roku; +肌>Ki; +肓>Kou; +肖>Shou; +肘>Chuu; +肚>To; +肛>Kou; +肝>Kan; +股>Ko; +肢>Shi; +肥>Hi; +肩>Ken; +肪>Bou; +肬>Yuu; +肭>Dotsu; +肯>Kou; +肱>Kou; +育>Iku; +肴>Kou; +肺>Hai; +胃>I; +胄>Chuu; +胆>Tan; +背>Hai; +胎>Tai; +胖>Han; +胙>So; +胚>Hai; +胛>Kou; +胝>Chi; +胞>Hou; +胡>Ko; +胤>In; +胥>Sho; +胯>Ko; +胱>Kou; +胴>Dou; +胸>Kyou; +胼>Hen; +能>Nou; +脂>Shi; +脅>Kyou; +脆>Zei; +脇>Kyou; +脈>Myaku; +脉>Myaku; +脊>Seki; +脚>Kyaku; +脛>Kei; +脣>Shin; +脩>Shuu; +脯>Ho; +脱>Datsu; +脳>Nou; +脹>Chou; +脾>Hi; +腆>Ten; +腋>Eki; +腎>Jin; +腐>Fu; +腑>Fu; +腓>Hi; +腔>Kou; +腕>Wan; +腟>Chitsu; +腥>Sei; +腦>Nou; +腫>Shou; +腮>Sai; +腰>You; +腱>Ken; +腴>Yu; +腸>Chou; +腹>Fuku; +腺>Sen; +腿>Tai; +膀>Hou; +膂>Ryo; +膃>Otsu; +膈>Kaku; +膊>Haku; +膏>Kou; +膓>Chou; +膕>Kaku; +膚>Fu; +膜>Maku; +膝>Shitsu; +膠>Kou; +膣>Chitsu; +膨>Bou; +膩>Ji; +膰>Han; +膳>Sen; +膵>Sui; +膸>Zui; +膺>You; +膽>Tan; +膾>Kai; +膿>Dou; +臀>Den; +臂>Hi; +臆>Oku; +臈>Rou; +臉>Ren; +臍>Sei; +臑>Dau; +臓>Zou; +臘>Rou; +臙>En; +臚>Ryo; +臟>Zou; +臠>Ren; +臣>Shin; +臥>Ga; +臧>Zou; +臨>Rin; +自>Shi; +臭>Shuu; +至>Shi; +致>Chi; +臺>Tai; +臻>Shin; +臼>Kyuu; +臾>Yu; +舁>Yo; +舂>Shou; +舅>Kyuu; +與>Yo; +興>Kyou; +舉>Kyo; +舊>Kyuu; +舌>Zetsu; +舍>Sha; +舎>Sha; +舐>Shi; +舒>Jo; +舖>Ho; +舗>Ho; +舘>Kan; +舛>Sen; +舜>Shun; +舞>Bu; +舟>Shuu; +舩>Sen; +航>Kou; +舫>Hou; +般>Han; +舮>Ro; +舳>Chiku; +舵>Ta; +舶>Haku; +舷>Ken; +舸>Ka; +船>Sen; +艀>Fu; +艇>Tei; +艘>Sou; +艙>Sou; +艚>Sou; +艟>Dou; +艢>Shou; +艤>Gi; +艦>Kan; +艨>Mou; +艪>Ro; +艫>Ro; +艮>Kon; +良>Ryou; +艱>Kan; +色>Shoku; +艶>En; +艷>En; +艸>Sou; +艾>Gai; +芋>U; +芍>Shaku; +芒>Bou; +芙>Fu; +芝>Shi; +芟>San; +芥>Kai; +芦>Ro; +芫>Gen; +芬>Fun; +芭>Ba; +芯>Shin; +花>Ka; +芳>Hou; +芸>Gei; +芹>Kin; +芻>Suu; +芽>Ga; +苅>Gai; +苑>En; +苒>Zen; +苓>Rei; +苔>Tai; +苗>Byou; +苙>Ryuu; +苛>Ka; +苜>Boku; +苞>Hou; +苟>Kou; +苡>I; +苣>Kyo; +若>Jaku; +苦>Ku; +苧>Cho; +苫>Sen; +英>Ei; +苳>Tou; +苴>So; +苹>Hei; +苺>Mai; +苻>Fu; +茂>Mo; +范>Han; +茄>Ka; +茅>Bou; +茆>Bou; +茉>Matsu; +茎>Kei; +茖>Kaku; +茗>Mei; +茘>Ri; +茜>Sen; +茣>Go; +茨>Shi; +茫>Bou; +茯>Fuku; +茱>Shu; +茲>Ji; +茴>Kai; +茵>In; +茶>Cha; +茸>Jou; +茹>Jo; +荀>Jun; +荅>Tou; +草>Sou; +荊>Kei; +荏>Jin; +荐>Sen; +荒>Kou; +荘>Sou; +荳>Tou; +荵>Jin; +荷>Ka; +荻>Teki; +荼>To; +莅>Ri; +莇>Cho; +莉>Chi; +莊>Sou; +莎>Sa; +莓>Mai; +莖>Kei; +莚>En; +莞>Kan; +莟>Kan; +莠>Yuu; +莢>Kyou; +莨>Rou; +莪>Ga; +莫>Bo; +莱>Rai; +莵>To; +莽>Bou; +菁>Sei; +菅>Kan; +菊>Kiku; +菌>Kin; +菎>Kon; +菓>Ka; +菖>Shou; +菘>Suu; +菜>Sai; +菟>To; +菠>Ha; +菩>Hai; +菫>Kin; +華>Ka; +菰>Ko; +菱>Ryou; +菲>Hi; +菴>An; +菷>Sou; +菻>Rin; +菽>Shuku; +萃>Sui; +萄>Tou; +萇>Chou; +萋>Sei; +萌>Hou; +萍>Hei; +萎>I; +萓>Gi; +萠>Hou; +萩>Shuu; +萪>Kuwa; +萬>Man; +萱>Ken; +萵>Wa; +萸>Yu; +萼>Gaku; +落>Raku; +葆>Ho; +葉>You; +葎>Ritsu; +著>Cho; +葛>Katsu; +葡>Ho; +葢>Gai; +董>Tou; +葦>I; +葩>Ha; +葫>Ko; +葬>Sou; +葭>Ka; +葮>Tan; +葯>Yaku; +葱>Sou; +葵>Ki; +葷>Gun; +葹>Shi; +葺>Shuu; +蒂>Tei; +蒄>Kan; +蒋>Shou; +蒐>Shuu; +蒔>Shi; +蒙>Bou; +蒜>San; +蒟>Kon; +蒡>Hou; +蒭>Suu; +蒲>Ho; +蒸>Jou; +蒹>Ken; +蒻>Jaku; +蒼>Sou; +蒿>Kou; +蓁>Shin; +蓄>Chiku; +蓆>Seki; +蓉>You; +蓊>Ou; +蓋>Gai; +蓍>Shi; +蓐>Joku; +蓑>Sa; +蓖>Hi; +蓚>Chou; +蓬>Hou; +蓮>Ren; +蓴>Shun; +蓼>Ryou; +蓿>Shuku; +蔀>Hou; +蔆>Ryou; +蔑>Betsu; +蔓>Ban; +蔔>Fuku; +蔕>Tei; +蔗>Sho; +蔘>Shin; +蔚>Utsu; +蔟>Zoku; +蔡>Sai; +蔦>Chou; +蔬>So; +蔭>In; +蔵>Zou; +蔽>Hei; +蕀>Kyoku; +蕁>Jin; +蕃>Ban; +蕈>Shin; +蕉>Shou; +蕊>Zui; +蕋>Zui; +蕎>Kyou; +蕕>Yuu; +蕗>Ro; +蕘>Jou; +蕚>Gaku; +蕣>Shun; +蕨>Ketsu; +蕩>Tou; +蕪>Bu; +蕭>Shou; +蕷>Yo; +蕾>Rai; +薀>Un; +薄>Haku; +薇>Bi; +薈>Kai; +薊>Kei; +薐>Rou; +薑>Kyou; +薔>Shoku; +薗>En; +薙>Tei; +薛>Setsu; +薜>Heki; +薤>Kai; +薦>Sen; +薨>Kou; +薩>Satsu; +薪>Shin; +薫>Kun; +薬>Yaku; +薮>Sou; +薯>Sho; +薹>Tai; +薺>Sei; +藁>Kou; +藉>Sha; +藍>Ran; +藏>Zou; +藐>Baku; +藕>Guu; +藜>Rei; +藝>Gei; +藤>Tou; +藥>Yaku; +藩>Han; +藪>Sou; +藷>Sho; +藹>Ai; +藺>Rin; +藻>Sou; +藾>Rai; +蘂>Zui; +蘆>Ro; +蘇>So; +蘊>Un; +蘋>Hin; +蘓>So; +蘖>Getsu; +蘗>Haku; +蘚>Sen; +蘢>Rou; +蘭>Ran; +蘯>Tou; +蘿>Ra; +虍>Ko; +虎>Ko; +虐>Gyaku; +虔>Ken; +處>Sho; +虚>Kyo; +虜>Ryo; +虞>Gu; +號>Gou; +虧>Ki; +虫>Chuu; +虱>Shitsu; +虹>Kou; +虻>Bou; +蚊>Bun; +蚋>Zei; +蚌>Hou; +蚓>In; +蚕>San; +蚣>Kou; +蚤>Sou; +蚩>Shi; +蚪>Tou; +蚫>Hou; +蚯>Kyuu; +蚰>Yuu; +蚶>Kan; +蛄>Ko; +蛆>Sho; +蛇>Ja; +蛉>Rei; +蛋>Tan; +蛍>Kei; +蛎>Rei; +蛔>Kai; +蛙>A; +蛛>Shu; +蛞>Katsu; +蛟>Kou; +蛤>Kou; +蛩>Kyou; +蛬>Kyou; +蛭>Shitsu; +蛮>Ban; +蛸>Sou; +蛹>You; +蛻>Zei; +蛾>Ga; +蜀>Shoku; +蜂>Hou; +蜃>Shin; +蜆>Ken; +蜈>Go; +蜉>Fu; +蜊>Ri; +蜍>Sho; +蜑>Tan; +蜒>En; +蜘>Chi; +蜚>Hi; +蜜>Mitsu; +蜥>Seki; +蜩>Chou; +蜴>Eki; +蜷>Ken; +蜻>Sei; +蜿>En; +蝉>Sen; +蝋>Rou; +蝌>Ka; +蝎>Katsu; +蝓>Yu; +蝕>Shoku; +蝗>Kou; +蝙>Hen; +蝟>I; +蝠>Fuku; +蝣>Yuu; +蝦>Ka; +蝨>Shitsu; +蝪>Tou; +蝮>Fuku; +蝴>Ko; +蝶>Chou; +蝸>Ka; +蝿>You; +螂>Rou; +融>Yuu; +螟>Mei; +螢>Kei; +螫>Seki; +螯>Gou; +螳>Tou; +螺>Ra; +螻>Rou; +螽>Shuu; +蟀>Shutsu; +蟄>Chitsu; +蟆>Ba; +蟇>Ba; +蟋>Shitsu; +蟐>Tou; +蟒>Bou; +蟠>Han; +蟯>Gyou; +蟲>Ki; +蟶>Tei; +蟷>Tou; +蟹>Kai; +蟻>Gi; +蟾>Sen; +蠅>You; +蠍>Katsu; +蠎>Bou; +蠏>Kai; +蠑>Ei; +蠕>Da; +蠖>Kaku; +蠡>Rei; +蠢>Shun; +蠣>Rei; +蠧>To; +蠱>Ko; +蠶>San; +蠹>To; +蠻>Ban; +血>Ketsu; +衂>Jiku; +衄>Jiku; +衆>Shuu; +行>Kou; +衍>En; +衒>Ken; +術>Jutsu; +街>Gai; +衙>Gyo; +衛>Ei; +衝>Shou; +衞>Ei; +衡>Kou; +衢>Ku; +衣>I; +表>Hyou; +衫>San; +衰>Sa; +衲>Dou; +衵>Jitsu; +衷>Chuu; +衽>Jin; +衾>Kin; +衿>Kin; +袁>En; +袂>Bei; +袈>Ka; +袋>Tai; +袍>Hou; +袒>Tan; +袖>Shuu; +袗>Shin; +袙>Ha; +袞>Kon; +袢>Han; +袤>Bou; +被>Hi; +袮>Ne; +袱>Fuku; +袴>Ko; +袵>Jin; +袷>Kou; +袿>Kei; +裁>Sai; +裂>Retsu; +装>Sou; +裏>Ri; +裔>Ei; +裕>Yuu; +裘>Kyuu; +裙>Kun; +補>Ho; +裝>Sou; +裟>Sa; +裡>Ri; +裨>Hi; +裲>Ryou; +裳>Shou; +裴>Hai; +裸>Ra; +裹>Ka; +裼>Seki; +製>Sei; +裾>Kyo; +褂>Kai; +複>Fuku; +褊>Hen; +褌>Kon; +褐>Katsu; +褒>Hou; +褓>Ho; +褝>Tan; +褞>On; +褥>Joku; +褪>Tai; +褫>Chi; +褶>Chou; +褸>Rou; +褻>Setsu; +襁>Kyou; +襃>Hou; +襄>Jou; +襌>Tan; +襍>Zatsu; +襖>Ou; +襞>Heki; +襟>Kin; +襠>Tou; +襤>Ran; +襦>Ju; +襪>Betsu; +襭>Ketsu; +襯>Shin; +襲>Shuu; +襴>Ran; +襾>Aka; +西>Sei; +要>You; +覃>Tan; +覆>Fuku; +覇>Ha; +覈>Kaku; +覊>Ki; +見>Ken; +規>Ki; +覓>Beki; +視>Shi; +覗>Shi; +覘>Ten; +覚>Kaku; +覡>Geki; +覦>Yu; +覧>Ran; +覩>To; +親>Shin; +覬>Ki; +覯>Kou; +覲>Kin; +観>Kan; +覺>Kaku; +覽>Ran; +覿>Teki; +觀>Kan; +角>Kaku; +觚>Ko; +觜>Shi; +觝>Tei; +解>Kai; +触>Shoku; +觧>Kai; +觴>Shou; +觸>Shoku; +言>Gen; +訂>Tei; +訃>Fu; +計>Kei; +訊>Jin; +訌>Kou; +討>Tou; +訐>Ketsu; +訓>Kun; +訖>Kitsu; +託>Taku; +記>Ki; +訛>Ka; +訝>Ga; +訟>Shou; +訣>Ketsu; +訥>Totsu; +訪>Hou; +設>Setsu; +許>Kyo; +訳>Yaku; +訴>So; +訶>Ka; +診>Shin; +註>Chuu; +証>Shou; +詁>Ko; +詆>Tei; +詈>Ri; +詐>Sa; +詑>Ta; +詒>Tai; +詔>Shou; +評>Hyou; +詛>So; +詞>Shi; +詠>Ei; +詢>Jun; +詣>Kei; +試>Shi; +詩>Shi; +詫>Ta; +詬>Kou; +詭>Ki; +詮>Sen; +詰>Kitsu; +話>Wa; +該>Gai; +詳>Shou; +詼>Kai; +誂>Chou; +誄>Rui; +誅>Chuu; +誇>Ko; +誉>Yo; +誌>Shi; +認>Nin; +誑>Kyou; +誓>Sei; +誕>Tan; +誘>Yuu; +誚>Shou; +語>Go; +誠>Sei; +誡>Kai; +誣>Fu; +誤>Go; +誥>Kou; +誦>Shou; +誨>Kai; +説>Setsu; +読>Toku; +誰>Sui; +課>Ka; +誹>Hi; +誼>Gi; +調>Chou; +諂>Ten; +諄>Jun; +談>Dan; +請>Sei; +諌>Kan; +諍>Sou; +諏>Shu; +諒>Ryou; +論>Ron; +諚>Jou; +諛>Yu; +諜>Chou; +諞>Hen; +諠>Ken; +諡>Shi; +諢>Kon; +諤>Gaku; +諦>Tei; +諧>Kai; +諫>Kan; +諭>Yu; +諮>Shi; +諱>Ki; +諳>An; +諷>Fuu; +諸>Sho; +諺>Gen; +諾>Daku; +謀>Bou; +謁>Etsu; +謂>I; +謄>Tou; +謇>Ken; +謌>Ka; +謎>Mei; +謐>Hitsu; +謔>Gyaku; +謖>Shoku; +謗>Bou; +謙>Ken; +謚>Shi; +講>Kou; +謝>Sha; +謠>You; +謡>You; +謦>Kei; +謨>Bo; +謫>Taku; +謬>Byou; +謳>Ou; +謹>Kin; +謾>Ban; +譁>Ka; +證>Shou; +譌>Ka; +譎>Kitsu; +譏>Ki; +譖>Shin; +識>Shiki; +譚>Tan; +譛>Shin; +譜>Fu; +譟>Sou; +警>Kei; +譫>Sen; +譬>Hi; +譯>Yaku; +議>Gi; +譱>Zen; +譲>Jou; +譴>Ken; +護>Go; +譽>Yo; +讀>Toku; +讃>San; +變>Hen; +讌>En; +讎>Shuu; +讐>Shuu; +讒>San; +讓>Jou; +讖>Shin; +讙>Kan; +讚>San; +谷>Koku; +谺>Ka; +谿>Kei; +豁>Katsu; +豆>Tou; +豈>Ki; +豊>Hou; +豌>En; +豎>Ju; +豐>Hou; +豕>Shi; +豚>Ton; +象>Shou; +豢>Ken; +豪>Gou; +豫>Yo; +豬>Cho; +豸>Chi; +豹>Hou; +豺>Sai; +豼>Hi; +貂>Chou; +貅>Kyuu; +貉>Kaku; +貊>Haku; +貌>Bou; +貍>Ri; +貎>Gei; +貔>Hi; +貘>Baku; +貝>Bai; +貞>Tei; +負>Fu; +財>Zai; +貢>Kou; +貧>Hin; +貨>Ka; +販>Han; +貪>Don; +貫>Kan; +責>Seki; +貭>Shitsu; +貮>Ni; +貯>Cho; +貰>Sei; +貲>Shi; +貳>Ni; +貴>Ki; +貶>Hen; +買>Bai; +貸>Tai; +費>Hi; +貼>Ten; +貽>I; +貿>Bou; +賀>Ga; +賁>Hi; +賂>Ro; +賃>Chin; +賄>Wai; +資>Shi; +賈>Ko; +賊>Zoku; +賍>Sou; +賎>Sen; +賑>Shin; +賓>Hin; +賚>Rai; +賛>San; +賜>Shi; +賞>Shou; +賠>Bai; +賢>Ken; +賣>Bai; +賤>Sen; +賦>Fu; +質>Shitsu; +賭>To; +賺>Tan; +賻>Fu; +購>Kou; +賽>Sai; +贄>Shi; +贅>Zei; +贇>In; +贈>Zou; +贊>San; +贋>Gan; +贍>Sen; +贏>Ei; +贐>Shin; +贓>Zou; +贔>Hi; +贖>Shoku; +赤>Seki; +赦>Sha; +赧>Tan; +赫>Kaku; +赭>Sha; +走>Sou; +赱>Sou; +赳>Kyuu; +赴>Fu; +起>Ki; +趁>Chin; +超>Chou; +越>Etsu; +趙>Chou; +趣>Shu; +趨>Suu; +足>Soku; +趺>Fu; +趾>Shi; +跂>Ki; +跋>Batsu; +跌>Tetsu; +跏>Ka; +跖>Seki; +跚>San; +跛>Ha; +距>Kyo; +跟>Kon; +跡>Seki; +跣>Sen; +跨>Ko; +跪>Ki; +跫>Kyou; +路>Ro; +跳>Chou; +践>Sen; +跼>Kyoku; +跿>To; +踈>Sho; +踉>Ryou; +踊>You; +踏>Tou; +踐>Sen; +踝>Ka; +踞>Kyo; +踟>Chi; +踪>Shou; +踰>Yu; +踴>You; +踵>Shou; +蹂>Juu; +蹄>Tei; +蹇>Ken; +蹈>Tou; +蹉>Sa; +蹊>Kei; +蹌>Shou; +蹐>Seki; +蹕>Hitsu; +蹙>Shuku; +蹟>Seki; +蹠>Seki; +蹣>Man; +蹤>Shou; +蹲>Son; +蹴>Shuu; +蹶>Ketsu; +蹼>Boku; +躁>Sou; +躄>Heki; +躅>Choku; +躇>Cho; +躊>Chuu; +躋>Sei; +躍>Yaku; +躑>Teki; +躓>Chi; +躔>Ten; +躙>Rin; +躡>Jou; +躪>Rin; +身>Shin; +躬>Kyuu; +躯>Ku; +躰>Tei; +躱>Ta; +軆>Tei; +車>Sha; +軋>Atsu; +軌>Ki; +軍>Gun; +軒>Ken; +軛>Aku; +軟>Nan; +転>Ten; +軣>Gou; +軫>Shin; +軸>Jiku; +軻>Ka; +軼>Itsu; +軽>Kei; +軾>Shoku; +較>Kaku; +輅>Ro; +載>Sai; +輊>Chi; +輌>Ryou; +輒>Chou; +輓>Ban; +輔>Fu; +輕>Kei; +輙>Chou; +輛>Ryou; +輜>Shi; +輝>Ki; +輟>Tetsu; +輦>Ren; +輩>Hai; +輪>Rin; +輯>Shuu; +輳>Sou; +輸>Yu; +輹>Fuku; +輻>Fuku; +輾>Ten; +輿>Yo; +轂>Koku; +轄>Katsu; +轅>En; +轆>Roku; +轉>Ten; +轍>Tetsu; +轎>Kyou; +轗>Kan; +轜>Ji; +轟>Gou; +轡>Hi; +轢>Reki; +轣>Reki; +轤>Ro; +辛>Shin; +辜>Ko; +辞>Ji; +辟>Heki; +辣>Ratsu; +辧>Ben; +辨>Ben; +辭>Ji; +辮>Hen; +辯>Ben; +辰>Shin; +辱>Joku; +農>Nou; +辺>Hen; +辿>Ten; +迂>U; +迄>Kitsu; +迅>Jin; +迎>Gei; +近>Kin; +返>Hen; +迢>Chou; +迥>Kei; +迦>Ka; +迩>Ji; +迪>Teki; +迫>Haku; +迭>Tetsu; +迯>Tou; +述>Jutsu; +迴>Kai; +迷>Mei; +迸>Hou; +迹>Seki; +迺>Dai; +追>Tsui; +退>Tai; +送>Sou; +逃>Tou; +逅>Kou; +逆>Gyaku; +逋>Ho; +逍>Shou; +逎>Shuu; +透>Tou; +逐>Chiku; +逑>Kyuu; +逓>Tei; +途>To; +逕>Kei; +逖>Teki; +逗>Tou; +這>Gen; +通>Ts; +逝>Sei; +逞>Tei; +速>Soku; +造>Zou; +逡>Shun; +逢>Hou; +連>Ren; +逮>Tai; +週>Shuu; +進>Shin; +逵>Ki; +逶>I; +逸>Itsu; +逹>Tatsu; +逼>Hitsu; +逾>Yu; +遁>Ton; +遂>Sui; +遅>Chi; +遇>Guu; +遉>Tei; +遊>Yuu; +運>Un; +遍>Hen; +過>Ka; +遏>Atsu; +遐>Ka; +遑>Kou; +遒>Shuu; +道>Dou; +達>Tatsu; +違>I; +遘>Kou; +遙>You; +遜>Son; +遞>Tei; +遠>En; +遡>So; +遣>Ken; +遥>You; +遨>Gou; +適>Teki; +遭>Sou; +遮>Sha; +遯>Ton; +遲>Chi; +遵>Jun; +遶>Jou; +遷>Sen; +選>Sen; +遺>I; +遼>Ryou; +遽>Kyo; +避>Hi; +邀>You; +邁>Bai; +邂>Kai; +邃>Sui; +還>Kan; +邇>Ji; +邉>Hen; +邊>Hen; +邏>Ra; +邑>Yuu; +那>Da; +邦>Hou; +邨>Son; +邪>Ya; +邯>Kan; +邱>Kyuu; +邵>Shou; +邸>Tei; +郁>Iku; +郊>Kou; +郎>Rou; +郛>Fu; +郡>Gun; +郢>Ei; +郤>Geki; +部>Bu; +郭>Kaku; +郵>Yuu; +郷>Kyou; +都>To; +鄂>Gaku; +鄒>Suu; +鄙>Hi; +鄭>Tei; +鄰>Rin; +鄲>Tan; +酉>Yuu; +酊>Tei; +酋>Shuu; +酌>Shaku; +配>Hai; +酎>Chuu; +酒>Shu; +酔>Sui; +酖>Tan; +酘>Tou; +酢>Saku; +酣>Kan; +酥>So; +酩>Mei; +酪>Raku; +酬>Shuu; +酲>Tei; +酳>In; +酵>Kou; +酷>Koku; +酸>San; +醂>Rin; +醇>Shun; +醉>Sui; +醋>Saku; +醍>Tei; +醐>Ko; +醒>Sei; +醗>Hatsu; +醜>Shuu; +醢>Kai; +醤>Shou; +醪>Rou; +醫>I; +醯>Kei; +醴>Rei; +醵>Kyo; +醸>Jou; +醺>Kun; +釀>Jou; +釁>Kin; +釆>Han; +采>Sai; +釈>Shaku; +釉>Yuu; +釋>Shaku; +里>Ri; +重>Chou; +野>Ya; +量>Ryou; +釐>Ri; +金>Kin; +釖>Tou; +釘>Tei; +釛>Koku; +釜>Fu; +針>Shin; +釟>Hatsu; +釡>Fu; +釣>Chou; +釦>Kou; +釧>Sen; +釵>Sa; +釶>Shi; +釼>Ken; +釿>Kin; +鈍>Don; +鈎>Kou; +鈑>Han; +鈔>Shou; +鈕>Chuu; +鈞>Kin; +鈩>Ro; +鈬>Taku; +鈴>Rei; +鈷>Ko; +鈿>Ten; +鉄>Tetsu; +鉅>Kyo; +鉈>Sha; +鉉>Ken; +鉋>Hou; +鉐>Seki; +鉗>Kan; +鉚>Ryuu; +鉛>En; +鉞>Etsu; +鉢>Hachi; +鉤>Kou; +鉦>Sei; +鉱>Kou; +鉾>Bou; +銀>Gin; +銃>Juu; +銅>Dou; +銑>Sen; +銓>Sen; +銕>Tetsu; +銖>Shu; +銘>Mei; +銚>You; +銛>Sen; +銜>Kan; +銭>Sen; +銷>Shou; +銹>Shuu; +鋏>Kyou; +鋒>Hou; +鋤>Jo; +鋩>Bou; +鋪>Ho; +鋭>Ei; +鋳>Chuu; +鋸>Kyo; +鋺>En; +鋼>Kou; +錆>Sei; +錏>A; +錐>Sui; +錘>Tsui; +錙>Shi; +錚>Sou; +錠>Jou; +錢>Sen; +錣>Tei; +錦>Kin; +錨>Byou; +錫>Seki; +錬>Ren; +錮>Ko; +錯>Saku; +録>Roku; +錻>Bu; +鍄>Kei; +鍋>Ka; +鍍>To; +鍔>Gaku; +鍖>Chin; +鍛>Tan; +鍜>Ka; +鍠>Kou; +鍬>Shuu; +鍮>Chuu; +鍵>Ken; +鍼>Shin; +鍾>Shou; +鎌>Ren; +鎔>You; +鎖>Sa; +鎗>Sou; +鎚>Tsui; +鎧>Gai; +鎬>Kou; +鎭>Chin; +鎮>Chin; +鎰>Itsu; +鏃>Zoku; +鏈>Ren; +鏐>Ryuu; +鏑>Teki; +鏖>Ou; +鏗>Kou; +鏘>Shou; +鏝>Man; +鏡>Kyou; +鏤>Rou; +鏥>Shuu; +鏨>San; +鐃>Dou; +鐇>Han; +鐐>Ryou; +鐓>Tai; +鐔>Shin; +鐘>Shou; +鐙>Tou; +鐚>A; +鐡>Tetsu; +鐫>Sen; +鐵>Tetsu; +鐶>Kan; +鐸>Taku; +鐺>Tou; +鑁>Ban; +鑄>Chuu; +鑑>Kan; +鑒>Kan; +鑚>San; +鑛>Kou; +鑞>Rou; +鑠>Shaku; +鑢>Ryo; +鑪>Ro; +鑰>Yaku; +鑵>Kan; +鑷>Jou; +鑼>Ra; +鑽>San; +鑾>Ran; +鑿>Saku; +钁>Kaku; +長>Chou; +門>Mon; +閂>San; +閃>Sen; +閇>Hei; +閉>Hei; +開>Kai; +閏>Jun; +閑>Kan; +間>Kan; +閔>Bin; +閘>Kou; +閙>Tou; +関>Kan; +閣>Kaku; +閤>Kou; +閥>Batsu; +閧>Kou; +閨>Kei; +閭>Ro; +閲>Etsu; +閹>En; +閻>En; +閼>A; +閾>Yoku; +闃>Geki; +闇>An; +闊>Katsu; +闌>Ran; +闍>To; +闔>Kou; +闕>Ketsu; +闖>Chin; +闘>Tou; +關>Kan; +闡>Sen; +闢>Heki; +闥>Tatsu; +阜>Fu; +阡>Sen; +阨>Aku; +阪>Han; +阮>Gen; +阯>Shi; +防>Bou; +阻>So; +阿>A; +陀>Da; +陂>Ha; +附>Fu; +陋>Rou; +陌>Haku; +降>Kou; +陏>Ta; +限>Gen; +陛>Hei; +陜>Kou; +陝>Sen; +陞>Shou; +陟>Choku; +院>In; +陣>Jin; +除>Jo; +陥>Kan; +陦>Tou; +陪>Bai; +陬>Suu; +陰>In; +陲>Sui; +陳>Chin; +陵>Ryou; +陶>Tou; +陷>Kan; +陸>Riku; +険>Ken; +陽>You; +隅>Guu; +隆>Ryuu; +隈>Wai; +隊>Tai; +隋>Ta; +隍>Kou; +階>Kai; +随>Zui; +隔>Kaku; +隕>In; +隗>Kai; +隘>Ai; +隙>Geki; +際>Sai; +障>Shou; +隠>In; +隣>Rin; +隧>Sui; +隨>Zui; +險>Ken; +隰>Shitsu; +隱>In; +隲>Shitsu; +隴>Rou; +隶>Tai; +隷>Rei; +隸>Rei; +隹>Sui; +隻>Seki; +隼>Jun; +雀>Jaku; +雁>Gan; +雄>Yuu; +雅>Ga; +集>Shuu; +雇>Ko; +雉>Chi; +雋>Sen; +雌>Shi; +雍>You; +雎>Sho; +雑>Zatsu; +雕>Chou; +雖>Sui; +雙>Sou; +雛>Suu; +雜>Zatsu; +離>Ri; +難>Nan; +雨>U; +雪>Setsu; +雫>Da; +雰>Fun; +雲>Un; +零>Rei; +雷>Rai; +雹>Haku; +電>Den; +需>Ju; +霄>Shou; +霆>Tei; +震>Shin; +霈>Hai; +霊>Rei; +霍>Kaku; +霎>Sou; +霏>Hi; +霑>Ten; +霓>Gei; +霖>Rin; +霙>Ei; +霜>Sou; +霞>Ka; +霤>Ryuu; +霧>Mu; +霪>In; +霰>San; +露>Ro; +霸>Haku; +霹>Heki; +霽>Sei; +霾>Bai; +靂>Reki; +靄>Ai; +靆>Tai; +靈>Rei; +靉>Ai; +青>Sei; +靖>Sei; +静>Sei; +靜>Sei; +非>Hi; +靠>Kou; +靡>Hi; +面>Men; +靤>Hou; +靦>Ten; +靨>You; +革>Kaku; +靫>Sai; +靭>Jin; +靱>Jin; +靴>Ka; +靹>Ketsu; +靺>Matsu; +靼>Tan; +鞁>Hi; +鞄>Hou; +鞅>Ou; +鞋>Ai; +鞍>An; +鞏>Kyou; +鞘>Sou; +鞜>Tou; +鞠>Kiku; +鞣>Juu; +鞦>Shuu; +鞨>Katsu; +鞫>Kiku; +鞭>Hen; +鞳>Tou; +鞴>Fuku; +韃>Datsu; +韆>Sen; +韈>Betsu; +韋>I; +韓>Kan; +韜>Tou; +韭>Kyuu; +韮>Kyou; +韲>Sei; +音>On; +韵>In; +韶>Shou; +韻>In; +響>Kyou; +頁>Ketsu; +頂>Chou; +頃>Kei; +項>Kou; +順>Jun; +須>Shu; +頌>Shou; +頏>Kou; +預>Yo; +頑>Gan; +頒>Han; +頓>Ton; +頗>Ha; +領>Ryou; +頚>Kei; +頡>Kitsu; +頤>I; +頬>Kyou; +頭>Tou; +頴>Ei; +頷>Kan; +頸>Kei; +頻>Hin; +頼>Rai; +頽>Tai; +顆>Ka; +顋>Sai; +題>Dai; +額>Gaku; +顎>Gaku; +顏>Gan; +顔>Gan; +顕>Ken; +願>Gan; +顛>Ten; +類>Rui; +顧>Ko; +顫>Sen; +顯>Ken; +顰>Hin; +顱>Ro; +顳>Shou; +顴>Kan; +風>Fuu; +颯>Satsu; +颱>Tai; +颶>Ku; +飃>Hyou; +飄>Hyou; +飆>Hyou; +飛>Hi; +飜>Hon; +食>Shoku; +飢>Ki; +飩>Ton; +飫>Yo; +飭>Choku; +飮>In; +飯>Han; +飲>In; +飴>I; +飼>Shi; +飽>Hou; +飾>Shoku; +餃>Kou; +餅>Hei; +餉>Shou; +養>You; +餌>Ji; +餐>San; +餒>Dai; +餓>Ga; +餔>Ho; +餘>Yo; +餝>Shoku; +餞>Sen; +餠>Hei; +餡>Kan; +餤>Tan; +館>Kan; +餬>Ko; +餮>Tetsu; +餽>Ki; +餾>Ryuu; +饂>Un; +饅>Man; +饉>Kin; +饋>Ki; +饌>Sen; +饐>I; +饑>Ki; +饒>Jou; +饕>Tou; +饗>Kyou; +首>Shu; +馗>Ki; +馘>Kaku; +香>Kou; +馥>Fuku; +馨>Kei; +馬>Ba; +馭>Gyo; +馮>Hyou; +馳>Chi; +馴>Shun; +馼>Bun; +駁>Baku; +駄>Ta; +駅>Eki; +駆>Ku; +駈>Ku; +駐>Chuu; +駑>Do; +駒>Ku; +駕>Ga; +駘>Tai; +駛>Shi; +駝>Ta; +駟>Shi; +駢>Hen; +駭>Kai; +駮>Haku; +駱>Raku; +駸>Shin; +駻>Kan; +駿>Shun; +騁>Tei; +騅>Sui; +騎>Ki; +騏>Ki; +騒>Sou; +験>Ken; +騙>Hen; +騨>Tan; +騫>Ken; +騰>Tou; +騷>Sou; +騾>Ra; +驀>Baku; +驂>San; +驃>Hyou; +驅>Ku; +驍>Gyou; +驕>Kyou; +驗>Ken; +驚>Kyou; +驛>Eki; +驟>Shuu; +驢>Ryo; +驤>Jou; +驥>Ki; +驩>Kan; +驪>Ri; +驫>Hyuu; +骨>Kotsu; +骭>Kan; +骰>Tou; +骸>Kai; +骼>Kaku; +髀>Hi; +髄>Zui; +髏>Rou; +髑>Toku; +髓>Zui; +體>Tei; +高>Kou; +髞>Sou; +髟>Hyou; +髢>Tei; +髣>Hou; +髦>Bou; +髪>Hatsu; +髫>Chou; +髭>Shi; +髮>Hatsu; +髯>Zen; +髱>Hou; +髴>Futsu; +髷>Kyoku; +髻>Kei; +鬆>Shou; +鬘>Ban; +鬚>Shu; +鬟>Kan; +鬢>Hin; +鬣>Ryou; +鬥>Tou; +鬧>Tou; +鬨>Kou; +鬩>Geki; +鬪>Tou; +鬮>Kyuu; +鬯>Chou; +鬱>Utsu; +鬲>Reki; +鬻>Shuku; +鬼>Ki; +魁>Kai; +魂>Kon; +魃>Batsu; +魄>Haku; +魅>Mi; +魍>Bou; +魎>Ryou; +魏>Gi; +魑>Chi; +魔>Ma; +魘>En; +魚>Gyo; +魯>Ro; +魴>Hou; +鮃>Hyou; +鮎>Nen; +鮑>Hou; +鮒>Fu; +鮓>Sa; +鮟>An; +鮠>Gai; +鮨>Shi; +鮪>I; +鮫>Kou; +鮭>Kei; +鮮>Sen; +鮹>Sou; +鯀>Kon; +鯆>Ho; +鯉>Ri; +鯊>Sa; +鯔>Shi; +鯖>Sei; +鯛>Chou; +鯡>Hi; +鯢>Gei; +鯣>Eki; +鯤>Kon; +鯨>Gei; +鯰>Nen; +鯵>Sou; +鰄>I; +鰆>Shun; +鰈>Chou; +鰉>Kou; +鰊>Ren; +鰌>Shuu; +鰍>Shuu; +鰐>Gaku; +鰒>Fuku; +鰓>Sai; +鰔>Kan; +鰕>Ka; +鰛>On; +鰡>Ryuu; +鰤>Shi; +鰥>Kan; +鰭>Ki; +鰮>On; +鰲>Gou; +鰹>Ken; +鰺>Sou; +鰻>Ban; +鰾>Hyou; +鱆>Shou; +鱇>Kou; +鱈>Setsu; +鱒>Son; +鱗>Rin; +鱠>Kai; +鱧>Rei; +鱶>Shou; +鱸>Ro; +鳥>Chou; +鳧>Fu; +鳩>Kyuu; +鳫>Gan; +鳬>Fu; +鳳>Hou; +鳴>Mei; +鳶>En; +鴃>Ketsu; +鴆>Chin; +鴇>Hou; +鴈>Gan; +鴉>A; +鴎>Ou; +鴒>Rei; +鴕>Ta; +鴛>En; +鴟>Shi; +鴣>Ko; +鴦>You; +鴨>Ou; +鴪>Itsu; +鴬>Ou; +鴻>Kou; +鴾>Bou; +鴿>Kou; +鵁>Kou; +鵄>Shi; +鵐>Bu; +鵑>Ken; +鵙>Geki; +鵜>Tei; +鵝>Ga; +鵞>Ga; +鵠>Koku; +鵡>Bu; +鵬>Hou; +鵯>Hi; +鵲>Jaku; +鵺>Ya; +鶇>Tou; +鶉>Shun; +鶏>Kei; +鶚>Gaku; +鶤>Kon; +鶩>Boku; +鶯>Ou; +鶲>Ou; +鶴>Kaku; +鶸>Jaku; +鶺>Seki; +鶻>Kotsu; +鷁>Geki; +鷂>You; +鷄>Kei; +鷆>Ten; +鷏>Ten; +鷓>Sha; +鷙>Shi; +鷦>Shou; +鷭>Ban; +鷯>Ryou; +鷲>Shuu; +鷸>Itsu; +鷹>You; +鷺>Ro; +鷽>Kaku; +鸚>Ou; +鸛>Kan; +鸞>Ran; +鹵>Ro; +鹸>Ken; +鹹>Kan; +鹽>En; +鹿>Roku; +麁>So; +麈>Shu; +麋>Bi; +麌>Gu; +麑>Gei; +麒>Ki; +麓>Roku; +麕>Kin; +麗>Rei; +麝>Sha; +麟>Rin; +麥>Baku; +麦>Baku; +麩>Fu; +麪>Men; +麭>Hou; +麸>Fu; +麹>Kiku; +麺>Men; +麻>Ma; +麼>Ma; +麾>Ki; +麿>Ro; +黄>Kou; +黌>Kou; +黍>Sho; +黎>Rei; +黏>Nen; +黐>Chi; +黒>Koku; +黔>Ken; +默>Moku; +黙>Moku; +黛>Tai; +黜>Chutsu; +黝>Yuu; +點>Ten; +黠>Katsu; +黥>Gei; +黨>Tou; +黯>An; +黴>Bai; +黶>En; +黷>Toku; +黹>Chi; +黻>Futsu; +黼>Ho; +黽>Bou; +鼇>Gou; +鼈>Betsu; +鼎>Tei; +鼓>Ko; +鼕>Tou; +鼠>So; +鼡>Sho; +鼬>Yuu; +鼻>Bi; +鼾>Kan; +齊>Sei; +齋>Sai; +齎>Sei; +齏>Sei; +齒>Shi; +齔>Shin; +齟>So; +齠>Chou; +齡>Rei; +齢>Rei; +齣>Shutsu; +齦>Gin; +齧>Ketsu; +齪>Soku; +齬>Gyo; +齲>Ku; +齶>Gaku; +齷>Aku; +龍>Ryuu; +龕>Gan; +龜>Ki; +龝>Shuu; +龠>Yaku; + +# eof diff --git a/demos/src/com/ibm/icu/dev/demo/translit/thai_test.txt b/demos/src/com/ibm/icu/dev/demo/translit/thai_test.txt new file mode 100644 index 00000000000..ef5f90c6840 --- /dev/null +++ b/demos/src/com/ibm/icu/dev/demo/translit/thai_test.txt @@ -0,0 +1,55 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +@UPPERFILTER@ +Unicode คืออะไร? +Unicode กำหนดหมายเลขเฉพาะสำหรับทุกอักขระ +โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด +ไม่ขึ้นกับว่าจะเป็นโปรแกรมใด +และไม่ว่าจะเป็นภาษาใด + +โดยพื้นฐานแล้ว, คอมพิวเตอร์จะเกี่ยวข้องกับเรื่องของตัวเลข. คอมพิวเตอร์จัดเก็บตัวอักษรและอักขระอื่นๆ โดยการกำหนดหมายเลขให้สำหรับแต่ละตัว. ก่อนหน้าที่๊ Unicode จะถูกสร้างขึ้น, ได้มีระบบ encoding อยู่หลายร้อยระบบสำหรับการกำหนดหมายเลขเหล่านี้. ไม่มี encoding ใดที่มีจำนวนตัวอักขระมากเพียงพอ: ยกตัวอย่างเช่น, เฉพาะในกลุ่มสหภาพยุโรปเพียงแห่งเดียว ก็ต้องการหลาย encoding ในการครอบคลุมทุกภาษาในกลุ่ม. หรือแม้แต่ในภาษาเดี่ยว เช่น ภาษาอังกฤษ ก็ไม่มี encoding ใดที่เพียงพอสำหรับทุกตัวอักษร, เครื่องหมายวรรคตอน และสัญลักษณ์ทางเทคนิคที่ใช้กันอยู่ทั่วไป. + +ระบบ encoding เหล่านี้ยังขัดแย้งซึ่งกันและกัน. นั่นก็คือ, ในสอง encoding สามารถใช้หมายเลขเดียวกันสำหรับตัวอักขระสองตัวที่แตกต่างกัน,หรือใช้หมายเลขต่างกันสำหรับอักขระตัวเดียวกัน. ในระบบคอมพิวเตอร์ (โดยเฉพาะเซิร์ฟเวอร์) ต้องมีการสนับสนุนหลาย encoding; และเมื่อข้อมูลที่ผ่านไปมาระหว่างการเข้ารหัสหรือแพล็ตฟอร์มที่ต่างกัน, ข้อมูลนั้นจะเสี่ยงต่อการผิดพลาดเสียหาย. + +Unicode จะเปลี่ยนแปลงสิ่งเหล่านั้นทั้งหมด! +Unicode กำหนดหมายเลขเฉพาะสำหรับแต่ละอักขระ, โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด, ไม่ขึ้นกับว่าจะเป็นโปรแกรมใดและไม่ว่าจะเป็นภาษาใด. มาตรฐาน Unicode ได้ถูกนำไปใช้โดยผู้นำในอุตสาหกรรม เช่น Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys และอื่นๆ อีกมาก. Unicode เป็นสิ่งที่จำเป็นสำหรับมาตรฐานใหม่ๆ เช่น XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML ฯลฯ., และเป็นแนวทางอย่างเป็นทางการในการทำ ISO/IEC 10646. Unicode ได้รับการสนับสนุนในระบบปฏิบัติการจำนวนมาก, บราวเซอร์ใหม่ๆ ทกตัว, และผลิตภัณฑ์อื่นๆ อีกมาก. การเกิดขึ้นของ Unicode Standard และทูลส์ต่างๆ ที่มีในการสนับสนุน Unicode, เป็นหนึ่งในแนวโน้มทางเทคโนโลยีซอฟต์แวร์ระดับโลกที่มีความสำคัญที่สุด. + +การรวม Unicode เข้าไปในระบบไคลเอ็นต์-เซิร์ฟเวอร์ หรือแอ็พพลิเคชันแบบ multi-tiered และเว็บไซต์ จะทำให้เกิดการประหยัดค่าใช้จ่ายมากกว่าการใช้ชุดอักขระแบบเดิม. Unicode ทำให้ผลิตภัณฑ์ซอฟต์แวร์หนึ่งเดียว หรือเว็บไซต์แห่งเดียว รองรับได้หลายแพล็ตฟอร์ม, หลายภาษาและหลายประเทศโดยไม่ต้องทำการรื้อปรับระบบ. Unicode ยังทำให้ข้อมูลสามารถเคลื่อนย้ายไปมาในหลายๆ ระบบโดยไม่เกิดความผิดพลาดเสียหาย. + +เกี่ยวกับ Unicode Consortium +Unicode Consortium เป็นองค์กรไม่แสวงหากำไรที่ก่อตั้งขึ้นเพื่อพัฒนา, ขยายและส่งเสริมการใช้ Unicode Standard, ซึ่งกำหนดรูปแบบการแทนค่าของข้อความในผลิตภัณฑ์ซอฟต์แวร์และมาตรฐานใหม่ๆ. สมาชิกของสมาคมเป็นตัวแทนจากบริษัทและองค์กรในอุตสาหกรรมคอมพิวเตอร์และการประมวลผลสารสนเทศ. สมาคมได้รับการสนับสนุนทางการเงินผ่านทางค่าธรรมเนียมของการเป็นสมาชิกเท่านั้น. สมาชิกภาพของ Unicode Consortium เปิดกว้างสำหรับองค์กรหรือบุคคลใดๆ ในโลกที่ต้องการสนับสนุน Unicode Standard และช่วยเหลือการขยายตัวและการนำ Unicode ไปใช้งาน. + +สำหรับข้อมูลเพิ่มเติม, ให้ดูที่ Glossary, Sample Unicode-Enabled Products, Technical Introduction และ Useful Resources. +@TITLECASE@ +ก๊กเฮง แซ่แต้ +กชกร ศราทธทัต +กติกา อังคสุภณ +กนก ธรรมประทีป +กนก วงศ์ทองศรี +กนกกร ช้างเย็นฉ่ำ +กนกฉัตร์ ถาวรนันท์ +กนกนวล โปษยะนันทน์ +กนกพร คมคาย +กนกพร ตีรเลิศพานิช +กนกพร พันทร +กนกพร ศรีบัณฑิต +กนกพร อติวรรณาพัฒน์ +กนกพรรณ ศรีวนาภิรมย์ +กนกรัตน์ เกียรติยิ่งอังศุลี +กนกรัตน์ สุธรรมพิทักษ์ +กนกวรรณ คงคาประเสริฐ +กนกวรรณ แซ่เตียว +กนกวรรณ บุญประเสริฐ +กนกวรรณ รักทรัพย์ +กนกวรรณ สัจจพงษ์ +กนกวรรณ อุ้ยวงศ์ไพศาล +กนกศักดิ์ ยิ่งยง +กนกแก้ว กรสมิต +กนิษฐา ทนุถนอมราษฎร์ +กนิษฐา หวังวิบูลย์กิจ +กมล กาญจนโรจน์ +กมล คัมภีร์ +กมล เจตน์มงคลรัตน์ +กมล ชูตระกูลธรรม \ No newline at end of file diff --git a/main/classes/charset/.classpath b/main/classes/charset/.classpath new file mode 100644 index 00000000000..3529965b155 --- /dev/null +++ b/main/classes/charset/.classpath @@ -0,0 +1,7 @@ + + + + + + + diff --git a/main/classes/charset/.externalToolBuilders/copy-data-charset.launch b/main/classes/charset/.externalToolBuilders/copy-data-charset.launch new file mode 100644 index 00000000000..dd1c877c964 --- /dev/null +++ b/main/classes/charset/.externalToolBuilders/copy-data-charset.launch @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/charset/.project b/main/classes/charset/.project new file mode 100644 index 00000000000..a4fb3b89418 --- /dev/null +++ b/main/classes/charset/.project @@ -0,0 +1,29 @@ + + + icu4j-charset + + + icu4j-core + icu4j-shared + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.ui.externaltools.ExternalToolBuilder + full,incremental, + + + LaunchConfigHandle + <project>/.externalToolBuilders/copy-data-charset.launch + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/main/classes/charset/.settings/org.eclipse.jdt.core.prefs b/main/classes/charset/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000000..ce15e6f7d61 --- /dev/null +++ b/main/classes/charset/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,343 @@ +#Thu Aug 27 17:46:17 EDT 2009 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.doc.comment.support=enabled +org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.autoboxing=ignore +org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning +org.eclipse.jdt.core.compiler.problem.deadCode=warning +org.eclipse.jdt.core.compiler.problem.deprecation=ignore +org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled +org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled +org.eclipse.jdt.core.compiler.problem.discouragedReference=warning +org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.problem.fallthroughCase=warning +org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled +org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore +org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning +org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning +org.eclipse.jdt.core.compiler.problem.forbiddenReference=error +org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning +org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning +org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore +org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore +org.eclipse.jdt.core.compiler.problem.invalidJavadoc=warning +org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsDeprecatedRef=disabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=enabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=public +org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore +org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning +org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled +org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public +org.eclipse.jdt.core.compiler.problem.missingJavadocTagDescription=all_standard_tags +org.eclipse.jdt.core.compiler.problem.missingJavadocTags=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled +org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=public +org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning +org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore +org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning +org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning +org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore +org.eclipse.jdt.core.compiler.problem.nullReference=warning +org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning +org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore +org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore +org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore +org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning +org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore +org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore +org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled +org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning +org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled +org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore +org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning +org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning +org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore +org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning +org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore +org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore +org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled +org.eclipse.jdt.core.compiler.problem.unusedImport=warning +org.eclipse.jdt.core.compiler.problem.unusedLabel=warning +org.eclipse.jdt.core.compiler.problem.unusedLocal=warning +org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore +org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled +org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning +org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning +org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning +org.eclipse.jdt.core.compiler.source=1.5 +org.eclipse.jdt.core.formatter.align_type_members_on_columns=false +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_assignment=0 +org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 +org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 +org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 +org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 +org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 +org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_after_package=1 +org.eclipse.jdt.core.formatter.blank_lines_before_field=0 +org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 +org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 +org.eclipse.jdt.core.formatter.blank_lines_before_method=1 +org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 +org.eclipse.jdt.core.formatter.blank_lines_before_package=0 +org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 +org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 +org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false +org.eclipse.jdt.core.formatter.comment.format_block_comments=true +org.eclipse.jdt.core.formatter.comment.format_header=false +org.eclipse.jdt.core.formatter.comment.format_html=true +org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true +org.eclipse.jdt.core.formatter.comment.format_line_comments=true +org.eclipse.jdt.core.formatter.comment.format_source_code=true +org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true +org.eclipse.jdt.core.formatter.comment.indent_root_tags=true +org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert +org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert +org.eclipse.jdt.core.formatter.comment.line_length=120 +org.eclipse.jdt.core.formatter.compact_else_if=true +org.eclipse.jdt.core.formatter.continuation_indentation=2 +org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 +org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true +org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_empty_lines=false +org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true +org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false +org.eclipse.jdt.core.formatter.indentation.size=4 +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert +org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false +org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false +org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false +org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false +org.eclipse.jdt.core.formatter.lineSplit=120 +org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false +org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false +org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 +org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 +org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true +org.eclipse.jdt.core.formatter.tabulation.char=space +org.eclipse.jdt.core.formatter.tabulation.size=4 +org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false +org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true diff --git a/main/classes/charset/.settings/org.eclipse.jdt.ui.prefs b/main/classes/charset/.settings/org.eclipse.jdt.ui.prefs new file mode 100644 index 00000000000..2f2671e6ee4 --- /dev/null +++ b/main/classes/charset/.settings/org.eclipse.jdt.ui.prefs @@ -0,0 +1,10 @@ +#Wed Jun 17 11:09:55 EDT 2009 +eclipse.preferences.version=1 +formatter_profile=_ICU4J Standard +formatter_settings_version=11 +org.eclipse.jdt.ui.ignorelowercasenames=true +org.eclipse.jdt.ui.importorder=java;javax;org;com; +org.eclipse.jdt.ui.javadoc=true +org.eclipse.jdt.ui.ondemandthreshold=99 +org.eclipse.jdt.ui.staticondemandthreshold=99 +org.eclipse.jdt.ui.text.custom_code_templates= diff --git a/main/classes/charset/build.properties b/main/classes/charset/build.properties new file mode 100644 index 00000000000..a21fb196196 --- /dev/null +++ b/main/classes/charset/build.properties @@ -0,0 +1,6 @@ +#******************************************************************************* +#* Copyright (C) 2009, International Business Machines Corporation and * +#* others. All Rights Reserved. * +#******************************************************************************* +shared.dir = ../../shared +javac.compilerarg = -Xlint:all,-deprecation,-dep-ann diff --git a/main/classes/charset/build.xml b/main/classes/charset/build.xml new file mode 100644 index 00000000000..b9c0493f2ef --- /dev/null +++ b/main/classes/charset/build.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/main/classes/charset/charset-build.launch b/main/classes/charset/charset-build.launch new file mode 100644 index 00000000000..5afd6a3ee4b --- /dev/null +++ b/main/classes/charset/charset-build.launch @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/charset/manifest.stub b/main/classes/charset/manifest.stub new file mode 100644 index 00000000000..a117a360bc0 --- /dev/null +++ b/main/classes/charset/manifest.stub @@ -0,0 +1,11 @@ +Manifest-Version: 1.0 + +Name: com/ibm/icu/charset +Specification-Title: ICU4J Charset +Specification-Version: @SPECVERSION@ +Specification-Vendor: ICU +Implementation-Title: ICU for Java Charset +Implementation-Version: @IMPLVERSION@ +Implementation-Vendor: IBM Corporation +Implementation-Vendor-Id: com.ibm +Copyright-Info: @COPYRIGHT@ \ No newline at end of file diff --git a/main/classes/charset/src/META-INF/services/java.nio.charset.spi.CharsetProvider b/main/classes/charset/src/META-INF/services/java.nio.charset.spi.CharsetProvider new file mode 100644 index 00000000000..ca798e7dd4a --- /dev/null +++ b/main/classes/charset/src/META-INF/services/java.nio.charset.spi.CharsetProvider @@ -0,0 +1,3 @@ +# Copyright (C) 2006, International Business Machines Corporation and others. All Rights Reserved. +# icu4j converters +com.ibm.icu.charset.CharsetProviderICU diff --git a/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java b/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java new file mode 100644 index 00000000000..a74a1f75e73 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java @@ -0,0 +1,128 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.charset; + +import java.nio.BufferOverflowException; +import java.nio.BufferUnderflowException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UnicodeSet; + +class Charset88591 extends CharsetASCII { + public Charset88591(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + } + + class CharsetDecoder88591 extends CharsetDecoderASCII { + public CharsetDecoder88591(CharsetICU cs) { + super(cs); + } + + protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target, + byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) { + + /* + * perform 88591 conversion from the source array to the target array. no range check is + * necessary. + */ + for (int i = oldSource; i < limit; i++) + targetArray[i + offset] = (char) (sourceArray[i] & 0xff); + + return null; + } + + protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) + throws BufferUnderflowException, BufferOverflowException { + + /* + * perform 88591 conversion from the source buffer to the target buffer. no range check + * is necessary (an exception will be generated to end the loop). + */ + while (true) + target.put((char) (source.get() & 0xff)); + } + } + + class CharsetEncoder88591 extends CharsetEncoderASCII { + public CharsetEncoder88591(CharsetICU cs) { + super(cs); + } + + protected final CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target, + char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit, + boolean flush) { + int i, ch = 0; + + /* + * perform 88591 conversion from the source array to the target array, making sure each + * char in the source is within the correct range + */ + for (i = oldSource; i < limit; i++) { + ch = (int) sourceArray[i]; + if ((ch & 0xff00) == 0) { + targetArray[i + offset] = (byte) ch; + } else { + break; + } + } + + /* + * if some byte was not in the correct range, we need to deal with this byte by calling + * encodeMalformedOrUnmappable and move the source and target positions to reflect the + * early termination of the loop + */ + if ((ch & 0xff00) != 0) { + source.position(i + 1); + target.position(i + offset); + return encodeMalformedOrUnmappable(source, ch, flush); + } else + return null; + } + + protected final CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, + boolean flush) throws BufferUnderflowException, BufferOverflowException { + int ch; + + /* + * perform 88591 conversion from the source buffer to the target buffer, making sure + * each char in the source is within the correct range + */ + + while (true) { + ch = (int) source.get(); + if ((ch & 0xff00) == 0) { + target.put((byte) ch); + } else { + break; + } + } + /* + * if we reach here, it's because a character was not in the correct range, and we need + * to deak with this by calling encodeMalformedOrUnmappable. + */ + return encodeMalformedOrUnmappable(source, ch, flush); + } + + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoder88591(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoder88591(this); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + setFillIn.add(0,0xff); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java new file mode 100644 index 00000000000..419a4601aeb --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java @@ -0,0 +1,357 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.BufferOverflowException; +import java.nio.BufferUnderflowException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +class CharsetASCII extends CharsetICU { + protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a }; + + public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + maxBytesPerChar = 1; + minBytesPerChar = 1; + maxCharsPerByte = 1; + } + + class CharsetDecoderASCII extends CharsetDecoderICU { + + public CharsetDecoderASCII(CharsetICU cs) { + super(cs); + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, + boolean flush) { + if (!source.hasRemaining()) { + /* no input, nothing to do */ + return CoderResult.UNDERFLOW; + } + if (!target.hasRemaining()) { + /* no output available, can't do anything */ + return CoderResult.OVERFLOW; + } + + CoderResult cr; + int oldSource = source.position(); + int oldTarget = target.position(); + + if (source.hasArray() && target.hasArray()) { + /* optimized loop */ + + /* + * extract arrays from the buffers and obtain various constant values that will be + * necessary in the core loop + */ + byte[] sourceArray = source.array(); + int sourceOffset = source.arrayOffset(); + int sourceIndex = oldSource + sourceOffset; + int sourceLength = source.limit() - oldSource; + + char[] targetArray = target.array(); + int targetOffset = target.arrayOffset(); + int targetIndex = oldTarget + targetOffset; + int targetLength = target.limit() - oldTarget; + + int limit = ((sourceLength < targetLength) ? sourceLength : targetLength) + + sourceIndex; + int offset = targetIndex - sourceIndex; + + /* + * perform the core loop... if it returns null, it must be due to an overflow or + * underflow + */ + cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit); + if (cr == null) { + if (sourceLength <= targetLength) { + source.position(oldSource + sourceLength); + target.position(oldTarget + sourceLength); + cr = CoderResult.UNDERFLOW; + } else { + source.position(oldSource + targetLength); + target.position(oldTarget + targetLength); + cr = CoderResult.OVERFLOW; + } + } + } else { + /* unoptimized loop */ + + try { + /* + * perform the core loop... if it throws an exception, it must be due to an + * overflow or underflow + */ + cr = decodeLoopCoreUnoptimized(source, target); + + } catch (BufferUnderflowException ex) { + /* all of the source has been read */ + cr = CoderResult.UNDERFLOW; + } catch (BufferOverflowException ex) { + /* the target is full */ + source.position(source.position() - 1); /* rewind by 1 */ + cr = CoderResult.OVERFLOW; + } + } + + /* set offsets since the start */ + if (offsets != null) { + int count = target.position() - oldTarget; + int sourceIndex = -1; + while (--count >= 0) offsets.put(++sourceIndex); + } + + return cr; + } + + protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target, + byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) { + int i, ch = 0; + + /* + * perform ascii conversion from the source array to the target array, making sure each + * byte in the source is within the correct range + */ + for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++) + targetArray[i + offset] = (char) ch; + + /* + * if some byte was not in the correct range, we need to deal with this byte by calling + * decodeMalformedOrUnmappable and move the source and target positions to reflect the + * early termination of the loop + */ + if ((ch & 0x80) != 0) { + source.position(i + 1); + target.position(i + offset); + return decodeMalformedOrUnmappable(ch); + } else + return null; + } + + protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) + throws BufferUnderflowException, BufferOverflowException { + int ch = 0; + + /* + * perform ascii conversion from the source buffer to the target buffer, making sure + * each byte in the source is within the correct range + */ + while (((ch = (source.get() & 0xff)) & 0x80) == 0) + target.put((char) ch); + + /* + * if we reach here, it's because a character was not in the correct range, and we need + * to deak with this by calling decodeMalformedOrUnmappable + */ + return decodeMalformedOrUnmappable(ch); + } + + protected CoderResult decodeMalformedOrUnmappable(int ch) { + /* + * put the guilty character into toUBytesArray and return a message saying that the + * character was malformed and of length 1. + */ + toUBytesArray[0] = (byte) ch; + toULength = 1; + return CoderResult.malformedForLength(1); + } + } + + class CharsetEncoderASCII extends CharsetEncoderICU { + + public CharsetEncoderASCII(CharsetICU cs) { + super(cs, fromUSubstitution); + implReset(); + } + + private final static int NEED_TO_WRITE_BOM = 1; + + protected void implReset() { + super.implReset(); + fromUnicodeStatus = NEED_TO_WRITE_BOM; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, + boolean flush) { + if (!source.hasRemaining()) { + /* no input, nothing to do */ + return CoderResult.UNDERFLOW; + } + if (!target.hasRemaining()) { + /* no output available, can't do anything */ + return CoderResult.OVERFLOW; + } + + CoderResult cr; + int oldSource = source.position(); + int oldTarget = target.position(); + + if (fromUChar32 != 0) { + /* + * if we have a leading character in fromUChar32 that needs to be dealt with, we + * need to check for a matching trail character and taking the appropriate action as + * dictated by encodeTrail. + */ + cr = encodeTrail(source, (char) fromUChar32, flush); + } else { + if (source.hasArray() && target.hasArray()) { + /* optimized loop */ + + /* + * extract arrays from the buffers and obtain various constant values that will + * be necessary in the core loop + */ + char[] sourceArray = source.array(); + int sourceOffset = source.arrayOffset(); + int sourceIndex = oldSource + sourceOffset; + int sourceLength = source.limit() - oldSource; + + byte[] targetArray = target.array(); + int targetOffset = target.arrayOffset(); + int targetIndex = oldTarget + targetOffset; + int targetLength = target.limit() - oldTarget; + + int limit = ((sourceLength < targetLength) ? sourceLength : targetLength) + + sourceIndex; + int offset = targetIndex - sourceIndex; + + /* + * perform the core loop... if it returns null, it must be due to an overflow or + * underflow + */ + cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush); + if (cr == null) { + if (sourceLength <= targetLength) { + source.position(oldSource + sourceLength); + target.position(oldTarget + sourceLength); + cr = CoderResult.UNDERFLOW; + } else { + source.position(oldSource + targetLength); + target.position(oldTarget + targetLength); + cr = CoderResult.OVERFLOW; + } + } + } else { + /* unoptimized loop */ + + try { + /* + * perform the core loop... if it throws an exception, it must be due to an + * overflow or underflow + */ + cr = encodeLoopCoreUnoptimized(source, target, flush); + + } catch (BufferUnderflowException ex) { + cr = CoderResult.UNDERFLOW; + } catch (BufferOverflowException ex) { + source.position(source.position() - 1); /* rewind by 1 */ + cr = CoderResult.OVERFLOW; + } + } + } + + /* set offsets since the start */ + if (offsets != null) { + int count = target.position() - oldTarget; + int sourceIndex = -1; + while (--count >= 0) offsets.put(++sourceIndex); + } + + return cr; + } + + protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target, + char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit, + boolean flush) { + int i, ch = 0; + + /* + * perform ascii conversion from the source array to the target array, making sure each + * char in the source is within the correct range + */ + for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++) + targetArray[i + offset] = (byte) ch; + + /* + * if some byte was not in the correct range, we need to deal with this byte by calling + * encodeMalformedOrUnmappable and move the source and target positions to reflect the + * early termination of the loop + */ + if ((ch & 0xff80) != 0) { + source.position(i + 1); + target.position(i + offset); + return encodeMalformedOrUnmappable(source, ch, flush); + } else + return null; + } + + protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, + boolean flush) throws BufferUnderflowException, BufferOverflowException { + int ch; + + /* + * perform ascii conversion from the source buffer to the target buffer, making sure + * each char in the source is within the correct range + */ + while (((ch = (int) source.get()) & 0xff80) == 0) + target.put((byte) ch); + + /* + * if we reach here, it's because a character was not in the correct range, and we need + * to deak with this by calling encodeMalformedOrUnmappable. + */ + return encodeMalformedOrUnmappable(source, ch, flush); + } + + protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) { + /* + * if the character is a lead surrogate, we need to call encodeTrail to attempt to match + * it up with a trail surrogate. if not, the character is unmappable. + */ + return (UTF16.isSurrogate((char) ch)) + ? encodeTrail(source, (char) ch, flush) + : CoderResult.unmappableForLength(1); + } + + private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) { + /* + * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null, + * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable. + */ + CoderResult cr = handleSurrogates(source, lead); + if (cr != null) { + return cr; + } else { + //source.position(source.position() - 2); + return CoderResult.unmappableForLength(2); + } + } + + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoderASCII(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderASCII(this); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + setFillIn.add(0,0x7f); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java new file mode 100644 index 00000000000..884f21a0223 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java @@ -0,0 +1,1063 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author krajwade + * + */ +class CharsetBOCU1 extends CharsetICU { + /* BOCU constants and macros */ + + /* initial value for "prev": middle of the ASCII range */ + private static final byte BOCU1_ASCII_PREV = 0x40; + + /* bounding byte values for differences */ + private static final int BOCU1_MIN = 0x21; + private static final int BOCU1_MIDDLE = 0x90; + //private static final int BOCU1_MAX_LEAD = 0xfe; + private static final int BOCU1_MAX_TRAIL = 0xff; + private static final int BOCU1_RESET = 0xff; + + /* number of lead bytes */ + //private static final int BOCU1_COUNT = (BOCU1_MAX_LEAD-BOCU1_MIN+1); + + /* adjust trail byte counts for the use of some C0 control byte values */ + private static final int BOCU1_TRAIL_CONTROLS_COUNT = 20; + private static final int BOCU1_TRAIL_BYTE_OFFSET = (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT); + + /* number of trail bytes */ + private static final int BOCU1_TRAIL_COUNT =((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT); + + /* + * number of positive and negative single-byte codes + * (counting 0==BOCU1_MIDDLE among the positive ones) + */ + private static final int BOCU1_SINGLE = 64; + + /* number of lead bytes for positive and negative 2/3/4-byte sequences */ + private static final int BOCU1_LEAD_2 = 43; + private static final int BOCU1_LEAD_3 = 3; + //private static final int BOCU1_LEAD_4 = 1; + + /* The difference value range for single-byters. */ + private static final int BOCU1_REACH_POS_1 = (BOCU1_SINGLE-1); + private static final int BOCU1_REACH_NEG_1 = (-BOCU1_SINGLE); + + /* The difference value range for double-byters. */ + private static final int BOCU1_REACH_POS_2 = (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT); + private static final int BOCU1_REACH_NEG_2 = (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT); + + /* The difference value range for 3-byters. */ + private static final int BOCU1_REACH_POS_3 = + (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); + + private static final int BOCU1_REACH_NEG_3 = (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); + + /* The lead byte start values. */ + private static final int BOCU1_START_POS_2 = (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1); + private static final int BOCU1_START_POS_3 = (BOCU1_START_POS_2+BOCU1_LEAD_2); + private static final int BOCU1_START_POS_4 = (BOCU1_START_POS_3+BOCU1_LEAD_3); + /* ==BOCU1_MAX_LEAD */ + + private static final int BOCU1_START_NEG_2 = (BOCU1_MIDDLE+BOCU1_REACH_NEG_1); + private static final int BOCU1_START_NEG_3 = (BOCU1_START_NEG_2-BOCU1_LEAD_2); + //private static final int BOCU1_START_NEG_4 = (BOCU1_START_NEG_3-BOCU1_LEAD_3); + /* ==BOCU1_MIN+1 */ + + /* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ + /* private static int BOCU1_LENGTH_FROM_LEAD(int lead) { + return ((BOCU1_START_NEG_2<=(lead) && (lead)>24 : 4); + } + + /* + * Byte value map for control codes, + * from external byte values 0x00..0x20 + * to trail byte values 0..19 (0..0x13) as used in the difference calculation. + * External byte values that are illegal as trail bytes are mapped to -1. + */ + private static final int[] + bocu1ByteToTrail={ + /* 0 1 2 3 4 5 6 7 */ + -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, + + /* 8 9 a b c d e f */ + -1, -1, -1, -1, -1, -1, -1, -1, + + /* 10 11 12 13 14 15 16 17 */ + 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, + + /* 18 19 1a 1b 1c 1d 1e 1f */ + 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13, + + /* 20 */ + -1 + }; + + /* + * Byte value map for control codes, + * from trail byte values 0..19 (0..0x13) as used in the difference calculation + * to external byte values 0x00..0x20. + */ + private static final int[] + bocu1TrailToByte = { + /* 0 1 2 3 4 5 6 7 */ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, + + /* 8 9 a b c d e f */ + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + + /* 10 11 12 13 */ + 0x1c, 0x1d, 0x1e, 0x1f + }; + + + /* + * 12 commonly used C0 control codes (and space) are only used to encode + * themselves directly, + * which makes BOCU-1 MIME-usable and reasonably safe for + * ASCII-oriented software. + * + * These controls are + * 0 NUL + * + * 7 BEL + * 8 BS + * + * 9 TAB + * a LF + * b VT + * c FF + * d CR + * + * e SO + * f SI + * + * 1a SUB + * 1b ESC + * + * The other 20 C0 controls are also encoded directly (to preserve order) + * but are also used as trail bytes in difference encoding + * (for better compression). + */ + private static int BOCU1_TRAIL_TO_BYTE(int trail) { + return ((trail)>=BOCU1_TRAIL_CONTROLS_COUNT ? (trail)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[trail]); + } + + /* BOCU-1 implementation functions ------------------------------------------ */ + private static int BOCU1_SIMPLE_PREV(int c){ + return (((c)&~0x7f)+BOCU1_ASCII_PREV); + } + + /** + * Compute the next "previous" value for differencing + * from the current code point. + * + * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below) + * @return "previous code point" state value + */ + private static int bocu1Prev(int c) { + /* compute new prev */ + if(/* 0x3040<=c && */ c<=0x309f) { + /* Hiragana is not 128-aligned */ + return 0x3070; + } else if(0x4e00<=c && c<=0x9fa5) { + /* CJK Unihan */ + return 0x4e00-BOCU1_REACH_NEG_2; + } else if(0xac00<=c /* && c<=0xd7a3 */) { + /* Korean Hangul */ + return (0xd7a3+0xac00)/2; + } else { + /* mostly small scripts */ + return BOCU1_SIMPLE_PREV(c); + } + } + + /** Fast version of bocu1Prev() for most scripts. */ + private static int BOCU1_PREV(int c) { + return ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c)); + } + + protected byte[] fromUSubstitution = new byte[]{(byte)0x1A}; + + /* Faster versions of packDiff() for single-byte-encoded diff values. */ + + /** Is a diff value encodable in a single byte? */ + private static boolean DIFF_IS_SINGLE(int diff){ + return (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1); + } + + /** Encode a diff value in a single byte. */ + private static int PACK_SINGLE_DIFF(int diff){ + return (BOCU1_MIDDLE+(diff)); + } + + /** Is a diff value encodable in two bytes? */ + private static boolean DIFF_IS_DOUBLE(int diff){ + return (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2); + } + + public CharsetBOCU1(String icuCanonicalName, String javaCanonicalName, String[] aliases){ + super(icuCanonicalName, javaCanonicalName, aliases); + maxBytesPerChar = 4; + minBytesPerChar = 1; + maxCharsPerByte = 1; + } + + class CharsetEncoderBOCU extends CharsetEncoderICU { + public CharsetEncoderBOCU(CharsetICU cs) { + super(cs,fromUSubstitution); + } + + int sourceIndex, nextSourceIndex; + int prev, c , diff; + boolean checkNegative; + boolean LoopAfterTrail; + int targetCapacity; + CoderResult cr; + + /* label values for supporting behavior similar to goto in C */ + private static final int fastSingle=0; + private static final int getTrail=1; + private static final int regularLoop=2; + + private boolean LabelLoop; //used to break the while loop + private int labelType = fastSingle; //labeType is set to fastSingle to start the code from fastSingle: + + /** + * Integer division and modulo with negative numerators + * yields negative modulo results and quotients that are one more than + * what we need here. + * This macro adjust the results so that the modulo-value m is always >=0. + * + * For positive n, the if() condition is always FALSE. + * + * @param n Number to be split into quotient and rest. + * Will be modified to contain the quotient. + * @param d Divisor. + * @param m Output variable for the rest (modulo result). + */ + private int NEGDIVMOD(int n, int d, int m) { + diff = n; + (m)=(diff)%(d); + (diff)/=(d); + if((m)<0) { + --(diff); + (m)+=(d); + } + return m; + } + + /** + * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes + * and return a packed integer with them. + * + * The encoding favors small absolute differences with short encodings + * to compress runs of same-script characters. + * + * Optimized version with unrolled loops and fewer floating-point operations + * than the standard packDiff(). + * + * @param diff difference value -0x10ffff..0x10ffff + * @return + * 0x010000zz for 1-byte sequence zz + * 0x0200yyzz for 2-byte sequence yy zz + * 0x03xxyyzz for 3-byte sequence xx yy zz + * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03) + */ + private int packDiff(int n) { + int result, m = 0; + diff = n; + + if(diff>=BOCU1_REACH_NEG_1) { + /* mostly positive differences, and single-byte negative ones */ + if(diff<=BOCU1_REACH_POS_2) { + /* two bytes */ + diff-=BOCU1_REACH_POS_1+1; + result=0x02000000; + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m); + + result|=(BOCU1_START_POS_2+diff)<<8; + } else if(diff<=BOCU1_REACH_POS_3) { + /* three bytes */ + diff-=BOCU1_REACH_POS_2+1; + result=0x03000000; + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m); + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + result|=(BOCU1_START_POS_3+diff)<<16; + } else { + /* four bytes */ + diff-=BOCU1_REACH_POS_3+1; + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result=BOCU1_TRAIL_TO_BYTE(m); + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + /* + * We know that / and % would deliver quotient 0 and rest=diff. + * Avoid division and modulo for performance. + */ + result|=BOCU1_TRAIL_TO_BYTE(diff)<<16; + + result|=((BOCU1_START_POS_4&UConverterConstants.UNSIGNED_INT_MASK))<<24; + } + } else { + /* two- to four-byte negative differences */ + if(diff>=BOCU1_REACH_NEG_2) { + /* two bytes */ + diff-=BOCU1_REACH_NEG_1; + result=0x02000000; + + m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m); + + result|=(BOCU1_START_NEG_2+diff)<<8; + } else if(diff>=BOCU1_REACH_NEG_3) { + /* three bytes */ + diff-=BOCU1_REACH_NEG_2; + result=0x03000000; + + m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m); + + m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + result|=(BOCU1_START_NEG_3+diff)<<16; + } else { + /* four bytes */ + diff-=BOCU1_REACH_NEG_3; + + m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result=BOCU1_TRAIL_TO_BYTE(m); + + m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + /* + * We know that NEGDIVMOD would deliver + * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT. + * Avoid division and modulo for performance. + */ + m=diff+BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m)<<16; + + result|=BOCU1_MIN<<24; + } + } + return result; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){ + cr = CoderResult.UNDERFLOW; + + LabelLoop = true; //used to break the while loop + checkNegative = false; // its value is set to true to get out of while loop when c = -c + LoopAfterTrail = false; // its value is set to true to ignore code before getTrail: + + /*set up the local pointers*/ + targetCapacity = target.limit() - target.position(); + c = fromUChar32; + prev = fromUnicodeStatus; + + if(prev==0){ + prev = BOCU1_ASCII_PREV; + } + + /*sourceIndex ==-1 if the current characte began in the previous buffer*/ + sourceIndex = c == 0 ? 0: -1; + nextSourceIndex = 0; + + /*conversion loop*/ + if(c!=0 && targetCapacity>0){ + labelType = getTrail; + } + + while(LabelLoop){ + switch(labelType){ + case fastSingle: + labelType = fastSingle(source, target, offsets); + break; + case getTrail: + labelType = getTrail(source, target, offsets); + break; + case regularLoop: + labelType = regularLoop(source, target, offsets); + break; + } + } + + return cr; + } + + private int fastSingle(CharBuffer source, ByteBuffer target, IntBuffer offsets){ +//fastSingle: + /*fast loop for single-byte differences*/ + /*use only one loop counter variable , targetCapacity, not also source*/ + diff = source.limit() - source.position(); + if(targetCapacity>diff){ + targetCapacity = diff; + } + while(targetCapacity>0 && (c=source.get(source.position()))<0x3000){ + if(c<=0x20){ + if(c!=0x20){ + prev = BOCU1_ASCII_PREV; + } + target.put((byte)c); + if(offsets!=null){ + offsets.put(nextSourceIndex++); + } + source.position(source.position()+1); + --targetCapacity; + }else { + diff = c-prev; + if(DIFF_IS_SINGLE(diff)){ + prev = BOCU1_SIMPLE_PREV(c); + target.put((byte)PACK_SINGLE_DIFF(diff)); + if(offsets!=null){ + offsets.put(nextSourceIndex++); + } + source.position(source.position()+1); + --targetCapacity; + }else { + break; + } + } + } + return regularLoop; + } + + private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){ + if(source.hasRemaining()){ + /*test the following code unit*/ + char trail = source.get(source.position()); + if(UTF16.isTrailSurrogate(trail)){ + source.position(source.position()+1); + ++nextSourceIndex; + c=UCharacter.getCodePoint((char)c, trail); + } + } else { + /*no more input*/ + c = -c; /*negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else*/ + checkNegative = true; + } + LoopAfterTrail = true; + return regularLoop; + } + + @SuppressWarnings("fallthrough") + private int regularLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){ + if(!LoopAfterTrail){ + /*restore real values*/ + targetCapacity = target.limit()-target.position(); + sourceIndex = nextSourceIndex; /*wrong if offsets==null but does not matter*/ + } + /*regular loop for all classes*/ + while(LoopAfterTrail || source.hasRemaining()){ + if(LoopAfterTrail || targetCapacity>0){ + + if(!LoopAfterTrail){ + c = source.get(); + ++nextSourceIndex; + + if(c<=0x20){ + /* + * ISO C0 control & space: + * Encode directly for MIME compatibility, + * and reset state except for space, to not disrupt compression. + */ + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + target.put((byte)c); + if(offsets != null){ + offsets.put(sourceIndex++); + } + --targetCapacity; + + sourceIndex=nextSourceIndex; + continue; + } + + if(UTF16.isLeadSurrogate((char)c)){ + getTrail(source, target, offsets); + if(checkNegative){ + break; + } + } + } + + if(LoopAfterTrail){ + LoopAfterTrail = false; + } + + /* + * all other Unicode code points c==U+0021..U+10ffff + * are encoded with the difference c-prev + * + * a new prev is computed from c, + * placed in the middle of a 0x80-block (for most small scripts) or + * in the middle of the Unihan and Hangul blocks + * to statistically minimize the following difference + */ + diff = c- prev; + prev = BOCU1_PREV(c); + if(DIFF_IS_SINGLE(diff)){ + target.put((byte)PACK_SINGLE_DIFF(diff)); + if(offsets!=null){ + offsets.put(sourceIndex++); + } + --targetCapacity; + sourceIndex=nextSourceIndex; + if(c<0x3000){ + labelType = fastSingle; + return labelType; + } + } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity){ + /*optimize 2 byte case*/ + int m = 0; + if(diff>=0){ + diff -= BOCU1_REACH_POS_1 +1; + m = diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + diff+=BOCU1_START_POS_2; + } else { + diff -= BOCU1_REACH_NEG_1; + m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + diff+=BOCU1_START_NEG_2; + } + target.put((byte)diff); + target.put((byte)BOCU1_TRAIL_TO_BYTE(m)); + if(offsets!=null){ + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + targetCapacity -= 2; + sourceIndex = nextSourceIndex; + } else { + int length; /*will be 2..4*/ + diff = packDiff(diff); + length = BOCU1_LENGTH_FROM_PACKED(diff); + + /*write the output character bytes from diff and length*/ + /*from the first if in the loop we know that targetCapacity>0*/ + if(length<=targetCapacity){ + switch(length){ + /*each branch falls through the next one*/ + case 4: + target.put((byte)(diff>>24)); + if(offsets!= null){ + offsets.put(sourceIndex); + } + case 3: + target.put((byte)(diff>>16)); + if(offsets!= null){ + offsets.put(sourceIndex); + } + case 2: + target.put((byte)(diff>>8)); + if(offsets!= null){ + offsets.put(sourceIndex); + } + /*case 1 handled above*/ + target.put((byte)diff); + if(offsets!= null){ + offsets.put(sourceIndex); + } + default: + /*will never occur*/ + break; + } + targetCapacity -= length; + sourceIndex = nextSourceIndex; + } else { + ByteBuffer error = ByteBuffer.wrap(errorBuffer); + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 1<=targetCapacity>16)); + case 2: + error.put((byte)(diff>>8)); + case 1: + error.put((byte)diff); + default: + /* will never occur */ + break; + } + errorBufferLength = length; + + /* now output what fits into the regular target */ + diff>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + target.put((byte)(diff>>16)); + if(offsets!= null){ + offsets.put(sourceIndex); + } + case 2: + target.put((byte)(diff>>8)); + if(offsets!= null){ + offsets.put(sourceIndex); + } + case 1: + target.put((byte)diff); + if(offsets!= null){ + offsets.put(sourceIndex); + } + default: + /* will never occur */ + break; + } + + /* target overflow */ + targetCapacity=0; + cr = CoderResult.OVERFLOW; + break; + } + } + } else{ + /*target is full*/ + cr = CoderResult.OVERFLOW; + break; + } + + } + /*set the converter state back into UConverter*/ + fromUChar32 = c<0 ? -c :0; + fromUnicodeStatus = prev; + LabelLoop = false; + labelType = fastSingle; + return labelType; + } + + } + + class CharsetDecoderBOCU extends CharsetDecoderICU{ + public CharsetDecoderBOCU(CharsetICU cs) { + super(cs); + } + + int byteIndex; + int sourceIndex, nextSourceIndex; + int prev, c , diff, count; + byte[] bytes; + int targetCapacity; + CoderResult cr; + + /* label values for supporting behavior similar to goto in C */ + private static final int fastSingle=0; + private static final int getTrail=1; + private static final int regularLoop=2; + private static final int endLoop=3; + + private boolean LabelLoop;//used to break the while loop + private boolean afterTrail; // its value is set to true to ignore code after getTrail: + private int labelType; + /* + * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c. + * The UConverter fields are used as follows: + * + * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) + * + * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) + * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0) + */ + + /* BOCU-1-from-Unicode conversion functions --------------------------------- */ + + + + /** + * Function for BOCU-1 decoder; handles multi-byte lead bytes. + * + * @param b lead byte; + * BOCU1_MIN<=b= BOCU1_START_NEG_2) { + /* positive difference */ + if(b < BOCU1_START_POS_3) { + /* two bytes */ + diffValue = (b - BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_POS_1+1; + countValue = 1; + } else if(b < BOCU1_START_POS_4) { + /* three bytes */ + diffValue = (b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1; + countValue = 2; + } else { + /* four bytes */ + diffValue = BOCU1_REACH_POS_3+1; + countValue = 3; + } + } else { + /* negative difference */ + if(b >= BOCU1_START_NEG_3) { + /* two bytes */ + diffValue=(b -BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_NEG_1; + countValue=1; + } else if(b>BOCU1_MIN) { + /* three bytes */ + diffValue=(b - BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT + BOCU1_REACH_NEG_2; + countValue = 2; + } else { + /* four bytes */ + diffValue=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3; + countValue=3; + } + } + + /* return the state for decoding the trail byte(s) */ + return (diffValue<<2)|countValue; + } + + /** + * Function for BOCU-1 decoder; handles multi-byte trail bytes. + * + * @param count number of remaining trail bytes including this one + * @param b trail byte + * @return new delta for diff including b - <0 indicates an error + * + * @see decodeBocu1 + */ + private int decodeBocu1TrailByte(int countValue, int b) { + b = b&UConverterConstants.UNSIGNED_BYTE_MASK; + if((b)<=0x20) { + /* skip some C0 controls and make the trail byte range contiguous */ + b = bocu1ByteToTrail[b]; + /* b<0 for an illegal trail byte value will result in return<0 below */ + } else { + //b-= BOCU1_TRAIL_BYTE_OFFSET; + b = b - BOCU1_TRAIL_BYTE_OFFSET; + } + + /* add trail byte into difference and decrement count */ + if(countValue==1) { + return b; + } else if(countValue==2) { + return b*BOCU1_TRAIL_COUNT; + } else /* count==3 */ { + return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); + } + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, + boolean flush){ + cr = CoderResult.UNDERFLOW; + + LabelLoop = true; + afterTrail = false; + labelType = fastSingle; // labelType is set to fastSingle so t + + /*get the converter state*/ + prev = toUnicodeStatus; + + if(prev==0){ + prev = BOCU1_ASCII_PREV; + } + diff = mode; + count = diff&3; + diff>>=2; + + byteIndex = toULength; + bytes = toUBytesArray; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ + if(count>0 && byteIndex>0 && target.position()diff) { + count = diff; + } + while(count>0) { + if(BOCU1_START_NEG_2 <=(c=source.get(source.position())&UConverterConstants.UNSIGNED_BYTE_MASK) && c< BOCU1_START_POS_2) { + c = prev + (c-BOCU1_MIDDLE); + if(c<0x3000) { + target.put((char)c); + if(offsets!=null){ + offsets.put(nextSourceIndex++); + } + prev = BOCU1_SIMPLE_PREV(c); + } else { + break; + } + } else if((c&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0x20) { + if((c&UConverterConstants.UNSIGNED_BYTE_MASK) != 0x20) { + prev = BOCU1_ASCII_PREV; + } + target.put((char)c); + if(offsets!=null){ + offsets.put(nextSourceIndex++); + } + } else { + break; + } + source.position(source.position()+1); + --count; + } + sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ + return labelType; + } + + private int getTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets){ + labelType = regularLoop; + for(;;) { + if(source.position() >= source.limit()) { + labelType = endLoop; + return labelType; + } + ++nextSourceIndex; + c = bytes[byteIndex++] = source.get(); + + /* trail byte in any position */ + c = decodeBocu1TrailByte(count, c); + if(c<0) { + cr = CoderResult.malformedForLength(1); + labelType = endLoop; + return labelType; + } + + diff+=c; + if(--count==0) { + /* final trail byte, deliver a code point */ + byteIndex=0; + c = prev + diff; + if(c > 0x10ffff) { + cr = CoderResult.malformedForLength(1); + labelType = endLoop; + return labelType; + } + break; + } + } + afterTrail = true; + return labelType; + + } + + private int afterGetTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets){ + /* decode a sequence of single and lead bytes */ + while(afterTrail || source.hasRemaining()) { + if(!afterTrail){ + if(target.position() >= target.limit()) { + /* target is full */ + cr = CoderResult.OVERFLOW; + break; + } + + ++nextSourceIndex; + c = source.get()&UConverterConstants.UNSIGNED_BYTE_MASK; + if(BOCU1_START_NEG_2 <= c && c < BOCU1_START_POS_2) { + /* Write a code point directly from a single-byte difference. */ + c = prev + (c-BOCU1_MIDDLE); + if(c<0x3000) { + target.put((char)c); + if(offsets!=null){ + offsets.put(sourceIndex); + } + prev = BOCU1_SIMPLE_PREV(c); + sourceIndex = nextSourceIndex; + labelType = fastSingle; + return labelType; + } + } else if(c <= 0x20) { + /* + * Direct-encoded C0 control code or space. + * Reset prev for C0 control codes but not for space. + */ + if(c != 0x20) { + prev=BOCU1_ASCII_PREV; + } + target.put((char)c); + if(offsets!=null){ + offsets.put(sourceIndex); + } + sourceIndex=nextSourceIndex; + continue; + } else if(BOCU1_START_NEG_3 <= c && c < BOCU1_START_POS_3 && source.hasRemaining()) { + /* Optimize two-byte case. */ + if(c >= BOCU1_MIDDLE) { + diff=(c - BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_POS_1 + 1; + } else { + diff=(c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_NEG_1; + } + + /* trail byte */ + ++nextSourceIndex; + c = decodeBocu1TrailByte(1, source.get()); + if(c<0 || ((c = prev + diff + c)&UConverterConstants.UNSIGNED_INT_MASK)>0x10ffff) { + bytes[0]= source.get(source.position()-2); + bytes[1]= source.get(source.position()-1); + byteIndex = 2; + cr = CoderResult.malformedForLength(2); + break; + } + } else if(c == BOCU1_RESET) { + /* only reset the state, no code point */ + prev=BOCU1_ASCII_PREV; + sourceIndex=nextSourceIndex; + continue; + } else { + /* + * For multi-byte difference lead bytes, set the decoder state + * with the partial difference value from the lead byte and + * with the number of trail bytes. + */ + bytes[0]= (byte)c; + byteIndex = 1; + + diff = decodeBocu1LeadByte(c); + count = diff&3; + diff>>=2; + getTrail(source, target, offsets); + if(labelType != regularLoop){ + return labelType; + } + } + } + + if(afterTrail){ + afterTrail = false; + } + + /* calculate the next prev and output c */ + prev = BOCU1_PREV(c); + if(c<=0xffff) { + target.put((char)c); + if(offsets!=null){ + offsets.put(sourceIndex); + } + } else { + /* output surrogate pair */ + target.put(UTF16.getLeadSurrogate(c)); + if(target.hasRemaining()) { + target.put(UTF16.getTrailSurrogate(c)); + if(offsets!=null){ + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + } else { + /* target overflow */ + if(offsets!=null){ + offsets.put(sourceIndex); + } + charErrorBufferArray[0] = UTF16.getTrailSurrogate(c); + charErrorBufferLength = 1; + cr = CoderResult.OVERFLOW; + break; + } + } + sourceIndex=nextSourceIndex; + } + labelType = endLoop; + return labelType; + } + + private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){ + if(cr.isMalformed()) { + /* set the converter state in UConverter to deal with the next character */ + toUnicodeStatus = BOCU1_ASCII_PREV; + mode = 0; + } else { + /* set the converter state back into UConverter */ + toUnicodeStatus=prev; + mode=(diff<<2)|count; + } + toULength=byteIndex; + LabelLoop = false; + } + + } + + + public CharsetDecoder newDecoder() { + return new CharsetDecoderBOCU(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderBOCU(this); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + CharsetICU.getCompleteUnicodeSet(setFillIn); + } + +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetCESU8.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetCESU8.java new file mode 100644 index 00000000000..bf2c204f5be --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetCESU8.java @@ -0,0 +1,26 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import com.ibm.icu.text.UnicodeSet; + +/** + * The purpose of this class is to set isCESU8 to true in the super class, and to allow the Charset framework to open + * the variant UTF-8 converter without extra setup work. CESU-8 encodes/decodes supplementary characters as 6 bytes + * instead of the proper 4 bytes. + */ +class CharsetCESU8 extends CharsetUTF8 { + public CharsetCESU8(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + } + + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + getCompleteUnicodeSet(setFillIn); + + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java new file mode 100644 index 00000000000..d9cc0c2e673 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java @@ -0,0 +1,408 @@ +/** +******************************************************************************* +* Copyright (C) 2006-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ + +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CoderResult; + +/** + *

    Callback API for CharsetICU API

    + * + * CharsetCallback class defines some error behaviour functions called + * by CharsetDecoderICU and CharsetEncoderICU. The class also provides + * the facility by which clients can write their own callbacks. + * + * These functions, although public, should NEVER be called directly. + * They should be used as parameters to the onUmappableCharacter() and + * onMalformedInput() methods, to set the behaviour of a converter + * when it encounters UNMAPPED/INVALID sequences. + * Currently the only way to set callbacks is by using CodingErrorAction. + * In the future we will provide set methods on CharsetEncoder and CharsetDecoder + * that will accept CharsetCallback fields. + * + * @stable ICU 3.6 + */ + +public class CharsetCallback { + /* + * FROM_U, TO_U context options for sub callback + */ + private static final String SUB_STOP_ON_ILLEGAL = "i"; + +// /* +// * FROM_U, TO_U context options for skip callback +// */ +// private static final String SKIP_STOP_ON_ILLEGAL = "i"; + +// /* +// * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) +// */ +// private static final String ESCAPE_ICU = null; + + /* + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) + */ + private static final String ESCAPE_JAVA = "J"; + + /* + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) + * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) + */ + private static final String ESCAPE_C = "C"; + + /* + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + */ + private static final String ESCAPE_XML_DEC = "D"; + + /* + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + */ + private static final String ESCAPE_XML_HEX = "X"; + + /* + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) + */ + private static final String ESCAPE_UNICODE = "U"; + + /* + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) + */ + private static final String ESCAPE_CSS2 = "S"; + + /** + * Decoder Callback interface + * @stable ICU 3.6 + */ + public interface Decoder { + /** + * This function is called when the bytes in the source cannot be handled, + * and this function is meant to handle or fix the error if possible. + * + * @return Result of decoding action. This returned object is set to an error + * if this function could not handle the conversion. + * @stable ICU 3.6 + */ + public CoderResult call(CharsetDecoderICU decoder, Object context, + ByteBuffer source, CharBuffer target, IntBuffer offsets, + char[] buffer, int length, CoderResult cr); + } + /** + * Encoder Callback interface + * @stable ICU 3.6 + */ + public interface Encoder { + /** + * This function is called when the Unicode characters in the source cannot be handled, + * and this function is meant to handle or fix the error if possible. + * @return Result of decoding action. This returned object is set to an error + * if this function could not handle the conversion. + * @stable ICU 3.6 + */ + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, + char[] buffer, int length, int cp, CoderResult cr); + } + /** + * Skip callback + * @stable ICU 3.6 + */ + public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() { + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, + char[] buffer, int length, int cp, CoderResult cr){ + if(context==null){ + return CoderResult.UNDERFLOW; + }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){ + if(!cr.isUnmappable()){ + return cr; + }else{ + return CoderResult.UNDERFLOW; + } + } + return cr; + } + }; + /** + * Skip callback + * @stable ICU 3.6 + */ + public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() { + public CoderResult call(CharsetDecoderICU decoder, Object context, + ByteBuffer source, CharBuffer target, IntBuffer offsets, + char[] buffer, int length, CoderResult cr){ + if(context==null){ + return CoderResult.UNDERFLOW; + }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){ + if(!cr.isUnmappable()){ + return cr; + }else{ + return CoderResult.UNDERFLOW; + } + } + return cr; + } + }; + /** + * Write substitute callback + * @stable ICU 3.6 + */ + public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){ + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, + char[] buffer, int length, int cp, CoderResult cr){ + if(context==null){ + return encoder.cbFromUWriteSub(encoder, source, target, offsets); + }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){ + if(!cr.isUnmappable()){ + return cr; + }else{ + return encoder.cbFromUWriteSub(encoder, source, target, offsets); + } + } + return cr; + } + }; + private static final char[] kSubstituteChar1 = new char[]{0x1A}; + private static final char[] kSubstituteChar = new char[] {0xFFFD}; + /** + * Write substitute callback + * @stable ICU 3.6 + */ + public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() { + public CoderResult call(CharsetDecoderICU decoder, Object context, + ByteBuffer source, CharBuffer target, IntBuffer offsets, + char[] buffer, int length, CoderResult cr){ + + CharsetICU cs = (CharsetICU) decoder.charset(); + /* could optimize this case, just one uchar */ + if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) { + return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar1, 0, 1, target, offsets, source.position()); + } else { + return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar, 0, 1, target, offsets, source.position()); + } + } + }; + /** + * Stop callback + * @stable ICU 3.6 + */ + public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() { + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, + char[] buffer, int length, int cp, CoderResult cr){ + return cr; + } + }; + /** + * Stop callback + * @stable ICU 3.6 + */ + public static final Decoder TO_U_CALLBACK_STOP = new Decoder() { + public CoderResult call(CharsetDecoderICU decoder, Object context, + ByteBuffer source, CharBuffer target, IntBuffer offsets, + char[] buffer, int length, CoderResult cr){ + return cr; + } + }; + private static final int VALUE_STRING_LENGTH = 32; + private static final char UNICODE_PERCENT_SIGN_CODEPOINT = 0x0025; + private static final char UNICODE_U_CODEPOINT = 0x0055; + private static final char UNICODE_X_CODEPOINT = 0x0058; + private static final char UNICODE_RS_CODEPOINT = 0x005C; + private static final char UNICODE_U_LOW_CODEPOINT = 0x0075; + private static final char UNICODE_X_LOW_CODEPOINT = 0x0078; + private static final char UNICODE_AMP_CODEPOINT = 0x0026; + private static final char UNICODE_HASH_CODEPOINT = 0x0023; + private static final char UNICODE_SEMICOLON_CODEPOINT = 0x003B; + private static final char UNICODE_PLUS_CODEPOINT = 0x002B; + private static final char UNICODE_LEFT_CURLY_CODEPOINT = 0x007B; + private static final char UNICODE_RIGHT_CURLY_CODEPOINT = 0x007D; + private static final char UNICODE_SPACE_CODEPOINT = 0x0020; + /** + * Write escape callback + * @stable ICU 4.0 + */ + public static final Encoder FROM_U_CALLBACK_ESCAPE = new Encoder() { + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, + char[] buffer, int length, int cp, CoderResult cr){ + char[] valueString = new char[VALUE_STRING_LENGTH]; + int valueStringLength = 0; + int i = 0; + + cr = CoderResult.UNDERFLOW; + + if (context == null || !(context instanceof String)) { + while (i < length) { + valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + } + } else { + if (((String)context).equals(ESCAPE_JAVA)) { + while (i < length) { + valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */ + valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */ + valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + } + } else if (((String)context).equals(ESCAPE_C)) { + valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */ + + if (length == 2) { + valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength = itou(valueString, valueStringLength, cp, 16, 8); + } else { + valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */ + valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + } + } else if (((String)context).equals(ESCAPE_XML_DEC)) { + valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */ + valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */ + if (length == 2) { + valueStringLength += itou(valueString, valueStringLength, cp, 10, 0); + } else { + valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0); + } + valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } else if (((String)context).equals(ESCAPE_XML_HEX)) { + valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */ + valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */ + valueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */ + if (length == 2) { + valueStringLength += itou(valueString, valueStringLength, cp, 16, 0); + } else { + valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0); + } + valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } else if (((String)context).equals(ESCAPE_UNICODE)) { + valueString[valueStringLength++] = UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ + valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */ + valueString[valueStringLength++] = UNICODE_PLUS_CODEPOINT; /* adding + */ + if (length == 2) { + valueStringLength += itou(valueString, valueStringLength,cp, 16, 4); + } else { + valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + } + valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ + } else if (((String)context).equals(ESCAPE_CSS2)) { + valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */ + valueStringLength += itou(valueString, valueStringLength, cp, 16, 0); + /* Always add space character, because the next character might be whitespace, + which would erroneously be considered the termination of the escape sequence. */ + valueString[valueStringLength++] = UNICODE_SPACE_CODEPOINT; + } else { + while (i < length) { + valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + } + } + } + + cr = encoder.cbFromUWriteUChars(encoder, CharBuffer.wrap(valueString, 0, valueStringLength), target, offsets); + return cr; + } + }; + /** + * Write escape callback + * @stable ICU 4.0 + */ + public static final Decoder TO_U_CALLBACK_ESCAPE = new Decoder() { + public CoderResult call(CharsetDecoderICU decoder, Object context, + ByteBuffer source, CharBuffer target, IntBuffer offsets, + char[] buffer, int length, CoderResult cr){ + char[] uniValueString = new char[VALUE_STRING_LENGTH]; + int valueStringLength = 0; + int i = 0; + + if (context == null || !(context instanceof String)) { + while (i < length) { + uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding U */ + valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2); + } + } else { + if (((String)context).equals(ESCAPE_XML_DEC)) { + while (i < length) { + uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */ + uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */ + valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 10, 0); + uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } + } else if (((String)context).equals(ESCAPE_XML_HEX)) { + while (i < length) { + uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */ + uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */ + uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */ + valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 0); + uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } + } else if (((String)context).equals(ESCAPE_C)) { + while (i < length) { + uniValueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */ + uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */ + valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2); + } + } else { + while (i < length) { + uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */ + itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2); + valueStringLength += 2; + } + } + } + + cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0); + + return cr; + } + }; + /*** + * Java port of uprv_itou() in ICU4C used by TO_U_CALLBACK_ESCAPE and FROM_U_CALLBACK_ESCAPE. + * Fills in a char string with the radix-based representation of a number padded with zeroes + * to minwidth. + */ + private static final int itou(char[] buffer, int sourceIndex, int i, int radix, int minwidth) { + int length = 0; + int digit; + int j; + char temp; + + do { + digit = i % radix; + buffer[sourceIndex + length++] = (char)(digit <= 9 ? (0x0030+digit) : (0x0030+digit+7)); + i = i/radix; + } while (i != 0 && (sourceIndex + length) < buffer.length); + + while (length < minwidth) { + buffer[sourceIndex + length++] = (char)0x0030; /* zero padding */ + } + /* reverses the string */ + for (j = 0; j < (length / 2); j++) { + temp = buffer[(sourceIndex + length - 1) - j]; + buffer[(sourceIndex + length-1) -j] = buffer[sourceIndex + j]; + buffer[sourceIndex + j] = temp; + } + + return length; + } + + /* + * No need to create an instance + */ + private CharsetCallback() { + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java new file mode 100644 index 00000000000..3479a20d221 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java @@ -0,0 +1,725 @@ +/** +******************************************************************************* +* Copyright (C) 2006-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ + +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +import com.ibm.icu.impl.Assert; + +/** + * An abstract class that provides framework methods of decoding operations for concrete + * subclasses. + * In the future this class will contain API that will implement converter sematics of ICU4C. + * @stable ICU 3.6 + */ +public abstract class CharsetDecoderICU extends CharsetDecoder{ + + int toUnicodeStatus; + byte[] toUBytesArray = new byte[128]; + int toUBytesBegin = 0; + int toULength; + char[] charErrorBufferArray = new char[128]; + int charErrorBufferLength; + int charErrorBufferBegin; + char[] invalidCharBuffer = new char[128]; + int invalidCharLength; + + /** + * Maximum number of indexed bytes + * @internal + * @deprecated This API is ICU internal only. + */ + protected static final int EXT_MAX_BYTES = 0x1f; + + /* store previous UChars/chars to continue partial matches */ + byte[] preToUArray = new byte[EXT_MAX_BYTES]; + int preToUBegin; + int preToULength; /* negative: replay */ + int preToUFirstLength; /* length of first character */ + int mode; + + Object toUContext = null; + private CharsetCallback.Decoder onUnmappableCharacter = CharsetCallback.TO_U_CALLBACK_STOP; + private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP; + CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() { + public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, + CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) { + if (cr.isUnmappable()) { + return onUnmappableCharacter.call(decoder, context, source, target, offsets, buffer, + length, cr); + } else /* if (cr.isMalformed()) */ { + return onMalformedInput.call(decoder, context, source, target, offsets, buffer, + length, cr); + } + // return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target, offsets, buffer, length, cr); + } + }; + + // exist to keep implOnMalformedInput and implOnUnmappableInput from being too recursive + private boolean malformedInputCalled = false; + private boolean unmappableCharacterCalled = false; + + /* + * Construct a CharsetDecorderICU based on the information provided from a CharsetICU object. + * + * @param cs The CharsetICU object containing information about how to charset to decode. + */ + CharsetDecoderICU(CharsetICU cs) { + super(cs, (1/cs.maxCharsPerByte), cs.maxCharsPerByte); + } + + /* + * Is this Decoder allowed to use fallbacks? A fallback mapping is a mapping + * that will convert a byte sequence to a Unicode codepoint sequence, but + * the encoded Unicode codepoint sequence will round trip convert to a different + * byte sequence. In ICU, this is can be called a reverse fallback. + * @return A boolean + */ + final boolean isFallbackUsed() { + return true; + } + + /** + * Fallback is currently always used by icu4j decoders. + */ + static final boolean isToUUseFallback() { + return isToUUseFallback(true); + } + + /** + * Fallback is currently always used by icu4j decoders. + */ + static final boolean isToUUseFallback(boolean iUseFallback) { + return true; + } + + /** + * Sets the action to be taken if an illegal sequence is encountered + * + * @param newAction action to be taken + * @exception IllegalArgumentException + * @stable ICU 3.6 + */ + protected final void implOnMalformedInput(CodingErrorAction newAction) { + // don't run infinitely + if (malformedInputCalled) + return; + + // if we get a replace, do not let the nio replace + if (newAction == CodingErrorAction.REPLACE) { + malformedInputCalled = true; + super.onMalformedInput(CodingErrorAction.IGNORE); + malformedInputCalled = false; + } + + onMalformedInput = getCallback(newAction); + } + + /** + * Sets the action to be taken if an illegal sequence is encountered + * + * @param newAction action to be taken + * @exception IllegalArgumentException + * @stable ICU 3.6 + */ + protected final void implOnUnmappableCharacter(CodingErrorAction newAction) { + // dont run infinitely + if (unmappableCharacterCalled) + return; + + // if we get a replace, do not let the nio replace + if (newAction == CodingErrorAction.REPLACE) { + unmappableCharacterCalled = true; + super.onUnmappableCharacter(CodingErrorAction.IGNORE); + unmappableCharacterCalled = false; + } + + onUnmappableCharacter = getCallback(newAction); + } + + /** + * Sets the callback encoder method and context to be used if an illegal sequence is encounterd. + * You would normally call this twice to set both the malform and unmappable error. In this case, + * newContext should remain the same since using a different newContext each time will negate the last + * one used. + * @param err CoderResult + * @param newCallback CharsetCallback.Encoder + * @param newContext Object + * @stable ICU 4.0 + */ + public final void setToUCallback(CoderResult err, CharsetCallback.Decoder newCallback, Object newContext) { + if (err.isMalformed()) { + onMalformedInput = newCallback; + } else if (err.isUnmappable()) { + onUnmappableCharacter = newCallback; + } else { + /* Error: Only malformed and unmappable are handled. */ + } + + if (toUContext == null || !toUContext.equals(newContext)) { + toUContext = newContext; + } + } + + private static CharsetCallback.Decoder getCallback(CodingErrorAction action){ + if(action==CodingErrorAction.REPLACE){ + return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE; + }else if(action==CodingErrorAction.IGNORE){ + return CharsetCallback.TO_U_CALLBACK_SKIP; + }else /* if(action==CodingErrorAction.REPORT) */ { + return CharsetCallback.TO_U_CALLBACK_STOP; + } + } + private final ByteBuffer EMPTY = ByteBuffer.allocate(0); + /** + * Flushes any characters saved in the converter's internal buffer and + * resets the converter. + * @param out action to be taken + * @return result of flushing action and completes the decoding all input. + * Returns CoderResult.UNDERFLOW if the action succeeds. + * @stable ICU 3.6 + */ + protected final CoderResult implFlush(CharBuffer out) { + return decode(EMPTY, out, null, true); + } + + /** + * Resets the to Unicode mode of converter + * @stable ICU 3.6 + */ + protected void implReset() { + toUnicodeStatus = 0 ; + toULength = 0; + charErrorBufferLength = 0; + charErrorBufferBegin = 0; + + /* store previous UChars/chars to continue partial matches */ + preToUBegin = 0; + preToULength = 0; /* negative: replay */ + preToUFirstLength = 0; + + mode = 0; + } + + /** + * Decodes one or more bytes. The default behaviour of the converter + * is stop and report if an error in input stream is encountered. + * To set different behaviour use @see CharsetDecoder.onMalformedInput() + * This method allows a buffer by buffer conversion of a data stream. + * The state of the conversion is saved between calls to convert. + * Among other things, this means multibyte input sequences can be + * split between calls. If a call to convert results in an Error, the + * conversion may be continued by calling convert again with suitably + * modified parameters.All conversions should be finished with a call to + * the flush method. + * @param in buffer to decode + * @param out buffer to populate with decoded result + * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding + * action succeeds or more input is needed for completing the decoding action. + * @stable ICU 3.6 + */ + protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){ + if(in.remaining() < toUCountPending()){ + return CoderResult.UNDERFLOW; + } +// if (!in.hasRemaining()) { +// toULength = 0; +// return CoderResult.UNDERFLOW; +// } + + in.position(in.position() + toUCountPending()); + + /* do the conversion */ + CoderResult ret = decode(in, out, null, false); + + // ok was there input held in the previous invocation of decodeLoop + // that resulted in output in this invocation? + in.position(in.position() - toUCountPending()); + + return ret; + } + + /* + * Implements the ICU semantic for decode operation + * @param in The input byte buffer + * @param out The output character buffer + * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding + * action succeeds or more input is needed for completing the decoding action. + */ + abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets, boolean flush); + + /* + * Implements the ICU semantic for decode operation + * @param source The input byte buffer + * @param target The output character buffer + * @param offsets + * @param flush true if, and only if, the invoker can provide no + * additional input bytes beyond those in the given buffer. + * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding + * action succeeds or more input is needed for completing the decoding action. + */ + final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + + /* check parameters */ + if (target == null || source == null) { + throw new IllegalArgumentException(); + } + + /* + * Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be sourceLimit=t+0x7fffffff; for example. + */ + /*agljport:fix + if( + ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) + ) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + */ + + /* flush the target overflow buffer */ + if (charErrorBufferLength > 0) { + int i = 0; + do { + if (!target.hasRemaining()) { + /* the overflow buffer contains too much, keep the rest */ + int j = 0; + + do { + charErrorBufferArray[j++] = charErrorBufferArray[i++]; + } while (i < charErrorBufferLength); + + charErrorBufferLength = (byte) j; + return CoderResult.OVERFLOW; + } + + /* copy the overflow contents to the target */ + target.put(charErrorBufferArray[i++]); + if (offsets != null) { + offsets.put(-1); /* no source index available for old output */ + } + } while (i < charErrorBufferLength); + + /* the overflow buffer is completely copied to the target */ + charErrorBufferLength = 0; + } + + if (!flush && !source.hasRemaining() && preToULength >= 0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return CoderResult.UNDERFLOW; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + return toUnicodeWithCallback(source, target, offsets, flush); + } + + /* Currently, we are not using offsets in ICU4J. */ + /* private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) { + int limit; + int delta, offset; + + if(sourceIndex>=0) { + /* + * adjust each offset by adding the previous sourceIndex + * minus the length of the input sequence that caused an + * error, if any + */ + /* delta=sourceIndex-errorInputLength; + } else { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + */ + /* delta=-1; + } + limit=offsets.position()+length; + if(delta==0) { + /* most common case, nothing to do */ + /* } else if(delta>0) { + /* add the delta to each offset (but not if the offset is <0) */ + /* while(offsets.position()=0) { + offsets.put(offset+delta); + } + //FIXME: ++offsets; + } + } else /* delta<0 */ /* { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + * or the error input sequence started in a previous buffer + */ + /* while(offsets.position()=0) { + /* normal mode */ + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=source; + realFlush=flush; + realSourceIndex=sourceIndex; + //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength); + replayArray.put(preToUArray,0, -preToULength); + source=replayArray; + source.position(0); + source.limit(replayArrayIndex-preToULength); + flush=false; + sourceIndex=-1; + preToULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + + /* convert */ + cr = decodeLoop(source, target, offsets, flush); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preToULength==0 because a replay (<0) will cause + * s0) { + updateOffsets(offsets, length, sourceIndex, errorInputLength); + + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + //TODO: pArgs->offsets=offsets+=length; + /* } + + if(sourceIndex>=0) { + sourceIndex+=(source.position()-s); + } + + } */ + + if(preToULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==null) + { + realSource=source; + realFlush=flush; + realSourceIndex=sourceIndex; + + //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength); + replayArray.put(preToUArray,0, -preToULength); + // reset position + replayArray.position(0); + + source=replayArray; + source.limit(replayArrayIndex-preToULength); + flush=false; + if((sourceIndex+=preToULength)<0) { + sourceIndex=-1; + } + + preToULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + //agljport:todo U_ASSERT(realSource==NULL); + Assert.assrt(realSource==null); + } + } + + /* update pointers */ + s=source.position(); + //t=target.position(); + + if(cr.isUnderflow()) { + if(s0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + cr = CoderResult.malformedForLength(toULength); + calledCallback=false; /* new error condition */ + } else { + /* input consumed */ + if(flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + implReset(); + } + + /* done successfully */ + return cr; + } + } + + /* U_FAILURE(*err) */ + { + + if( calledCallback || cr.isOverflow() || + (cr.isMalformed() && cr.isUnmappable()) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=null) { + int length; + Assert.assrt(preToULength==0); + length = source.limit() - source.position(); + if(length>0) { + //UConverterUtility.uprv_memcpy(preToUArray, preToUBegin, pArgs.sourceArray, pArgs.sourceBegin, length); + source.get(preToUArray, preToUBegin, length); + preToULength=(byte)-length; + } + + source=realSource; + flush=realFlush; + } + return cr; + } + } + + /* copy toUBytes[] to invalidCharBuffer[] */ + errorInputLength=invalidCharLength=toULength; + if(errorInputLength>0) { + copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength); + } + + /* set the converter state to deal with the next character */ + toULength=0; + + /* call the callback function */ + cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr); + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=true; + } + } + } + + /* + * Returns the number of chars held in the converter's internal state + * because more input is needed for completing the conversion. This function is + * useful for mapping semantics of ICU's converter interface to those of iconv, + * and this information is not needed for normal conversion. + * @return The number of chars in the state. -1 if an error is encountered. + */ + /*public*/ int toUCountPending() { + if(preToULength > 0){ + return preToULength ; + } else if(preToULength < 0){ + return -preToULength; + } else if(toULength > 0){ + return toULength; + } else { + return 0; + } + } + + + private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) { + for(int i=srcOffset; i0 && target.hasRemaining()) { + target.put(ucharsArray[ucharsBegin++]); + --length; + } + + } else { + /* output with offsets */ + while(length>0 && target.hasRemaining()) { + target.put(ucharsArray[ucharsBegin++]); + offsets.put(sourceIndex); + --length; + } + } + /* write overflow */ + if(length>0) { + cnv.charErrorBufferLength= 0; + cr = CoderResult.OVERFLOW; + do { + cnv.charErrorBufferArray[cnv.charErrorBufferLength++]=ucharsArray[ucharsBegin++]; + } while(--length>0); + } + return cr; + } + /* + * This function will write out the Unicode substitution character to the + * target character buffer. + * Sub classes to override this method if required + * @param decoder + * @param source + * @param target + * @param offsets + * @return A CoderResult object that contains the error result when an error occurs. + */ + /* Note: Currently, this method is not being used because the callback method calls toUWriteUChars with + * the substitution characters. Will leave in here for the time being. To be removed later. (4.0) + */ + /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder, + ByteBuffer source, CharBuffer target, + IntBuffer offsets){ + String sub = decoder.replacement(); + CharsetICU cs = (CharsetICU) decoder.charset(); + if (decoder.invalidCharLength==1 && cs.subChar1 != 0x00) { + char[] subArr = new char[] { 0x1a }; + return CharsetDecoderICU.toUWriteUChars(decoder, subArr, 0, sub + .length(), target, offsets, source.position()); + } else { + return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(), + 0, sub.length(), target, offsets, source.position()); + + } + }*/ +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java new file mode 100644 index 00000000000..6a26f0ab53f --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java @@ -0,0 +1,920 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ + +package com.ibm.icu.charset; + +import java.nio.BufferOverflowException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +import com.ibm.icu.impl.Assert; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; + +/** + * An abstract class that provides framework methods of decoding operations for concrete + * subclasses. + * In the future this class will contain API that will implement converter semantics of ICU4C. + * @stable ICU 3.6 + */ +public abstract class CharsetEncoderICU extends CharsetEncoder { + + /* this is used in fromUnicode DBCS tables as an "unassigned" marker */ + static final char MISSING_CHAR_MARKER = '\uFFFF'; + + byte[] errorBuffer = new byte[30]; + + int errorBufferLength = 0; + + /** these are for encodeLoopICU */ + int fromUnicodeStatus; + + int fromUChar32; + + boolean useSubChar1; + + boolean useFallback; + + /* maximum number of indexed UChars */ + static final int EXT_MAX_UCHARS = 19; + + /* store previous UChars/chars to continue partial matches */ + int preFromUFirstCP; /* >=0: partial match */ + + char[] preFromUArray = new char[EXT_MAX_UCHARS]; + + int preFromUBegin; + + int preFromULength; /* negative: replay */ + + char[] invalidUCharBuffer = new char[2]; + + int invalidUCharLength; + + Object fromUContext; + + private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP; + + private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP; + + CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() { + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, + char[] buffer, int length, int cp, CoderResult cr) { + if (cr.isUnmappable()) { + return onUnmappableInput.call(encoder, context, source, target, + offsets, buffer, length, cp, cr); + } else /* if (cr.isMalformed()) */ { + return onMalformedInput.call(encoder, context, source, target, + offsets, buffer, length, cp, cr); + } + // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr); + + } + }; + + /* + * Construcs a new encoder for the given charset + * + * @param cs + * for which the decoder is created + * @param replacement + * the substitution bytes + */ + CharsetEncoderICU(CharsetICU cs, byte[] replacement) { + super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2, + cs.maxBytesPerChar, replacement); + } + + /** + * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping + * that will convert a Unicode codepoint sequence to a byte sequence, but + * the encoded byte sequence will round trip convert to a different + * Unicode codepoint sequence. + * @return true if the converter uses fallback, false otherwise. + * @stable ICU 3.8 + */ + public boolean isFallbackUsed() { + return useFallback; + } + + /** + * Sets whether this Encoder can use fallbacks? + * @param usesFallback true if the user wants the converter to take + * advantage of the fallback mapping, false otherwise. + * @stable ICU 3.8 + */ + public void setFallbackUsed(boolean usesFallback) { + useFallback = usesFallback; + } + + /* + * Use fallbacks from Unicode to codepage when useFallback or for private-use code points + * @param c A codepoint + */ + final boolean isFromUUseFallback(int c) { + return (useFallback) + || (UCharacter.getType(c) == UCharacter.PRIVATE_USE); + } + + /** + * Use fallbacks from Unicode to codepage when useFallback or for private-use code points + */ + static final boolean isFromUUseFallback(boolean iUseFallback, int c) { + return (iUseFallback) + || (UCharacter.getType(c) == UCharacter.PRIVATE_USE); + } + + /** + * Sets the action to be taken if an illegal sequence is encountered + * + * @param newAction + * action to be taken + * @exception IllegalArgumentException + * @stable ICU 3.6 + */ + protected void implOnMalformedInput(CodingErrorAction newAction) { + onMalformedInput = getCallback(newAction); + } + + /** + * Sets the action to be taken if an illegal sequence is encountered + * + * @param newAction + * action to be taken + * @exception IllegalArgumentException + * @stable ICU 3.6 + */ + protected void implOnUnmappableCharacter(CodingErrorAction newAction) { + onUnmappableInput = getCallback(newAction); + } + + /** + * Sets the callback encoder method and context to be used if an illegal sequence is encountered. + * You would normally call this twice to set both the malform and unmappable error. In this case, + * newContext should remain the same since using a different newContext each time will negate the last + * one used. + * @param err CoderResult + * @param newCallback CharsetCallback.Encoder + * @param newContext Object + * @stable ICU 4.0 + */ + public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) { + if (err.isMalformed()) { + onMalformedInput = newCallback; + } else if (err.isUnmappable()) { + onUnmappableInput = newCallback; + } else { + /* Error: Only malformed and unmappable are handled. */ + } + + if (fromUContext == null || !fromUContext.equals(newContext)) { + setFromUContext(newContext); + } + } + + /** + * Sets fromUContext used in callbacks. + * + * @param newContext Object + * @exception IllegalArgumentException The object is an illegal argument for UContext. + * @stable ICU 4.0 + */ + public final void setFromUContext(Object newContext) { + fromUContext = newContext; + } + + private static CharsetCallback.Encoder getCallback(CodingErrorAction action) { + if (action == CodingErrorAction.REPLACE) { + return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE; + } else if (action == CodingErrorAction.IGNORE) { + return CharsetCallback.FROM_U_CALLBACK_SKIP; + } else /* if (action == CodingErrorAction.REPORT) */ { + return CharsetCallback.FROM_U_CALLBACK_STOP; + } + } + + private static final CharBuffer EMPTY = CharBuffer.allocate(0); + + /** + * Flushes any characters saved in the converter's internal buffer and + * resets the converter. + * @param out action to be taken + * @return result of flushing action and completes the decoding all input. + * Returns CoderResult.UNDERFLOW if the action succeeds. + * @stable ICU 3.6 + */ + protected CoderResult implFlush(ByteBuffer out) { + return encode(EMPTY, out, null, true); + } + + /** + * Resets the from Unicode mode of converter + * @stable ICU 3.6 + */ + protected void implReset() { + errorBufferLength = 0; + fromUnicodeStatus = 0; + fromUChar32 = 0; + fromUnicodeReset(); + } + + private void fromUnicodeReset() { + preFromUBegin = 0; + preFromUFirstCP = UConverterConstants.U_SENTINEL; + preFromULength = 0; + } + + /** + * Encodes one or more chars. The default behaviour of the + * converter is stop and report if an error in input stream is encountered. + * To set different behaviour use @see CharsetEncoder.onMalformedInput() + * @param in buffer to decode + * @param out buffer to populate with decoded result + * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding + * action succeeds or more input is needed for completing the decoding action. + * @stable ICU 3.6 + */ + protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty + // The Java framework should have already substituted what was left. + fromUChar32 = 0; + //fromUnicodeReset(); + return CoderResult.UNDERFLOW; + } + in.position(in.position() + fromUCountPending()); + /* do the conversion */ + CoderResult ret = encode(in, out, null, false); + setSourcePosition(in); + /* No need to reset to keep the proper state of the encoder. + if (ret.isUnderflow() && in.hasRemaining()) { + // The Java framework is going to substitute what is left. + //fromUnicodeReset(); + } */ + return ret; + } + + /* + * Implements ICU semantics of buffer management + * @param source + * @param target + * @param offsets + * @return A CoderResult object that contains the error result when an error occurs. + */ + abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target, + IntBuffer offsets, boolean flush); + + /* + * Implements ICU semantics for encoding the buffer + * @param source The input character buffer + * @param target The output byte buffer + * @param offsets + * @param flush true if, and only if, the invoker can provide no + * additional input bytes beyond those in the given buffer. + * @return A CoderResult object that contains the error result when an error occurs. + */ + final CoderResult encode(CharBuffer source, ByteBuffer target, + IntBuffer offsets, boolean flush) { + + /* check parameters */ + if (target == null || source == null) { + throw new IllegalArgumentException(); + } + + /* + * Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be targetLimit=t+0x7fffffff; for example. + */ + + /* flush the target overflow buffer */ + if (errorBufferLength > 0) { + byte[] overflowArray; + int i, length; + + overflowArray = errorBuffer; + length = errorBufferLength; + i = 0; + do { + if (target.remaining() == 0) { + /* the overflow buffer contains too much, keep the rest */ + int j = 0; + + do { + overflowArray[j++] = overflowArray[i++]; + } while (i < length); + + errorBufferLength = (byte) j; + return CoderResult.OVERFLOW; + } + + /* copy the overflow contents to the target */ + target.put(overflowArray[i++]); + if (offsets != null) { + offsets.put(-1); /* no source index available for old output */ + } + } while (i < length); + + /* the overflow buffer is completely copied to the target */ + errorBufferLength = 0; + } + + if (!flush && source.remaining() == 0 && preFromULength >= 0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return CoderResult.UNDERFLOW; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + return fromUnicodeWithCallback(source, target, offsets, flush); + + } + + /* + * Implementation note for m:n conversions + * + * While collecting source units to find the longest match for m:n conversion, + * some source units may need to be stored for a partial match. + * When a second buffer does not yield a match on all of the previously stored + * source units, then they must be "replayed", i.e., fed back into the converter. + * + * The code relies on the fact that replaying will not nest - + * converting a replay buffer will not result in a replay. + * This is because a replay is necessary only after the _continuation_ of a + * partial match failed, but a replay buffer is converted as a whole. + * It may result in some of its units being stored again for a partial match, + * but there will not be a continuation _during_ the replay which could fail. + * + * It is conceivable that a callback function could call the converter + * recursively in a way that causes another replay to be stored, but that + * would be an error in the callback function. + * Such violations will cause assertion failures in a debug build, + * and wrong output, but they will not cause a crash. + */ + final CoderResult fromUnicodeWithCallback(CharBuffer source, + ByteBuffer target, IntBuffer offsets, boolean flush) { + int sBufferIndex; + int sourceIndex; + int errorInputLength; + boolean converterSawEndOfInput, calledCallback; + + /* variables for m:n conversion */ + CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS); + int replayArrayIndex = 0; + CharBuffer realSource; + boolean realFlush; + + CoderResult cr = CoderResult.UNDERFLOW; + + /* get the converter implementation function */ + sourceIndex = 0; + + if (preFromULength >= 0) { + /* normal mode */ + realSource = null; + realFlush = false; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource = source; + realFlush = flush; + + //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR); + replayArray.put(preFromUArray, 0, -preFromULength); + source = replayArray; + source.position(replayArrayIndex); + source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration + flush = false; + + preFromULength = 0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for (;;) { + /* convert */ + cr = encodeLoop(source, target, offsets, flush); + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv.preFromULength==0 because a replay (<0) will cause + * s 0) { + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + /* offsets.position(offsets.position() + length); + } + + if (sourceIndex >= 0) { + sourceIndex += (int) (source.position()); + } + } */ + + if (preFromULength < 0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if (realSource == null) { + realSource = source; + realFlush = flush; + + //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR); + replayArray.put(preFromUArray, 0, -preFromULength); + + source = replayArray; + source.position(replayArrayIndex); + source.limit(replayArrayIndex - preFromULength); + flush = false; + if ((sourceIndex += preFromULength) < 0) { + sourceIndex = -1; + } + + preFromULength = 0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + //agljport:todo U_ASSERT(realSource==NULL); + Assert.assrt(realSource == null); + } + } + + /* update pointers */ + sBufferIndex = source.position(); + if (cr.isUnderflow()) { + if (sBufferIndex < source.limit()) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if (realSource != null) { + /* switch back from replaying to the real source and continue */ + source = realSource; + flush = realFlush; + sourceIndex = source.position(); + realSource = null; + break; + } else if (flush && fromUChar32 != 0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND; + cr = CoderResult.malformedForLength(1); + calledCallback = false; /* new error condition */ + } else { + /* input consumed */ + if (flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if (!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + implReset(); + } + + /* done successfully */ + return cr; + } + } + + /*U_FAILURE(*err) */ + { + + if (calledCallback || cr.isOverflow() + || (!cr.isMalformed() && !cr.isUnmappable())) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if (realSource != null) { + int length; + + //agljport:todo U_ASSERT(cnv.preFromULength==0); + + length = source.remaining(); + if (length > 0) { + //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR); + source.get(preFromUArray, 0, length); + preFromULength = (byte) -length; + } + source = realSource; + flush = realFlush; + } + return cr; + } + } + + /* callback handling */ + { + int codePoint; + + /* get and write the code point */ + codePoint = fromUChar32; + errorInputLength = UTF16.append(invalidUCharBuffer, 0, + fromUChar32); + invalidUCharLength = errorInputLength; + + /* set the converter state to deal with the next character */ + fromUChar32 = 0; + + /* call the callback function */ + cr = fromCharErrorBehaviour.call(this, fromUContext, + source, target, offsets, invalidUCharBuffer, + invalidUCharLength, codePoint, cr); + } + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback = true; + } + } + } + + /* + * Ascertains if a given Unicode code point (32bit value for handling surrogates) + * can be converted to the target encoding. If the caller wants to test if a + * surrogate pair can be converted to target encoding then the + * responsibility of assembling the int value lies with the caller. + * For assembling a code point the caller can use UTF16 class of ICU4J and do something like: + *
    +     *  while(i
    +     * or
    +     * 
    +     *  String src = new String(mySource);
    +     *  int i,codepoint;
    +     *  boolean passed = false;
    +     *  while(i0xfff)? 2:1;
    +     *      if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
    +     *          passed = false;
    +     *      }
    +     *  }
    +     * 
    + * + * @param codepoint Unicode code point as int value + * @return true if a character can be converted + */ + /* TODO This is different from Java's canEncode(char) API. + * ICU's API should implement getUnicodeSet, + * and override canEncode(char) which queries getUnicodeSet. + * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C. + */ + /*public boolean canEncode(int codepoint) { + return true; + }*/ + /** + * Overrides super class method + * @stable ICU 3.6 + */ + public boolean isLegalReplacement(byte[] repl) { + return true; + } + + /* + * Writes out the specified output bytes to the target byte buffer or to converter internal buffers. + * @param cnv + * @param bytesArray + * @param bytesBegin + * @param bytesLength + * @param out + * @param offsets + * @param sourceIndex + * @return A CoderResult object that contains the error result when an error occurs. + */ + static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv, + byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out, + IntBuffer offsets, int sourceIndex) { + + //write bytes + int obl = bytesLength; + CoderResult cr = CoderResult.UNDERFLOW; + int bytesLimit = bytesBegin + bytesLength; + try { + for (; bytesBegin < bytesLimit;) { + out.put(bytesArray[bytesBegin]); + bytesBegin++; + } + // success + bytesLength = 0; + } catch (BufferOverflowException ex) { + cr = CoderResult.OVERFLOW; + } + + if (offsets != null) { + while (obl > bytesLength) { + offsets.put(sourceIndex); + --obl; + } + } + //write overflow + cnv.errorBufferLength = bytesLimit - bytesBegin; + if (cnv.errorBufferLength > 0) { + int index = 0; + while (bytesBegin < bytesLimit) { + cnv.errorBuffer[index++] = bytesArray[bytesBegin++]; + } + cr = CoderResult.OVERFLOW; + } + return cr; + } + + /* + * Returns the number of chars held in the converter's internal state + * because more input is needed for completing the conversion. This function is + * useful for mapping semantics of ICU's converter interface to those of iconv, + * and this information is not needed for normal conversion. + * @return The number of chars in the state. -1 if an error is encountered. + */ + /*public*/int fromUCountPending() { + if (preFromULength > 0) { + return UTF16.getCharCount(preFromUFirstCP) + preFromULength; + } else if (preFromULength < 0) { + return -preFromULength; + } else if (fromUChar32 > 0) { + return 1; + } else if (preFromUFirstCP > 0) { + return UTF16.getCharCount(preFromUFirstCP); + } + return 0; + } + + /** + * + * @param source + */ + private final void setSourcePosition(CharBuffer source) { + + // ok was there input held in the previous invocation of encodeLoop + // that resulted in output in this invocation? + source.position(source.position() - fromUCountPending()); + } + + /* + * Write the codepage substitution character. + * Subclasses to override this method. + * For stateful converters, it is typically necessary to handle this + * specificially for the converter in order to properly maintain the state. + * @param source The input character buffer + * @param target The output byte buffer + * @param offsets + * @return A CoderResult object that contains the error result when an error occurs. + */ + CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, + ByteBuffer target, IntBuffer offsets) { + CharsetICU cs = (CharsetICU) encoder.charset(); + byte[] sub = encoder.replacement(); + if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) { + return CharsetEncoderICU.fromUWriteBytes(encoder, + new byte[] { cs.subChar1 }, 0, 1, target, offsets, source + .position()); + } else { + return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0, + sub.length, target, offsets, source.position()); + } + } + + /* + * Write the characters to target. + * @param source The input character buffer + * @param target The output byte buffer + * @param offsets + * @return A CoderResult object that contains the error result when an error occurs. + */ + CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder, + CharBuffer source, ByteBuffer target, IntBuffer offsets) { + CoderResult cr = CoderResult.UNDERFLOW; + + /* This is a fun one. Recursion can occur - we're basically going to + * just retry shoving data through the same converter. Note, if you got + * here through some kind of invalid sequence, you maybe should emit a + * reset sequence of some kind. Since this IS an actual conversion, + * take care that you've changed the callback or the data, or you'll + * get an infinite loop. + */ + + int oldTargetPosition = target.position(); + int offsetIndex = source.position(); + + cr = encoder.encode(source, target, null, false); /* no offsets and no flush */ + + if (offsets != null) { + while (target.position() != oldTargetPosition) { + offsets.put(offsetIndex); + oldTargetPosition++; + } + } + + /* Note, if you did something like used a stop subcallback, things would get interesting. + * In fact, here's where we want to return the partially consumed in-source! + */ + if (cr.isOverflow()) { + /* Overflowed target. Now, we'll write into the charErrorBuffer. + * It's a fixed size. If we overflow it...Hm + */ + + /* start the new target at the first free slot in the error buffer */ + int errBuffLen = encoder.errorBufferLength; + ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer); + newTarget.position(errBuffLen); /* set the position at the end of the error buffer */ + encoder.errorBufferLength = 0; + + encoder.encode(source, newTarget, null, false); + + encoder.errorBuffer = newTarget.array(); + encoder.errorBufferLength = newTarget.position(); + } + + return cr; + } + + /** + *

    + * Handles a common situation where a character has been read and it may be + * a lead surrogate followed by a trail surrogate. This method can change + * the source position and will modify fromUChar32. + *

    + * + *

    + * If null is returned, then there was success in reading a + * surrogate pair, the codepoint is stored in fromUChar32 and + * fromUChar32 should be reset (to 0) after being read. + *

    + * + * @param source + * The encoding source. + * @param lead + * A character that may be the first in a surrogate pair. + * @return CoderResult.malformedForLength(1) or + * CoderResult.UNDERFLOW if there is a problem, or + * null if there isn't. + * @see #handleSurrogates(CharBuffer, char) + * @see #handleSurrogates(CharBuffer, int, char) + * @see #handleSurrogates(char[], int, int, char) + */ + final CoderResult handleSurrogates(CharBuffer source, char lead) { + if (!UTF16.isLeadSurrogate(lead)) { + fromUChar32 = lead; + return CoderResult.malformedForLength(1); + } + + if (!source.hasRemaining()) { + fromUChar32 = lead; + return CoderResult.UNDERFLOW; + } + + char trail = source.get(); + + if (!UTF16.isTrailSurrogate(trail)) { + fromUChar32 = lead; + source.position(source.position() - 1); + return CoderResult.malformedForLength(1); + } + + fromUChar32 = UCharacter.getCodePoint(lead, trail); + return null; + } + + /** + *

    + * Same as handleSurrogates(CharBuffer, char), but with arrays. As an added + * requirement, the calling method must also increment the index if this method returns + * null. + *

    + * + * + * @param source + * The encoding source. + * @param lead + * A character that may be the first in a surrogate pair. + * @return CoderResult.malformedForLength(1) or + * CoderResult.UNDERFLOW if there is a problem, or null if + * there isn't. + * @see #handleSurrogates(CharBuffer, char) + * @see #handleSurrogates(CharBuffer, int, char) + * @see #handleSurrogates(char[], int, int, char) + */ + final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex, + int sourceLimit, char lead) { + if (!UTF16.isLeadSurrogate(lead)) { + fromUChar32 = lead; + return CoderResult.malformedForLength(1); + } + + if (sourceIndex >= sourceLimit) { + fromUChar32 = lead; + return CoderResult.UNDERFLOW; + } + + char trail = sourceArray[sourceIndex]; + + if (!UTF16.isTrailSurrogate(trail)) { + fromUChar32 = lead; + return CoderResult.malformedForLength(1); + } + + fromUChar32 = UCharacter.getCodePoint(lead, trail); + return null; + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java new file mode 100644 index 00000000000..4fc11086ac0 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java @@ -0,0 +1,385 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +class CharsetHZ extends CharsetICU { + + private static final int UCNV_TILDE = 0x7E; /* ~ */ + private static final int UCNV_OPEN_BRACE = 0x7B; /* { */ + private static final int UCNV_CLOSE_BRACE = 0x7D; /* } */ + private static final byte[] SB_ESCAPE = new byte[] { 0x7E, 0x7D }; + private static final byte[] DB_ESCAPE = new byte[] { 0x7E, 0x7B }; + private static final byte[] TILDE_ESCAPE = new byte[] { 0x7E, 0x7E }; + private static final byte[] fromUSubstitution = new byte[] { (byte) 0x1A }; + + private CharsetMBCS gbCharset; + private boolean isEmptySegment; + + public CharsetHZ(String icuCanonicalName, String canonicalName, String[] aliases) { + super(icuCanonicalName, canonicalName, aliases); + gbCharset = (CharsetMBCS) new CharsetProviderICU().charsetForName("GBK"); + + maxBytesPerChar = 4; + minBytesPerChar = 1; + maxCharsPerByte = 1; + + isEmptySegment = false; + } + + class CharsetDecoderHZ extends CharsetDecoderICU { + CharsetMBCS.CharsetDecoderMBCS gbDecoder; + boolean isStateDBCS = false; + + public CharsetDecoderHZ(CharsetICU cs) { + super(cs); + gbDecoder = (CharsetMBCS.CharsetDecoderMBCS) gbCharset.newDecoder(); + } + + protected void implReset() { + super.implReset(); + gbDecoder.implReset(); + + isStateDBCS = false; + isEmptySegment = false; + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + byte[] tempBuf = new byte[2]; + int targetUniChar = 0; + int mySourceChar = 0; + + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + else if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + while (source.hasRemaining()) { + + if (target.hasRemaining()) { + + // get the byte as unsigned + mySourceChar = source.get() & 0xff; + + if (mode == UCNV_TILDE) { + /* second byte after ~ */ + mode = 0; + switch (mySourceChar) { + case 0x0A: + /* no output for ~\n (line-continuation marker) */ + continue; + case UCNV_TILDE: + if (offsets != null) { + offsets.put(source.position() - 2); + } + target.put((char) mySourceChar); + continue; + case UCNV_OPEN_BRACE: + case UCNV_CLOSE_BRACE: + isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); + if (isEmptySegment) { + isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */ + this.toUBytesArray[0] = UCNV_TILDE; + this.toUBytesArray[1] = (byte)mySourceChar; + this.toULength = 2; + return CoderResult.malformedForLength(1); + } + isEmptySegment = true; + continue; + default: + /* + * if the first byte is equal to TILDE and the trail byte is not a valid byte then it is an + * error condition + */ + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + isEmptySegment = false; /* different error here, reset this to avoid spurious furture error */ + err = CoderResult.malformedForLength(1); + toUBytesArray[0] = UCNV_TILDE; + if (isStateDBCS ? (0x21 <= mySourceChar && mySourceChar <= 0x7e) : mySourceChar <= 0x7f) { + /* The current byte could be the start of a character: Back it out. */ + toULength = 1; + source.position(source.position() - 1); + } else { + /* Include the current byte in the illegal sequence. */ + toUBytesArray[1] = (byte)mySourceChar; + toULength = 2; + } + return err; + } + } else if (isStateDBCS) { + if (toUnicodeStatus == 0) { + /* lead byte */ + if (mySourceChar == UCNV_TILDE) { + mode = UCNV_TILDE; + } else { + /* + * add another bit to distinguish a 0 byte from not having seen a lead byte + */ + toUnicodeStatus = mySourceChar | 0x100; + isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */ + } + continue; + } else { + /* trail byte */ + boolean leadIsOk, trailIsOk; + int leadByte = toUnicodeStatus & 0xff; + targetUniChar = 0xffff; + /* + * Ticket 5691: consistent illegal sequence + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those + * + * In HZ DBCS, if the second byte is in the 21..7e range, + * we report ony the first byte as the illegal sequence. + * Otherwise we convert of report the pair of bytes. + */ + leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (leadByte - 0x21)) <= (0x7d - 0x21); + trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + tempBuf[0] = (byte)(leadByte + 0x80); + tempBuf[1] = (byte)(mySourceChar + 0x80); + targetUniChar = gbDecoder.simpleGetNextUChar(ByteBuffer.wrap(tempBuf), super.isFallbackUsed()); + mySourceChar = (leadByte << 8) | mySourceChar; + } else if (trailIsOk) { + /* report a single illegal byte and continue with the following DBCS starter byte */ + source.position(source.position() - 1); + mySourceChar = leadByte; + } else { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (leadByte << 8) | mySourceChar; + } + toUnicodeStatus = 0x00; + } + } else { + if (mySourceChar == UCNV_TILDE) { + mode = UCNV_TILDE; + continue; + } else if (mySourceChar <= 0x7f) { + targetUniChar = mySourceChar; /* ASCII */ + isEmptySegment = false; /* the segment has something valid */ + } else { + targetUniChar = 0xffff; + isEmptySegment = false; /* different error here, reset this to avoid spurious future error */ + } + } + + if (targetUniChar < 0xfffe) { + if (offsets != null) { + offsets.put(source.position() - 1 - (isStateDBCS ? 1 : 0)); + } + + target.put((char) targetUniChar); + } else /* targetUniChar >= 0xfffe */{ + if (mySourceChar > 0xff) { + toUBytesArray[toUBytesBegin + 0] = (byte) (mySourceChar >> 8); + toUBytesArray[toUBytesBegin + 1] = (byte) mySourceChar; + toULength = 2; + } else { + toUBytesArray[toUBytesBegin + 0] = (byte) mySourceChar; + toULength = 1; + } + if (targetUniChar == 0xfffe) { + return CoderResult.unmappableForLength(toULength); + } else { + return CoderResult.malformedForLength(toULength); + } + } + } else { + return CoderResult.OVERFLOW; + } + } + + return err; + } + } + + class CharsetEncoderHZ extends CharsetEncoderICU { + CharsetMBCS.CharsetEncoderMBCS gbEncoder; + boolean isEscapeAppended = false; + boolean isTargetUCharDBCS = false; + + public CharsetEncoderHZ(CharsetICU cs) { + super(cs, fromUSubstitution); + gbEncoder = (CharsetMBCS.CharsetEncoderMBCS) gbCharset.newEncoder(); + } + + protected void implReset() { + super.implReset(); + gbEncoder.implReset(); + + isEscapeAppended = false; + isTargetUCharDBCS = false; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + int length = 0; + int[] targetUniChar = new int[] { 0 }; + int mySourceChar = 0; + boolean oldIsTargetUCharDBCS = isTargetUCharDBCS; + + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + else if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + if (fromUChar32 != 0 && target.hasRemaining()) { + CoderResult cr = handleSurrogates(source, (char) fromUChar32); + return (cr != null) ? cr : CoderResult.unmappableForLength(2); + } + /* writing the char to the output stream */ + while (source.hasRemaining()) { + targetUniChar[0] = MISSING_CHAR_MARKER; + if (target.hasRemaining()) { + + mySourceChar = source.get(); + + oldIsTargetUCharDBCS = isTargetUCharDBCS; + if (mySourceChar == UCNV_TILDE) { + /* + * concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex); + */ + concatEscape(source, target, offsets, TILDE_ESCAPE); + continue; + } else if (mySourceChar <= 0x7f) { + length = 1; + targetUniChar[0] = mySourceChar; + } else { + length = gbEncoder.fromUChar32(mySourceChar, targetUniChar, super.isFallbackUsed()); + + /* + * we can only use lead bytes 21..7D and trail bytes 21..7E + */ + if (length == 2 && 0xa1a1 <= targetUniChar[0] && targetUniChar[0] <= 0xfdfe + && 0xa1 <= (targetUniChar[0] & 0xff) && (targetUniChar[0] & 0xff) <= 0xfe) { + targetUniChar[0] -= 0x8080; + } else { + targetUniChar[0] = MISSING_CHAR_MARKER; + } + } + if (targetUniChar[0] != MISSING_CHAR_MARKER) { + isTargetUCharDBCS = (targetUniChar[0] > 0x00FF); + if (oldIsTargetUCharDBCS != isTargetUCharDBCS || !isEscapeAppended) { + /* Shifting from a double byte to single byte mode */ + if (!isTargetUCharDBCS) { + concatEscape(source, target, offsets, SB_ESCAPE); + isEscapeAppended = true; + } else { /* + * Shifting from a single byte to double byte mode + */ + concatEscape(source, target, offsets, DB_ESCAPE); + isEscapeAppended = true; + + } + } + + if (isTargetUCharDBCS) { + if (target.hasRemaining()) { + target.put((byte) (targetUniChar[0] >> 8)); + if (offsets != null) { + offsets.put(source.position() - 1); + } + if (target.hasRemaining()) { + target.put((byte) targetUniChar[0]); + if (offsets != null) { + offsets.put(source.position() - 1); + } + } else { + errorBuffer[errorBufferLength++] = (byte) targetUniChar[0]; + // *err = U_BUFFER_OVERFLOW_ERROR; + } + } else { + errorBuffer[errorBufferLength++] = (byte) (targetUniChar[0] >> 8); + errorBuffer[errorBufferLength++] = (byte) targetUniChar[0]; + // *err = U_BUFFER_OVERFLOW_ERROR; + } + + } else { + if (target.hasRemaining()) { + target.put((byte) targetUniChar[0]); + if (offsets != null) { + offsets.put(source.position() - 1); + } + + } else { + errorBuffer[errorBufferLength++] = (byte) targetUniChar[0]; + // *err = U_BUFFER_OVERFLOW_ERROR; + } + } + + } else { + /* oops.. the code point is unassigned */ + /* Handle surrogates */ + /* check if the char is a First surrogate */ + + if (UTF16.isSurrogate((char) mySourceChar)) { + // use that handy handleSurrogates method everyone's been talking about! + CoderResult cr = handleSurrogates(source, (char) mySourceChar); + return (cr != null) ? cr : CoderResult.unmappableForLength(2); + } else { + /* callback(unassigned) for a BMP code point */ + // *err = U_INVALID_CHAR_FOUND; + fromUChar32 = mySourceChar; + return CoderResult.unmappableForLength(1); + } + } + } else { + // *err = U_BUFFER_OVERFLOW_ERROR; + return CoderResult.OVERFLOW; + } + } + + return CoderResult.UNDERFLOW; + } + + private CoderResult concatEscape(CharBuffer source, ByteBuffer target, IntBuffer offsets, byte[] strToAppend) { + CoderResult cr = null; + for (int i=0; iA subclass of java.nio.Charset for providing implementation of ICU's charset converters. + * This API is used to convert codepage or character encoded data to and + * from UTF-16. You can open a converter with {@link Charset#forName } and {@link #forNameICU }. With that + * converter, you can get its properties, set options, convert your data.

    + * + *

    Since many software programs recogize different converter names for + * different types of converters, there are other functions in this API to + * iterate over the converter aliases. + * + * @stable ICU 3.6 + */ +public abstract class CharsetICU extends Charset{ + + String icuCanonicalName; + String javaCanonicalName; + int options; + + float maxCharsPerByte; + + String name; /* +4: 60 internal name of the converter- invariant chars */ + + int codepage; /* +64: 4 codepage # (now IBM-$codepage) */ + + byte platform; /* +68: 1 platform of the converter (only IBM now) */ + byte conversionType; /* +69: 1 conversion type */ + + int minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */ + int maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */ + + byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */ + byte subCharLen; /* +76: 1 */ + + byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ + byte hasFromUnicodeFallback; /* +78: 1 */ + short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ + byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ + //byte reserved[/*19*/]; /* +81: 19 to round out the structure */ + + + // typedef enum UConverterUnicodeSet { + /** + * Parameter that select the set of roundtrippable Unicode code points. + * @stable ICU 4.0 + */ + public static final int ROUNDTRIP_SET=0; + /** + * Select the set of Unicode code points with roundtrip or fallback mappings. + * Not supported at this point. + * @internal + * @deprecated This API is ICU internal only. + */ + public static final int ROUNDTRIP_AND_FALLBACK_SET =1; + + //} UConverterUnicodeSet; + + /** + * + * @param icuCanonicalName + * @param canonicalName + * @param aliases + * @stable ICU 3.6 + */ + protected CharsetICU(String icuCanonicalName, String canonicalName, String[] aliases) { + super(canonicalName,aliases); + if(canonicalName.length() == 0){ + throw new IllegalCharsetNameException(canonicalName); + } + this.javaCanonicalName = canonicalName; + this.icuCanonicalName = icuCanonicalName; + } + + /** + * Ascertains if a charset is a sub set of this charset + * Implements the abstract method of super class. + * @param cs charset to test + * @return true if the given charset is a subset of this charset + * @stable ICU 3.6 + */ + public boolean contains(Charset cs){ + if (null == cs) { + return false; + } else if (this.equals(cs)) { + return true; + } + return false; + } + private static final HashMap algorithmicCharsets = new HashMap(); + static{ + algorithmicCharsets.put("LMBCS-1", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-2", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-3", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-4", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-5", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-6", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-8", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-11", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-16", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-17", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-18", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("LMBCS-19", "com.ibm.icu.charset.CharsetLMBCS"); + algorithmicCharsets.put("BOCU-1", "com.ibm.icu.charset.CharsetBOCU1" ); + algorithmicCharsets.put("SCSU", "com.ibm.icu.charset.CharsetSCSU" ); + algorithmicCharsets.put("US-ASCII", "com.ibm.icu.charset.CharsetASCII" ); + algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" ); + algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" ); + algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.charset.CharsetUTF16BE" ); + algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.charset.CharsetUTF16LE" ); + algorithmicCharsets.put("UTF16_OppositeEndian", "com.ibm.icu.charset.CharsetUTF16LE" ); + algorithmicCharsets.put("UTF16_PlatformEndian", "com.ibm.icu.charset.CharsetUTF16" ); + algorithmicCharsets.put("UTF-32", "com.ibm.icu.charset.CharsetUTF32" ); + algorithmicCharsets.put("UTF-32BE", "com.ibm.icu.charset.CharsetUTF32BE" ); + algorithmicCharsets.put("UTF-32LE", "com.ibm.icu.charset.CharsetUTF32LE" ); + algorithmicCharsets.put("UTF32_OppositeEndian", "com.ibm.icu.charset.CharsetUTF32LE" ); + algorithmicCharsets.put("UTF32_PlatformEndian", "com.ibm.icu.charset.CharsetUTF32" ); + algorithmicCharsets.put("UTF-8", "com.ibm.icu.charset.CharsetUTF8" ); + algorithmicCharsets.put("CESU-8", "com.ibm.icu.charset.CharsetCESU8" ); + algorithmicCharsets.put("UTF-7", "com.ibm.icu.charset.CharsetUTF7" ); + algorithmicCharsets.put("ISCII,version=0", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=1", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=2", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=3", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=4", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=5", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=6", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=7", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("ISCII,version=8", "com.ibm.icu.charset.CharsetISCII" ); + algorithmicCharsets.put("IMAP-mailbox-name", "com.ibm.icu.charset.CharsetUTF7" ); + algorithmicCharsets.put("HZ", "com.ibm.icu.charset.CharsetHZ" ); + algorithmicCharsets.put("ISO_2022,locale=ja,version=0", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=ja,version=1", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=ja,version=2", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=ja,version=3", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=ja,version=4", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=zh,version=0", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=zh,version=1", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=ko,version=0", "com.ibm.icu.charset.CharsetISO2022" ); + algorithmicCharsets.put("ISO_2022,locale=ko,version=1", "com.ibm.icu.charset.CharsetISO2022" ); + } + + /*public*/ static final Charset getCharset(String icuCanonicalName, String javaCanonicalName, String[] aliases){ + String className = algorithmicCharsets.get(icuCanonicalName); + if(className==null){ + //all the cnv files are loaded as MBCS + className = "com.ibm.icu.charset.CharsetMBCS"; + } + try{ + CharsetICU conv = null; + Class cs = Class.forName(className).asSubclass(CharsetICU.class); + Class[] paramTypes = new Class[]{ String.class, String.class, String[].class}; + final Constructor c = cs.getConstructor(paramTypes); + Object[] params = new Object[]{ icuCanonicalName, javaCanonicalName, aliases}; + + // Run constructor + try { + conv = c.newInstance(params); + if (conv != null) { + return conv; + } + }catch (InvocationTargetException e) { + throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className+ ". Exception:" + e.getTargetException()); + } + }catch(ClassNotFoundException ex){ + }catch(NoSuchMethodException ex){ + }catch (IllegalAccessException ex){ + }catch (InstantiationException ex){ + } + throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className); + } + + static final boolean isSurrogate(int c){ + return (((c)&0xfffff800)==0xd800); + } + + /* + * Returns the default charset name + */ +// static final String getDefaultCharsetName(){ +// String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding(); +// return defaultEncoding; +// } + + /** + * Returns a charset object for the named charset. + * This method gurantee that ICU charset is returned when + * available. If the ICU charset provider does not support + * the specified charset, then try other charset providers + * including the standard Java charset provider. + * + * @param charsetName The name of the requested charset, + * may be either a canonical name or an alias + * @return A charset object for the named charset + * @throws IllegalCharsetNameException If the given charset name + * is illegal + * @throws UnsupportedCharsetException If no support for the + * named charset is available in this instance of th Java + * virtual machine + * @stable ICU 3.6 + */ + public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException { + CharsetProviderICU icuProvider = new CharsetProviderICU(); + CharsetICU cs = (CharsetICU) icuProvider.charsetForName(charsetName); + if (cs != null) { + return cs; + } + return Charset.forName(charsetName); + } + +// /** +// * @see java.lang.Comparable#compareTo(java.lang.Object) +// * @stable 3.8 +// */ +// public int compareTo(Object otherObj) { +// if (!(otherObj instanceof CharsetICU)) { +// return -1; +// } +// return icuCanonicalName.compareTo(((CharsetICU)otherObj).icuCanonicalName); +// } + + /** + * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the + * start of the stream for example U+FEFF (the Unicode BOM/signature + * character) that can be ignored. + * + * Detects Unicode signature byte sequences at the start of the byte stream + * and returns number of bytes of the BOM of the indicated Unicode charset. + * 0 is returned when no Unicode signature is recognized. + * + */ + // TODO This should be proposed as CharsetDecoderICU API. +// static String detectUnicodeSignature(ByteBuffer source) { +// int signatureLength = 0; // number of bytes of the signature +// final int SIG_MAX_LEN = 5; +// String sigUniCharset = null; // states what unicode charset is the BOM +// int i = 0; +// +// /* +// * initial 0xa5 bytes: make sure that if we read Returns the set of Unicode code points that can be converted by an ICU Converter. + *

    + * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): The set of all Unicode code points that can be + * roundtrip-converted (converted without any data loss) with the converter This set will not include code points that have fallback + * mappings or are only the result of reverse fallback mappings. See UTR #22 "Character Mapping Markup Language" at http://www.unicode.org/reports/tr22/ + *

    * In the future, there may be more UConverterUnicodeSet choices to select sets with different properties. + *

    + *

    This is useful for example for + *

    • checking that a string or document can be roundtrip-converted with a converter, + * without/before actually performing the conversion
    • + *
    • testing if a converter can be used for text for typical text for a certain locale, + * by comparing its roundtrip set with the set of ExemplarCharacters from + * ICU's locale data or other sources
    + * + * @param setFillIn A valid UnicodeSet. It will be cleared by this function before + * the converter's specific set is filled in. + * @param which A selector; currently ROUNDTRIP_SET is the only supported value. + * @throws IllegalArgumentException if the parameters does not match. + * @stable ICU 4.0 + */ + public void getUnicodeSet(UnicodeSet setFillIn, int which){ + if( setFillIn == null || which != ROUNDTRIP_SET ){ + throw new IllegalArgumentException(); + } + setFillIn.clear(); + getUnicodeSetImpl(setFillIn, which); + } + + static void getNonSurrogateUnicodeSet(UnicodeSet setFillIn){ + setFillIn.add(0, 0xd7ff); + setFillIn.add(0xe000, 0x10ffff); + } + + static void getCompleteUnicodeSet(UnicodeSet setFillIn){ + setFillIn.add(0, 0x10ffff); + } + +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetISCII.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetISCII.java new file mode 100644 index 00000000000..d9c82d5ed3e --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetISCII.java @@ -0,0 +1,1458 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author Michael Ow + * + */ +class CharsetISCII extends CharsetICU { + private static final short UCNV_OPTIONS_VERSION_MASK = 0X0f; + //private static final short NUKTA = 0x093c; + //private static final short HALANT = 0x094d; + private static final short ZWNJ = 0x200c; /* Zero Width Non Joiner */ + private static final short ZWJ = 0x200d; /* Zero Width Joiner */ + //private static final int INVALID_CHAR = 0xffff; + private static final short ATR = 0xef; /* Attribute code */ + private static final short EXT = 0xf0; /* Extension code */ + private static final short DANDA = 0x0964; + private static final short DOUBLE_DANDA = 0x0965; + private static final short ISCII_NUKTA = 0xe9; + private static final short ISCII_HALANT = 0xe8; + private static final short ISCII_DANDA = 0xea; + private static final short ISCII_VOWEL_SIGN_E = 0xe0; + private static final short ISCII_INV = 0xd9; + private static final short INDIC_BLOCK_BEGIN = 0x0900; + private static final short INDIC_BLOCK_END = 0x0d7f; + private static final short INDIC_RANGE = (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN); + private static final short VOCALLIC_RR = 0x0931; + private static final short LF = 0x0a; + private static final short ASCII_END = 0xa0; + private static final short TELUGU_DELTA = (UniLang.DELTA * UniLang.TELUGU); + private static final short DEV_ABBR_SIGN = 0x0970; + private static final short DEV_ANUDATTA = 0x0952; + private static final short EXT_RANGE_BEGIN = 0xa1; + private static final short EXT_RANGE_END = 0xee; + private static final short PNJ_DELTA = 0x100; + private static final int NO_CHAR_MARKER = 0xfffe; + + /* Used for proper conversion to and from Gurmukhi */ + private static UnicodeSet PNJ_BINDI_TIPPI_SET; + private static UnicodeSet PNJ_CONSONANT_SET; + private static final short PNJ_BINDI = 0x0a02; + private static final short PNJ_TIPPI = 0x0a70; + private static final short PNJ_SIGN_VIRAMA = 0x0a4d; + private static final short PNJ_ADHAK = 0x0a71; + private static final short PNJ_HA = 0x0a39; + private static final short PNJ_RRA = 0x0a5c; + + private static final class UniLang { + static final short DEVALANGARI = 0; + static final short BENGALI = DEVALANGARI + 1; + static final short GURMUKHI = BENGALI + 1; + static final short GUJARATI = GURMUKHI + 1; + static final short ORIYA = GUJARATI + 1; + static final short TAMIL = ORIYA + 1; + static final short TELUGU = TAMIL + 1; + static final short KANNADA = TELUGU + 1; + static final short MALAYALAM = KANNADA + 1; + static final short DELTA = 0x80; + } + @SuppressWarnings("unused") + private static final class ISCIILang { + static final short DEF = 0x40; + static final short RMN = 0x41; + static final short DEV = 0x42; + static final short BNG = 0x43; + static final short TML = 0x44; + static final short TLG = 0x45; + static final short ASM = 0x46; + static final short ORI = 0x47; + static final short KND = 0x48; + static final short MLM = 0x49; + static final short GJR = 0x4a; + static final short PNJ = 0x4b; + static final short ARB = 0x71; + static final short PES = 0x72; + static final short URD = 0x73; + static final short SND = 0x74; + static final short KSM = 0x75; + static final short PST = 0x76; + } + + private static final class MaskEnum { + static final short DEV_MASK = 0x80; + static final short PNJ_MASK = 0x40; + static final short GJR_MASK = 0x20; + static final short ORI_MASK = 0x10; + static final short BNG_MASK = 0x08; + static final short KND_MASK = 0x04; + static final short MLM_MASK = 0x02; + static final short TML_MASK = 0x01; + static final short ZERO = 0x00; + } + + private final String ISCII_CNV_PREFIX = "ISCII,version="; + + @SuppressWarnings("unused") + private final class UConverterDataISCII { + int option; + int contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ + int contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ + short defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ + short currentDeltaFromUnicode; /* current delta in Indic block */ + short currentDeltaToUnicode; /* current delta in Indic block */ + short currentMaskFromUnicode; /* mask for current state in fromUnicode */ + short currentMaskToUnicode; /* mask for current state in toUnicode */ + short defMaskToUnicode; /* mask for default state in toUnicode */ + boolean isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ + boolean resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered */ + String name; + int prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ + + UConverterDataISCII(int option, String name) { + this.option = option; + this.name = name; + + initialize(); + } + + void initialize() { + this.contextCharToUnicode = NO_CHAR_MARKER; /* contextCharToUnicode */ + this.currentDeltaFromUnicode = 0x0000; /* contextCharFromUnicode */ + this.defDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* defDeltaToUnicode */ + this.currentDeltaFromUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaFromUnicode */ + this.currentDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaToUnicode */ + this.currentMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskToUnicode */ + this.currentMaskFromUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskFromUnicode */ + this.defMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* defMaskToUnicode */ + this.isFirstBuffer = true; /* isFirstBuffer */ + this.resetToDefaultToUnicode = false; /* resetToDefaultToUnicode */ + this.prevToUnicodeStatus = 0x0000; + } + } + + private static final class LookupDataStruct { + short uniLang; + short maskEnum; + short isciiLang; + + LookupDataStruct(short uniLang, short maskEnum, short isciiLang) { + this.uniLang = uniLang; + this.maskEnum = maskEnum; + this.isciiLang = isciiLang; + } + } + + private static final LookupDataStruct [] lookupInitialData = { + new LookupDataStruct(UniLang.DEVALANGARI, MaskEnum.DEV_MASK, ISCIILang.DEV), + new LookupDataStruct(UniLang.BENGALI, MaskEnum.BNG_MASK, ISCIILang.BNG), + new LookupDataStruct(UniLang.GURMUKHI, MaskEnum.PNJ_MASK, ISCIILang.PNJ), + new LookupDataStruct(UniLang.GUJARATI, MaskEnum.GJR_MASK, ISCIILang.GJR), + new LookupDataStruct(UniLang.ORIYA, MaskEnum.ORI_MASK, ISCIILang.ORI), + new LookupDataStruct(UniLang.TAMIL, MaskEnum.TML_MASK, ISCIILang.TML), + new LookupDataStruct(UniLang.TELUGU, MaskEnum.KND_MASK, ISCIILang.TLG), + new LookupDataStruct(UniLang.KANNADA, MaskEnum.KND_MASK, ISCIILang.KND), + new LookupDataStruct(UniLang.MALAYALAM, MaskEnum.MLM_MASK, ISCIILang.MLM) + }; + + /* + * The values in validity table are indexed by the lower bits of Unicode + * range 0x0900 - 0x09ff. The values have a structure like: + * ----------------------------------------------------------------- + * |DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | + * | | | | | ASM | KND | | | + * ----------------------------------------------------------------- + * If a code point is valid in a particular script + * then that bit is turned on + * + * Unicode does not distinguish between Bengali and Assamese aso we use 1 bit for + * to represent these languages + * + * Telugu and Kannda have same codepoints except for Vocallic_RR which we special case + * and combine and use 1 bit to represent these languages + */ + private static final short validityTable[] = { + /* This state table is tool generated so please do not edit unless you know exactly what you are doing */ + /* Note: This table was edited to mirror the Windows XP implementation */ + /* ISCII: Valid: Unicode */ + /* 0xa0: 0x00: 0x900 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xa1: 0xb8: 0x901 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xa2: 0xfe: 0x902 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xa3: 0xbf: 0x903 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0x00: 0x00: 0x904 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xa4: 0xff: 0x905 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xa5: 0xff: 0x906 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xa6: 0xff: 0x907 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xa7: 0xff: 0x908 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xa8: 0xff: 0x909 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xa9: 0xff: 0x90a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xaa: 0xfe: 0x90b */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0x00: 0x00: 0x90c */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xae: 0x80: 0x90d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xab: 0x87: 0x90e */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xac: 0xff: 0x90f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xad: 0xff: 0x910 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xb2: 0x80: 0x911 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xaf: 0x87: 0x912 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xb0: 0xff: 0x913 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xb1: 0xff: 0x914 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xb3: 0xff: 0x915 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xb4: 0xfe: 0x916 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xb5: 0xfe: 0x917 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xb6: 0xfe: 0x918 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xb7: 0xff: 0x919 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xb8: 0xff: 0x91a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xb9: 0xfe: 0x91b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xba: 0xff: 0x91c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xbb: 0xfe: 0x91d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xbc: 0xff: 0x91e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xbd: 0xff: 0x91f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xbe: 0xfe: 0x920 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xbf: 0xfe: 0x921 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xc0: 0xfe: 0x922 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xc1: 0xff: 0x923 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xc2: 0xff: 0x924 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xc3: 0xfe: 0x925 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xc4: 0xfe: 0x926 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xc5: 0xfe: 0x927 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xc6: 0xff: 0x928 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xc7: 0x81: 0x929 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.TML_MASK, + /* 0xc8: 0xff: 0x92a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xc9: 0xfe: 0x92b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xca: 0xfe: 0x92c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xcb: 0xfe: 0x92d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xcc: 0xfe: 0x92e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xcd: 0xff: 0x92f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xcf: 0xff: 0x930 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd0: 0x87: 0x931 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd1: 0xff: 0x932 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd2: 0xb7: 0x933 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd3: 0x83: 0x934 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd4: 0xff: 0x935 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd5: 0xfe: 0x936 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0xd6: 0xbf: 0x937 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd7: 0xff: 0x938 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xd8: 0xff: 0x939 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0x00: 0x00: 0x93a */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x93b */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xe9: 0xda: 0x93c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x93d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xda: 0xff: 0x93e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xdb: 0xff: 0x93f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xdc: 0xff: 0x940 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xdd: 0xff: 0x941 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xde: 0xff: 0x942 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xdf: 0xbe: 0x943 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0x00: 0x00: 0x944 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xe3: 0x80: 0x945 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xe0: 0x87: 0x946 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xe1: 0xff: 0x947 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xe2: 0xff: 0x948 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xe7: 0x80: 0x949 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xe4: 0x87: 0x94a */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xe5: 0xff: 0x94b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xe6: 0xff: 0x94c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xe8: 0xff: 0x94d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xec: 0x00: 0x94e */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xed: 0x00: 0x94f */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x950 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x951 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x952 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x953 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x954 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x955 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x956 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x957 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0x00: 0x00: 0x958 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x959 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x95a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x95b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x95c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x95d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x95e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xce: 0x98: 0x95f */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x960 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0x00: 0x00: 0x961 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, + /* 0x00: 0x00: 0x962 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0x00: 0x00: 0x963 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xea: 0xf8: 0x964 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xeaea: 0x00: 0x965 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + /* 0xf1: 0xff: 0x966 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf2: 0xff: 0x967 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf3: 0xff: 0x968 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf4: 0xff: 0x969 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf5: 0xff: 0x96a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf6: 0xff: 0x96b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf7: 0xff: 0x96c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf8: 0xff: 0x96d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xf9: 0xff: 0x96e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0xfa: 0xff: 0x96f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, + /* 0x00: 0x80: 0x970 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, + + /* + * The length of the array is 128 to provide values for 0x900..0x97f. + * The last 15 entries for 0x971..0x97f of the table are all zero + * because no Indic script uses such Unicode code points. + */ + + /* 0x00: 0x00: 0x971 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x972 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x973 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x974 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x975 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x976 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x977 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x978 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x979 */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x97A */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x97B */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x97C */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x97D */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x97E */ MaskEnum.ZERO, + /* 0x00: 0x00: 0x97F */ MaskEnum.ZERO, + }; + + private static final char fromUnicodeTable[] = { + 0x00a0, /* 0x0900 */ + 0x00a1, /* 0x0901 */ + 0x00a2, /* 0x0902 */ + 0x00a3, /* 0x0903 */ + 0xa4e0, /* 0x0904 */ + 0x00a4, /* 0x0905 */ + 0x00a5, /* 0x0906 */ + 0x00a6, /* 0x0907 */ + 0x00a7, /* 0x0908 */ + 0x00a8, /* 0x0909 */ + 0x00a9, /* 0x090a */ + 0x00aa, /* 0x090b */ + 0xA6E9, /* 0x090c */ + 0x00ae, /* 0x090d */ + 0x00ab, /* 0x090e */ + 0x00ac, /* 0x090f */ + 0x00ad, /* 0x0910 */ + 0x00b2, /* 0x0911 */ + 0x00af, /* 0x0912 */ + 0x00b0, /* 0x0913 */ + 0x00b1, /* 0x0914 */ + 0x00b3, /* 0x0915 */ + 0x00b4, /* 0x0916 */ + 0x00b5, /* 0x0917 */ + 0x00b6, /* 0x0918 */ + 0x00b7, /* 0x0919 */ + 0x00b8, /* 0x091a */ + 0x00b9, /* 0x091b */ + 0x00ba, /* 0x091c */ + 0x00bb, /* 0x091d */ + 0x00bc, /* 0x091e */ + 0x00bd, /* 0x091f */ + 0x00be, /* 0x0920 */ + 0x00bf, /* 0x0921 */ + 0x00c0, /* 0x0922 */ + 0x00c1, /* 0x0923 */ + 0x00c2, /* 0x0924 */ + 0x00c3, /* 0x0925 */ + 0x00c4, /* 0x0926 */ + 0x00c5, /* 0x0927 */ + 0x00c6, /* 0x0928 */ + 0x00c7, /* 0x0929 */ + 0x00c8, /* 0x092a */ + 0x00c9, /* 0x092b */ + 0x00ca, /* 0x092c */ + 0x00cb, /* 0x092d */ + 0x00cc, /* 0x092e */ + 0x00cd, /* 0x092f */ + 0x00cf, /* 0x0930 */ + 0x00d0, /* 0x0931 */ + 0x00d1, /* 0x0932 */ + 0x00d2, /* 0x0933 */ + 0x00d3, /* 0x0934 */ + 0x00d4, /* 0x0935 */ + 0x00d5, /* 0x0936 */ + 0x00d6, /* 0x0937 */ + 0x00d7, /* 0x0938 */ + 0x00d8, /* 0x0939 */ + 0xFFFF, /* 0x093a */ + 0xFFFF, /* 0x093b */ + 0x00e9, /* 0x093c */ + 0xEAE9, /* 0x093d */ + 0x00da, /* 0x093e */ + 0x00db, /* 0x093f */ + 0x00dc, /* 0x0940 */ + 0x00dd, /* 0x0941 */ + 0x00de, /* 0x0942 */ + 0x00df, /* 0x0943 */ + 0xDFE9, /* 0x0944 */ + 0x00e3, /* 0x0945 */ + 0x00e0, /* 0x0946 */ + 0x00e1, /* 0x0947 */ + 0x00e2, /* 0x0948 */ + 0x00e7, /* 0x0949 */ + 0x00e4, /* 0x094a */ + 0x00e5, /* 0x094b */ + 0x00e6, /* 0x094c */ + 0x00e8, /* 0x094d */ + 0x00ec, /* 0x094e */ + 0x00ed, /* 0x094f */ + 0xA1E9, /* 0x0950 */ /* OM Symbol */ + 0xFFFF, /* 0x0951 */ + 0xF0B8, /* 0x0952 */ + 0xFFFF, /* 0x0953 */ + 0xFFFF, /* 0x0954 */ + 0xFFFF, /* 0x0955 */ + 0xFFFF, /* 0x0956 */ + 0xFFFF, /* 0x0957 */ + 0xb3e9, /* 0x0958 */ + 0xb4e9, /* 0x0959 */ + 0xb5e9, /* 0x095a */ + 0xbae9, /* 0x095b */ + 0xbfe9, /* 0x095c */ + 0xC0E9, /* 0x095d */ + 0xc9e9, /* 0x095e */ + 0x00ce, /* 0x095f */ + 0xAAe9, /* 0x0960 */ + 0xA7E9, /* 0x0961 */ + 0xDBE9, /* 0x0962 */ + 0xDCE9, /* 0x0963 */ + 0x00ea, /* 0x0964 */ + 0xeaea, /* 0x0965 */ + 0x00f1, /* 0x0966 */ + 0x00f2, /* 0x0967 */ + 0x00f3, /* 0x0968 */ + 0x00f4, /* 0x0969 */ + 0x00f5, /* 0x096a */ + 0x00f6, /* 0x096b */ + 0x00f7, /* 0x096c */ + 0x00f8, /* 0x096d */ + 0x00f9, /* 0x096e */ + 0x00fa, /* 0x096f */ + 0xF0BF, /* 0x0970 */ + 0xFFFF, /* 0x0971 */ + 0xFFFF, /* 0x0972 */ + 0xFFFF, /* 0x0973 */ + 0xFFFF, /* 0x0974 */ + 0xFFFF, /* 0x0975 */ + 0xFFFF, /* 0x0976 */ + 0xFFFF, /* 0x0977 */ + 0xFFFF, /* 0x0978 */ + 0xFFFF, /* 0x0979 */ + 0xFFFF, /* 0x097a */ + 0xFFFF, /* 0x097b */ + 0xFFFF, /* 0x097c */ + 0xFFFF, /* 0x097d */ + 0xFFFF, /* 0x097e */ + 0xFFFF, /* 0x097f */ + }; + private static final char toUnicodeTable[] = { + 0x0000, /* 0x00 */ + 0x0001, /* 0x01 */ + 0x0002, /* 0x02 */ + 0x0003, /* 0x03 */ + 0x0004, /* 0x04 */ + 0x0005, /* 0x05 */ + 0x0006, /* 0x06 */ + 0x0007, /* 0x07 */ + 0x0008, /* 0x08 */ + 0x0009, /* 0x09 */ + 0x000a, /* 0x0a */ + 0x000b, /* 0x0b */ + 0x000c, /* 0x0c */ + 0x000d, /* 0x0d */ + 0x000e, /* 0x0e */ + 0x000f, /* 0x0f */ + 0x0010, /* 0x10 */ + 0x0011, /* 0x11 */ + 0x0012, /* 0x12 */ + 0x0013, /* 0x13 */ + 0x0014, /* 0x14 */ + 0x0015, /* 0x15 */ + 0x0016, /* 0x16 */ + 0x0017, /* 0x17 */ + 0x0018, /* 0x18 */ + 0x0019, /* 0x19 */ + 0x001a, /* 0x1a */ + 0x001b, /* 0x1b */ + 0x001c, /* 0x1c */ + 0x001d, /* 0x1d */ + 0x001e, /* 0x1e */ + 0x001f, /* 0x1f */ + 0x0020, /* 0x20 */ + 0x0021, /* 0x21 */ + 0x0022, /* 0x22 */ + 0x0023, /* 0x23 */ + 0x0024, /* 0x24 */ + 0x0025, /* 0x25 */ + 0x0026, /* 0x26 */ + 0x0027, /* 0x27 */ + 0x0028, /* 0x28 */ + 0x0029, /* 0x29 */ + 0x002a, /* 0x2a */ + 0x002b, /* 0x2b */ + 0x002c, /* 0x2c */ + 0x002d, /* 0x2d */ + 0x002e, /* 0x2e */ + 0x002f, /* 0x2f */ + 0x0030, /* 0x30 */ + 0x0031, /* 0x31 */ + 0x0032, /* 0x32 */ + 0x0033, /* 0x33 */ + 0x0034, /* 0x34 */ + 0x0035, /* 0x35 */ + 0x0036, /* 0x36 */ + 0x0037, /* 0x37 */ + 0x0038, /* 0x38 */ + 0x0039, /* 0x39 */ + 0x003A, /* 0x3A */ + 0x003B, /* 0x3B */ + 0x003c, /* 0x3c */ + 0x003d, /* 0x3d */ + 0x003e, /* 0x3e */ + 0x003f, /* 0x3f */ + 0x0040, /* 0x40 */ + 0x0041, /* 0x41 */ + 0x0042, /* 0x42 */ + 0x0043, /* 0x43 */ + 0x0044, /* 0x44 */ + 0x0045, /* 0x45 */ + 0x0046, /* 0x46 */ + 0x0047, /* 0x47 */ + 0x0048, /* 0x48 */ + 0x0049, /* 0x49 */ + 0x004a, /* 0x4a */ + 0x004b, /* 0x4b */ + 0x004c, /* 0x4c */ + 0x004d, /* 0x4d */ + 0x004e, /* 0x4e */ + 0x004f, /* 0x4f */ + 0x0050, /* 0x50 */ + 0x0051, /* 0x51 */ + 0x0052, /* 0x52 */ + 0x0053, /* 0x53 */ + 0x0054, /* 0x54 */ + 0x0055, /* 0x55 */ + 0x0056, /* 0x56 */ + 0x0057, /* 0x57 */ + 0x0058, /* 0x58 */ + 0x0059, /* 0x59 */ + 0x005a, /* 0x5a */ + 0x005b, /* 0x5b */ + 0x005c, /* 0x5c */ + 0x005d, /* 0x5d */ + 0x005e, /* 0x5e */ + 0x005f, /* 0x5f */ + 0x0060, /* 0x60 */ + 0x0061, /* 0x61 */ + 0x0062, /* 0x62 */ + 0x0063, /* 0x63 */ + 0x0064, /* 0x64 */ + 0x0065, /* 0x65 */ + 0x0066, /* 0x66 */ + 0x0067, /* 0x67 */ + 0x0068, /* 0x68 */ + 0x0069, /* 0x69 */ + 0x006a, /* 0x6a */ + 0x006b, /* 0x6b */ + 0x006c, /* 0x6c */ + 0x006d, /* 0x6d */ + 0x006e, /* 0x6e */ + 0x006f, /* 0x6f */ + 0x0070, /* 0x70 */ + 0x0071, /* 0x71 */ + 0x0072, /* 0x72 */ + 0x0073, /* 0x73 */ + 0x0074, /* 0x74 */ + 0x0075, /* 0x75 */ + 0x0076, /* 0x76 */ + 0x0077, /* 0x77 */ + 0x0078, /* 0x78 */ + 0x0079, /* 0x79 */ + 0x007a, /* 0x7a */ + 0x007b, /* 0x7b */ + 0x007c, /* 0x7c */ + 0x007d, /* 0x7d */ + 0x007e, /* 0x7e */ + 0x007f, /* 0x7f */ + 0x0080, /* 0x80 */ + 0x0081, /* 0x81 */ + 0x0082, /* 0x82 */ + 0x0083, /* 0x83 */ + 0x0084, /* 0x84 */ + 0x0085, /* 0x85 */ + 0x0086, /* 0x86 */ + 0x0087, /* 0x87 */ + 0x0088, /* 0x88 */ + 0x0089, /* 0x89 */ + 0x008a, /* 0x8a */ + 0x008b, /* 0x8b */ + 0x008c, /* 0x8c */ + 0x008d, /* 0x8d */ + 0x008e, /* 0x8e */ + 0x008f, /* 0x8f */ + 0x0090, /* 0x90 */ + 0x0091, /* 0x91 */ + 0x0092, /* 0x92 */ + 0x0093, /* 0x93 */ + 0x0094, /* 0x94 */ + 0x0095, /* 0x95 */ + 0x0096, /* 0x96 */ + 0x0097, /* 0x97 */ + 0x0098, /* 0x98 */ + 0x0099, /* 0x99 */ + 0x009a, /* 0x9a */ + 0x009b, /* 0x9b */ + 0x009c, /* 0x9c */ + 0x009d, /* 0x9d */ + 0x009e, /* 0x9e */ + 0x009f, /* 0x9f */ + 0x00A0, /* 0xa0 */ + 0x0901, /* 0xa1 */ + 0x0902, /* 0xa2 */ + 0x0903, /* 0xa3 */ + 0x0905, /* 0xa4 */ + 0x0906, /* 0xa5 */ + 0x0907, /* 0xa6 */ + 0x0908, /* 0xa7 */ + 0x0909, /* 0xa8 */ + 0x090a, /* 0xa9 */ + 0x090b, /* 0xaa */ + 0x090e, /* 0xab */ + 0x090f, /* 0xac */ + 0x0910, /* 0xad */ + 0x090d, /* 0xae */ + 0x0912, /* 0xaf */ + 0x0913, /* 0xb0 */ + 0x0914, /* 0xb1 */ + 0x0911, /* 0xb2 */ + 0x0915, /* 0xb3 */ + 0x0916, /* 0xb4 */ + 0x0917, /* 0xb5 */ + 0x0918, /* 0xb6 */ + 0x0919, /* 0xb7 */ + 0x091a, /* 0xb8 */ + 0x091b, /* 0xb9 */ + 0x091c, /* 0xba */ + 0x091d, /* 0xbb */ + 0x091e, /* 0xbc */ + 0x091f, /* 0xbd */ + 0x0920, /* 0xbe */ + 0x0921, /* 0xbf */ + 0x0922, /* 0xc0 */ + 0x0923, /* 0xc1 */ + 0x0924, /* 0xc2 */ + 0x0925, /* 0xc3 */ + 0x0926, /* 0xc4 */ + 0x0927, /* 0xc5 */ + 0x0928, /* 0xc6 */ + 0x0929, /* 0xc7 */ + 0x092a, /* 0xc8 */ + 0x092b, /* 0xc9 */ + 0x092c, /* 0xca */ + 0x092d, /* 0xcb */ + 0x092e, /* 0xcc */ + 0x092f, /* 0xcd */ + 0x095f, /* 0xce */ + 0x0930, /* 0xcf */ + 0x0931, /* 0xd0 */ + 0x0932, /* 0xd1 */ + 0x0933, /* 0xd2 */ + 0x0934, /* 0xd3 */ + 0x0935, /* 0xd4 */ + 0x0936, /* 0xd5 */ + 0x0937, /* 0xd6 */ + 0x0938, /* 0xd7 */ + 0x0939, /* 0xd8 */ + 0x200D, /* 0xd9 */ + 0x093e, /* 0xda */ + 0x093f, /* 0xdb */ + 0x0940, /* 0xdc */ + 0x0941, /* 0xdd */ + 0x0942, /* 0xde */ + 0x0943, /* 0xdf */ + 0x0946, /* 0xe0 */ + 0x0947, /* 0xe1 */ + 0x0948, /* 0xe2 */ + 0x0945, /* 0xe3 */ + 0x094a, /* 0xe4 */ + 0x094b, /* 0xe5 */ + 0x094c, /* 0xe6 */ + 0x0949, /* 0xe7 */ + 0x094d, /* 0xe8 */ + 0x093c, /* 0xe9 */ + 0x0964, /* 0xea */ + 0xFFFF, /* 0xeb */ + 0xFFFF, /* 0xec */ + 0xFFFF, /* 0xed */ + 0xFFFF, /* 0xee */ + 0xFFFF, /* 0xef */ + 0xFFFF, /* 0xf0 */ + 0x0966, /* 0xf1 */ + 0x0967, /* 0xf2 */ + 0x0968, /* 0xf3 */ + 0x0969, /* 0xf4 */ + 0x096a, /* 0xf5 */ + 0x096b, /* 0xf6 */ + 0x096c, /* 0xf7 */ + 0x096d, /* 0xf8 */ + 0x096e, /* 0xf9 */ + 0x096f, /* 0xfa */ + 0xFFFF, /* 0xfb */ + 0xFFFF, /* 0xfc */ + 0xFFFF, /* 0xfd */ + 0xFFFF, /* 0xfe */ + 0xFFFF, /* 0xff */ + }; + private static final char nuktaSpecialCases[][] = { + { 16 /* length of array */ , 0 }, + { 0xa6, 0x090c }, + { 0xea, 0x093d }, + { 0xdf, 0x0944 }, + { 0xa1, 0x0950 }, + { 0xb3, 0x0958 }, + { 0xb4, 0x0959 }, + { 0xb5, 0x095a }, + { 0xba, 0x095b }, + { 0xbf, 0x095c }, + { 0xc0, 0x095d }, + { 0xc9, 0x095e }, + { 0xaa, 0x0960 }, + { 0xa7, 0x0961 }, + { 0xdb, 0x0962 }, + { 0xdc, 0x0963 } + }; + private static final char vowelSignESpecialCases[][] = { + { 2 /* length of array */ , 0 }, + { 0xA4, 0x0904 } + }; + + private static final short lookupTable[][] = { + { MaskEnum.ZERO, MaskEnum.ZERO }, /* DEFAULT */ + { MaskEnum.ZERO, MaskEnum.ZERO }, /* ROMAN */ + { UniLang.DEVALANGARI, MaskEnum.DEV_MASK }, + { UniLang.BENGALI, MaskEnum.BNG_MASK }, + { UniLang.TAMIL, MaskEnum.TML_MASK }, + { UniLang.TELUGU, MaskEnum.KND_MASK }, + { UniLang.BENGALI, MaskEnum.BNG_MASK }, + { UniLang.ORIYA, MaskEnum.ORI_MASK }, + { UniLang.KANNADA, MaskEnum.KND_MASK }, + { UniLang.MALAYALAM, MaskEnum.MLM_MASK }, + { UniLang.GUJARATI, MaskEnum.GJR_MASK }, + { UniLang.GURMUKHI, MaskEnum.PNJ_MASK } + }; + + private UConverterDataISCII extraInfo = null; + protected byte[] fromUSubstitution = new byte[]{(byte)0x1A}; + + public CharsetISCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + maxBytesPerChar = 4; + minBytesPerChar = 1; + maxCharsPerByte = 1; + //get the version number of the ISCII converter + int option = Integer.parseInt(icuCanonicalName.substring(14)); + + extraInfo = new UConverterDataISCII( + option, + new String(ISCII_CNV_PREFIX + (option & UCNV_OPTIONS_VERSION_MASK)) /* name */ + ); + + initializePNJSets(); + } + + /* Initialize the two UnicodeSets use for proper Gurmukhi conversion if they have not already been created. */ + private void initializePNJSets() { + if (PNJ_BINDI_TIPPI_SET != null && PNJ_CONSONANT_SET != null) { + return; + } + PNJ_BINDI_TIPPI_SET = new UnicodeSet(); + PNJ_CONSONANT_SET = new UnicodeSet(); + + PNJ_CONSONANT_SET.add(0x0a15, 0x0a28); + PNJ_CONSONANT_SET.add(0x0a2a, 0x0a30); + PNJ_CONSONANT_SET.add(0x0a35, 0x0a36); + PNJ_CONSONANT_SET.add(0x0a38, 0x0a39); + + PNJ_BINDI_TIPPI_SET.addAll(PNJ_CONSONANT_SET); + PNJ_BINDI_TIPPI_SET.add(0x0a05); + PNJ_BINDI_TIPPI_SET.add(0x0a07); + + PNJ_BINDI_TIPPI_SET.add(0x0a41, 0x0a42); + PNJ_BINDI_TIPPI_SET.add(0x0a3f); + + PNJ_CONSONANT_SET.compact(); + PNJ_BINDI_TIPPI_SET.compact(); + } + + /* + * Rules for ISCII to Unicode converter + * ISCII is a stateful encoding. To convert ISCII bytes to Unicode, + * which is both precomposed and decomposed from characters + * pre-context and post-context need to be considered. + * + * Post context + * i) ATR : Attribute code is used to declare the font and script switching. + * Currently we only switch scripts and font codes consumed without generating an error + * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, + * obsolete characters + * Pre context + * i) Halant: if preceeded by a halant then it is a explicit halant + * ii) Nukta: + * a) if preceeded by a halant then it is a soft halant + * b) if preceeded by specific consonants and the ligatures have pre-composed + * characters in Unicode then convert to pre-composed characters + * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda + */ + class CharsetDecoderISCII extends CharsetDecoderICU { + public CharsetDecoderISCII(CharsetICU cs) { + super(cs); + implReset(); + } + + protected void implReset() { + super.implReset(); + this.toUnicodeStatus = 0xFFFF; + extraInfo.initialize(); + } + + @SuppressWarnings("fallthrough") + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult cr = CoderResult.UNDERFLOW; + int targetUniChar = 0x0000; + short sourceChar = 0x0000; + UConverterDataISCII data; + boolean gotoCallBack = false; + int offset = 0; + + data = extraInfo; + //data.contextCharToUnicode; /* contains previous ISCII codepoint visited */ + //this.toUnicodeStatus; /* contains the mapping to Unicode of the above codepoint */ + + while (source.hasRemaining()) { + targetUniChar = UConverterConstants.missingCharMarker; + + if (target.hasRemaining()) { + sourceChar = (short)((short)source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + + /* look at the post-context perform special processing */ + if (data.contextCharToUnicode == ATR) { + /* If we have ATR in data.contextCharToUnicode then we need to change our + * state to Indic Script specified by sourceChar + */ + /* check if the sourceChar is supported script range */ + if (((short)(ISCIILang.PNJ - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (ISCIILang.PNJ - ISCIILang.DEV)) { + data.currentDeltaToUnicode = (short)(lookupTable[sourceChar & 0x0F][0] * UniLang.DELTA); + data.currentMaskToUnicode = lookupTable[sourceChar & 0x0F][1]; + } else if (sourceChar == ISCIILang.DEF) { + /* switch back to default */ + data.currentDeltaToUnicode = data.defDeltaToUnicode; + data.currentMaskToUnicode = data.defMaskToUnicode; + } else { + if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { + /* these are display codes consume and continue */ + } else { + cr = CoderResult.malformedForLength(1); + /* reset */ + data.contextCharToUnicode = NO_CHAR_MARKER; + gotoCallBack = true; + } + } + /* reset */ + if (!gotoCallBack) { + data.contextCharToUnicode = NO_CHAR_MARKER; + continue; + } + } else if (data.contextCharToUnicode == EXT) { + /* check if sourceChar is in 0xA1 - 0xEE range */ + if (((short)(EXT_RANGE_END - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { + /* We currently support only Anudatta and Devanagari abbreviation sign */ + if (sourceChar == 0xBF || sourceChar == 0xB8) { + targetUniChar = (sourceChar == 0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; + + /* find out if the mappling is valid in this state */ + if ((validityTable[((short)targetUniChar) & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) > 0) { + data.contextCharToUnicode = NO_CHAR_MARKER; + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data.prevToUnicodeStatus != 0) { + cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); + data.prevToUnicodeStatus = 0x0000; + } + /* write to target */ + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode); + + continue; + } + } + /* byte unit is unassigned */ + targetUniChar = UConverterConstants.missingCharMarker; + cr = CoderResult.unmappableForLength(1); + } else { + /* only 0xA1 - 0xEE are legal after EXT char */ + data.contextCharToUnicode = NO_CHAR_MARKER; + cr = CoderResult.malformedForLength(1); + } + gotoCallBack = true; + } else if (data.contextCharToUnicode == ISCII_INV) { + if (sourceChar == ISCII_HALANT) { + targetUniChar = 0x0020; /* replace with space according to Indic FAQ */ + } else { + targetUniChar = ZWJ; + } + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data.prevToUnicodeStatus != 0) { + cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); + data.prevToUnicodeStatus = 0x0000; + } + + /* write to target */ + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode); + /* reset */ + data.contextCharToUnicode = NO_CHAR_MARKER; + } + + /* look at the pre-context and perform special processing */ + if (!gotoCallBack) { + switch (sourceChar) { + case ISCII_INV: + case EXT: /* falls through */ + case ATR: + data.contextCharToUnicode = (char)sourceChar; + + if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data.prevToUnicodeStatus != 0) { + cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); + data.prevToUnicodeStatus = 0x0000; + } + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode); + this.toUnicodeStatus = UConverterConstants.missingCharMarker; + } + continue; + case ISCII_DANDA: + /* handle double danda */ + if (data.contextCharToUnicode == ISCII_DANDA) { + targetUniChar = DOUBLE_DANDA; + /* clear the context */ + data.contextCharToUnicode = NO_CHAR_MARKER; + this.toUnicodeStatus = UConverterConstants.missingCharMarker; + } else { + targetUniChar = GetMapping(sourceChar, targetUniChar, data); + data.contextCharToUnicode = (char)sourceChar; + } + break; + case ISCII_HALANT: + /* handle explicit halant */ + if (data.contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWNJ; + /* clear context */ + data.contextCharToUnicode = NO_CHAR_MARKER; + } else { + targetUniChar = GetMapping(sourceChar, targetUniChar, data); + data.contextCharToUnicode = (char)sourceChar; + } + break; + case 0x0A: + /* fall through */ + case 0x0D: + data.resetToDefaultToUnicode = true; + targetUniChar = GetMapping(sourceChar, targetUniChar, data); + data.contextCharToUnicode = (char)sourceChar; + break; + case ISCII_VOWEL_SIGN_E: + /* find + SIGN_VOWEL_E special mapping */ + int n = 1; + boolean find = false; + for (; n < vowelSignESpecialCases[0][0]; n++) { + if (vowelSignESpecialCases[n][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) { + targetUniChar = vowelSignESpecialCases[n][1]; + find = true; + break; + } + } + if (find) { + /* find out if the mapping is valid in this state */ + if ((validityTable[(byte)targetUniChar] & data.currentMaskFromUnicode) > 0) { + data.contextCharToUnicode = NO_CHAR_MARKER; + this.toUnicodeStatus = UConverterConstants.missingCharMarker; + break; + } + } + targetUniChar = GetMapping(sourceChar, targetUniChar, data); + data.contextCharToUnicode = (char)sourceChar; + break; + case ISCII_NUKTA: + /* handle soft halant */ + if (data.contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWJ; + /* clear the context */ + data.contextCharToUnicode = NO_CHAR_MARKER; + break; + } else if (data.currentDeltaToUnicode == PNJ_DELTA && data.contextCharToUnicode == 0xc0) { + /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. + * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). + * WriteToTargetToU is given 0x095c instead of 0xa5c because that method will automatically + * convert the code point given based on the delta provided. + */ + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_RRA, (short)0); + if (!cr.isOverflow()) { + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_SIGN_VIRAMA, (short)0); + if (!cr.isOverflow()) { + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_HA, (short)0); + } else { + this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA; + } + } else { + this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_SIGN_VIRAMA; + this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA; + } + this.toUnicodeStatus = UConverterConstants.missingCharMarker; + data.contextCharToUnicode = NO_CHAR_MARKER; + if (!cr.isError()) { + continue; + } + break; + } else { + /* try to handle + ISCII_NUKTA special mappings */ + int i = 1; + boolean found = false; + for (; i < nuktaSpecialCases[0][0]; i++) { + if (nuktaSpecialCases[i][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) { + targetUniChar = nuktaSpecialCases[i][1]; + found = true; + break; + } + } + if (found) { + /* find out if the mapping is valid in this state */ + if ((validityTable[(byte)targetUniChar] & data.currentMaskToUnicode) > 0) { + data.contextCharToUnicode = NO_CHAR_MARKER; + this.toUnicodeStatus = UConverterConstants.missingCharMarker; + if (data.currentDeltaToUnicode == PNJ_DELTA) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data.prevToUnicodeStatus != 0) { + cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); + data.prevToUnicodeStatus = 0x0000; + } + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode); + continue; + } + break; + } + /* else fall through to default */ + } + /* else fall through to default */ + } + + default: + targetUniChar = GetMapping(sourceChar, targetUniChar, data); + data.contextCharToUnicode = (char)sourceChar; + break; + } //end of switch + }//end of CallBack if statement + + if (!gotoCallBack && this.toUnicodeStatus != UConverterConstants.missingCharMarker) { + /* Check to make sure that consonant clusters are handled correctly for Gurmukhi script. */ + if (data.currentDeltaToUnicode == PNJ_DELTA && data.prevToUnicodeStatus != 0 && PNJ_CONSONANT_SET.contains(data.prevToUnicodeStatus) && + (this.toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data.prevToUnicodeStatus) { + if (offsets != null) { + offset = source.position() - 3; + } + cr = WriteToTargetToU(offsets, offset, source, target, PNJ_ADHAK, (short)0); + cr = WriteToTargetToU(offsets, offset, source, target, data.prevToUnicodeStatus, (short)0); + data.prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ + toUnicodeStatus = UConverterConstants.missingCharMarker; + continue; + } else { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data.prevToUnicodeStatus != 0) { + cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); + data.prevToUnicodeStatus = 0x0000; + } + /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. + * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. + */ + if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && PNJ_BINDI_TIPPI_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) { + targetUniChar = PNJ_TIPPI - PNJ_DELTA; + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, PNJ_DELTA); + } else if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && PNJ_CONSONANT_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) { + /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ + data.prevToUnicodeStatus = this.toUnicodeStatus + PNJ_DELTA; + } else { + /* write the previously mapped codepoint */ + cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode); + } + } + this.toUnicodeStatus = UConverterConstants.missingCharMarker; + } + + if (!gotoCallBack && targetUniChar != UConverterConstants.missingCharMarker) { + /* now save the targetUniChar for delayed write */ + this.toUnicodeStatus = (char)targetUniChar; + if (data.resetToDefaultToUnicode) { + data.currentDeltaToUnicode = data.defDeltaToUnicode; + data.currentMaskToUnicode = data.defMaskToUnicode; + data.resetToDefaultToUnicode = false; + } + } else { + /* we reach here only if targetUniChar == missingCharMarker + * so assign codes to reason and err + */ + if (!gotoCallBack) { + cr = CoderResult.unmappableForLength(1); + } +//CallBack : + toUBytesArray[0] = (byte)sourceChar; + toULength = 1; + gotoCallBack = false; + break; + } + } else { + cr = CoderResult.OVERFLOW; + break; + } + + } //end of while + + if (cr.isUnderflow() && flush && !source.hasRemaining()) { + /*end of the input stream */ + if (data.contextCharToUnicode == ATR || data.contextCharToUnicode == EXT || data.contextCharToUnicode == ISCII_INV) { + /* set toUBytes[] */ + toUBytesArray[0] = (byte)data.contextCharToUnicode; + toULength = 1; + + /* avoid looping on truncated sequences */ + data.contextCharToUnicode = NO_CHAR_MARKER; + } else { + toULength = 0; + } + + if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) { + /* output a remaining target character */ + WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode); + this.toUnicodeStatus = UConverterConstants.missingCharMarker; + } + } + return cr; + } + + private CoderResult WriteToTargetToU(IntBuffer offsets, int offset, ByteBuffer source, CharBuffer target, int targetUniChar, short delta) { + CoderResult cr = CoderResult.UNDERFLOW; + /* add offset to current Indic Block */ + if (targetUniChar > ASCII_END && + targetUniChar != ZWJ && + targetUniChar != ZWNJ && + targetUniChar != DANDA && + targetUniChar != DOUBLE_DANDA) { + targetUniChar += delta; + } + + /* now write the targetUniChar */ + if (target.hasRemaining()) { + target.put((char)targetUniChar); + if (offsets != null) { + offsets.put(offset); + } + } else { + charErrorBufferArray[charErrorBufferLength++] = (char)targetUniChar; + cr = CoderResult.OVERFLOW; + } + return cr; + } + + private int GetMapping(short sourceChar, int targetUniChar, UConverterDataISCII data) { + targetUniChar = toUnicodeTable[sourceChar]; + /* is the code point valid in current script? */ + if (sourceChar > ASCII_END && + (validityTable[(short)targetUniChar & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) == 0) { + /* Vocallic RR is assigne in ISCII Telugu and Unicode */ + if (data.currentDeltaToUnicode != (TELUGU_DELTA) || targetUniChar != VOCALLIC_RR) { + targetUniChar = UConverterConstants.missingCharMarker; + } + } + return targetUniChar; + } + } + + /* + * Rules: + * Explicit Halant : + * + + * Soft Halant : + * + + */ + class CharsetEncoderISCII extends CharsetEncoderICU { + public CharsetEncoderISCII(CharsetICU cs) { + super(cs, fromUSubstitution); + implReset(); + } + + protected void implReset() { + super.implReset(); + extraInfo.initialize(); + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + int targetByteUnit = 0x0000; + int sourceChar = 0x0000; + UConverterDataISCII converterData; + short newDelta = 0; + short range = 0; + boolean deltaChanged = false; + int tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ + CoderResult cr = CoderResult.UNDERFLOW; + + /* initialize data */ + converterData = extraInfo; + newDelta = converterData.currentDeltaFromUnicode; + range = (short)(newDelta / UniLang.DELTA); + + if ((sourceChar = fromUChar32) != 0) { + cr = handleSurrogates(source, (char) sourceChar); + return (cr != null) ? cr : CoderResult.unmappableForLength(2); + } + + /* writing the char to the output stream */ + while (source.hasRemaining()) { + if (!target.hasRemaining()) { + return CoderResult.OVERFLOW; + } + + /* Write the language code following LF only if LF is not the last character. */ + if (fromUnicodeStatus == LF) { + targetByteUnit = ATR << 8; + targetByteUnit += (byte)lookupInitialData[range].isciiLang; + fromUnicodeStatus = 0x0000; + /* now append ATR and language code */ + cr = WriteToTargetFromU(offsets, source, target, targetByteUnit); + if (cr.isOverflow()) { + break; + } + } + + sourceChar = source.get(); + tempContextFromUnicode = converterData.contextCharFromUnicode; + + targetByteUnit = UConverterConstants.missingCharMarker; + + /* check if input is in ASCII and C0 control codes range */ + if (sourceChar <= ASCII_END) { + fromUnicodeStatus = sourceChar; + cr = WriteToTargetFromU(offsets, source, target, sourceChar); + if (cr.isOverflow()) { + break; + } + continue; + } + + switch (sourceChar) { + case ZWNJ: + /* contextChar has HALANT */ + if (converterData.contextCharFromUnicode != 0) { + converterData.contextCharFromUnicode = 0x00; + targetByteUnit = ISCII_HALANT; + } else { + /* consume ZWNJ and continue */ + converterData.contextCharFromUnicode = 0x00; + continue; + } + break; + case ZWJ: + /* contextChar has HALANT */ + if (converterData.contextCharFromUnicode != 0) { + targetByteUnit = ISCII_NUKTA; + } else { + targetByteUnit = ISCII_INV; + } + converterData.contextCharFromUnicode = 0x00; + break; + default: + /* is the sourceChar in the INDIC_RANGE? */ + if((char)(INDIC_BLOCK_END - sourceChar) <= INDIC_RANGE) { + /* Danda and Doube Danda are valid in Northern scripts.. since Unicode + * does not include these codepoints in all Northern scripts we need to + * filter them out + */ + if (sourceChar != DANDA && sourceChar != DOUBLE_DANDA) { + /* find out to which block the sourceChar belongs */ + range = (short)((sourceChar - INDIC_BLOCK_BEGIN) / UniLang.DELTA); + newDelta = (short)(range * UniLang.DELTA); + + /* Now are we in the same block as previous? */ + if (newDelta != converterData.currentDeltaFromUnicode || converterData.isFirstBuffer) { + converterData.currentDeltaFromUnicode = newDelta; + converterData.currentMaskFromUnicode = lookupInitialData[range].maskEnum; + deltaChanged = true; + converterData.isFirstBuffer = false; + } + if (converterData.currentDeltaFromUnicode == PNJ_DELTA) { + if (sourceChar == PNJ_TIPPI) { + /* Make sure Tippi is converterd to Bindi. */ + sourceChar = PNJ_BINDI; + } else if (sourceChar == PNJ_ADHAK) { + /* This is for consonant cluster handling. */ + converterData.contextCharFromUnicode = PNJ_ADHAK; + } + } + /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ + /* now subtract the new delta from sourceChar */ + sourceChar -= converterData.currentDeltaFromUnicode; + } + /* get the target byte unit */ + targetByteUnit = fromUnicodeTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK]; + + /* is the code point valid in current script? */ + if ((validityTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK] & converterData.currentMaskFromUnicode) == 0) { + /* Vocallic RR is assigned in ISCII Telugu and Unicode */ + if (converterData.currentDeltaFromUnicode != (TELUGU_DELTA) || sourceChar != VOCALLIC_RR) { + targetByteUnit = UConverterConstants.missingCharMarker; + } + } + + if (deltaChanged) { + /* we are in a script block which is different than + * previous sourceChar's script block write ATR and language codes + */ + char temp = 0; + temp = (char)(ATR << 8); + temp += (char)(lookupInitialData[range].isciiLang & UConverterConstants.UNSIGNED_BYTE_MASK); + /* reset */ + deltaChanged = false; + /* now append ATR and language code */ + cr = WriteToTargetFromU(offsets, source, target, temp); + if (cr.isOverflow()) { + break; + } + } + if (converterData.currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { + continue; + } + } + /* reset context char */ + converterData.contextCharFromUnicode = 0x00; + break; + } //end of switch + if (converterData.currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && PNJ_CONSONANT_SET.contains(sourceChar + PNJ_DELTA)) { + /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ + /* reset context char */ + converterData.contextCharFromUnicode = 0x0000; + targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; + /*write targetByteUnit to target */ + cr = WriteToTargetFromU(offsets, source, target, targetByteUnit); + if (cr.isOverflow()) { + break; + } + } else if (targetByteUnit != UConverterConstants.missingCharMarker) { + if (targetByteUnit == ISCII_HALANT) { + converterData.contextCharFromUnicode = (char)targetByteUnit; + } + /*write targetByteUnit to target */ + cr = WriteToTargetFromU(offsets, source, target, targetByteUnit); + if (cr.isOverflow()) { + break; + } + } else if (UTF16.isSurrogate((char)sourceChar)) { + cr = handleSurrogates(source, (char) sourceChar); + return (cr != null) ? cr : CoderResult.unmappableForLength(2); + } else { + return CoderResult.unmappableForLength(1); + } + } /* end of while */ + + /* save the state and return */ + return cr; + } + + private CoderResult WriteToTargetFromU(IntBuffer offsets, CharBuffer source, ByteBuffer target, int targetByteUnit) { + CoderResult cr = CoderResult.UNDERFLOW; + int offset = source.position() - 1; + /* write the targetUniChar to target */ + if (target.hasRemaining()) { + if (targetByteUnit <= 0xFF) { + target.put((byte)targetByteUnit); + if (offsets != null) { + offsets.put(offset); + } + } else { + if (targetByteUnit > 0xFFFF) { + target.put((byte)(targetByteUnit >> 16)); + if (offsets != null) { + --offset; + offsets.put(offset); + } + } + if (!target.hasRemaining()) { + errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8); + errorBuffer[errorBufferLength++] = (byte)targetByteUnit; + cr = CoderResult.OVERFLOW; + return cr; + } + target.put((byte)(targetByteUnit >> 8)); + if (offsets != null) { + offsets.put(offset); + } + if (target.hasRemaining()) { + target.put((byte)targetByteUnit); + if (offsets != null) { + offsets.put(offset); + } + } else { + errorBuffer[errorBufferLength++] = (byte)targetByteUnit; + cr = CoderResult.OVERFLOW; + } + } + } else { + if ((targetByteUnit > 0xFFFF)) { + errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 16); + } else if ((targetByteUnit & 0xFF00) > 0) { + errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8); + } + errorBuffer[errorBufferLength++] = (byte)(targetByteUnit); + cr = CoderResult.OVERFLOW; + } + return cr; + } + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoderISCII(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderISCII(this); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + int idx,script; + char mask; + + setFillIn.add(0,ASCII_END ); + for(script = UniLang.DEVALANGARI ; script<= UniLang.MALAYALAM ;script++){ + mask = (char)lookupInitialData[script].maskEnum; + for(idx=0; idx < UniLang.DELTA ; idx++){ + // Special check for telugu character + if((validityTable[idx] & mask)!=0 || (script == UniLang.TELUGU && idx==0x31)){ + setFillIn.add(idx+(script*UniLang.DELTA)+INDIC_BLOCK_BEGIN ); + } + } + } + setFillIn.add(DANDA); + setFillIn.add(DOUBLE_DANDA); + setFillIn.add(ZWNJ); + setFillIn.add(ZWJ); + + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java new file mode 100644 index 00000000000..28db507b4fc --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java @@ -0,0 +1,2992 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.util.Arrays; + +import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS; +import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +class CharsetISO2022 extends CharsetICU { + private UConverterDataISO2022 myConverterData; + private int variant; // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN} + + private static final byte[] SHIFT_IN_STR = { 0x0f }; +// private static final byte[] SHIFT_OUT_STR = { 0x0e }; + + private static final byte CR = 0x0D; + private static final byte LF = 0x0A; +/* + private static final byte H_TAB = 0x09; + private static final byte SPACE = 0x20; +*/ + private static final char HWKANA_START = 0xff61; + private static final char HWKANA_END = 0xff9f; + + /* + * 94-character sets with native byte values A1..FE are encoded in ISO 2022 + * as bytes 21..7E. (Subtract 0x80.) + * 96-character sets with native bit values A0..FF are encoded in ISO 2022 + * as bytes 20..7F. (Subtract 0x80.) + * Do not encode C1 control codes with native bytes 80..9F + * as bytes 00..1F (C0 control codes). + */ +/* + private static final char GR94_START = 0xa1; + private static final char GR94_END = 0xfe; +*/ + private static final char GR96_START = 0xa0; + private static final char GR96_END = 0xff; + + /* for ISO-2022-JP and -CN implementations */ + // typedef enum { + /* shared values */ + private static final byte INVALID_STATE = -1; + private static final byte ASCII = 0; + + private static final byte SS2_STATE = 0x10; + private static final byte SS3_STATE = 0x11; + + /* JP */ + private static final byte ISO8859_1 = 1; + private static final byte ISO8859_7 = 2; + private static final byte JISX201 = 3; + private static final byte JISX208 = 4; + private static final byte JISX212 = 5; + private static final byte GB2312 = 6; + private static final byte KSC5601 = 7; + private static final byte HWKANA_7BIT = 8; /* Halfwidth Katakana 7 bit */ + + /* CN */ + /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */ + private static final byte GB2312_1 = 1; + private static final byte ISO_IR_165= 2; + private static final byte CNS_11643 = 3; + + /* + * these are used in StateEnum and ISO2022State variables, + * but CNS_11643 must be used to index into myConverterArray[] + */ + private static final byte CNS_11643_0 = 0x20; + private static final byte CNS_11643_1 = 0x21; + private static final byte CNS_11643_2 = 0x22; + private static final byte CNS_11643_3 = 0x23; + private static final byte CNS_11643_4 = 0x24; + private static final byte CNS_11643_5 = 0x25; + private static final byte CNS_11643_6 = 0x26; + private static final byte CNS_11643_7 = 0x27; + // } StateEnum; + + + public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + + myConverterData = new UConverterDataISO2022(); + + int versionIndex = icuCanonicalName.indexOf("version="); + int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue(); + + myConverterData.version = version; + + if (icuCanonicalName.indexOf("locale=ja") > 0) { + ISO2022InitJP(version); + } else if (icuCanonicalName.indexOf("locale=zh") > 0) { + ISO2022InitCN(version); + } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ { + ISO2022InitKR(version); + } + + myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder(); + myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder(); + } + + private void ISO2022InitJP(int version) { + variant = ISO_2022_JP; + + maxBytesPerChar = 6; + minBytesPerChar = 1; + maxCharsPerByte = 1; + // open the required converters and cache them + if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) { + myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData; + } + // myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData; + myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData; + if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) { + myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData; + } + if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) { + myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData; + } + if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) { + myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData; + } + + // create a generic CharsetMBCS object + myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546"); + } + + private void ISO2022InitCN(int version) { + variant = ISO_2022_CN; + + maxBytesPerChar = 8; + minBytesPerChar = 1; + maxCharsPerByte = 1; + // open the required coverters and cache them. + myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData; + if (version == 1) { + myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData; + } + myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData; + + // create a generic CharsetMBCS object + myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546"); + } + + private void ISO2022InitKR(int version) { + variant = ISO_2022_KR; + + maxBytesPerChar = 3; + minBytesPerChar = 1; + maxCharsPerByte = 1; + + if (version == 1) { + myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546"); + myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0]; + } else { + myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949"); + } + + myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder(); + myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder(); + } + + /* + * ISO 2022 control codes must not be converted from Unicode + * because they would mess up the byte stream. + * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b + * corresponding to SO, SI, and ESC. + */ + private static boolean IS_2022_CONTROL(int c) { + return (c<0x20) && (((1<= 0xa1a1) && + ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) { + return (value - 0x8080); /* shift down to 21..7e byte range */ + } else { + return 0; /* not valid for ISO 2022 */ + } + } + + /* + * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that. + * + * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the + * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point + * unchanged. + * + private static int _2022ToGR94DBCS(int value) { + int returnValue = value + 0x8080; + + if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) && + ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) { + return returnValue; + } else { + return value; + } + }*/ + + /* is the StateEnum charset value for a DBCS charset? */ + private static boolean IS_JP_DBCS(byte cs) { + return ((JISX208 <= cs) && (cs <= KSC5601)); + } + + private static short CSM(short cs) { + return (short)(1<= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) { + return 0; + } + /* convert the Unicode code point in c into codepage bytes */ + table = sharedData.mbcs.fromUnicodeTable; + /* get the byte for the output */ + value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c); + /* get the byte for the output */ + retval[0] = value & 0xff; + if (value >= 0xf00) { + return 1; /* roundtrip */ + } else if (useFallback ? value>=0x800 : value>=0xc00) { + return -1; /* fallback taken */ + } else { + return 0; /* no mapping */ + } + } + + /* + * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence + * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x + * + * Note: The converter uses some leniency: + * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in + * all versions, not just JIS7 and JIS8. + * - ICU does not distinguish between different version so of JIS X 0208. + */ + private static final short jpCharsetMasks[] = { + (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)), + (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)), + (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)), + (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)), + (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)) + }; + +/* + // typedef enum { + private static final byte ASCII1 = 0; + private static final byte LATIN1 = 1; + private static final byte SBCS = 2; + private static final byte DBCS = 3; + private static final byte MBCS = 4; + private static final byte HWKANA = 5; + // } Cnv2002Type; +*/ + + private class ISO2022State { + private byte []cs; /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */ + private byte g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */ + private byte prevG; /* g before single shift (SS2 or SS3) */ + + ISO2022State() { + cs = new byte[4]; + } + + void reset() { + Arrays.fill(cs, (byte)0); + g = 0; + prevG = 0; + } + } + +// private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf; + private static final byte UCNV_2022_MAX_CONVERTERS = 10; + + @SuppressWarnings("unused") + private class UConverterDataISO2022 { + UConverterSharedData []myConverterArray; + CharsetEncoderMBCS currentEncoder; + CharsetDecoderMBCS currentDecoder; + CharsetMBCS currentConverter; + int currentType; // Cnv2022Type; + ISO2022State toU2022State; + ISO2022State fromU2022State; + int key; + int version; + boolean isEmptySegment; + + UConverterDataISO2022() { + myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS]; + toU2022State = new ISO2022State(); + fromU2022State = new ISO2022State(); + currentType = 0; + key = 0; + version = 0; + isEmptySegment = false; + } + + void reset() { + toU2022State.reset(); + fromU2022State.reset(); + isEmptySegment = false; + } + } + + private static final byte ESC_2022 = 0x1B; /* ESC */ + + // typedef enum { + private static final byte INVALID_2022 = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */ + private static final byte VALID_NON_TERMINAL_2022 = 0; /* so far corresponds to a valid iso 2022 escape sequence */ + private static final byte VALID_TERMINAL_2022 = 1; /* corresponds to a valid iso 2022 escape sequence */ + private static final byte VALID_MAYBE_TERMINAL_2022 = 2; /* so far matches one iso 2022 escape sequence, but by adding + more characters might match another escape sequence */ + // } UCNV_TableStates_2022; + + /* + * The way these state transition arrays work is: + * ex : ESC$B is the sequence for JISX208 + * a) First Iteration: char is ESC + * i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index + * int x = normalize_esq_chars_2022[27] which is equal to 1 + * ii) Search for this value in escSeqStateTable_Key_2022[] + * value of x is stored at escSeqStateTable_Key_2022[0] + * iii) Save this index as offset + * iv) Get state of this sequence from escSeqStateTable_Value_2022[] + * escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022 + * b) Switch on this state and continue to next char + * i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index + * which is normalize_esq_chars_2022[36] == 4 + * ii) x is currently 1(from above) + * x<<=5 -- x is now 32 + * x+=normalize_esq_chars_2022[36] + * now x is 36 + * iii) Search for this value in escSeqStateTable_Key_2022[] + * value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2 + * iv) Get state of this sequence from escSeqStateTable_Value_2022[] + * escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 + * c) Switch on this state and continue to next char + * i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index + * ii) x is currently 36 (from above) + * x<<=5 -- x is now 1152 + * x+= normalize_esq_chars_2022[66] + * now x is 1161 + * iii) Search for this value in escSeqStateTable_Key_2022[] + * value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21 + * iv) Get state of this sequence from escSeqStateTable_Value_2022[1] + * escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022 + * v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208 + */ + /* Below are the 3 arrays depicting a state transition table */ + private static final byte normalize_esq_chars_2022[] = { + /* 0 1 2 3 4 5 6 7 8 9 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 7, 29, 0, + 2, 24, 26, 27, 0, 3, 23, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 25, 28, + 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, + 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + }; + + private static final short MAX_STATES_2022 = 74; + private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = { + /* 0 1 2 3 4 5 6 7 8 9 */ + 1, 34, 36, 39, 55, 57, 60, 61, 1093, 1096, + 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, + 1109, 1154, 1157, 1160, 1161, 1176, 1178, 1179, 1254, 1257, + 1768, 1773, 1957, 35105, 36933, 36936, 36937, 36938, 36939, 36940, + 36942, 36943, 36944, 36945, 36946, 36947, 36948, 37640, 37642, 37644, + 37646, 37711, 37744, 37745, 37746, 37747, 37748, 40133, 40136, 40138, + 40139, 40140, 40141, 1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630, + 35947631, 35947635, 35947636, 35947638 + }; + + private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = { + /* 0 1 2 3 4 */ + VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_MAYBE_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, + VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022 + }; + + /* Type def for refactoring changeState_2022 code */ + // typedef enum { + private static final byte ISO_2022_JP = 1; + private static final byte ISO_2022_KR = 2; + private static final byte ISO_2022_CN = 3; + // } Variant2022; + + /* const UConverterSharedData _ISO2022Data; */ + //private UConverterSharedData _ISO2022JPData; + //private UConverterSharedData _ISO2022KRData; + //private UConverterSharedData _ISO2022CNData; + + /******************** to unicode ********************/ + /**************************************************** + * Recognized escape sequenes are + * (B ASCII + * .A ISO-8859-1 + * .F ISO-8859-7 + * (J JISX-201 + * (I JISX-201 + * $B JISX-208 + * $@ JISX-208 + * $(D JISX-212 + * $A GB2312 + * $(C KSC5601 + */ + private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = { + /* 0 1 2 3 4 5 6 7 8 9 */ + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, SS2_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + ASCII, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, JISX201, HWKANA_7BIT, JISX201, INVALID_STATE, + INVALID_STATE, INVALID_STATE, JISX208, GB2312, JISX208, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + ISO8859_1, ISO8859_7, JISX208, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, KSC5601, JISX212, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE + }; + + private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = { + /* 0 1 2 3 4 5 6 7 8 9 */ + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, SS2_STATE, SS3_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, GB2312_1, INVALID_STATE, ISO_IR_165, + CNS_11643_1, CNS_11643_2, CNS_11643_3, CNS_11643_4, CNS_11643_5, CNS_11643_6, CNS_11643_7, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, + INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE + }; + + /* runs through a state machine to determine the escape sequence - codepage correspondence */ + @SuppressWarnings("fallthrough") + private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) { + CoderResult err = CoderResult.UNDERFLOW; + boolean DONE = false; + byte value; + int key[] = {myConverterData.key}; + int offset[] = {0}; + int initialToULength = decoder.toULength; + byte c; + int malformLength = 0; + + value = VALID_NON_TERMINAL_2022; + while (source.hasRemaining()) { + c = source.get(); + malformLength++; + decoder.toUBytesArray[decoder.toULength++] = c; + value = getKey_2022(c, key, offset); + + switch(value) { + + case VALID_NON_TERMINAL_2022: + /* continue with the loop */ + break; + + case VALID_TERMINAL_2022: + key[0] = 0; + DONE = true; + break; + + case INVALID_2022: + DONE = true; + break; + + case VALID_MAYBE_TERMINAL_2022: + /* not ISO_2022 itself, finish here */ + value = VALID_TERMINAL_2022; + key[0] = 0; + DONE = true; + break; + } + if (DONE) { + break; + } + } +// DONE: + myConverterData.key = key[0]; + + if (value == VALID_NON_TERMINAL_2022) { + /* indicate that the escape sequence is incomplete: key !=0 */ + return err; + } else if (value == INVALID_2022) { + err = CoderResult.malformedForLength(malformLength); + } else /* value == VALID_TERMINAL_2022 */ { + switch (var) { + case ISO_2022_JP: { + byte tempState = nextStateToUnicodeJP[offset[0]]; + switch (tempState) { + case INVALID_STATE: + err = CoderResult.malformedForLength(malformLength); + break; + case SS2_STATE: + if (myConverterData.toU2022State.cs[2] != 0) { + if (myConverterData.toU2022State.g < 2) { + myConverterData.toU2022State.prevG = myConverterData.toU2022State.g; + } + myConverterData.toU2022State.g = 2; + } else { + /* illegal to have SS2 before a matching designator */ + err = CoderResult.malformedForLength(malformLength); + } + break; + /* case SS3_STATE: not used in ISO-2022-JP-x */ + case ISO8859_1: + case ISO8859_7: + if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) { + err = CoderResult.unmappableForLength(malformLength); + } else { + /* G2 charset for SS2 */ + myConverterData.toU2022State.cs[2] = tempState; + } + break; + default: + if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) { + err = CoderResult.unmappableForLength(source.position() - 1); + } else { + /* G0 charset */ + myConverterData.toU2022State.cs[0] = tempState; + } + break; + } // end of switch + break; + } + case ISO_2022_CN: { + byte tempState = nextStateToUnicodeCN[offset[0]]; + switch (tempState) { + case INVALID_STATE: + err = CoderResult.unmappableForLength(malformLength); + break; + case SS2_STATE: + if (myConverterData.toU2022State.cs[2] != 0) { + if (myConverterData.toU2022State.g < 2) { + myConverterData.toU2022State.prevG = myConverterData.toU2022State.g; + } + myConverterData.toU2022State.g = 2; + } else { + /* illegal to have SS2 before a matching designator */ + err = CoderResult.malformedForLength(malformLength); + } + break; + case SS3_STATE: + if (myConverterData.toU2022State.cs[3] != 0) { + if (myConverterData.toU2022State.g < 2) { + myConverterData.toU2022State.prevG = myConverterData.toU2022State.g; + } + myConverterData.toU2022State.g = 3; + } else { + /* illegal to have SS3 before a matching designator */ + err = CoderResult.malformedForLength(malformLength); + } + break; + case ISO_IR_165: + if (myConverterData.version == 0) { + err = CoderResult.unmappableForLength(malformLength); + break; + } + /* fall through */ + case GB2312_1: + /* fall through */ + case CNS_11643_1: + myConverterData.toU2022State.cs[1] = tempState; + break; + case CNS_11643_2: + myConverterData.toU2022State.cs[2] = tempState; + break; + default: + /* other CNS 11643 planes */ + if (myConverterData.version == 0) { + err = CoderResult.unmappableForLength(source.position() - 1); + } else { + myConverterData.toU2022State.cs[3] = tempState; + } + break; + } //end of switch + } + break; + case ISO_2022_KR: + if (offset[0] == 0x30) { + /* nothing to be done, just accept this one escape sequence */ + } else { + err = CoderResult.unmappableForLength(malformLength); + } + break; + default: + err = CoderResult.malformedForLength(malformLength); + break; + } // end of switch + } + if (!err.isError()) { + decoder.toULength = 0; + } else if (err.isMalformed()) { + if (decoder.toULength > 1) { + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte (ESC) in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequece before the first one of those. + * In escape sequences, all following bytes are "printable", that is, + * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS), + * they are valid single/lead bytes. + * For simplicity, we always only report the initial ESC byte as the + * illegal sequence and back out all other bytes we looked at. + */ + /* Back out some bytes. */ + int backOutDistance = decoder.toULength - 1; + int bytesFromThisBuffer = decoder.toULength - initialToULength; + if (backOutDistance <= bytesFromThisBuffer) { + /* same as initialToULength<=1 */ + source.position(source.position() - backOutDistance); + } else { + /* Back out bytes from the previous buffer: Need to replay them. */ + decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance); + /* same as -(initalToULength-1) */ + /* preToULength is negative! */ + for (int i = 0; i < -(decoder.preToULength); i++) { + decoder.preToUArray[i] = decoder.toUBytesArray[i+1]; + } + source.position(source.position() - bytesFromThisBuffer); + } + decoder.toULength = 1; + } + } + + return err; + } + + private static byte getKey_2022(byte c, int[]key, int[]offset) { + int togo; + int low = 0; + int hi = MAX_STATES_2022; + int oldmid = 0; + + togo = normalize_esq_chars_2022[(short)c&UConverterConstants.UNSIGNED_BYTE_MASK]; + + if (togo == 0) { + /* not a valid character anywhere in an escape sequence */ + key[0] = 0; + offset[0] = 0; + return INVALID_2022; + } + togo = (key[0] << 5) + togo; + + while (hi != low) { /* binary search */ + int mid = (hi+low) >> 1; /* Finds median */ + + if (mid == oldmid) { + break; + } + + if (escSeqStateTable_Key_2022[mid] > togo) { + hi = mid; + } else if (escSeqStateTable_Key_2022[mid] < togo) { + low = mid; + } else /* we found it */ { + key[0] = togo; + offset[0] = mid; + return escSeqStateTable_Value_2022[mid]; + } + oldmid = mid; + } + return INVALID_2022; + } + + /* + * To Unicode Callback helper function + */ + private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) { + CoderResult err = CoderResult.UNDERFLOW; + if (sourceChar > 0xff) { + cnv.toUBytesArray[0] = (byte)(sourceChar>>8); + cnv.toUBytesArray[1] = (byte)sourceChar; + cnv.toULength = 2; + } else { + cnv.toUBytesArray[0] = (byte)sourceChar; + cnv.toULength = 1; + } + + if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) { + err = CoderResult.unmappableForLength(1); + } else { + err = CoderResult.malformedForLength(1); + } + + return err; + } + + /****************************ISO-2022-JP************************************/ + private class CharsetDecoderISO2022JP extends CharsetDecoderICU { + public CharsetDecoderISO2022JP(CharsetICU cs) { + super(cs); + } + + protected void implReset() { + super.implReset(); + myConverterData.reset(); + } + /* + * Map 00..7F to Unicode according to JIS X 0201. + * */ + private int jisx201ToU(int value) { + if (value < 0x5c) { + return value; + } else if (value == 0x5c) { + return 0xa5; + } else if (value == 0x7e) { + return 0x203e; + } else { /* value <= 0x7f */ + return value; + } + } + /* + * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS. + * If either byte is outside 21..7E make sure that the result is not valid + * for Shift-JIS so that the converter catches it. + * Some invalid byte values already turn into equally invalid Shift-JIS + * byte values and need not be tested explicitly. + */ + private void _2022ToSJIS(char c1, char c2, byte []bytes) { + if ((c1&1) > 0) { + ++c1; + if (c2 <= 0x5f) { + c2 += 0x1f; + } else if (c2 <= 0x7e) { + c2 += 0x20; + } else { + c2 = 0; /* invalid */ + } + } else { + if ((c2 >= 0x21) && (c2 <= 0x7e)) { + c2 += 0x7e; + } else { + c2 = 0; /* invalid */ + } + } + + c1 >>=1; + if (c1 <= 0x2f) { + c1 += 0x70; + } else if (c1 <= 0x3f) { + c1 += 0xb0; + } else { + c1 = 0; /* invalid */ + } + bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1); + bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2); + } + + @SuppressWarnings("fallthrough") + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + boolean gotoGetTrail = false; + boolean gotoEscape = false; + CoderResult err = CoderResult.UNDERFLOW; + byte []tempBuf = new byte[2]; + int targetUniChar = 0x0000; + int mySourceChar = 0x0000; + int mySourceCharTemp = 0x0000; // use for getTrail label call. + byte cs; /* StateEnum */ + byte csTemp= 0; // use for getTrail label call. + + if (myConverterData.key != 0) { + /* continue with a partial escape sequence */ + // goto escape; + gotoEscape = true; + } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) { + /* continue with a partial double-byte character */ + mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK); + toULength = 0; + cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g]; + // goto getTrailByte; + mySourceCharTemp = 0x99; + gotoGetTrail = true; + } + + while (source.hasRemaining() || gotoEscape || gotoGetTrail) { + // This code is here for the goto escape label call above. + if (gotoEscape) { + mySourceCharTemp = ESC_2022; + } + + targetUniChar = UConverterConstants.missingCharMarker; + + if (gotoEscape || gotoGetTrail || target.hasRemaining()) { + if (!gotoEscape && !gotoGetTrail) { + mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK; + mySourceCharTemp = mySourceChar; + } + + switch (mySourceCharTemp) { + case UConverterConstants.SI: + if (myConverterData.version == 3) { + myConverterData.toU2022State.g = 0; + continue; + } else { + /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ + myConverterData.isEmptySegment = false; + break; + } + + case UConverterConstants.SO: + if (myConverterData.version == 3) { + /* JIS7: switch to G1 half-width Katakana */ + myConverterData.toU2022State.cs[1] = HWKANA_7BIT; + myConverterData.toU2022State.g = 1; + continue; + } else { + /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ + myConverterData.isEmptySegment = false; /* reset this, we have a different error */ + break; + } + + case ESC_2022: + if (!gotoEscape) { + source.position(source.position() - 1); + } else { + gotoEscape = false; + } +// escape: + { + int mySourceBefore = source.position(); + int toULengthBefore = this.toULength; + + err = changeState_2022(this, source, variant); + + /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */ + if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) { + err = CoderResult.malformedForLength(source.position() - mySourceBefore); + this.toULength = toULengthBefore + (source.position() - mySourceBefore); + } + } + + /* invalid or illegal escape sequence */ + if(err.isError()){ + myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */ + return err; + } + /* If we successfully completed an escape sequence, we begin a new segment, empty so far */ + if(myConverterData.key == 0) { + myConverterData.isEmptySegment = true; + } + + continue; + /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ + case CR: + /* falls through */ + case LF: + /* automatically reset to single-byte mode */ + if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) { + myConverterData.toU2022State.cs[0] = ASCII; + } + myConverterData.toU2022State.cs[2] = 0; + myConverterData.toU2022State.g = 0; + /* falls through */ + default : + /* convert one or two bytes */ + myConverterData.isEmptySegment = false; + cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g]; + csTemp = cs; + if (gotoGetTrail) { + csTemp = (byte)0x99; + } + if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) { + /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */ + targetUniChar = mySourceChar + (HWKANA_START - 0xa1); + + /* return from a single-shift state to the previous one */ + if (myConverterData.toU2022State.g >= 2) { + myConverterData.toU2022State.g = myConverterData.toU2022State.prevG; + } + } else { + switch(csTemp) { + case ASCII: + if (mySourceChar <= 0x7f) { + targetUniChar = mySourceChar; + } + break; + case ISO8859_1: + if (mySourceChar <= 0x7f) { + targetUniChar = mySourceChar + 0x80; + } + /* return from a single-shift state to the prevous one */ + myConverterData.toU2022State.g = myConverterData.toU2022State.prevG; + break; + case ISO8859_7: + if (mySourceChar <= 0x7f) { + /* convert mySourceChar+0x80 to use a normal 8-bit table */ + targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs, + mySourceChar+0x80); + } + /* return from a single-shift state to the previous one */ + myConverterData.toU2022State.g = myConverterData.toU2022State.prevG; + break; + case JISX201: + if (mySourceChar <= 0x7f) { + targetUniChar = jisx201ToU(mySourceChar); + } + break; + case HWKANA_7BIT: + if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) { + /* 7-bit halfwidth Katakana */ + targetUniChar = mySourceChar + (HWKANA_START - 0x21); + break; + } + default : + /* G0 DBCS */ + if (gotoGetTrail || source.hasRemaining()) { +// getTrailByte: + int tmpSourceChar; + gotoGetTrail = false; + short trailByte; + boolean leadIsOk, trailIsOk; + + trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK); + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21); + trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + source.get(); + tmpSourceChar = (mySourceChar << 8) | trailByte; + if (cs == JISX208) { + _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf); + mySourceChar = tmpSourceChar; + } else { + /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ + mySourceChar = tmpSourceChar; + if (cs == KSC5601) { + tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ + } + tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8)); + tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar); + } + targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false); + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + source.get(); + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; + } + } else { + toUBytesArray[0] = (byte)mySourceChar; + toULength = 1; + // goto endloop + return err; + } + } /* end of inner switch */ + } + break; + } /* end of outer switch */ + + if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) { + if (offsets != null) { + offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2)); + } + target.put((char)targetUniChar); + } else if (targetUniChar > UConverterConstants.missingCharMarker) { + /* disassemble the surrogate pair and write to output */ + targetUniChar -= 0x0010000; + target.put((char)(0xd800 + (char)(targetUniChar>>10))); + target.position(target.position()-1); + if (offsets != null) { + offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2)); + } + target.get(); + if (target.hasRemaining()) { + target.put((char)(0xdc00+(char)(targetUniChar&0x3ff))); + target.position(target.position()-1); + if (offsets != null) { + offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2)); + } + target.get(); + } else { + charErrorBufferArray[charErrorBufferLength++] = + (char)(0xdc00+(char)(targetUniChar&0x3ff)); + } + } else { + /* Call the callback function */ + err = toUnicodeCallback(this, mySourceChar, targetUniChar); + break; + } + } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */ + err = CoderResult.OVERFLOW; + break; + } + } +//endloop: + return err; + } + } // end of class CharsetDecoderISO2022JP + + /****************************ISO-2022-CN************************************/ + private class CharsetDecoderISO2022CN extends CharsetDecoderICU { + public CharsetDecoderISO2022CN(CharsetICU cs) { + super(cs); + } + + protected void implReset() { + super.implReset(); + myConverterData.reset(); + } + + @SuppressWarnings("fallthrough") + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + byte[] tempBuf = new byte[3]; + int targetUniChar = 0x0000; + int mySourceChar = 0x0000; + int mySourceCharTemp = 0x0000; + boolean gotoEscape = false; + boolean gotoGetTrailByte = false; + + if (myConverterData.key != 0) { + /* continue with a partial escape sequence */ + // goto escape; + gotoEscape = true; + } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) { + /* continue with a partial double-byte character */ + mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK); + toULength = 0; + targetUniChar = UConverterConstants.missingCharMarker; + // goto getTrailByte + gotoGetTrailByte = true; + } + + while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) { + targetUniChar = UConverterConstants.missingCharMarker; + + if (target.hasRemaining() || gotoEscape) { + if (gotoEscape) { + mySourceChar = ESC_2022; // goto escape label + mySourceCharTemp = mySourceChar; + } else if (gotoGetTrailByte) { + mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default + } else { + mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get(); + mySourceCharTemp = mySourceChar; + } + + switch (mySourceCharTemp) { + case UConverterConstants.SI: + myConverterData.toU2022State.g = 0; + if (myConverterData.isEmptySegment) { + myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */ + err = CoderResult.malformedForLength(1); + this.toUBytesArray[0] = (byte)mySourceChar; + this.toULength = 1; + return err; + } + continue; + + case UConverterConstants.SO: + if (myConverterData.toU2022State.cs[1] != 0) { + myConverterData.toU2022State.g = 1; + myConverterData.isEmptySegment = true; /* Begin a new segment, empty so far */ + continue; + } else { + /* illegal to have SO before a matching designator */ + myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */ + break; + } + + case ESC_2022: + if (!gotoEscape) { + source.position(source.position()-1); + } +// escape label + gotoEscape = false; + { + int mySourceBefore = source.position(); + int toULengthBefore = this.toULength; + + err = changeState_2022(this, source, ISO_2022_CN); + + /* After SO there must be at least one character before a designator (designator error handled separately) */ + if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) { + err = CoderResult.malformedForLength(source.position() - mySourceBefore); + this.toULength = toULengthBefore + (source.position() - mySourceBefore); + } + } + + /* invalid or illegal escape sequence */ + if(err.isError()){ + myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */ + return err; + } + continue; + + /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ + case CR: + /* falls through */ + case LF: + myConverterData.toU2022State.reset(); + /* falls through */ + default: + /* converter one or two bytes */ + myConverterData.isEmptySegment = false; + if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) { + if (source.hasRemaining() || gotoGetTrailByte) { + UConverterSharedData cnv; + byte tempState; + int tempBufLen; + boolean leadIsOk, trailIsOk; + short trailByte; +// getTrailByte: label + gotoGetTrailByte = false; // reset gotoGetTrailByte + + trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK); + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21); + trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + source.get(); + tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g]; + if (tempState > CNS_11643_0) { + cnv = myConverterData.myConverterArray[CNS_11643]; + tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0)); + tempBuf[1] = (byte)mySourceChar; + tempBuf[2] = (byte)trailByte; + tempBufLen = 3; + } else { + cnv = myConverterData.myConverterArray[tempState]; + tempBuf[0] = (byte)mySourceChar; + tempBuf[1] = (byte)trailByte; + tempBufLen = 2; + } + ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf); + tempBuffer.limit(tempBufLen); + targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false); + mySourceChar = (mySourceChar << 8) | trailByte; + + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + source.get(); + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; + } + if (myConverterData.toU2022State.g >= 2) { + /* return from a single-shift state to the previous one */ + myConverterData.toU2022State.g = myConverterData.toU2022State.prevG; + } + } else { + toUBytesArray[0] = (byte)mySourceChar; + toULength = 1; + // goto endloop; + return err; + } + } else { + if (mySourceChar <= 0x7f) { + targetUniChar = (char)mySourceChar; + } + } + break; + } + if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) { + if (offsets != null) { + offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2); + } + target.put((char)targetUniChar); + } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) { + /* disassemble the surrogate pair and write to output */ + targetUniChar -= 0x0010000; + target.put((char)(0xd800+(char)(targetUniChar>>10))); + if (offsets != null) { + offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2); + } + if (target.hasRemaining()) { + target.put((char)(0xdc00+(char)(targetUniChar&0x3ff))); + if (offsets != null) { + offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2); + } + } else { + charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff)); + } + } else { + /* Call the callback function */ + err = toUnicodeCallback(this, mySourceChar, targetUniChar); + break; + } + + } else { + err = CoderResult.OVERFLOW; + break; + } + } + + return err; + } + + } + /************************ ISO-2022-KR ********************/ + private class CharsetDecoderISO2022KR extends CharsetDecoderICU { + public CharsetDecoderISO2022KR(CharsetICU cs) { + super(cs); + } + + protected void implReset() { + super.implReset(); + setInitialStateToUnicodeKR(); + myConverterData.reset(); + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + int mySourceChar = 0x0000; + int targetUniChar = 0x0000; + byte[] tempBuf = new byte[2]; + boolean usingFallback; + boolean gotoGetTrailByte = false; + boolean gotoEscape = false; + + if (myConverterData.version == 1) { + return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush); + } + + /* initialize state */ + usingFallback = isFallbackUsed(); + + if (myConverterData.key != 0) { + /* continue with a partial escape sequence */ + gotoEscape = true; + } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) { + /* continue with a partial double-byte character */ + mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK); + toULength = 0; + gotoGetTrailByte = true; + } + + while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) { + if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) { + if (!gotoGetTrailByte && !gotoEscape) { + mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + } + + if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) { + myConverterData.toU2022State.g = 0; + if (myConverterData.isEmptySegment) { + myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */ + err = CoderResult.malformedForLength(1); + this.toUBytesArray[0] = (byte)mySourceChar; + this.toULength = 1; + return err; + } + /* consume the source */ + continue; + } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) { + myConverterData.toU2022State.g = 1; + myConverterData.isEmptySegment = true; + /* consume the source */ + continue; + } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) { + if (!gotoEscape) { + source.position(source.position()-1); + } +// escape label + gotoEscape = false; // reset gotoEscape flag + myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */ + err = changeState_2022(this, source, ISO_2022_KR); + if (err.isError()) { + return err; + } + continue; + } + myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */ + if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) { + if (source.hasRemaining() || gotoGetTrailByte) { + boolean leadIsOk, trailIsOk; + short trailByte; +// getTrailByte label + gotoGetTrailByte = false; // reset gotoGetTrailByte flag + + trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK); + targetUniChar = UConverterConstants.missingCharMarker; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21); + trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + source.get(); + tempBuf[0] = (byte)(mySourceChar + 0x80); + tempBuf[1] = (byte)(trailByte + 0x80); + targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback); + mySourceChar = (char)((mySourceChar << 8) | trailByte); + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + source.get(); + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte); + } + } else { + toUBytesArray[0] = (byte)mySourceChar; + toULength = 1; + break; + } + } else if (mySourceChar <= 0x7f) { + int savedSourceLimit = source.limit(); + int savedSourcePosition = source.position(); + source.limit(source.position()); + source.position(source.position()-1); + targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback); + source.limit(savedSourceLimit); + source.position(savedSourcePosition); + } else { + targetUniChar = 0xffff; + } + if (targetUniChar < 0xfffe) { + target.put((char)targetUniChar); + if (offsets != null) { + offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2); + } + } else { + /* Call the callback function */ + err = toUnicodeCallback(this, mySourceChar, targetUniChar); + break; + } + } else { + err = CoderResult.OVERFLOW; + break; + } + } + + return err; + } + + protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + int sourceStart; + int sourceLimit; + int argSource; + int argTarget; + boolean gotoEscape = false; + int oldSourceLimit; + + /* remember the original start of the input for offsets */ + sourceStart = argSource = source.position(); + + if (myConverterData.key != 0) { + /* continue with a partial escape sequence */ + gotoEscape = true; + } + + while (gotoEscape || (!err.isError() && source.hasRemaining())) { + if (!gotoEscape) { + /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */ + int oldSourcePos = source.position(); + sourceLimit = getEndOfBuffer_2022(source); + source.position(oldSourcePos); + if (source.position() != sourceLimit) { + /* + * get the current partial byte sequence + * + * it needs to be moved between the public and the subconverter + * so that the conversion frameword, which only sees the public + * converter, can handle truncated and illegal input etc. + */ + if (toULength > 0) { + cnv.toUBytesArray = toUBytesArray.clone(); + } + cnv.toULength = toULength; + + /* + * Convert up to the end of the input, or to before the next escape character. + * Does not handle conversion extensions because the preToU[] state etc. + * is not copied. + */ + argTarget = target.position(); + oldSourceLimit = source.limit(); // save the old source limit change to new one + source.limit(sourceLimit); + err = myConverterData.currentDecoder.cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush); + source.limit(oldSourceLimit); // restore source limit; + if (offsets != null && sourceStart != argSource) { + /* update offsets to base them on the actual start of the input */ + int delta = argSource - sourceStart; + while (argTarget < target.position()) { + int currentOffset = offsets.get(); + offsets.position(offsets.position()-1); + if (currentOffset >= 0) { + offsets.put(currentOffset + delta); + offsets.position(offsets.position()-1); + } + offsets.get(); + target.get(); + } + } + argSource = source.position(); + + /* copy input/error/overflow buffers */ + if (cnv.toULength > 0) { + toUBytesArray = cnv.toUBytesArray.clone(); + } + toULength = cnv.toULength; + + if (err.isOverflow()) { + if (cnv.charErrorBufferLength > 0) { + charErrorBufferArray = cnv.charErrorBufferArray.clone(); + } + charErrorBufferLength = cnv.charErrorBufferLength; + cnv.charErrorBufferLength = 0; + } + } + + if (err.isError() || err.isOverflow() || (source.position() == source.limit())) { + return err; + } + } +// escape label + gotoEscape = false; + err = changeState_2022(this, source, ISO_2022_KR); + } + return err; + } + } + + /******************** from unicode **********************/ + /* preference order of JP charsets */ + private final static byte []jpCharsetPref = { + ASCII, + JISX201, + ISO8859_1, + ISO8859_7, + JISX208, + JISX212, + GB2312, + KSC5601, + HWKANA_7BIT + }; + /* + * The escape sequences must be in order of the enum constants like JISX201 = 3, + * not in order of jpCharsetPref[]! + */ + private final static byte [][]escSeqChars = { + { 0x1B, 0x28, 0x42}, /* (B ASCII */ + { 0x1B, 0x2E, 0x41}, /* .A ISO-8859-1 */ + { 0x1B, 0x2E, 0x46}, /* .F ISO-8859-7 */ + { 0x1B, 0x28, 0x4A}, /* (J JISX-201 */ + { 0x1B, 0x24, 0x42}, /* $B JISX-208 */ + { 0x1B, 0x24, 0x28, 0x44}, /* $(D JISX-212 */ + { 0x1B, 0x24, 0x41}, /* $A GB2312 */ + { 0x1B, 0x24, 0x28, 0x43}, /* $(C KSC5601 */ + { 0x1B, 0x28, 0x49} /* (I HWKANA_7BIT */ + }; + /* + * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS) + * Katakana. + * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks + * because Shift-JIS roundtrips half-width Katakana to single bytes. + * These were the only fallbacks in ICU's jisx-208.ucm file. + */ + private final static char []hwkana_fb = { + 0x2123, /* U+FF61 */ + 0x2156, + 0x2157, + 0x2122, + 0x2126, + 0x2572, + 0x2521, + 0x2523, + 0x2525, + 0x2527, + 0x2529, + 0x2563, + 0x2565, + 0x2567, + 0x2543, + 0x213C, /* U+FF70 */ + 0x2522, + 0x2524, + 0x2526, + 0x2528, + 0x252A, + 0x252B, + 0x252D, + 0x252F, + 0x2531, + 0x2533, + 0x2535, + 0x2537, + 0x2539, + 0x253B, + 0x253D, + 0x253F, /* U+FF80 */ + 0x2541, + 0x2544, + 0x2546, + 0x2548, + 0x254A, + 0x254B, + 0x254C, + 0x254D, + 0x254E, + 0x254F, + 0x2552, + 0x2555, + 0x2558, + 0x255B, + 0x255E, + 0x255F, /* U+FF90 */ + 0x2560, + 0x2561, + 0x2562, + 0x2564, + 0x2566, + 0x2568, + 0x2569, + 0x256A, + 0x256B, + 0x256C, + 0x256D, + 0x256F, + 0x2573, + 0x212B, + 0x212C /* U+FF9F */ + }; + + protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} }; + /****************************ISO-2022-JP************************************/ + private class CharsetEncoderISO2022JP extends CharsetEncoderICU { + public CharsetEncoderISO2022JP(CharsetICU cs) { + super(cs, fromUSubstitutionChar[0]); + } + + protected void implReset() { + super.implReset(); + myConverterData.reset(); + } + /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */ + private int jisx201FromU(int value) { + if (value <= 0x7f) { + if (value != 0x5c && value != 0x7e) { + return value; + } + } else if (value == 0xa5) { + return 0x5c; + } else if (value == 0x203e) { + return 0x7e; + } + return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe); + } + + /* + * Take a valid Shift-JIS byte pair, check that it is in the range corresponding + * to JIS X 0208, and convert it to a pair of 21..7E bytes. + * Return 0 if the byte pair is out of range. + */ + private int _2022FromSJIS(int value) { + short trail; + + if (value > 0xEFFC) { + return 0; /* beyond JIS X 0208 */ + } + + trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK); + + value &= 0xff00; /* lead byte */ + if (value <= 0x9f00) { + value -= 0x7000; + } else { /* 0xe000 <= value <= 0xef00 */ + value -= 0xb000; + } + + value <<= 1; + + if (trail <= 0x9e) { + value -= 0x100; + if (trail <= 0x7e) { + value |= ((trail - 0x1f) & UConverterConstants.UNSIGNED_BYTE_MASK); + } else { + value |= ((trail - 0x20) & UConverterConstants.UNSIGNED_BYTE_MASK); + } + } else { /* trail <= 0xfc */ + value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK); + } + + return value; + } + /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */ + CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, + CharBuffer source, ByteBuffer target, IntBuffer offsets){ + CoderResult err = CoderResult.UNDERFLOW; + byte[] buffer = new byte[8]; + int i = 0; + byte[] subchar; + subchar = encoder.replacement(); + + byte cs; + if (myConverterData.fromU2022State.g == 1) { + /* JIS7: switch from G1 to G0 */ + myConverterData.fromU2022State.g = 0; + buffer[i++] = UConverterConstants.SI; + } + cs = myConverterData.fromU2022State.cs[0]; + + if (cs != ASCII && cs != JISX201) { + /* not in ASCII or JIS X 0201: switch to ASCII */ + myConverterData.fromU2022State.cs[0] = ASCII; + buffer[i++] = 0x1B; + buffer[i++] = 0x28; + buffer[i++] = 0x42; + } + + buffer[i++] = subchar[0]; + + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1); + + return err; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + int sourceChar; + byte cs, g; + int choiceCount; + int len, outLen; + byte[] choices = new byte[10]; + int targetValue = 0; + boolean usingFallback; + byte[] buffer = new byte[8]; + boolean getTrail = false; // use for getTrail label + int oldSourcePos; // for proper error handling + + choiceCount = 0; + + /* check if the last codepoint of previous buffer was a lead surrogate */ + if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) { + getTrail = true; + } + + while (getTrail || source.hasRemaining()) { + if (getTrail || target.hasRemaining()) { + oldSourcePos = source.position(); + if (!getTrail) { /* skip if going to getTrail label */ + sourceChar = source.get(); + } + /* check if the char is a First surrogate */ + if (getTrail || UTF16.isSurrogate((char)sourceChar)) { + if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) { +// getTrail: + if (getTrail) { + getTrail = false; + } + /* look ahead to find the trail surrogate */ + if (source.hasRemaining()) { + /* test the following code unit */ + char trail = source.get(); + /* go back to the previous position */ + source.position(source.position()-1); + if (UTF16.isTrailSurrogate(trail)) { + source.get(); + sourceChar = UCharacter.getCodePoint((char)sourceChar, trail); + fromUChar32 = 0x00; + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + fromUChar32 = sourceChar; + break; + } + } else { + /* no more input */ + fromUChar32 = sourceChar; + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + fromUChar32 = sourceChar; + break; + } + } + + /* do not convert SO/SI/ESC */ + if (IS_2022_CONTROL(sourceChar)) { + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + fromUChar32 = sourceChar; + break; + } + + /* do the conversion */ + + if (choiceCount == 0) { + char csm; + /* + * The csm variable keeps track of which charsets are allowed + * and not used yet while building the choices[]. + */ + csm = (char)jpCharsetMasks[myConverterData.version]; + choiceCount = 0; + + /* JIS7/8: try single-byte half-width Katakana before JISX208 */ + if (myConverterData.version == 3 || myConverterData.version == 4) { + choices[choiceCount++] = HWKANA_7BIT; + } + /* Do not try single-bit half-width Katakana for other versions. */ + csm &= ~CSM(HWKANA_7BIT); + + /* try the current G0 charset */ + choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0]; + csm &= ~CSM(cs); + + /* try the current G2 charset */ + if ((cs = myConverterData.fromU2022State.cs[2]) != 0) { + choices[choiceCount++] = cs; + csm &= ~CSM(cs); + } + + /* try all the other charsets */ + for (int i = 0; i < jpCharsetPref.length; i++) { + cs = jpCharsetPref[i]; + if ((CSM(cs) & csm) != 0) { + choices[choiceCount++] = cs; + csm &= ~CSM(cs); + } + } + } + + cs = g = 0; + /* + * len==0: no mapping found yet + * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks + * len>0: found a roundtrip result, done + */ + len = 0; + /* + * We will turn off usingFallBack after finding a fallback, + * but we still get fallbacks from PUA code points as usual. + * Therefore, we will also need to check that we don't overwrite + * an early fallback with a later one. + */ + usingFallback = useFallback; + + for (int i = 0; i < choiceCount && len <= 0; i++) { + int[] value = new int[1]; + int len2; + byte cs0 = choices[i]; + switch (cs0) { + case ASCII: + if (sourceChar <= 0x7f) { + targetValue = sourceChar; + len = 1; + cs = cs0; + g = 0; + } + break; + case ISO8859_1: + if (GR96_START <= sourceChar && sourceChar <= GR96_END) { + targetValue = sourceChar - 0x80; + len = 1; + cs = cs0; + g = 2; + } + break; + case HWKANA_7BIT: + if (sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) { + if (myConverterData.version == 3) { + /* JIS7: use G1 (SO) */ + /* Shift U+FF61..U+FF9F to bytes 21..5F. */ + targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0x21))); + len = 1; + myConverterData.fromU2022State.cs[1] = cs = cs0; /* do not output an escape sequence */ + g = 1; + } else if (myConverterData.version == 4) { + /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */ + /* Shift U+FF61..U+FF9F to bytes A1..DF. */ + targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1))); + len = 1; + + cs = myConverterData.fromU2022State.cs[0]; + if (IS_JP_DBCS(cs)) { + /* switch from a DBCS charset to JISX201 */ + cs = JISX201; + } + /* else stay in the current G0 charset */ + g = 0; + } + /* else do not use HWKANA_7BIT with other versions */ + } + break; + case JISX201: + /* G0 SBCS */ + value[0] = jisx201FromU(sourceChar); + if (value[0] <= 0x7f) { + targetValue = value[0]; + len = 1; + cs = cs0; + g = 0; + usingFallback = false; + } + break; + case JISX208: + /* G0 DBCS from JIS table */ + myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0]; + myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2; + len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback); + //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); + if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len) == 2 */ + value[0] = _2022FromSJIS(value[0]); + if (value[0] != 0) { + targetValue = value[0]; + len = len2; + cs = cs0; + g = 0; + usingFallback = false; + } + } else if (len == 0 && usingFallback && sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) { + targetValue = hwkana_fb[sourceChar - HWKANA_START]; + len = -2; + cs = cs0; + g = 0; + usingFallback = false; + } + break; + case ISO8859_7: + /* G0 SBCS forced to 7-bit output */ + len2 = MBCSSingleFromUChar32(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback); + if (len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value[0] && value[0] <= GR96_END) { + targetValue = value[0] - 0x80; + len = len2; + cs = cs0; + g = 2; + usingFallback = false; + } + break; + default : + /* G0 DBCS */ + myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0]; + myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2; + len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback); + //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); + if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ + if (cs0 == KSC5601) { + /* + * Check for valid bytes for the encoding scheme. + * This is necessary because the sub-converter (windows-949) + * has a broader encoding scheme than is valid for 2022. + */ + value[0] = _2022FromGR94DBCS(value[0]); + if (value[0] == 0) { + break; + } + } + targetValue = value[0]; + len = len2; + cs = cs0; + g = 0; + usingFallback = false; + } + break; + } + } + + if (len != 0) { + if (len < 0) { + len = -len; /* fallback */ + } + outLen = 0; + + /* write SI if necessary (only for JIS7 */ + if (myConverterData.fromU2022State.g == 1 && g == 0) { + buffer[outLen++] = UConverterConstants.SI; + myConverterData.fromU2022State.g = 0; + } + + /* write the designation sequence if necessary */ + if (cs != myConverterData.fromU2022State.cs[g]) { + for (int i = 0; i < escSeqChars[cs].length; i++) { + buffer[outLen++] = escSeqChars[cs][i]; + } + myConverterData.fromU2022State.cs[g] = cs; + + /* invalidate the choices[] */ + choiceCount = 0; + } + + /* write the shift sequence if necessary */ + if (g != myConverterData.fromU2022State.g) { + switch (g) { + /* case 0 handled before writing escapes */ + case 1: + buffer[outLen++] = UConverterConstants.SO; + myConverterData.fromU2022State.g = 1; + break; + default : /* case 2 */ + buffer[outLen++] = 0x1b; + buffer[outLen++] = 0x4e; + break; + /* case 3: no SS3 in ISO-2022-JP-x */ + } + } + + /* write the output bytes */ + if (len == 1) { + buffer[outLen++] = (byte)targetValue; + } else { /* len == 2 */ + buffer[outLen++] = (byte)(targetValue >> 8); + buffer[outLen++] = (byte)targetValue; + } + }else { + /* + * if we cannot find the character after checking all codepages + * then this is an error. + */ + err = CoderResult.unmappableForLength(source.position()-oldSourcePos); + fromUChar32 = sourceChar; + break; + } + + if (sourceChar == CR || sourceChar == LF) { + /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */ + myConverterData.fromU2022State.cs[2] = 0; + choiceCount = 0; + } + + /* output outLen>0 bytes in buffer[] */ + if (outLen == 1) { + target.put(buffer[0]); + if (offsets != null) { + offsets.put(source.remaining() - 1); /* -1 known to be ASCII */ + } + } else if (outLen == 2 && (target.position() + 2) <= target.limit()) { + target.put(buffer[0]); + target.put(buffer[1]); + if (offsets != null) { + int sourceIndex = source.position() - 1; + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + } else { + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, source.position()-1); + } + } else { + err = CoderResult.OVERFLOW; + break; + } + } + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but for ISO-2022-JP conversion + * we need to be in ASCII mode at the very end + * + * conditions: + * successful + * in SO mode or not in ASCII mode + * end of input and no truncated input + */ + if (!err.isError() && + (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) && + flush && !source.hasRemaining() && fromUChar32 == 0) { + int sourceIndex; + + outLen = 0; + + if (myConverterData.fromU2022State.g != 0) { + buffer[outLen++] = UConverterConstants.SI; + myConverterData.fromU2022State.g = 0; + } + + if (myConverterData.fromU2022State.cs[0] != ASCII) { + for (int i = 0; i < escSeqChars[ASCII].length; i++) { + buffer[outLen++] = escSeqChars[ASCII][i]; + } + myConverterData.fromU2022State.cs[0] = ASCII; + } + + /* get the source index of the last input character */ + sourceIndex = source.position(); + if (sourceIndex > 0) { + --sourceIndex; + if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && + (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) { + --sourceIndex; + } + } else { + sourceIndex = -1; + } + + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex); + } + return err; + } + } + /****************************ISO-2022-CN************************************/ + /* + * Rules for ISO-2022-CN Encoding: + * i) The designator sequence must appear once on a line before any instance + * of chracter set it designates. + * ii) If two lines contain characters from the same character set, both lines + * must include the designator sequence. + * iii) Once the designator sequence is known, a shifting sequence has to be found + * to invoke the shifting + * iv) All lines start in ASCII and end in ASCII. + * v) Four shifting sequences are employed for this purpose: + * Sequence ASCII Eq Charsets + * --------- --------- -------- + * SI US-ASCII + * SO CNS-11643-1992 Plane 1, GB2312, ISO-IR-165 + * SS2 N CNS-11643-1992 Plane 2 + * SS3 O CNS-11643-1992 Planes 3-7 + * vi) + * SOdesignator : ESC "$" ")" finalchar_for_SO + * SS2designator : ESC "$" "*" finalchar_for_SS2 + * SS3designator : ESC "$" "+" finalchar_for_SS3 + * + * ESC $ ) A Indicates the bytes following SO are Chinese + * characters as defined in GB 2312-80, until + * another SOdesignation appears + * + * ESC $ ) E Indicates the bytes following SO are as defined + * in ISO-IR-165 (for details, see section 2.1), + * until another SOdesignation appears + * + * ESC $ ) G Indicates the bytes following SO are as defined + * in CNS 11643-plane-1, until another SOdesignation appears + * + * ESC $ * H Indicates teh two bytes immediately following + * SS2 is a Chinese character as defined in CNS + * 11643-plane-2, until another SS2designation + * appears + * (Meaning N must preceed ever 2 byte sequence.) + * + * ESC $ + I Indicates the immediate two bytes following SS3 + * is a Chinese character as defined in CNS + * 11643-plane-3, until another SS3designation + * appears + * (Meaning O must preceed every 2 byte sequence.) + * + * ESC $ + J Indicates the immediate two bytes following SS3 + * is a Chinese character as defined in CNS + * 11643-plane-4, until another SS3designation + * appears + * (In English: O must preceed every 2 byte sequence.) + * + * ESC $ + K Indicates the immediate two bytes following SS3 + * is a Chinese character as defined in CNS + * 11643-plane-5, until another SS3designation + * appears + * + * ESC $ + L Indicates the immediate two bytes following SS3 + * is a Chinese character as defined in CNS + * 11643-plane-6, until another SS3designation + * appears + * + * ESC $ + M Indicates the immediate two bytes following SS3 + * is a Chinese character as defined in CNS + * 11643-plane-7, until another SS3designation + * appears + * + * As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and + * has its own designation information before any Chinese chracters + * appears + */ + + /* The following are defined this way to make strings truely readonly */ + private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 }; + private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 }; + private final static byte[] CNS_11643_1992_Plane_1_STR = { 0x1B, 0x24, 0x29, 0x47 }; + private final static byte[] CNS_11643_1992_Plane_2_STR = { 0x1B, 0x24, 0x2A, 0x48 }; + private final static byte[] CNS_11643_1992_Plane_3_STR = { 0x1B, 0x24, 0x2B, 0x49 }; + private final static byte[] CNS_11643_1992_Plane_4_STR = { 0x1B, 0x24, 0x2B, 0x4A }; + private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B }; + private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C }; + private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D }; + + /************************ ISO2022-CN Data *****************************/ + private final static byte[][] escSeqCharsCN = { + SHIFT_IN_STR, + GB_2312_80_STR, + ISO_IR_165_STR, + CNS_11643_1992_Plane_1_STR, + CNS_11643_1992_Plane_2_STR, + CNS_11643_1992_Plane_3_STR, + CNS_11643_1992_Plane_4_STR, + CNS_11643_1992_Plane_5_STR, + CNS_11643_1992_Plane_6_STR, + CNS_11643_1992_Plane_7_STR, + }; + + private class CharsetEncoderISO2022CN extends CharsetEncoderICU { + public CharsetEncoderISO2022CN(CharsetICU cs) { + super(cs, fromUSubstitutionChar[0]); + } + + protected void implReset() { + super.implReset(); + myConverterData.reset(); + } + + /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */ + CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, + CharBuffer source, ByteBuffer target, IntBuffer offsets){ + CoderResult err = CoderResult.UNDERFLOW; + byte[] buffer = new byte[8]; + int i = 0; + byte[] subchar; + subchar = encoder.replacement(); + + if (myConverterData.fromU2022State.g != 0) { + /* not in ASCII mode: switch to ASCII */ + myConverterData.fromU2022State.g = 0; + buffer[i++] = UConverterConstants.SI; + } + buffer[i++] = subchar[0]; + + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1); + + return err; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + int sourceChar; + byte[] buffer = new byte[8]; + int len; + byte[] choices = new byte[3]; + int choiceCount; + int targetValue = 0; + boolean usingFallback; + boolean gotoGetTrail = false; + int oldSourcePos; // For proper error handling + + choiceCount = 0; + + /* check if the last codepoint of previous buffer was a lead surrogate */ + if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) { + // goto getTrail label + gotoGetTrail = true; + } + + while (source.hasRemaining() || gotoGetTrail) { + if (target.hasRemaining() || gotoGetTrail) { + oldSourcePos = source.position(); + if (!gotoGetTrail) { + sourceChar = source.get(); + } + /* check if the char is a First surrogate */ + if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) { + if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) { +// getTrail label + /* reset gotoGetTrail flag*/ + gotoGetTrail = false; + + /* look ahead to find the trail surrogate */ + if (source.hasRemaining()) { + /* test the following code unit */ + char trail = source.get(); + source.position(source.position()-1); + if (UTF16.isTrailSurrogate(trail)) { + source.get(); + sourceChar = UCharacter.getCodePoint((char)sourceChar, trail); + fromUChar32 = 0x00; + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + fromUChar32 = sourceChar; + break; + } + } else { + /* no more input */ + fromUChar32 = sourceChar; + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + fromUChar32 = sourceChar; + break; + } + } + + /* do the conversion */ + if (sourceChar <= 0x007f) { + /* do not converter SO/SI/ESC */ + if (IS_2022_CONTROL(sourceChar)) { + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + fromUChar32 = sourceChar; + break; + } + + /* US-ASCII */ + if (myConverterData.fromU2022State.g == 0) { + buffer[0] = (byte)sourceChar; + len = 1; + } else { + buffer[0] = UConverterConstants.SI; + buffer[1] = (byte)sourceChar; + len = 2; + myConverterData.fromU2022State.g = 0; + choiceCount = 0; + } + + if (sourceChar == CR || sourceChar == LF) { + /* reset the state at the end of a line */ + myConverterData.fromU2022State.reset(); + choiceCount = 0; + } + } else { + /* convert U+0080..U+10ffff */ + int i; + byte cs, g; + + if (choiceCount == 0) { + /* try the current SO/G1 converter first */ + choices[0] = myConverterData.fromU2022State.cs[1]; + + /* default to GB2312_1 if none is designated yet */ + if (choices[0] == 0) { + choices[0] = GB2312_1; + } + if (myConverterData.version == 0) { + /* ISO-2022-CN */ + /* try other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */ + if (choices[0] == GB2312_1) { + choices[1] = CNS_11643_1; + } else { + choices[1] = GB2312_1; + } + + choiceCount = 2; + } else { + /* ISO-2022-CN-EXT */ + + /* try one of the other converters */ + switch (choices[0]) { + case GB2312_1: + choices[1] = CNS_11643_1; + choices[2] = ISO_IR_165; + break; + case ISO_IR_165: + choices[1] = GB2312_1; + choices[2] = CNS_11643_1; + break; + default : + choices[1] = GB2312_1; + choices[2] = ISO_IR_165; + break; + } + + choiceCount = 3; + } + } + + cs = g = 0; + /* + * len==0: no mapping found yet + * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks + * len>0: found a roundtrip result, done + */ + len = 0; + /* + * We will turn off usingFallback after finding a fallback, + * but we still get fallbacks from PUA code points as usual. + * Therefore, we will also need to check that we don't overwrite + * an early fallback with a later one. + */ + usingFallback = useFallback; + + for (i = 0; i < choiceCount && len <= 0; ++i) { + byte cs0 = choices[i]; + if (cs0 > 0) { + int[] value = new int[1]; + int len2; + if (cs0 > CNS_11643_0) { + myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[CNS_11643]; + myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_3; + len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback); + //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[CNS_11643], + // sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_3); + if (len2 == 3 || (len2 == -3 && len == 0)) { + targetValue = value[0]; + cs = (byte)(CNS_11643_0 + (value[0] >> 16) - 0x80); + if (len2 >= 0) { + len = 2; + } else { + len = -2; + usingFallback = false; + } + if (cs == CNS_11643_1) { + g = 1; + } else if (cs == CNS_11643_2) { + g = 2; + } else if (myConverterData.version == 1) { /* plane 3..7 */ + g = 3; + } else { + /* ISO-2022-CN (without -EXT) does not support plane 3..7 */ + len = 0; + } + } + } else { + /* GB2312_1 or ISO-IR-165 */ + myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0]; + myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2; + len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback); + //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], + // sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); + if (len2 == 2 || (len2 == -2 && len == 0)) { + targetValue = value[0]; + len = len2; + cs = cs0; + g = 1; + usingFallback = false; + } + } + } + } + + if (len != 0) { + len = 0; /* count output bytes; it must have ben abs(len) == 2 */ + + /* write the designation sequence if necessary */ + if (cs != myConverterData.fromU2022State.cs[g]) { + if (cs < CNS_11643) { + for (int n = 0; n < escSeqCharsCN[cs].length; n++) { + buffer[n] = escSeqCharsCN[cs][n]; + } + } else { + for (int n = 0; n < escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)].length; n++) { + buffer[n] = escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)][n]; + } + } + len = 4; + myConverterData.fromU2022State.cs[g] = cs; + if (g == 1) { + /* changing the SO/G1 charset invalidates the choices[] */ + choiceCount = 0; + } + } + + /* write the shift sequence if necessary */ + if (g != myConverterData.fromU2022State.g) { + switch (g) { + case 1: + buffer[len++] = UConverterConstants.SO; + + /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */ + myConverterData.fromU2022State.g = 1; + break; + case 2: + buffer[len++] = 0x1b; + buffer[len++] = 0x4e; + break; + default: /* case 3 */ + buffer[len++] = 0x1b; + buffer[len++] = 0x4f; + break; + } + } + + /* write the two output bytes */ + buffer[len++] = (byte)(targetValue >> 8); + buffer[len++] = (byte)targetValue; + } else { + /* if we cannot find the character after checking all codepages + * then this is an error + */ + err = CoderResult.unmappableForLength(source.position()-oldSourcePos); + fromUChar32 = sourceChar; + break; + } + } + /* output len>0 bytes in buffer[] */ + if (len == 1) { + target.put(buffer[0]); + if (offsets != null) { + offsets.put(source.position()-1); + } + } else if (len == 2 && (target.remaining() >= 2)) { + target.put(buffer[0]); + target.put(buffer[1]); + if (offsets != null) { + int sourceIndex = source.position(); + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + } else { + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, len, target, offsets, source.position()-1); + if (err.isError()) { + break; + } + } + } else { + err = CoderResult.OVERFLOW; + break; + } + } /* end while (source.hasRemaining() */ + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but for ISO-2022-CN conversion + * we need to be in ASCII mode at the very end + * + * condtions: + * succesful + * not in ASCII mode + * end of input and no truncated input + */ + if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) { + int sourceIndex; + + /* we are switching to ASCII */ + myConverterData.fromU2022State.g = 0; + + /* get the source index of the last input character */ + sourceIndex = source.position(); + if (sourceIndex > 0) { + --sourceIndex; + if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && + (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) { + --sourceIndex; + } + } else { + sourceIndex = -1; + } + + err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex); + } + + return err; + } + } + /******************************** ISO-2022-KR *****************************/ + /* + * Rules for ISO-2022-KR encoding + * i) The KSC5601 designator sequence should appear only once in a file, + * at the begining of a line before any KSC5601 characters. This usually + * means that it appears by itself on the first line of the file + * ii) There are only 2 shifting sequences SO to shift into double byte mode + * and SI to shift into single byte mode + */ + private class CharsetEncoderISO2022KR extends CharsetEncoderICU { + public CharsetEncoderISO2022KR(CharsetICU cs) { + super(cs, fromUSubstitutionChar[myConverterData.version]); + } + + protected void implReset() { + super.implReset(); + myConverterData.reset(); + setInitialStateFromUnicodeKR(this); + } + + /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */ + CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, + CharBuffer source, ByteBuffer target, IntBuffer offsets){ + CoderResult err = CoderResult.UNDERFLOW; + byte[] buffer = new byte[8]; + int length, i = 0; + byte[] subchar; + + subchar = encoder.replacement(); + length = subchar.length; + + if (myConverterData.version == 0) { + if (length == 1) { + if (encoder.fromUnicodeStatus != 0) { + /* in DBCS mode: switch to SBCS */ + encoder.fromUnicodeStatus = 0; + buffer[i++] = UConverterConstants.SI; + } + buffer[i++] = subchar[0]; + } else { /* length == 2 */ + if (encoder.fromUnicodeStatus == 0) { + /* in SBCS mode: switch to DBCS */ + encoder.fromUnicodeStatus = 1; + buffer[i++] = UConverterConstants.SO; + } + buffer[i++] = subchar[0]; + buffer[i++] = subchar[1]; + } + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1); + } else { + /* save the subvonverter's substitution string */ + byte[] currentSubChars = myConverterData.currentEncoder.replacement(); + + /* set our substitution string into the subconverter */ + myConverterData.currentEncoder.replaceWith(subchar); + myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0]; + /* let the subconverter write the subchar, set/retrieve fromUChar32 state */ + myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32; + err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets); + encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32; + + /* restore the subconverter's substitution string */ + myConverterData.currentEncoder.replaceWith(currentSubChars); + + if (err.isOverflow()) { + if (myConverterData.currentEncoder.errorBufferLength > 0) { + encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone(); + } + encoder.errorBufferLength = myConverterData.currentEncoder.errorBufferLength; + myConverterData.currentEncoder.errorBufferLength = 0; + } + } + + return err; + } + + private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + + myConverterData.currentEncoder.fromUChar32 = fromUChar32; + err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush); + fromUChar32 = myConverterData.currentEncoder.fromUChar32; + + if (err.isOverflow()) { + if (myConverterData.currentEncoder.errorBufferLength > 0) { + errorBuffer = myConverterData.currentEncoder.errorBuffer.clone(); + } + errorBufferLength = myConverterData.currentEncoder.errorBufferLength; + myConverterData.currentEncoder.errorBufferLength = 0; + } + + return err; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + int[] targetByteUnit = { 0x0000 }; + int sourceChar = 0x0000; + boolean isTargetByteDBCS; + boolean oldIsTargetByteDBCS; + boolean usingFallback; + int length = 0; + boolean gotoGetTrail = false; // for goto getTrail label call + + /* + * if the version is 1 then the user is requesting + * conversion with ibm-25546 pass the argument to + * MBCS converter and return + */ + if (myConverterData.version == 1) { + return encodeLoopIBM(source, target, offsets, flush); + } + + usingFallback = useFallback; + isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true; + if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) { + gotoGetTrail = true; + } + + while (source.hasRemaining() || gotoGetTrail) { + targetByteUnit[0] = UConverterConstants.missingCharMarker; + + if (target.hasRemaining() || gotoGetTrail) { + if (!gotoGetTrail) { + sourceChar = source.get(); + + /* do not convert SO/SI/ESC */ + if (IS_2022_CONTROL(sourceChar)) { + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + fromUChar32 = sourceChar; + break; + } + myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2; + length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback); + //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); + if (length < 0) { + length = -length; /* fallback */ + } + /* only DBCS or SBCS characters are expected */ + /* DB characters with high bit set to 1 are expected */ + if (length > 2 || length == 0 || + (length == 1 && targetByteUnit[0] > 0x7f) || + (length ==2 && + ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) || + ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) { + targetByteUnit[0] = UConverterConstants.missingCharMarker; + } + } + if (!gotoGetTrail && targetByteUnit[0] != UConverterConstants.missingCharMarker) { + oldIsTargetByteDBCS = isTargetByteDBCS; + isTargetByteDBCS = (targetByteUnit[0] > 0x00FF); + /* append the shift sequence */ + if (oldIsTargetByteDBCS != isTargetByteDBCS) { + if (isTargetByteDBCS) { + target.put((byte)UConverterConstants.SO); + } else { + target.put((byte)UConverterConstants.SI); + } + if (offsets != null) { + offsets.put(source.position()-1); + } + } + /* write the targetUniChar to target */ + if (targetByteUnit[0] <= 0x00FF) { + if (target.hasRemaining()) { + target.put((byte)targetByteUnit[0]); + if (offsets != null) { + offsets.put(source.position()-1); + } + } else { + errorBuffer[errorBufferLength++] = (byte)targetByteUnit[0]; + err = CoderResult.OVERFLOW; + } + } else { + if (target.hasRemaining()) { + target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80))); + if (offsets != null) { + offsets.put(source.position()-1); + } + if (target.hasRemaining()) { + target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80))); + if (offsets != null) { + offsets.put(source.position()-1); + } + } else { + errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80)); + err = CoderResult.OVERFLOW; + } + + } else { + errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)); + errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)); + err = CoderResult.OVERFLOW; + } + } + } else { + /* oops.. the code point is unassigned + * set the error and reason + */ + + /* check if the char is a First surrogate */ + if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) { + if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) { +// getTrail label + // reset gotoGetTrail flag + gotoGetTrail = false; + + /* look ahead to find the trail surrogate */ + if (source.hasRemaining()) { + /* test the following code unit */ + char trail = source.get(); + source.position(source.position()-1); + if (UTF16.isTrailSurrogate(trail)) { + source.get(); + sourceChar = UCharacter.getCodePoint((char)sourceChar, trail); + err = CoderResult.unmappableForLength(2); + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + } + } else { + /* no more input */ + err = CoderResult.UNDERFLOW; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate ) */ + /* callback(illegal) */ + err = CoderResult.malformedForLength(1); + } + } else { + /* callback(unassigned) for a BMP code point */ + err = CoderResult.unmappableForLength(1); + } + + fromUChar32 = sourceChar; + break; + } + } else { + err = CoderResult.OVERFLOW; + break; + } + } + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but for ISO-2022-KR conversion + * we need to be inASCII mode at the very end + * + * conditions: + * successful + * not in ASCII mode + * end of input and no truncated input + */ + if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) { + int sourceIndex; + + /* we are switching to ASCII */ + isTargetByteDBCS = false; + + /* get the source index of the last input character */ + sourceIndex = source.position(); + if (sourceIndex > 0) { + --sourceIndex; + if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && UTF16.isLeadSurrogate(source.get(sourceIndex-1))) { + --sourceIndex; + } + } else { + sourceIndex = -1; + } + + CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex); + } + /*save the state and return */ + fromUnicodeStatus = isTargetByteDBCS ? 1 : 0; + + return err; + } + } + + public CharsetDecoder newDecoder() { + switch (variant) { + case ISO_2022_JP: + return new CharsetDecoderISO2022JP(this); + + case ISO_2022_CN: + return new CharsetDecoderISO2022CN(this); + + case ISO_2022_KR: + setInitialStateToUnicodeKR(); + return new CharsetDecoderISO2022KR(this); + + default: /* should not happen */ + return null; + } + } + + public CharsetEncoder newEncoder() { + CharsetEncoderICU cnv; + + switch (variant) { + case ISO_2022_JP: + return new CharsetEncoderISO2022JP(this); + + case ISO_2022_CN: + return new CharsetEncoderISO2022CN(this); + + case ISO_2022_KR: + cnv = new CharsetEncoderISO2022KR(this); + setInitialStateFromUnicodeKR(cnv); + return cnv; + + default: /* should not happen */ + return null; + } + } + + private void setInitialStateToUnicodeKR() { + if (myConverterData.version == 1) { + myConverterData.currentDecoder.toUnicodeStatus = 0; /* offset */ + myConverterData.currentDecoder.mode = 0; /* state */ + myConverterData.currentDecoder.toULength = 0; /* byteIndex */ + } + } + private void setInitialStateFromUnicodeKR(CharsetEncoderICU cnv) { + /* ISO-2022-KR the designator sequence appears only once + * in a file so we append it only once + */ + if (cnv.errorBufferLength == 0) { + cnv.errorBufferLength = 4; + cnv.errorBuffer[0] = 0x1b; + cnv.errorBuffer[1] = 0x24; + cnv.errorBuffer[2] = 0x29; + cnv.errorBuffer[3] = 0x43; + } + if (myConverterData.version == 1) { + ((CharsetMBCS)myConverterData.currentEncoder.charset()).subChar1 = 0x1A; + myConverterData.currentEncoder.fromUChar32 = 0; + myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */ + } + } + + void getUnicodeSetImpl(UnicodeSet setFillIn, int which) { + int i; + /*open a set and initialize it with code points that are algorithmically round-tripped */ + + switch(variant){ + case ISO_2022_JP: + /*include JIS X 0201 which is hardcoded */ + setFillIn.add(0xa5); + setFillIn.add(0x203e); + if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){ + /*include Latin-1 some variants of JP */ + setFillIn.add(0, 0xff); + + } + else { + /* include ASCII for JP */ + setFillIn.add(0, 0x7f); + } + if(myConverterData.version==3 || myConverterData.version==4 ||which == ROUNDTRIP_AND_FALLBACK_SET){ + /* + * Do not test(jpCharsetMasks[myConverterData.version]&CSM(HWKANA_7BIT))!=0 because the bit + * is on for all JP versions although version 3 & 4 (JIS7 and JIS8) use half-width Katakana. + * This is because all ISO_2022_JP variant are lenient in that they accept (in toUnicode) half-width + * Katakana via ESC. + * However, we only emit (fromUnicode) half-width Katakana according to the + * definition of each variant. + * + * When including fallbacks, + * we need to include half-width Katakana Unicode code points for all JP variants because + * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana). + */ + /* include half-width Katakana for JP */ + setFillIn.add(HWKANA_START, HWKANA_END); + } + break; + case ISO_2022_CN: + /* Include ASCII for CN */ + setFillIn.add(0, 0x7f); + break; + case ISO_2022_KR: + /* there is only one converter for KR */ + myConverterData.currentConverter.getUnicodeSetImpl(setFillIn, which); + break; + default: + break; + } + + //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until + for(i=0; i 0x80 in an otherwise double-byte + * character set. So, for example, the LMBCS sequence x10 x10 xAE is the + * same as '0xAE' in the Japanese code page 943. + * + * Next, you will notice that the list of group bytes has some gaps. + * These are used in various ways. + * + * We reserve a few special single byte values for common control + * characters. These are in the same place as their ANSI equivalents for speed. + */ + private static final short ULMBCS_HT = 0x09; /* Fixed control-char - Horizontal Tab */ + private static final short ULMBCS_LF = 0x0A; /* Fixed control-char - Line Feed */ + private static final short ULMBCS_CR = 0x0D; /* Fixed control-char - Carriage Return */ + /* + * Then, 1-2-3 reserved a special single-byte character to put at the + * beginning of internal 'system' range names: + */ + private static final short ULMBCS_123SYSTEMRANGE = 0x19; + /* + * Then we needed a place to put all the other ansi control characters + * that must be moved to different values because LMBCS reserves those + * values for other purposes. To represent the control characters, we start + * with a first byte of 0x0F & add the control character value as the + * second byte. + */ + private static final short ULMBCS_GRP_CTRL = 0x0F; + /* + * For the C0 controls (less than 0x20), we add 0x20 to preserve the + * useful doctrine that any byte less than 0x20 in a LMBCS char must be + * the first byte of a character: + */ + private static final short ULMBCS_CTRLOFFSET = 0x20; + /* + * Where to put the characters that aren't part of any of the 12 national + * character sets? The first thing that was done, in the earlier years of + * LMBCS, was to use up the spaces of the form + * [G] D1, + * where 'G' was one of the single-byte character groups, and + * D1 was less than 0x80. These sequences are gathered together + * into a Lotus-invented doublebyte character set to represent a + * lot of stray values. Internally, in this implementation, we track this + * as group '0', as a place to tuck this exceptions list. + */ + private static final short ULMBCS_GRP_EXCEPT = 0x00; + /* + * Finally, as the durability and usefulness of UNICODE became clear, + * LOTUS added a new group 0x14 to hold Unicode values not otherwise + * represented in LMBCS: + */ + private static final short ULMBCS_GRP_UNICODE = 0x14; + /* + * The two bytes appearing after a 0x14 are interpreted as UTF-16 BE + * (Big Endian) characters. The exception comes when UTF16 + * representation would have a zero as the second byte. In that case, + * 'F6' is used in its place, and the bytes are swapped. (This prevents + * LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK: + * 0xF6xx is in the middle of the Private Use Area.) + */ + private static char ULMBCS_UNICOMPATZERO = 0x00F6; + /* + * It is also useful in our code to have a constant for the size of + * a LMBCS char that holds a literal Unicode value. + */ + private static final short ULMBCS_UNICODE_SIZE = 3; + /* + * To squish the LMBCS representation down even further, and to make + * translations even faster, sometimes the optimization group byte can be dropped + * from a LMBCS character. This is decided on a process-by-process basis. The + * group byte that is dropped is called the 'optimization group.' + * + * For Notes, the optimization group is always 0x1. + */ + //private static final short ULMBCS_DEFAULTOPTGROUP = 0x01; + /* For 1-2-3 files, the optimization group is stored in the header of the 1-2-3 + * file. + * In any case, when using ICU, you either pass in the + * optimization group as part of the name of the converter (LMBCS-1, LMBCS-2, + * etc.). Using plain 'LMBCS' as the name of the converter will give you + * LMBCS-1. + */ + + /* Implementation strategy */ + /* + * Because of the extensive use of other character sets, the LMBCS converter + * keeps a mapping between optimization groups and IBM character sets, so that + * ICU converters can be created and used as needed. + * + * As you can see, even though any byte below 0x20 could be an optimization + * byte, only those at 0x13 or below can map to an actual converter. To limit + * some loops and searches, we define a value for that last group converter: + */ + private static final short ULMBCS_GRP_LAST = 0x13; /* last LMBCS group that has a converter */ + + private static final String[] OptGroupByteToCPName = { + /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ + /* 0x0001 */ "ibm-850", + /* 0x0002 */ "ibm-851", + /* 0x0003 */ "windows-1255", + /* 0x0004 */ "windows-1256", + /* 0x0005 */ "windows-1251", + /* 0x0006 */ "ibm-852", + /* 0x0007 */ null, /* Unused */ + /* 0x0008 */ "windows-1254", + /* 0x0009 */ null, /* Control char HT */ + /* 0x000A */ null, /* Control char LF */ + /* 0x000B */ "windows-874", + /* 0x000C */ null, /* Unused */ + /* 0x000D */ null, /* Control char CR */ + /* 0x000E */ null, /* Unused */ + /* 0x000F */ null, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */ + /* 0x0010 */ "windows-932", + /* 0x0011 */ "windows-949", + /* 0x0012 */ "windows-950", + /* 0x0013 */ "windows-936", + /* The rest are null, including the 0x0014 Unicode compatibility region + * and 0x0019, the 1-2-3 system range control char */ + /* 0x0014 */ null + }; + + /* That's approximately all the data that's needed for translating + * LMBCS to Unicode. + * + * However, to translate Unicode to LMBCS, we need some more support. + * + * That's because there are often more than one possible mappings from a Unicode + * code point back into LMBCS. The first thing we do is look up into a table + * to figure out if there are more than one possible mapplings. This table, + * arranged by Unicode values (including ranges) either lists which group + * to use, or says that it could go into one or more of the SBCS sets, or + * into one or more of the DBCS sets. (If the character exists in both DBCS & + * SBCS, the table will place it in the SBCS sets, to make the LMBCS code point + * length as small as possible. Here's the two special markers we use to indicate + * ambiguous mappings: + */ + private static final short ULMBCS_AMBIGUOUS_SBCS = 0x80; /* could fit in more than one + LMBCS sbcs native encoding + (example: most accented latin) */ + private static final short ULMBCS_AMBIGUOUS_MBCS = 0x81; /* could fit in more than one + LMBCS mbcs native encoding + (example: Unihan) */ + private static final short ULMBCS_AMBIGUOUS_ALL = 0x82; + + /* And here's a simple way to see if a group falls in an appropriate range */ + private boolean ULMBCS_AMBIGUOUS_MATCH(short agroup, short xgroup) { + return (((agroup == ULMBCS_AMBIGUOUS_SBCS) && + (xgroup < ULMBCS_DOUBLEOPTGROUP_START)) || + ((agroup == ULMBCS_AMBIGUOUS_MBCS) && + (xgroup >= ULMBCS_DOUBLEOPTGROUP_START)) || + ((agroup) == ULMBCS_AMBIGUOUS_ALL)); + } + + /* The table & some code to use it: */ + private static class _UniLMBCSGrpMap { + int uniStartRange; + int uniEndRange; + short GrpType; + _UniLMBCSGrpMap(int uniStartRange, int uniEndRange, short GrpType) { + this.uniStartRange = uniStartRange; + this.uniEndRange = uniEndRange; + this.GrpType = GrpType; + } + } + + private static final _UniLMBCSGrpMap[] UniLMBCSGrpMap = { + new _UniLMBCSGrpMap(0x0001, 0x001F, ULMBCS_GRP_CTRL), + new _UniLMBCSGrpMap(0x0080, 0x009F, ULMBCS_GRP_CTRL), + new _UniLMBCSGrpMap(0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x01CE, 0x01CE, ULMBCS_GRP_TW ), + new _UniLMBCSGrpMap(0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x02BA, 0x02BA, ULMBCS_GRP_CN), + new _UniLMBCSGrpMap(0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x0400, 0x0400, ULMBCS_GRP_RU), + new _UniLMBCSGrpMap(0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x0402, 0x040F, ULMBCS_GRP_RU), + new _UniLMBCSGrpMap(0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x0432, 0x044E, ULMBCS_GRP_RU), + new _UniLMBCSGrpMap(0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x0450, 0x0491, ULMBCS_GRP_RU), + new _UniLMBCSGrpMap(0x05B0, 0x05F2, ULMBCS_GRP_HE), + new _UniLMBCSGrpMap(0x060C, 0x06AF, ULMBCS_GRP_AR), + new _UniLMBCSGrpMap(0x0E01, 0x0E5B, ULMBCS_GRP_TH), + new _UniLMBCSGrpMap(0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x2027, 0x2027, ULMBCS_GRP_TW), + new _UniLMBCSGrpMap(0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x203C, 0x203C, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2074, 0x2074, ULMBCS_GRP_KO), + new _UniLMBCSGrpMap(0x207F, 0x207F, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2081, 0x2084, ULMBCS_GRP_KO), + new _UniLMBCSGrpMap(0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS), + /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/ + new _UniLMBCSGrpMap(0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2153, 0x2154, ULMBCS_GRP_KO), + new _UniLMBCSGrpMap(0x215B, 0x215E, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x2194, 0x2195, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x21B8, 0x21B9, ULMBCS_GRP_CN), + new _UniLMBCSGrpMap(0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x21E7, 0x21E7, ULMBCS_GRP_CN), + new _UniLMBCSGrpMap(0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2201, 0x2201, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2204, 0x2206, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2209, 0x220A, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2219, 0x2219, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x221B, 0x221C, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x221F, 0x221F, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2245, 0x2248, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x224C, 0x224C, ULMBCS_GRP_TW), + new _UniLMBCSGrpMap(0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2262, 0x2265, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2284, 0x2285, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2288, 0x2297, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2310, 0x2310, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2318, 0x2321, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2318, 0x2321, ULMBCS_GRP_CN), + new _UniLMBCSGrpMap(0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x2504, 0x2505, ULMBCS_GRP_TW), + new _UniLMBCSGrpMap(0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x2666, 0x2666, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL), + new _UniLMBCSGrpMap(0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x266F, 0x266F, ULMBCS_GRP_JA), + new _UniLMBCSGrpMap(0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT), + new _UniLMBCSGrpMap(0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS), + new _UniLMBCSGrpMap(0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS), + new _UniLMBCSGrpMap(0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE) + }; + + static short FindLMBCSUniRange(char uniChar) { + int index = 0; + + while (uniChar > UniLMBCSGrpMap[index].uniEndRange) { + index++; + } + + if (uniChar >= UniLMBCSGrpMap[index].uniStartRange) { + return UniLMBCSGrpMap[index].GrpType; + } + return ULMBCS_GRP_UNICODE; + } + + /* + * We also ask the creator of a converter to send in a preferred locale + * that we can use in resolving ambiguous mappings. They send the locale + * in as a string, and we map it, if possible, to one of the + * LMBCS groups. We use this table, and the associated code, to + * do the lookup: + * + * This table maps locale ID's to LMBCS opt groups. + * The default return is group 0x01. Note that for + * performance reasons, the table is sorted in + * increasing alphabetic order, with the notable + * exception of zhTW. This is to force the check + * for Traditional Chinese before dropping back to + * Simplified. + * Note too that the Latin-1 groups have been + * commented out because it's the default, and + * this shortens the table, allowing a serial + * search to go quickly. + */ + private static class _LocaleLMBCSGrpMap { + String LocaleID; + short OptGroup; + _LocaleLMBCSGrpMap(String LocaleID, short OptGroup) { + this.LocaleID = LocaleID; + this.OptGroup = OptGroup; + } + } + private static final _LocaleLMBCSGrpMap[] LocaleLMBCSGrpMap = { + new _LocaleLMBCSGrpMap("ar", ULMBCS_GRP_AR), + new _LocaleLMBCSGrpMap("be", ULMBCS_GRP_RU), + new _LocaleLMBCSGrpMap("bg", ULMBCS_GRP_L2), + // new _LocaleLMBCSGrpMap("ca", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("cs", ULMBCS_GRP_L2), + // new _LocaleLMBCSGrpMap("da", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("de", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("el", ULMBCS_GRP_GR), + // new _LocaleLMBCSGrpMap("en", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("es", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("et", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("fi", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("fr", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("he", ULMBCS_GRP_HE), + new _LocaleLMBCSGrpMap("hu", ULMBCS_GRP_L2), + // new _LocaleLMBCSGrpMap("is", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("it", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("iw", ULMBCS_GRP_HE), + new _LocaleLMBCSGrpMap("ja", ULMBCS_GRP_JA), + new _LocaleLMBCSGrpMap("ko", ULMBCS_GRP_KO), + // new _LocaleLMBCSGrpMap("lt", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("lv", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("mk", ULMBCS_GRP_RU), + // new _LocaleLMBCSGrpMap("nl", ULMBCS_GRP_L1), + // new _LocaleLMBCSGrpMap("no", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("pl", ULMBCS_GRP_L2), + // new _LocaleLMBCSGrpMap("pt", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("ro", ULMBCS_GRP_L2), + new _LocaleLMBCSGrpMap("ru", ULMBCS_GRP_RU), + new _LocaleLMBCSGrpMap("sh", ULMBCS_GRP_L2), + new _LocaleLMBCSGrpMap("sk", ULMBCS_GRP_L2), + new _LocaleLMBCSGrpMap("sl", ULMBCS_GRP_L2), + new _LocaleLMBCSGrpMap("sq", ULMBCS_GRP_L2), + new _LocaleLMBCSGrpMap("sr", ULMBCS_GRP_RU), + // new _LocaleLMBCSGrpMap("sv", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("th", ULMBCS_GRP_TH), + new _LocaleLMBCSGrpMap("tr", ULMBCS_GRP_TR), + new _LocaleLMBCSGrpMap("uk", ULMBCS_GRP_RU), + // new _LocaleLMBCSGrpMap("vi", ULMBCS_GRP_L1), + new _LocaleLMBCSGrpMap("zhTW", ULMBCS_GRP_TW), + new _LocaleLMBCSGrpMap("zh", ULMBCS_GRP_CN), + new _LocaleLMBCSGrpMap(null, ULMBCS_GRP_L1) + }; + static short FindLMBCSLocale(String LocaleID) { + int index = 0; + + if (LocaleID == null) { + return 0; + } + + while (LocaleLMBCSGrpMap[index].LocaleID != null) { + if (LocaleLMBCSGrpMap[index].LocaleID == LocaleID) { + return LocaleLMBCSGrpMap[index].OptGroup; + } else if (LocaleLMBCSGrpMap[index].LocaleID.compareTo(LocaleID) > 0){ + break; + } + index++; + } + return ULMBCS_GRP_L1; + } + + /* + * Before we get to the main body of code, here's how we hook up the rest + * of ICU. ICU converters are required to define a structure that includes + * some function pointers, and some common data, in the style of a C++ + * vtable. There is also room in there for converter-specific data. LMBCS + * uses that converter-specific data to keep track of the 12 subconverters + * we use, the optimization group, and the group (if any) that matches the + * locale. We have one structure instantiated for each of the 12 possible + * optimization groups. + */ + private class UConverterDataLMBCS { + UConverterSharedData[] OptGrpConverter; /* Converter per Opt. grp. */ + short OptGroup; /* default Opt. grp. for this LMBCS session */ + short localeConverterIndex; /* reasonable locale match for index */ + CharsetDecoderMBCS decoder; + CharsetEncoderMBCS encoder; + CharsetMBCS charset; + UConverterDataLMBCS() { + OptGrpConverter = new UConverterSharedData[ULMBCS_GRP_LAST + 1]; + charset = (CharsetMBCS)CharsetICU.forNameICU("ibm-850"); + encoder = (CharsetEncoderMBCS)charset.newEncoder(); + decoder = (CharsetDecoderMBCS)charset.newDecoder(); + } + } + + private UConverterDataLMBCS extraInfo; /* extraInfo in ICU4C implementation */ + + public CharsetLMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + maxBytesPerChar = ULMBCS_CHARSIZE_MAX; + minBytesPerChar = 1; + maxCharsPerByte = 1; + + extraInfo = new UConverterDataLMBCS(); + + for (int i = 0; i <= ULMBCS_GRP_LAST; i++) { + if (OptGroupByteToCPName[i] != null) { + extraInfo.OptGrpConverter[i] = ((CharsetMBCS)CharsetICU.forNameICU(OptGroupByteToCPName[i])).sharedData; + } + } + + //get the Opt Group number for the LMBCS converter + int option = Integer.parseInt(icuCanonicalName.substring(6)); + extraInfo.OptGroup = (short)option; + extraInfo.localeConverterIndex = FindLMBCSLocale(ULocale.getDefault().getBaseName()); + } + + class CharsetDecoderLMBCS extends CharsetDecoderICU { + public CharsetDecoderLMBCS(CharsetICU cs) { + super(cs); + implReset(); + } + + protected void implReset() { + super.implReset(); + } + + /* A function to call when we are looking at the Unicode group byte in LMBCS */ + private char GetUniFromLMBCSUni(ByteBuffer ppLMBCSin) { + short HighCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + short LowCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + + if (HighCh == ULMBCS_UNICOMPATZERO) { + HighCh = LowCh; + LowCh = 0; /* zero-byte in LSB special character */ + } + + return (char)((HighCh << 8) | LowCh); + } + + private int LMBCS_SimpleGetNextUChar(UConverterSharedData cnv, ByteBuffer source, int positionOffset, int length) { + int uniChar; + int oldSourceLimit; + int oldSourcePos; + + extraInfo.charset.sharedData = cnv; + + oldSourceLimit = source.limit(); + oldSourcePos = source.position(); + + source.position(oldSourcePos + positionOffset); + source.limit(source.position() + length); + + uniChar = extraInfo.decoder.simpleGetNextUChar(source, false); + + source.limit(oldSourceLimit); + source.position(oldSourcePos); + + return uniChar; + } + /* Return the Unicode representation for the current LMBCS character. */ + /* + * Note: Because there is no U_TRUNCATED_CHAR_FOUND error code in ICU4J, we + * are going to use BufferOverFlow. The error will be handled correctly + * by the calling function. + */ + private int LMBCSGetNextUCharWorker(ByteBuffer source, CoderResult[] err) { + int uniChar = 0; /* an output Unicode char */ + short CurByte; /* A byte from the input stream */ + + /* error check */ + if (!source.hasRemaining()) { + err[0] = CoderResult.malformedForLength(0); + return 0xffff; + } + /* Grab first byte & save address for error recovery */ + CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + + /* + * at entry of each if clause: + * 1. 'CurByte' points at the first byte of a LMBCS character + * 2. 'source' points to the next byte of the source stream after 'CurByte' + * + * the job of each if clause is: + * 1. set 'source' to the point at the beginning of the next char (not if LMBCS char is only 1 byte) + * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately + */ + /* First lets check the simple fixed values. */ + if ((CurByte > ULMBCS_C0END && CurByte < ULMBCS_C1START) /* ascii range */ || + CurByte == 0 || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR || CurByte == ULMBCS_LF || + CurByte == ULMBCS_123SYSTEMRANGE) { + + uniChar = CurByte; + } else { + short group; + UConverterSharedData cnv; + + if (CurByte == ULMBCS_GRP_CTRL) { /* Control character group - no opt group update */ + short C0C1byte; + /* CHECK_SOURCE_LIMIT(1) */ + if (source.position() + 1 > source.limit()) { + err[0] = CoderResult.OVERFLOW; + source.position(source.limit()); + return 0xFFFF; + } + C0C1byte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte; + } else if (CurByte == ULMBCS_GRP_UNICODE) { /* Unicode Compatibility group: Big Endian UTF16 */ + /* CHECK_SOURCE_LIMIT(2) */ + if (source.position() + 2 > source.limit()) { + err[0] = CoderResult.OVERFLOW; + source.position(source.limit()); + return 0xFFFF; + } + + /* don't check for error indicators fffe/ffff below */ + return GetUniFromLMBCSUni(source); + } else if (CurByte <= ULMBCS_CTRLOFFSET) { + group = CurByte; + if (group > ULMBCS_GRP_LAST || (cnv = extraInfo.OptGrpConverter[group]) == null) { + /* this is not a valid group byte - no converter */ + err[0] = CoderResult.unmappableForLength(1); + } else if (group >= ULMBCS_DOUBLEOPTGROUP_START) { + /* CHECK_SOURCE_LIMIT(2) */ + if (source.position() + 2 > source.limit()) { + err[0] = CoderResult.OVERFLOW; + source.position(source.limit()); + return 0xFFFF; + } + + /* check for LMBCS doubled-group-byte case */ + if (source.get(source.position()) == group) { + /* single byte */ + source.get(); + uniChar = LMBCS_SimpleGetNextUChar(cnv, source, 0, 1); + source.get(); + } else { + /* double byte */ + uniChar = LMBCS_SimpleGetNextUChar(cnv, source, 0, 2); + source.get(); + source.get(); + } + } else { /* single byte conversion */ + /* CHECK_SOURCE_LIMIT(1) */ + if (source.position() + 1 > source.limit()) { + err[0] = CoderResult.OVERFLOW; + source.position(source.limit()); + return 0xFFFF; + } + CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + + if (CurByte >= ULMBCS_C1START) { + uniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv.mbcs, CurByte); + } else { + /* + * The non-optimizable oddballs where there is an explicit byte + * AND the second byte is not in the upper ascii range + */ + byte[] bytes = new byte[2]; + + cnv = extraInfo.OptGrpConverter[ULMBCS_GRP_EXCEPT]; + + /* Lookup value must include opt group */ + bytes[0] = (byte)group; + bytes[1] = (byte)CurByte; + uniChar = LMBCS_SimpleGetNextUChar(cnv, ByteBuffer.wrap(bytes), 0, 2); + } + } + + } else if (CurByte >= ULMBCS_C1START) { /* group byte is implicit */ + group = extraInfo.OptGroup; + cnv = extraInfo.OptGrpConverter[group]; + if (group >= ULMBCS_DOUBLEOPTGROUP_START) { /* double byte conversion */ + if (CharsetMBCS.MBCS_ENTRY_IS_TRANSITION(cnv.mbcs.stateTable[0][CurByte]) /* isLeadByte */) { + /* CHECK_SOURCE_LIMIT(0) */ + if (source.position() + 0 > source.limit()) { + err[0] = CoderResult.OVERFLOW; + source.position(source.limit()); + return 0xFFFF; + } + + /* let the MBCS conversion consume CurByte again */ + uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 1); + } else { + /* CHECK_SOURCE_LIMIT(1) */ + if (source.position() + 1 > source.limit()) { + err[0] = CoderResult.OVERFLOW; + source.position(source.limit()); + return 0xFFFF; + } + + /* let the MBCS conversion consume CurByte again */ + uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 2); + source.get(); + } + } else { + uniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv.mbcs, CurByte); + } + } + } + + return uniChar; + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult[] err = new CoderResult[1]; + err[0] = CoderResult.UNDERFLOW; + byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX * 2]; /* Increase the size for proper handling in subsequent calls to MBCS functions */ + char uniChar; /* one output Unicode char */ + int saveSource; /* beginning of current code point */ + int errSource = 0; /* index to actual input in case an error occurs */ + byte savebytes = 0; + + /* Process from source to limit, or until error */ + while (err[0].isUnderflow() && source.hasRemaining() && target.hasRemaining()) { + saveSource = source.position(); /* beginning of current code point */ + if (toULength > 0) { /* reassemble char from previous call */ + int size_old = toULength; + ByteBuffer tmpSourceBuffer; + + /* limit from source is either remainder of temp buffer, or user limit on source */ + int size_new_maybe_1 = ULMBCS_CHARSIZE_MAX - size_old; + int size_new_maybe_2 = source.remaining(); + int size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2; + savebytes = (byte)(size_old + size_new); + for (int i = 0; i < savebytes; i++) { + if (i < size_old) { + LMBCS[i] = toUBytesArray[i]; + } else { + LMBCS[i] = source.get(); + } + } + tmpSourceBuffer = ByteBuffer.wrap(LMBCS); + tmpSourceBuffer.limit(savebytes); + uniChar = (char)LMBCSGetNextUCharWorker(tmpSourceBuffer, err); + source.position(saveSource + tmpSourceBuffer.position() - size_old); + errSource = saveSource - size_old; + + if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */ + /* evil special case: source buffers so small a char spans more than 2 buffers */ + toULength = savebytes; + for (int i = 0; i < savebytes; i++) { + toUBytesArray[i] = LMBCS[i]; + } + source.position(source.limit()); + err[0] = CoderResult.UNDERFLOW; + return err[0]; + } else { + /* clear the partial-char marker */ + toULength = 0; + } + } else { + errSource = saveSource; + uniChar = (char)LMBCSGetNextUCharWorker(source, err); + savebytes = (byte)(source.position() - saveSource); + } + + if (err[0].isUnderflow()) { + if (uniChar < 0x0fffe) { + target.put(uniChar); + if (offsets != null) { + offsets.put(saveSource); + } + } else if (uniChar == 0xfffe) { + err[0] = CoderResult.unmappableForLength(source.position() - saveSource); + } else /* if (uniChar == 0xffff) */ { + err[0] = CoderResult.malformedForLength(source.position() - saveSource); + } + } + } + /* If target ran out before source, return over flow buffer error. */ + if (err[0].isUnderflow() && source.hasRemaining() && !target.hasRemaining()) { + err[0] = CoderResult.OVERFLOW; + } else if (!err[0].isUnderflow()) { + /* If character incomplete or unmappable/illegal, store it in toUBytesArray[] */ + toULength = savebytes; + if (savebytes > 0) { + for (int i = 0; i < savebytes; i++) { + toUBytesArray[i] = source.get(errSource + i); + } + } + if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */ + err[0] = CoderResult.UNDERFLOW; + } + } + return err[0]; + } + } + + class CharsetEncoderLMBCS extends CharsetEncoderICU { + public CharsetEncoderLMBCS(CharsetICU cs) { + super(cs, fromUSubstitution); + implReset(); + } + + protected void implReset() { + super.implReset(); + } + /* + * Here's the basic helper function that we use when converting from + * Unicode to LMBCS, and we suspect that a Unicode character will fit into + * one of the 12 groups. The return value is the number of bytes written + * starting at pStartLMBCS (if any). + */ + @SuppressWarnings("fallthrough") + private int LMBCSConversionWorker(short group, byte[] LMBCS, char pUniChar, short[] lastConverterIndex, boolean[] groups_tried) { + byte pLMBCS = 0; + UConverterSharedData xcnv = extraInfo.OptGrpConverter[group]; + + int bytesConverted; + int[] value = new int[1]; + short firstByte; + + extraInfo.charset.sharedData = xcnv; + bytesConverted = extraInfo.encoder.fromUChar32(pUniChar, value, false); + + /* get the first result byte */ + if (bytesConverted > 0) { + firstByte = (short)((value[0] >> ((bytesConverted - 1) * 8)) & UConverterConstants.UNSIGNED_BYTE_MASK); + } else { + /* most common failure mode is an unassigned character */ + groups_tried[group] = true; + return 0; + } + + lastConverterIndex[0] = group; + + /* + * All initial byte values in lower ascii range should have been caught by now, + * except with the exception group. + */ + + /* use converted data: first write 0, 1 or two group bytes */ + if (group != ULMBCS_GRP_EXCEPT && extraInfo.OptGroup != group) { + LMBCS[pLMBCS++] = (byte)group; + if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START) { + LMBCS[pLMBCS++] = (byte)group; + } + } + + /* don't emit control chars */ + if (bytesConverted == 1 && firstByte < 0x20) { + return 0; + } + + /* then move over the converted data */ + switch (bytesConverted) { + case 4: + LMBCS[pLMBCS++] = (byte)(value[0] >> 24); + case 3: + LMBCS[pLMBCS++] = (byte)(value[0] >> 16); + case 2: + LMBCS[pLMBCS++] = (byte)(value[0] >> 8); + case 1: + LMBCS[pLMBCS++] = (byte)value[0]; + default: + /* will never occur */ + break; + } + + return pLMBCS; + } + /* + * This is a much simpler version of above, when we + * know we are writing LMBCS using the Unicode group. + */ + private int LMBCSConvertUni(byte[] LMBCS, char uniChar) { + int index = 0; + short LowCh = (short)(uniChar & UConverterConstants.UNSIGNED_BYTE_MASK); + short HighCh = (short)((uniChar >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK); + + LMBCS[index++] = (byte)ULMBCS_GRP_UNICODE; + + if (LowCh == 0) { + LMBCS[index++] = (byte)ULMBCS_UNICOMPATZERO; + LMBCS[index++] = (byte)HighCh; + } else { + LMBCS[index++] = (byte)HighCh; + LMBCS[index++] = (byte)LowCh; + } + return ULMBCS_UNICODE_SIZE; + } + /* The main Unicode to LMBCS conversion function */ + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult err = CoderResult.UNDERFLOW; + short[] lastConverterIndex = new short[1]; + char uniChar; + byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX]; + byte pLMBCS; + int bytes_written; + boolean[] groups_tried = new boolean[ULMBCS_GRP_LAST+1]; + int sourceIndex = 0; + + /* + * Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS) + * If that succeeds, see if it will all fit into the target & copy it over + * if it does. + * + * We try conversions in the following order: + * 1. Single-byte ascii & special fixed control chars (&null) + * 2. Look up group in table & try that (could b + * A) Unicode group + * B) control group + * C) national encodeing + * or ambiguous SBCS or MBCS group (on to step 4...) + * 3. If its ambiguous, try this order: + * A) The optimization group + * B) The locale group + * C) The last group that succeeded with this string. + * D) every other group that's relevant + * E) If its single-byte ambiguous, try the exceptions group + * 4. And as a grand fallback: Unicode + */ + + short OldConverterIndex = 0; + + while (source.hasRemaining() && err.isUnderflow()) { + OldConverterIndex = extraInfo.localeConverterIndex; + + if (!target.hasRemaining()) { + err = CoderResult.OVERFLOW; + break; + } + + uniChar = source.get(source.position()); + bytes_written = 0; + pLMBCS = 0; + + /* check cases in rough order of how common they are, for speed */ + + /* single-byte matches: strategy 1 */ + if((uniChar>=0x80) && (uniChar<=0xff) && (uniChar!=0xB1) && (uniChar!=0xD7) && (uniChar!=0xF7) && + (uniChar!=0xB0) && (uniChar!=0xB4) && (uniChar!=0xB6) && (uniChar!=0xA7) && (uniChar!=0xA8)) { + extraInfo.localeConverterIndex = ULMBCS_GRP_L1; + } + if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) || + uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR || + uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE) { + LMBCS[pLMBCS++] = (byte)uniChar; + bytes_written = 1; + } + + if (bytes_written == 0) { + /* Check by Unicode rage (Strategy 2) */ + short group = FindLMBCSUniRange(uniChar); + if (group == ULMBCS_GRP_UNICODE) { /* (Strategy 2A) */ + bytes_written = LMBCSConvertUni(LMBCS, uniChar); + } else if (group == ULMBCS_GRP_CTRL) { /* Strategy 2B) */ + /* Handle control characters here */ + if (uniChar <= ULMBCS_C0END) { + LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL; + LMBCS[pLMBCS++] = (byte)(ULMBCS_CTRLOFFSET + uniChar); + } else if (uniChar >= ULMBCS_C1START && uniChar <= (ULMBCS_C1START + ULMBCS_CTRLOFFSET)) { + LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL; + LMBCS[pLMBCS++] = (byte)uniChar; + } + bytes_written = pLMBCS; + } else if (group < ULMBCS_GRP_UNICODE) { /* (Strategy 2C) */ + /* a specific converter has been identified - use it */ + bytes_written = LMBCSConversionWorker(group, LMBCS, uniChar, lastConverterIndex, groups_tried); + } + if (bytes_written == 0) { /* the ambiguous group cases (Strategy 3) */ + groups_tried = new boolean[ULMBCS_GRP_LAST+1]; + + /* check for non-default optimization group (Strategy 3A) */ + if (extraInfo.OptGroup != 1 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.OptGroup)) { + if(extraInfo.localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) { + bytes_written = LMBCSConversionWorker (ULMBCS_GRP_L1, LMBCS, uniChar, lastConverterIndex, groups_tried); + + if(bytes_written == 0) { + bytes_written = LMBCSConversionWorker (ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried); + } + if(bytes_written == 0) { + bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried); + } + } else { + bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried); + } + } + /* check for locale optimization group (Strategy 3B) */ + if (bytes_written == 0 && extraInfo.localeConverterIndex > 0 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.localeConverterIndex)) { + + bytes_written = LMBCSConversionWorker(extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried); + } + /* check for last optimization group used for this string (Strategy 3C) */ + if (bytes_written == 0 && lastConverterIndex[0] > 0 && ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex[0])) { + bytes_written = LMBCSConversionWorker(lastConverterIndex[0], LMBCS, uniChar, lastConverterIndex, groups_tried); + } + if (bytes_written == 0) { + /* just check every possible matching converter (Strategy 3D) */ + short grp_start; + short grp_end; + short grp_ix; + + grp_start = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_DOUBLEOPTGROUP_START : ULMBCS_GRP_L1; + grp_end = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_GRP_LAST : ULMBCS_GRP_TH; + + if(group == ULMBCS_AMBIGUOUS_ALL) { + grp_start = ULMBCS_GRP_L1; + grp_end = ULMBCS_GRP_LAST; + } + + for (grp_ix = grp_start; grp_ix <= grp_end && bytes_written == 0; grp_ix++) { + if (extraInfo.OptGrpConverter[grp_ix] != null && !groups_tried[grp_ix]) { + bytes_written = LMBCSConversionWorker(grp_ix, LMBCS, uniChar, lastConverterIndex, groups_tried); + } + } + /* + * a final conversion fallback to the exceptions group if its likely + * to be single byte (Strategy 3E) + */ + if (bytes_written == 0 && grp_start == ULMBCS_GRP_L1) { + bytes_written = LMBCSConversionWorker(ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried); + } + } + /* all of our other strategies failed. Fallback to Unicode. (Strategy 4) */ + if (bytes_written == 0) { + bytes_written = LMBCSConvertUni(LMBCS, uniChar); + } + } + } + /* we have a translation. increment source and write as much as possible to target */ + source.get(); + pLMBCS = 0; + while (target.hasRemaining() && bytes_written > 0) { + bytes_written--; + target.put(LMBCS[pLMBCS++]); + if (offsets != null) { + offsets.put(sourceIndex); + } + } + sourceIndex++; + if (bytes_written > 0) { + /* + * write any bytes that didn't fit in target to the error buffer, + * common code will move this to target if we get called back with + * enough target room + */ + err = CoderResult.OVERFLOW; + errorBufferLength = bytes_written; + for (int i = 0; bytes_written > 0; i++, bytes_written--) { + errorBuffer[i] = LMBCS[pLMBCS++]; + } + } + extraInfo.localeConverterIndex = OldConverterIndex; + } + + return err; + } + } + public CharsetDecoder newDecoder() { + return new CharsetDecoderLMBCS(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderLMBCS(this); + } + + void getUnicodeSetImpl(UnicodeSet setFillIn, int which){ + getCompleteUnicodeSet(setFillIn); + } + private byte[] fromUSubstitution = new byte[]{ 0x3F }; +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java new file mode 100644 index 00000000000..55d3834ea67 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java @@ -0,0 +1,5126 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.Buffer; +import java.nio.BufferOverflowException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.charset.UConverterSharedData.UConverterType; +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.InvalidFormatException; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +class CharsetMBCS extends CharsetICU { + + private byte[] fromUSubstitution = null; + UConverterSharedData sharedData = null; + private static final int MAX_VERSION_LENGTH = 4; + + // these variables are used in getUnicodeSet() and may be changed in future + // typedef enum UConverterSetFilter { + static final int UCNV_SET_FILTER_NONE = 1; + static final int UCNV_SET_FILTER_DBCS_ONLY = 2; + static final int UCNV_SET_FILTER_2022_CN = 3; + static final int UCNV_SET_FILTER_SJIS= 4 ; + static final int UCNV_SET_FILTER_GR94DBCS = 5; + static final int UCNV_SET_FILTER_HZ = 6; + static final int UCNV_SET_FILTER_COUNT = 7; + // } UConverterSetFilter; + + /** + * Fallbacks to Unicode are stored outside the normal state table and code point structures in a vector of items of + * this type. They are sorted by offset. + */ + final class MBCSToUFallback { + int offset; + int codePoint; + } + + /** + * This is the MBCS part of the UConverterTable union (a runtime data structure). It keeps all the per-converter + * data and points into the loaded mapping tables. + */ + static final class UConverterMBCSTable { + /* toUnicode */ + short countStates; + byte dbcsOnlyState; + boolean stateTableOwned; + int countToUFallbacks; + + int stateTable[/* countStates */][/* 256 */]; + int swapLFNLStateTable[/* countStates */][/* 256 */]; /* for swaplfnl */ + char unicodeCodeUnits[/* countUnicodeResults */]; + MBCSToUFallback toUFallbacks[/* countToUFallbacks */]; + + /* fromUnicode */ + char fromUnicodeTable[]; + byte fromUnicodeBytes[]; + byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */ + int fromUBytesLength; + short outputType, unicodeMask; + + /* converter name for swaplfnl */ + String swapLFNLName; + + /* extension data */ + UConverterSharedData baseSharedData; + // int extIndexes[]; + ByteBuffer extIndexes; // create int[] view etc. as needed + + CharBuffer mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */ + char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */ + boolean utf8Friendly; /* for utf8Friendly data */ + char maxFastUChar; /* for utf8Friendly data */ + + /* roundtrips */ + long asciiRoundtrips; + + UConverterMBCSTable() { + utf8Friendly = false; + mbcsIndex = null; + sbcsIndex = new char[SBCS_FAST_LIMIT>>6]; + } + + /* + * UConverterMBCSTable(UConverterMBCSTable t) { countStates = t.countStates; dbcsOnlyState = t.dbcsOnlyState; + * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable; + * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks = + * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes; + * swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; fromUBytesLength = t.fromUBytesLength; outputType = + * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData; + * extIndexes = t.extIndexes; } + */ + } + + /* Constants used in MBCS data header */ + // enum { + static final int MBCS_OPT_LENGTH_MASK=0x3f; + static final int MBCS_OPT_NO_FROM_U=0x40; + /* + * If any of the following options bits are set, + * then the file must be rejected. + */ + static final int MBCS_OPT_INCOMPATIBLE_MASK=0xffc0; + /* + * Remove bits from this mask as more options are recognized + * by all implementations that use this constant. + */ + static final int MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80; + // }; + /* Constants for fast and UTF-8-friendly conversion. */ + // enum { + static final int SBCS_FAST_MAX=0x0fff; /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */ + static final int SBCS_FAST_LIMIT=SBCS_FAST_MAX+1; /* =0x1000 */ + static final int MBCS_FAST_MAX=0xd7ff; /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */ + static final int MBCS_FAST_LIMIT=MBCS_FAST_MAX+1; /* =0xd800 */ + // }; + /** + * MBCS data header. See data format description above. + */ + final class MBCSHeader { + byte version[/* U_MAX_VERSION_LENGTH */]; + int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes; + int flags; + int fromUBytesLength; + + /* new and required in version 5 */ + int options; + + /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */ + int fullStage2Length; /* number of 32-bit units */ + + MBCSHeader() { + version = new byte[MAX_VERSION_LENGTH]; + } + } + + public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath, + ClassLoader loader) throws InvalidFormatException { + super(icuCanonicalName, javaCanonicalName, aliases); + + /* See if the icuCanonicalName contains certain option information. */ + if (icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) { + options = UConverterConstants.OPTION_SWAP_LFNL; + icuCanonicalName = icuCanonicalName.substring(0, icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING)); + super.icuCanonicalName = icuCanonicalName; + } + + // now try to load the data + sharedData = loadConverter(1, icuCanonicalName, classPath, loader); + + maxBytesPerChar = sharedData.staticData.maxBytesPerChar; + minBytesPerChar = sharedData.staticData.minBytesPerChar; + maxCharsPerByte = 1; + fromUSubstitution = sharedData.staticData.subChar; + subChar = sharedData.staticData.subChar; + subCharLen = sharedData.staticData.subCharLen; + subChar1 = sharedData.staticData.subChar1; + fromUSubstitution = new byte[sharedData.staticData.subCharLen]; + System.arraycopy(sharedData.staticData.subChar, 0, fromUSubstitution, 0, sharedData.staticData.subCharLen); + + initializeConverter(options); + } + + public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) + throws InvalidFormatException { + this(icuCanonicalName, javaCanonicalName, aliases, ICUResourceBundle.ICU_BUNDLE, null); + } + + private UConverterSharedData loadConverter(int nestedLoads, String myName, String classPath, ClassLoader loader) + throws InvalidFormatException { + boolean noFromU = false; + // Read converter data from file + UConverterStaticData staticData = new UConverterStaticData(); + UConverterDataReader reader = null; + try { + String resourceName = classPath + "/" + myName + "." + UConverterSharedData.DATA_TYPE; + InputStream i; + + if (loader != null) { + i = ICUData.getRequiredStream(loader, resourceName); + } else { + i = ICUData.getRequiredStream(resourceName); + } + BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE); + reader = new UConverterDataReader(b); + reader.readStaticData(staticData); + } catch (IOException e) { + throw new InvalidFormatException(); + } catch (Exception e) { + throw new InvalidFormatException(); + } + + UConverterSharedData data = null; + int type = staticData.conversionType; + + if (type != UConverterSharedData.UConverterType.MBCS + || staticData.structSize != UConverterStaticData.SIZE_OF_UCONVERTER_STATIC_DATA) { + throw new InvalidFormatException(); + } + + data = new UConverterSharedData(1, null, false, 0); + data.dataReader = reader; + data.staticData = staticData; + data.sharedDataCached = false; + + // Load data + UConverterMBCSTable mbcsTable = data.mbcs; + MBCSHeader header = new MBCSHeader(); + try { + reader.readMBCSHeader(header); + } catch (IOException e) { + throw new InvalidFormatException(); + } + + int offset; + // int[] extIndexesArray = null; + String baseNameString = null; + int[][] stateTableArray = null; + MBCSToUFallback[] toUFallbacksArray = null; + char[] unicodeCodeUnitsArray = null; + char[] fromUnicodeTableArray = null; + byte[] fromUnicodeBytesArray = null; + + if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) { + noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0); + } else if (header.version[0] != 4) { + throw new InvalidFormatException(); + } + + mbcsTable.outputType = (byte) header.flags; + + /* extension data, header version 4.2 and higher */ + offset = header.flags >>> 8; + // if(offset!=0 && mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) { + if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) { + try { + baseNameString = reader.readBaseTableName(); + if (offset != 0) { + // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null + // terminator byte all already read; + mbcsTable.extIndexes = reader.readExtIndexes(offset + - (reader.bytesRead - reader.staticDataBytesRead)); + } + } catch (IOException e) { + throw new InvalidFormatException(); + } + } + + // agljport:add this would be unnecessary if extIndexes were memory mapped + /* + * if(mbcsTable.extIndexes != null) { + * + * try { //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_LENGTH]*4 + + * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_UCHARS_LENGTH]*2 + + * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_LENGTH]*6 + + * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_BYTES_LENGTH] + + * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_12_LENGTH]*2 + + * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3_LENGTH]*2 + + * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3B_LENGTH]*4; //int nbytes = + * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_SIZE] //byte[] extTables = dataReader.readExtTables(nbytes); + * //mbcsTable.extTables = ByteBuffer.wrap(extTables); } catch(IOException e) { System.err.println("Caught + * IOException: " + e.getMessage()); pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR; return; } } + */ + if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) { + UConverterSharedData baseSharedData = null; + ByteBuffer extIndexes; + String baseName; + + /* extension-only file, load the base table and set values appropriately */ + extIndexes = mbcsTable.extIndexes; + if (extIndexes == null) { + /* extension-only file without extension */ + throw new InvalidFormatException(); + } + + if (nestedLoads != 1) { + /* an extension table must not be loaded as a base table */ + throw new InvalidFormatException(); + } + + /* load the base table */ + baseName = baseNameString; + if (baseName.equals(staticData.name)) { + /* forbid loading this same extension-only file */ + throw new InvalidFormatException(); + } + + // agljport:fix args.size=sizeof(UConverterLoadArgs); + baseSharedData = loadConverter(2, baseName, classPath, loader); + + if (baseSharedData.staticData.conversionType != UConverterType.MBCS + || baseSharedData.mbcs.baseSharedData != null) { + // agljport:fix ucnv_unload(baseSharedData); + throw new InvalidFormatException(); + } + + /* copy the base table data */ + // agljport:comment deep copy in C changes mbcs through local reference mbcsTable; in java we probably don't + // need the deep copy so can just make sure mbcs and its local reference both refer to the same new object + mbcsTable = data.mbcs = baseSharedData.mbcs; + + /* overwrite values with relevant ones for the extension converter */ + mbcsTable.baseSharedData = baseSharedData; + mbcsTable.extIndexes = extIndexes; + + /* + * It would be possible to share the swapLFNL data with a base converter, but the generated name would have + * to be different, and the memory would have to be free'd only once. It is easier to just create the data + * for the extension converter separately when it is requested. + */ + mbcsTable.swapLFNLStateTable = null; + mbcsTable.swapLFNLFromUnicodeBytes = null; + mbcsTable.swapLFNLName = null; + + /* + * Set a special, runtime-only outputType if the extension converter is a DBCS version of a base converter + * that also maps single bytes. + */ + if (staticData.conversionType == UConverterType.DBCS + || (staticData.conversionType == UConverterType.MBCS && staticData.minBytesPerChar >= 2)) { + + if (baseSharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) { + /* the base converter is SI/SO-stateful */ + int entry; + + /* get the dbcs state from the state table entry for SO=0x0e */ + entry = mbcsTable.stateTable[0][0xe]; + if (MBCS_ENTRY_IS_FINAL(entry) && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_CHANGE_ONLY + && MBCS_ENTRY_FINAL_STATE(entry) != 0) { + mbcsTable.dbcsOnlyState = (byte) MBCS_ENTRY_FINAL_STATE(entry); + + mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY; + } + } else if (baseSharedData.staticData.conversionType == UConverterType.MBCS + && baseSharedData.staticData.minBytesPerChar == 1 + && baseSharedData.staticData.maxBytesPerChar == 2 && mbcsTable.countStates <= 127) { + + /* non-stateful base converter, need to modify the state table */ + int newStateTable[][/* 256 */]; + int state[]; // this works because java 2-D array is array of references and we can have state = + // newStateTable[i]; + int i, count; + + /* allocate a new state table and copy the base state table contents */ + count = mbcsTable.countStates; + newStateTable = new int[(count + 1) * 1024][256]; + + for (i = 0; i < mbcsTable.stateTable.length; ++i) + System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, + mbcsTable.stateTable[i].length); + + /* change all final single-byte entries to go to a new all-illegal state */ + state = newStateTable[0]; + for (i = 0; i < 256; ++i) { + if (MBCS_ENTRY_IS_FINAL(state[i])) { + state[i] = MBCS_ENTRY_TRANSITION(count, 0); + } + } + + /* build the new all-illegal state */ + state = newStateTable[count]; + for (i = 0; i < 256; ++i) { + state[i] = MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0); + } + mbcsTable.stateTable = newStateTable; + mbcsTable.countStates = (byte) (count + 1); + mbcsTable.stateTableOwned = true; + + mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY; + } + } + + /* + * unlike below for files with base tables, do not get the unicodeMask from the sharedData; instead, use the + * base table's unicodeMask, which we copied in the memcpy above; this is necessary because the static data + * unicodeMask, especially the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data + */ + } else { + /* conversion file with a base table; an additional extension table is optional */ + /* make sure that the output type is known */ + switch (mbcsTable.outputType) { + case MBCS_OUTPUT_1: + case MBCS_OUTPUT_2: + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4: + case MBCS_OUTPUT_3_EUC: + case MBCS_OUTPUT_4_EUC: + case MBCS_OUTPUT_2_SISO: + /* OK */ + break; + default: + throw new InvalidFormatException(); + } + + stateTableArray = new int[header.countStates][256]; + toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks]; + for (int i = 0; i < toUFallbacksArray.length; ++i) + toUFallbacksArray[i] = new MBCSToUFallback(); + unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits) / 2]; + fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable) / 2]; + fromUnicodeBytesArray = new byte[header.fromUBytesLength]; + try { + reader.readMBCSTable(stateTableArray, toUFallbacksArray, unicodeCodeUnitsArray, fromUnicodeTableArray, + fromUnicodeBytesArray); + } catch (IOException e) { + throw new InvalidFormatException(); + } + + mbcsTable.countStates = (byte) header.countStates; + mbcsTable.countToUFallbacks = header.countToUFallbacks; + mbcsTable.stateTable = stateTableArray; + mbcsTable.toUFallbacks = toUFallbacksArray; + mbcsTable.unicodeCodeUnits = unicodeCodeUnitsArray; + + mbcsTable.fromUnicodeTable = fromUnicodeTableArray; + mbcsTable.fromUnicodeBytes = fromUnicodeBytesArray; + mbcsTable.fromUBytesLength = header.fromUBytesLength; + + /* + * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient + * function implementations + */ + // agljport:fix info.size=sizeof(UDataInfo); + // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info); + // agljport:fix if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) { + /* mask off possible future extensions to be safe */ + mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3); + // agljport:fix } else { + /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */ + // agljport:fix mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES; + // agljport:fix } + if (offset != 0) { + try { + // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null + // terminator byte all already read; + // int namelen = baseNameString != null? baseNameString.length() + 1: 0; + mbcsTable.extIndexes = reader.readExtIndexes(offset + - (reader.bytesRead - reader.staticDataBytesRead)); + } catch (IOException e) { + throw new InvalidFormatException(); + } + } + + if (header.version[1] >= 3 && (mbcsTable.unicodeMask & UConverterConstants.HAS_SURROGATES) == 0 && + (mbcsTable.countStates == 1 ? ((char)header.version[2] >= (SBCS_FAST_MAX>>8)) : ((char)header.version[2] >= (MBCS_FAST_MAX>>8)))) { + mbcsTable.utf8Friendly = true; + + if (mbcsTable.countStates == 1) { + /* + * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher. + * Build a table with indexes to each block, to be used instaed of + * the regular stage 1/2 table. + */ + for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) { + mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)]; + } + /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */ + mbcsTable.maxFastUChar = SBCS_FAST_MAX; + } else { + /* + * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher. + * The .cnv file is prebuilt with an additional stage table with indexes to each block. + */ + if (noFromU) { + mbcsTable.mbcsIndex = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer(); + } + mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff); + } + } + /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */ + { + long asciiRoundtrips = 0xffffffff; + for (int i = 0; i < 0x80; ++i) { + if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) { + asciiRoundtrips&=~((long)1<<(i>>2))&UConverterConstants.UNSIGNED_INT_MASK; + } + } + mbcsTable.asciiRoundtrips = asciiRoundtrips&UConverterConstants.UNSIGNED_INT_MASK; + } + + if (noFromU) { + int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40; + int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2; + reconstituteData(mbcsTable, stage1Length, stage2Length, header.fullStage2Length); + } + if (mbcsTable.outputType == MBCS_OUTPUT_DBCS_ONLY || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) { + /* + * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip. + * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly. + */ + mbcsTable.asciiRoundtrips = 0; + } + } + return data; + } + + private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) { + char[] table; + byte[] bytes; + int stage2; + int p; + int c; + int i, st3; + long temp; + + table = mbcsTable.fromUnicodeTable; + bytes = mbcsTable.fromUnicodeBytes; + + /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */ + switch(mbcsTable.outputType) { + case MBCS_OUTPUT_3_EUC: + if(value<=0xffff) { + /* short sequences are stored directly */ + /* code set 0 or 1 */ + } else if(value<=0x8effff) { + /* code set 2 */ + value&=0x7fff; + } else /* first byte is 0x8f */ { + /* code set 3 */ + value&=0xff7f; + } + break; + case MBCS_OUTPUT_4_EUC: + if(value<=0xffffff) { + /* short sequences are stored directly */ + /* code set 0 or 1 */ + } else if(value<=0x8effffff) { + /* code set 2 */ + value&=0x7fffff; + } else /* first byte is 0x8f */ { + /* code set 3 */ + value&=0xff7fff; + } + break; + default: + break; + } + + for(i=0; i<=0x1f; ++value, ++i) { + c=codePoints[i]; + if(c<0) { + continue; + } + + /* locate the stage 2 & 3 data */ + stage2 = table[c>>10] + ((c>>4)&0x3f); + st3 = table[stage2*2]<<16|table[stage2*2 + 1]; + st3 = (int)(char)(st3 * 16 + (c&0xf)); + + /* write the codepage bytes into stage 3 */ + switch(mbcsTable.outputType) { + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4_EUC: + p = st3*3; + bytes[p] = (byte)(value>>16); + bytes[p+1] = (byte)(value>>8); + bytes[p+2] = (byte)value; + break; + case MBCS_OUTPUT_4: + bytes[st3*4] = (byte)(value >> 24); + bytes[st3*4 + 1] = (byte)(value >> 16); + bytes[st3*4 + 2] = (byte)(value >> 8); + bytes[st3*4 + 3] = (byte)value; + break; + default: + /* 2 bytes per character */ + bytes[st3*2] = (byte)(value >> 8); + bytes[st3*2 + 1] = (byte)value; + break; + } + + /* set the roundtrip flag */ + temp = (1L<<(16+(c&0xf))); + table[stage2*2] |= (char)(temp>>16); + table[stage2*2 + 1] |= (char)temp; + } + return true; + } + + private static void reconstituteData(UConverterMBCSTable mbcsTable, int stage1Length, int stage2Length, int fullStage2Length) { + int datalength = stage1Length*2+fullStage2Length*4+mbcsTable.fromUBytesLength; + int offset = 0; + byte[] stage = new byte[datalength]; + + for (int i = 0; i < stage1Length; ++i) { + stage[i*2] = (byte)(mbcsTable.fromUnicodeTable[i]>>8); + stage[i*2+1] = (byte)(mbcsTable.fromUnicodeTable[i]); + } + + offset = ((fullStage2Length - stage2Length) * 4) + (stage1Length * 2); + for (int i = 0; i < stage2Length; ++i) { + stage[offset + i*4] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]>>8); + stage[offset + i*4+1] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]); + stage[offset + i*4+2] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]>>8); + stage[offset + i*4+3] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]); + } + + /* indexes into stage 2 count from the bottom of the fromUnicodeTable */ + + /* reconsitute the initial part of stage 2 from the mbcsIndex */ + { + int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6; + int stageUTF8Index=0; + int st1, st2, st3, i; + + for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) { + st2 = ((char)stage[2*st1]<<8) | stage[2*st1+1]; + if (st2 != stage1Length/2) { + /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */ + for (i = 0; i < 16; ++i) { + st3 = mbcsTable.mbcsIndex.get(stageUTF8Index++); + if (st3 != 0) { + /* a stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */ + st3>>=4; + /* + * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are + * allocated together as a single 64-block for access from the mbcsIndex + */ + stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++; + stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++; + stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++; + stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); + } else { + /* no stage 3 block, skip */ + st2+=4; + } + } + } else { + /* no stage 2 block, skip */ + stageUTF8Index+=16; + } + } + } + + char[] stage1 = new char[stage.length/2]; + for (int i = 0; i < stage1.length; ++i) { + stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK)); + } + byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))]; + System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length); + + mbcsTable.fromUnicodeTable = stage1; + mbcsTable.fromUnicodeBytes = stage2; + + /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */ + MBCSEnumToUnicode(mbcsTable); + } + + /* + * Internal function enumerating the toUnicode data of an MBCS converter. + * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U + * table, but could also be used for a future getUnicodeSet() option + * that includes reverse fallbacks (after updating this function's implementation). + * Currently only handles roundtrip mappings. + * Does not currently handle extensions. + */ + private static void MBCSEnumToUnicode(UConverterMBCSTable mbcsTable) { + /* + * Properties for each state, to speed up the enumeration. + * Ignorable actions are unassigned/illegal/state-change-only: + * They do not lead to mappings. + * + * Bits 7..6 + * 1 direct/initial state (stateful converters have mulitple) + * 0 non-initial state with transitions or with nonignorable result actions + * -1 final state with only ignorable actions + * + * Bits 5..3 + * The lowest byte value with non-ignorable actions is + * value<<5 (rounded down). + * + * Bits 2..0: + * The highest byte value with non-ignorable actions is + * (value<<5)&0x1f (rounded up). + */ + byte stateProps[] = new byte[MBCS_MAX_STATE_COUNT]; + int state; + + /* recurse from state 0 and set all stateProps */ + getStateProp(mbcsTable.stateTable, stateProps, 0); + + for (state = 0; state < mbcsTable.countStates; ++state) { + if (stateProps[state] >= 0x40) { + /* start from each direct state */ + enumToU(mbcsTable, stateProps, state, 0, 0); + } + } + + + } + + private static boolean enumToU(UConverterMBCSTable mbcsTable, byte stateProps[], int state, int offset, int value) { + int[] codePoints = new int[32]; + int[] row; + char[] unicodeCodeUnits; + int anyCodePoints; + int b, limit; + + row = mbcsTable.stateTable[state]; + unicodeCodeUnits = mbcsTable.unicodeCodeUnits; + + value<<=8; + anyCodePoints = -1; /* becomes non-negative if there is a mapping */ + + b = (stateProps[state]&0x38)<<2; + if (b == 0 && stateProps[state] >= 0x40) { + /* skip byte sequences with leading zeros because they are note stored in the fromUnicode table */ + codePoints[0] = UConverterConstants.U_SENTINEL; + b = 1; + } + limit = ((stateProps[state]&7)+1)<<5; + while (b < limit) { + int entry = row[b]; + if (MBCS_ENTRY_IS_TRANSITION(entry)) { + int nextState = MBCS_ENTRY_TRANSITION_STATE(entry); + if (stateProps[nextState] >= 0) { + /* recurse to a state with non-ignorable actions */ + if (!enumToU(mbcsTable, stateProps, nextState, offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), value|b)) { + return false; + } + } + codePoints[b&0x1f] = UConverterConstants.U_SENTINEL; + } else { + int c; + int action; + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action = MBCS_ENTRY_FINAL_ACTION(entry); + if (action == MBCS_STATE_VALID_DIRECT_16) { + /* output BMP code point */ + c = MBCS_ENTRY_FINAL_VALUE_16(entry); + } else if (action == MBCS_STATE_VALID_16) { + int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + c = unicodeCodeUnits[finalOffset]; + if (c < 0xfffe) { + /* output BMP code point */ + } else { + c = UConverterConstants.U_SENTINEL; + } + } else if (action == MBCS_STATE_VALID_16_PAIR) { + int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + c = unicodeCodeUnits[finalOffset++]; + if (c < 0xd800) { + /* output BMP code point below 0xd800 */ + } else if (c <= 0xdbff) { + /* output roundtrip or fallback supplementary code point */ + c = ((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00); + } else if (c == 0xe000) { + /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ + c = unicodeCodeUnits[finalOffset]; + } else { + c = UConverterConstants.U_SENTINEL; + } + } else if (action == MBCS_STATE_VALID_DIRECT_20) { + /* output supplementary code point */ + c = MBCS_ENTRY_FINAL_VALUE(entry)+0x10000; + } else { + c = UConverterConstants.U_SENTINEL; + } + + codePoints[b&0x1f] = c; + anyCodePoints&=c; + } + if (((++b)&0x1f) == 0) { + if(anyCodePoints>=0) { + if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20)&UConverterConstants.UNSIGNED_INT_MASK, codePoints)) { + return false; + } + anyCodePoints=-1; + } + } + } + + return true; + } + + /* + * Only called if stateProps[state]==-1. + * A recursive call may do stateProps[state]|=0x40 if this state is the target of an + * MBCS_STATE_CHANGE_ONLY. + */ + private static byte getStateProp(int stateTable[][], byte stateProps[], int state) { + int[] row; + int min, max, entry, nextState; + + row = stateTable[state]; + stateProps[state] = 0; + + /* find first non-ignorable state */ + for (min = 0;;++min) { + entry = row[min]; + nextState = MBCS_ENTRY_STATE(entry); + if (stateProps[nextState] == -1) { + getStateProp(stateTable, stateProps, nextState); + } + if (MBCS_ENTRY_IS_TRANSITION(entry)) { + if (stateProps[nextState] >- 0) { + break; + } + } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) { + break; + } + if (min == 0xff) { + stateProps[state] = -0x40; /* (byte)0xc0 */ + return stateProps[state]; + } + } + stateProps[state]|=(byte)((min>>5)<<3); + + /* find last non-ignorable state */ + for (max = 0xff; min < max; --max) { + entry = row[max]; + nextState = MBCS_ENTRY_STATE(entry); + if (stateProps[nextState] == -1) { + getStateProp(stateTable, stateProps, nextState); + } + if (MBCS_ENTRY_IS_TRANSITION(entry)) { + if (stateProps[nextState] >- 0) { + break; + } + } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) { + break; + } + } + stateProps[state]|=(byte)(max>>5); + + /* recurse further and collect direct-state information */ + while (min <= max) { + entry = row[min]; + nextState = MBCS_ENTRY_STATE(entry); + if (stateProps[nextState] == -1) { + getStateProp(stateTable, stateProps, nextState); + } + if (MBCS_ENTRY_IS_TRANSITION(entry)) { + stateProps[nextState]|=0x40; + if (MBCS_ENTRY_FINAL_ACTION(entry) <= MBCS_STATE_FALLBACK_DIRECT_20) { + stateProps[state]|=0x40; + } + } + ++min; + } + return stateProps[state]; + } + + protected void initializeConverter(int myOptions) { + UConverterMBCSTable mbcsTable; + ByteBuffer extIndexes; + short outputType; + byte maxBytesPerUChar; + + mbcsTable = sharedData.mbcs; + outputType = mbcsTable.outputType; + + if (outputType == MBCS_OUTPUT_DBCS_ONLY) { + /* the swaplfnl option does not apply, remove it */ + this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL; + } + + if ((myOptions & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + /* do this because double-checked locking is broken */ + boolean isCached; + + // agljport:todo umtx_lock(NULL); + isCached = mbcsTable.swapLFNLStateTable != null; + // agljport:todo umtx_unlock(NULL); + + if (!isCached) { + try { + if (!EBCDICSwapLFNL()) { + /* this option does not apply, remove it */ + this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL; + } + } catch (Exception e) { + /* something went wrong. */ + return; + } + } + } + + if (icuCanonicalName.toLowerCase().indexOf("gb18030") >= 0) { + /* set a flag for GB 18030 mode, which changes the callback behavior */ + this.options |= MBCS_OPTION_GB18030; + } else if (icuCanonicalName.toLowerCase().indexOf("keis") >= 0) { + this.options |= MBCS_OPTION_KEIS; + } else if (icuCanonicalName.toLowerCase().indexOf("jef") >= 0) { + this.options |= MBCS_OPTION_JEF; + } else if (icuCanonicalName.toLowerCase().indexOf("jips") >= 0) { + this.options |= MBCS_OPTION_JIPS; + } + + /* fix maxBytesPerUChar depending on outputType and options etc. */ + if (outputType == MBCS_OUTPUT_2_SISO) { + maxBytesPerChar = 3; /* SO+DBCS */ + } + + extIndexes = mbcsTable.extIndexes; + if (extIndexes != null) { + maxBytesPerUChar = (byte) GET_MAX_BYTES_PER_UCHAR(extIndexes); + if (outputType == MBCS_OUTPUT_2_SISO) { + ++maxBytesPerUChar; /* SO + multiple DBCS */ + } + + if (maxBytesPerUChar > maxBytesPerChar) { + maxBytesPerChar = maxBytesPerUChar; + } + } + } + /* EBCDIC swap LF<->NL--------------------------------------------------------------------------------*/ + /* + * This code modifies a standard EBCDIC<->Unicode mappling table for + * OS/390 (z/OS) Unix System Services (Open Edition). + * The difference is in the mapping of Line Feed and New Line control codes: + * Standard EBDIC maps + * + * \x25 |0 + * \x15 |0 + * + * but OS/390 USS EBCDIC swaps the control codes for LF and NL, + * mapping + * + * \x15 |0 + * \x25 |0 + * + * This code modifies a loaded standard EBCDIC<->Unicode mapping table + * by copying it into allocated memory and swapping the LF and NL values. + * It allows to support the same EBCDIC charset in both version without + * duplicating the entire installed table. + */ + /* standard EBCDIC codes */ + private static final short EBCDIC_LF = 0x0025; + private static final short EBCDIC_NL = 0x0015; + + /* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */ + private static final short EBCDIC_RT_LF = 0x0f25; + private static final short EBCDIC_RT_NL = 0x0f15; + + /* Unicode code points */ + private static final short U_LF = 0x000A; + private static final short U_NL = 0x0085; + + private boolean EBCDICSwapLFNL() throws Exception { + UConverterMBCSTable mbcsTable; + + char[] table; + byte[] results; + byte[] bytes; + + int[][] newStateTable; + byte[] newResults; + String newName; + + int stage2Entry; +// int size; + int sizeofFromUBytes; + + mbcsTable = sharedData.mbcs; + + table = mbcsTable.fromUnicodeTable; + bytes = mbcsTable.fromUnicodeBytes; + results = bytes; + + /* + * Check that this is an EBCDIC table with SBCS portion - + * SBCS or EBCDIC with standard EBCDIC LF and NL mappings. + * + * If not, ignore the option Options are always ignored if they do not apply. + */ + if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) && + mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) && + mbcsTable.stateTable[0][EBCDIC_NL] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL))) { + return false; + } + + if (mbcsTable.outputType == MBCS_OUTPUT_1) { + if (!(EBCDIC_RT_LF == MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) && + EBCDIC_RT_NL == MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL))) { + return false; + } + } else /* MBCS_OUTPUT_2_SISO */ { + stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF); + if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) && + EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF))) { + return false; + } + + stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL); + if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) && + EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL))) { + return false; + } + } + + if (mbcsTable.fromUBytesLength > 0) { + /* + * We _know_ the number of bytes in the fromUnicodeBytes array + * starting with header.version 4.1. + */ + sizeofFromUBytes = mbcsTable.fromUBytesLength; + } else { + /* + * Otherwise: + * There used to be code to enumerate the fromUnicode + * trie and find the highest entry, but it was removed in ICU 3.2 + * because it was not tested and caused a low code coverage number. + */ + throw new Exception("U_INVALID_FORMAT_ERROR"); + } + + /* + * The table has an appropriate format. + * Allocate and build + * - a modified to-Unicode state table + * - a modified from-Unicode output array + * - a converter name string with the swap option appended + */ +// size = mbcsTable.countStates * 1024 + sizeofFromUBytes + UConverterConstants.MAX_CONVERTER_NAME_LENGTH + 20; + + /* copy and modify the to-Unicode state table */ + newStateTable = new int[mbcsTable.stateTable.length][mbcsTable.stateTable[0].length]; + for (int i = 0; i < newStateTable.length; i++) { + System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, newStateTable[i].length); + } + + newStateTable[0][EBCDIC_LF] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL); + newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF); + + /* copy and modify the from-Unicode result table */ + newResults = new byte[sizeofFromUBytes]; + System.arraycopy(bytes, 0, newResults, 0, sizeofFromUBytes); + /* conveniently, the table access macros work on the left side of expressions */ + if (mbcsTable.outputType == MBCS_OUTPUT_1) { + MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL); + MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF); + } else /* MBCS_OUTPUT_2_SISO */ { + stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF); + MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL); + + stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL); + MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF); + } + + /* set the canonical converter name */ + newName = new String(icuCanonicalName); + newName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING); + + if (mbcsTable.swapLFNLStateTable == null) { + mbcsTable.swapLFNLStateTable = newStateTable; + mbcsTable.swapLFNLFromUnicodeBytes = newResults; + mbcsTable.swapLFNLName = newName; + } + return true; + } + + /** + * MBCS output types for conversions from Unicode. These per-converter types determine the storage method in stage 3 + * of the lookup table, mostly how many bytes are stored per entry. + */ + static final int MBCS_OUTPUT_1 = 0; /* 0 */ + static final int MBCS_OUTPUT_2 = MBCS_OUTPUT_1 + 1; /* 1 */ + static final int MBCS_OUTPUT_3 = MBCS_OUTPUT_2 + 1; /* 2 */ + static final int MBCS_OUTPUT_4 = MBCS_OUTPUT_3 + 1; /* 3 */ + static final int MBCS_OUTPUT_3_EUC = 8; /* 8 */ + static final int MBCS_OUTPUT_4_EUC = MBCS_OUTPUT_3_EUC + 1; /* 9 */ + static final int MBCS_OUTPUT_2_SISO = 12; /* c */ + static final int MBCS_OUTPUT_2_HZ = MBCS_OUTPUT_2_SISO + 1; /* d */ + static final int MBCS_OUTPUT_EXT_ONLY = MBCS_OUTPUT_2_HZ + 1; /* e */ + // static final int MBCS_OUTPUT_COUNT = MBCS_OUTPUT_EXT_ONLY + 1; + static final int MBCS_OUTPUT_DBCS_ONLY = 0xdb; /* runtime-only type for DBCS-only handling of SISO tables */ + + /* GB 18030 data ------------------------------------------------------------ */ + + /* helper macros for linear values for GB 18030 four-byte sequences */ + private static long LINEAR_18030(long a, long b, long c, long d) { + return ((((a & 0xff) * 10 + (b & 0xff)) * 126L + (c & 0xff)) * 10L + (d & 0xff)); + } + + private static long LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30); + + private static long LINEAR(long x) { + return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff); + } + + /* + * Some ranges of GB 18030 where both the Unicode code points and the GB four-byte sequences are contiguous and are + * handled algorithmically by the special callback functions below. The values are start & end of Unicode & GB + * codes. + * + * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30. + */ + private static final long gb18030Ranges[][] = new long[/* 13 */][/* 4 */] { + { 0x10000L, 0x10FFFFL, LINEAR(0x90308130L), LINEAR(0xE3329A35L) }, + { 0x9FA6L, 0xD7FFL, LINEAR(0x82358F33L), LINEAR(0x8336C738L) }, + { 0x0452L, 0x200FL, LINEAR(0x8130D330L), LINEAR(0x8136A531L) }, + { 0xE865L, 0xF92BL, LINEAR(0x8336D030L), LINEAR(0x84308534L) }, + { 0x2643L, 0x2E80L, LINEAR(0x8137A839L), LINEAR(0x8138FD38L) }, + { 0xFA2AL, 0xFE2FL, LINEAR(0x84309C38L), LINEAR(0x84318537L) }, + { 0x3CE1L, 0x4055L, LINEAR(0x8231D438L), LINEAR(0x8232AF32L) }, + { 0x361BL, 0x3917L, LINEAR(0x8230A633L), LINEAR(0x8230F237L) }, + { 0x49B8L, 0x4C76L, LINEAR(0x8234A131L), LINEAR(0x8234E733L) }, + { 0x4160L, 0x4336L, LINEAR(0x8232C937L), LINEAR(0x8232F837L) }, + { 0x478EL, 0x4946L, LINEAR(0x8233E838L), LINEAR(0x82349638L) }, + { 0x44D7L, 0x464BL, LINEAR(0x8233A339L), LINEAR(0x8233C931L) }, + { 0xFFE6L, 0xFFFFL, LINEAR(0x8431A234L), LINEAR(0x8431A439L) } }; + + /* bit flag for UConverter.options indicating GB 18030 special handling */ + private static final int MBCS_OPTION_GB18030 = 0x8000; + + /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */ + private static final int MBCS_OPTION_KEIS = 0x01000; + private static final int MBCS_OPTION_JEF = 0x02000; + private static final int MBCS_OPTION_JIPS = 0x04000; + + private static enum SISO_Option { + SI, + SO + } + + private static final byte[] KEIS_SO_CHAR = { 0x0A, 0x42 }; + private static final byte[] KEIS_SI_CHAR = { 0x0A, 0x41 }; + private static final byte JEF_SO_CHAR = 0x28; + private static final byte JEF_SI_CHAR = 0x29; + private static final byte[] JIPS_SO_CHAR = { 0x1A, 0x70 }; + private static final byte[] JIPS_SI_CHAR = { 0x1A, 0x71 }; + + private static int getSISOBytes(SISO_Option option, int cnvOption, byte[] value) { + int SISOLength = 0; + + switch (option) { + case SI: + if ((cnvOption&MBCS_OPTION_KEIS)!=0) { + value[0] = KEIS_SI_CHAR[0]; + value[1] = KEIS_SI_CHAR[1]; + SISOLength = 2; + } else if ((cnvOption&MBCS_OPTION_JEF)!=0) { + value[0] = JEF_SI_CHAR; + SISOLength = 1; + } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) { + value[0] = JIPS_SI_CHAR[0]; + value[1] = JIPS_SI_CHAR[1]; + SISOLength = 2; + } else { + value[0] = UConverterConstants.SI; + SISOLength = 1; + } + break; + case SO: + if ((cnvOption&MBCS_OPTION_KEIS)!=0) { + value[0] = KEIS_SO_CHAR[0]; + value[1] = KEIS_SO_CHAR[1]; + SISOLength = 2; + } else if ((cnvOption&MBCS_OPTION_JEF)!=0) { + value[0] = JEF_SO_CHAR; + SISOLength = 1; + } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) { + value[0] = JIPS_SO_CHAR[0]; + value[1] = JIPS_SO_CHAR[1]; + SISOLength = 2; + } else { + value[0] = UConverterConstants.SO; + SISOLength = 1; + } + break; + default: + /* Should never happen. */ + break; + } + + return SISOLength; + } + // enum { + static final int MBCS_MAX_STATE_COUNT = 128; + // }; + /** + * MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries. + */ + static final int MBCS_STATE_VALID_DIRECT_16 = 0; + static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1; + static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1; + static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1; + static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1; + static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1; + static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1; + static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1; + static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1; + + static int MBCS_ENTRY_SET_STATE(int entry, int state) { + return (entry&0x80ffffff)|(state<<24L); + } + + static int MBCS_ENTRY_STATE(int entry) { + return (((entry)>>24)&0x7f); + } + + /* Methods for state table entries */ + static int MBCS_ENTRY_TRANSITION(int state, int offset) { + return (state << 24L) | offset; + } + + static int MBCS_ENTRY_FINAL(int state, int action, int value) { + return 0x80000000 | (state << 24L) | (action << 20L) | value; + } + + static boolean MBCS_ENTRY_IS_TRANSITION(int entry) { + return (entry) >= 0; + } + + static boolean MBCS_ENTRY_IS_FINAL(int entry) { + return (entry) < 0; + } + + static int MBCS_ENTRY_TRANSITION_STATE(int entry) { + return ((entry) >>> 24); + } + + static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) { + return ((entry) & 0xffffff); + } + + static int MBCS_ENTRY_FINAL_STATE(int entry) { + return ((entry) >>> 24) & 0x7f; + } + + static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) { + return ((entry) < 0x80100000); + } + + static int MBCS_ENTRY_FINAL_ACTION(int entry) { + return ((entry) >>> 20) & 0xf; + } + + static int MBCS_ENTRY_FINAL_VALUE(int entry) { + return ((entry) & 0xfffff); + } + + static char MBCS_ENTRY_FINAL_VALUE_16(int entry) { + return (char) (entry); + } + + static boolean MBCS_IS_ASCII_ROUNDTRIP(int b, long asciiRoundtrips) { + return (((asciiRoundtrips) & (1<<((b)>>2)))!=0); + } + + /** + * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. It works for single-byte, + * single-state codepages that only map to and from BMP code points, and it always returns fallback values. + */ + static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) { + return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b & UConverterConstants.UNSIGNED_BYTE_MASK]); + } + + /* single-byte fromUnicode: get the 16-bit result word */ + static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) { + int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f); + int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array + return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK)); + } + + /* single-byte fromUnicode: set the 16-bit result word with newValue*/ + static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, byte[] results, int c, int newValue) { + int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f); + int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array + results[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK); + results[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK); + } + + /* multi-byte fromUnicode: get the 32-bit stage 2 entry */ + static int MBCS_STAGE_2_FROM_U(char[] table, int c) { + int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as + // int[] array + return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16) + | (table[i + 1] & UConverterConstants.UNSIGNED_SHORT_MASK); + } + + private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) { + return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0); + } + + static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) { + int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf)); + return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK)); + } + + static void MBCS_VALUE_2_FROM_STAGE_2_SET(byte[] bytes, int stage2Entry, int c, int newValue) { + int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf)); + bytes[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK); + bytes[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK); + } + + private static int MBCS_VALUE_4_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) { + int i = 4 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf)); + return ((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 24) + | ((bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16) + | ((bytes[i + 2] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) + | (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK); + } + + static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) { + return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3); + } + + // ------------UConverterExt------------------------------------------------------- + + static final int EXT_INDEXES_LENGTH = 0; /* 0 */ + + static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */ + static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1; + static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1; + static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1; + + static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */ + static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1; + static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1; + static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1; + static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1; + + static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */ + static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1; + static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1; + static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1; + static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1; + static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1; + static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1; + + private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */ + // private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1; + // private static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1; + // + // private static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */ + // + // private static final int EXT_SIZE=31; + // private static final int EXT_INDEXES_MIN_LENGTH=32; + + static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3; + + /* toUnicode helpers -------------------------------------------------------- */ + + private static final int TO_U_BYTE_SHIFT = 24; + private static final int TO_U_VALUE_MASK = 0xffffff; + private static final int TO_U_MIN_CODE_POINT = 0x1f0000; + private static final int TO_U_MAX_CODE_POINT = 0x2fffff; + private static final int TO_U_ROUNDTRIP_FLAG = (1 << 23); + private static final int TO_U_INDEX_MASK = 0x3ffff; + private static final int TO_U_LENGTH_SHIFT = 18; + private static final int TO_U_LENGTH_OFFSET = 12; + + /* maximum number of indexed UChars */ + static final int MAX_UCHARS = 19; + + static int TO_U_GET_BYTE(int word) { + return word >>> TO_U_BYTE_SHIFT; + } + + static int TO_U_GET_VALUE(int word) { + return word & TO_U_VALUE_MASK; + } + + static boolean TO_U_IS_ROUNDTRIP(int value) { + return (value & TO_U_ROUNDTRIP_FLAG) != 0; + } + + static boolean TO_U_IS_PARTIAL(int value) { + return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT; + } + + static int TO_U_GET_PARTIAL_INDEX(int value) { + return value; + } + + static int TO_U_MASK_ROUNDTRIP(int value) { + return value & ~TO_U_ROUNDTRIP_FLAG; + } + + private static int TO_U_MAKE_WORD(byte b, int value) { + return ((b & UConverterConstants.UNSIGNED_BYTE_MASK) << TO_U_BYTE_SHIFT) | value; + } + + /* use after masking off the roundtrip flag */ + static boolean TO_U_IS_CODE_POINT(int value) { + return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT; + } + + static int TO_U_GET_CODE_POINT(int value) { + return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT); + } + + private static int TO_U_GET_INDEX(int value) { + return value & TO_U_INDEX_MASK; + } + + private static int TO_U_GET_LENGTH(int value) { + return (value >>> TO_U_LENGTH_SHIFT) - TO_U_LENGTH_OFFSET; + } + + /* fromUnicode helpers ------------------------------------------------------ */ + + /* most trie constants are shared with ucnvmbcs.h */ + private static final int STAGE_2_LEFT_SHIFT = 2; + + // private static final int STAGE_3_GRANULARITY = 4; + + /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */ + static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) { + return stage3.get(((int) stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT) + + (c & 0xf)); + } + + private static final int FROM_U_LENGTH_SHIFT = 24; + private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31; + static final int FROM_U_RESERVED_MASK = 0x60000000; + private static final int FROM_U_DATA_MASK = 0xffffff; + + /* special value for "no mapping" to (impossible roundtrip to 0 bytes, value 01) */ + static final int FROM_U_SUBCHAR1 = 0x80000001; + + /* at most 3 bytes in the lower part of the value */ + private static final int FROM_U_MAX_DIRECT_LENGTH = 3; + + /* maximum number of indexed bytes */ + static final int MAX_BYTES = 0x1f; + + static boolean FROM_U_IS_PARTIAL(int value) { + return (value >>> FROM_U_LENGTH_SHIFT) == 0; + } + + static int FROM_U_GET_PARTIAL_INDEX(int value) { + return value; + } + + static boolean FROM_U_IS_ROUNDTRIP(int value) { + return (value & FROM_U_ROUNDTRIP_FLAG) != 0; + } + + private static int FROM_U_MASK_ROUNDTRIP(int value) { + return value & ~FROM_U_ROUNDTRIP_FLAG; + } + + /* use after masking off the roundtrip flag */ + static int FROM_U_GET_LENGTH(int value) { + return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES; + } + + /* get bytes or bytes index */ + static int FROM_U_GET_DATA(int value) { + return value & FROM_U_DATA_MASK; + } + + /* get the pointer to an extension array from indexes[index] */ + static Buffer ARRAY(ByteBuffer indexes, int index, Class itemType) { + int oldpos = indexes.position(); + Buffer b; + + indexes.position(indexes.getInt(index << 2)); + if (itemType == int.class) + b = indexes.asIntBuffer(); + else if (itemType == char.class) + b = indexes.asCharBuffer(); + else if (itemType == short.class) + b = indexes.asShortBuffer(); + else + // default or (itemType == byte.class) + b = indexes.slice(); + indexes.position(oldpos); + return b; + } + + private static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes) { + indexes.position(0); + return indexes.getInt(EXT_COUNT_BYTES) & 0xff; + } + + /* + * @return index of the UChar, if found; else <0 + */ + static int findFromU(CharBuffer fromUSection, int length, char u) { + int i, start, limit; + + /* binary search */ + start = 0; + limit = length; + for (;;) { + i = limit - start; + if (i <= 1) { + break; /* done */ + } + /* startmode==0 is equivalent to firstLength==1. + */ + private static int SISO_STATE(UConverterSharedData sharedData, int mode) { + return sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO ? (byte) mode + : sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1; + } + + class CharsetDecoderMBCS extends CharsetDecoderICU { + + CharsetDecoderMBCS(CharsetICU cs) { + super(cs); + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + /* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */ + return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush); + } + + /* + * continue partial match with new input never called for simple, single-character conversion + */ + private CoderResult continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex, + boolean flush) { + CoderResult cr = CoderResult.UNDERFLOW; + + int[] value = new int[1]; + int match, length; + + match = matchToU((byte) SISO_STATE(sharedData, mode), preToUArray, preToUBegin, preToULength, source, + value, isToUUseFallback(), flush); + + if (match > 0) { + if (match >= preToULength) { + /* advance src pointer for the consumed input */ + source.position(source.position() + match - preToULength); + preToULength = 0; + } else { + /* the match did not use all of preToU[] - keep the rest for replay */ + length = preToULength - match; + System.arraycopy(preToUArray, preToUBegin + match, preToUArray, preToUBegin, length); + preToULength = (byte) -length; + } + + /* write result */ + cr = writeToU(value[0], target, offsets, srcIndex); + } else if (match < 0) { + /* save state for partial match */ + int j, sArrayIndex; + + /* just _append_ the newly consumed input to preToU[] */ + sArrayIndex = source.position(); + match = -match; + for (j = preToULength; j < match; ++j) { + preToUArray[j] = source.get(sArrayIndex++); + } + source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */ + preToULength = (byte) match; + } else /* match==0 */{ + /* + * no match + * + * We need to split the previous input into two parts: + * + * 1. The first codepage character is unmappable - that's how we got into trying the extension data in + * the first place. We need to move it from the preToU buffer to the error buffer, set an error code, + * and prepare the rest of the previous input for 2. + * + * 2. The rest of the previous input must be converted once we come back from the callback for the first + * character. At that time, we have to try again from scratch to convert these input characters. The + * replay will be handled by the ucnv.c conversion code. + */ + + /* move the first codepage character to the error field */ + System.arraycopy(preToUArray, preToUBegin, toUBytesArray, toUBytesBegin, preToUFirstLength); + toULength = preToUFirstLength; + + /* move the rest up inside the buffer */ + length = preToULength - preToUFirstLength; + if (length > 0) { + System.arraycopy(preToUArray, preToUBegin + preToUFirstLength, preToUArray, preToUBegin, length); + } + + /* mark preToU for replay */ + preToULength = (byte) -length; + + /* set the error code for unassigned */ + cr = CoderResult.unmappableForLength(preToUFirstLength); + } + return cr; + } + + /* + * this works like matchFromU() except - the first character is in pre - no trie is used - the returned + * matchLength is not offset by 2 + */ + private int matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source, + int[] pMatchValue, boolean isUseFallback, boolean flush) { + ByteBuffer cx = sharedData.mbcs.extIndexes; + IntBuffer toUTable, toUSection; + + int value, matchValue, srcLength = 0; + int i, j, index, length, matchLength; + short b; + + if (cx == null || cx.asIntBuffer().get(EXT_TO_U_LENGTH) <= 0) { + return 0; /* no extension data, no match */ + } + + /* initialize */ + toUTable = (IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class); + index = 0; + + matchValue = 0; + i = j = matchLength = 0; + if (source != null) { + srcLength = source.remaining(); + } + + if (sisoState == 0) { + /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ + if (preLength > 1) { + return 0; /* no match of a DBCS sequence in SBCS mode */ + } else if (preLength == 1) { + srcLength = 0; + } else /* preLength==0 */{ + if (srcLength > 1) { + srcLength = 1; + } + } + flush = true; + } + + /* we must not remember fallback matches when not using fallbacks */ + + /* match input units until there is a full match or the input is consumed */ + for (;;) { + /* go to the next section */ + int oldpos = toUTable.position(); + toUSection = ((IntBuffer) toUTable.position(index)).slice(); + toUTable.position(oldpos); + + /* read first pair of the section */ + value = toUSection.get(); + length = TO_U_GET_BYTE(value); + value = TO_U_GET_VALUE(value); + if (value != 0 && (TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback)) + && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) { + /* remember longest match so far */ + matchValue = value; + matchLength = i + j; + } + + /* match pre[] then src[] */ + if (i < preLength) { + b = (short) (preArray[preArrayBegin + i++] & UConverterConstants.UNSIGNED_BYTE_MASK); + } else if (j < srcLength) { + b = (short) (source.get(source.position() + j++) & UConverterConstants.UNSIGNED_BYTE_MASK); + } else { + /* all input consumed, partial match */ + if (flush || (length = (i + j)) > MAX_BYTES) { + /* + * end of the entire input stream, stop with the longest match so far or: partial match must not + * be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers + */ + break; + } else { + /* continue with more input next time */ + return -length; + } + } + + /* search for the current UChar */ + value = findToU(toUSection, length, b); + if (value == 0) { + /* no match here, stop with the longest match so far */ + break; + } else { + if (TO_U_IS_PARTIAL(value)) { + /* partial match, continue */ + index = TO_U_GET_PARTIAL_INDEX(value); + } else { + if ((TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback)) && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) { + /* full match, stop with result */ + matchValue = value; + matchLength = i + j; + } else { + /* full match on fallback not taken, stop with the longest match so far */ + } + break; + } + } + } + + if (matchLength == 0) { + /* no match at all */ + return 0; + } + + /* return result */ + pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue); + return matchLength; + } + + private CoderResult writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex) { + ByteBuffer cx = sharedData.mbcs.extIndexes; + /* output the result */ + if (TO_U_IS_CODE_POINT(value)) { + /* output a single code point */ + return toUWriteCodePoint(TO_U_GET_CODE_POINT(value), target, offsets, srcIndex); + } else { + /* output a string - with correct data we have resultLength>0 */ + + char[] a = new char[TO_U_GET_LENGTH(value)]; + CharBuffer cb = ((CharBuffer) ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class)); + cb.position(TO_U_GET_INDEX(value)); + cb.get(a, 0, a.length); + return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex); + } + } + + private CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex) { + CoderResult cr = CoderResult.UNDERFLOW; + int tBeginIndex = target.position(); + + if (target.hasRemaining()) { + if (c <= 0xffff) { + target.put((char) c); + c = UConverterConstants.U_SENTINEL; + } else /* c is a supplementary code point */{ + target.put(UTF16.getLeadSurrogate(c)); + c = UTF16.getTrailSurrogate(c); + if (target.hasRemaining()) { + target.put((char) c); + c = UConverterConstants.U_SENTINEL; + } + } + + /* write offsets */ + if (offsets != null) { + offsets.put(sourceIndex); + if ((tBeginIndex + 1) < target.position()) { + offsets.put(sourceIndex); + } + } + } + + /* write overflow from c */ + if (c >= 0) { + charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c); + cr = CoderResult.OVERFLOW; + } + + return cr; + } + + /* + * Input sequence: cnv->toUBytes[0..length[ @return if(U_FAILURE) return the length (toULength, byteIndex) for + * the input else return 0 after output has been written to the target + */ + private int toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex, + boolean flush, CoderResult[] cr) { + // ByteBuffer cx; + + if (sharedData.mbcs.extIndexes != null + && initialMatchToU(length, source, target, offsets, sourceIndex, flush, cr)) { + return 0; /* an extension mapping handled the input */ + } + + /* GB 18030 */ + if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) { + long[] range; + long linear; + int i; + + linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]); + for (i = 0; i < gb18030Ranges.length; ++i) { + range = gb18030Ranges[i]; + if (range[2] <= linear && linear <= range[3]) { + /* found the sequence, output the Unicode code point for it */ + cr[0] = CoderResult.UNDERFLOW; + + /* add the linear difference between the input and start sequences to the start code point */ + linear = range[0] + (linear - range[2]); + + /* output this code point */ + cr[0] = toUWriteCodePoint((int) linear, target, offsets, sourceIndex); + + return 0; + } + } + } + + /* no mapping */ + cr[0] = CoderResult.unmappableForLength(length); + return length; + } + + /* + * target 0) { + /* advance src pointer for the consumed input */ + source.position(source.position() + match - firstLength); + + /* write result to target */ + cr[0] = writeToU(value[0], target, offsets, srcIndex); + return true; + } else if (match < 0) { + /* save state for partial match */ + byte[] sArray; + int sArrayIndex; + int j; + + /* copy the first code point */ + sArray = toUBytesArray; + sArrayIndex = toUBytesBegin; + preToUFirstLength = (byte) firstLength; + for (j = 0; j < firstLength; ++j) { + preToUArray[j] = sArray[sArrayIndex++]; + } + + /* now copy the newly consumed input */ + sArrayIndex = source.position(); + match = -match; + for (; j < match; ++j) { + preToUArray[j] = source.get(sArrayIndex++); + } + source.position(sArrayIndex); + preToULength = (byte) match; + return true; + } else /* match==0 no match */{ + return false; + } + } + + private int simpleMatchToU(ByteBuffer source, boolean useFallback) { + int[] value = new int[1]; + int match; + + if (source.remaining() <= 0) { + return 0xffff; + } + + /* try to match */ + byte[] sourceArray; + int sourcePosition, sourceLimit; + if (source.isReadOnly()) { + // source.array() would throw an exception + sourcePosition = source.position(); // relative to source.array() + sourceArray = new byte[Math.min(source.remaining(), EXT_MAX_BYTES)]; + source.get(sourceArray).position(sourcePosition); + sourcePosition = 0; // relative to sourceArray + sourceLimit = sourceArray.length; + } else { + sourceArray = source.array(); + sourcePosition = source.position(); + sourceLimit = source.limit(); + } + match = matchToU((byte) -1, sourceArray, sourcePosition, sourceLimit, null, value, useFallback, true); + + if (match == source.remaining()) { + /* write result for simple, single-character conversion */ + if (TO_U_IS_CODE_POINT(value[0])) { + return TO_U_GET_CODE_POINT(value[0]); + } + } + + /* + * return no match because - match>0 && value points to string: simple conversion cannot handle multiple + * code points - match>0 && match!=length: not all input consumed, forbidden for this function - match==0: + * no match found in the first place - match<0: partial match, not supported for simple conversion (and + * flush==TRUE) + */ + return 0xfffe; + } + + CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult[] cr = { CoderResult.UNDERFLOW }; + + int sourceArrayIndex, sourceArrayIndexStart; + int stateTable[][/* 256 */]; + char[] unicodeCodeUnits; + + int offset; + byte state; + int byteIndex; + byte[] bytes; + + int sourceIndex, nextSourceIndex; + + int entry = 0; + char c; + byte action; + + if (preToULength > 0) { + /* + * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with + * continuous offsets + */ + cr[0] = continueMatchToU(source, target, offsets, -1, flush); + + if (cr[0].isError() || preToULength < 0) { + return cr[0]; + } + } + + if (sharedData.mbcs.countStates == 1) { + if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) { + cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush); + } else { + cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush); + } + return cr[0]; + } + + /* set up the local pointers */ + sourceArrayIndex = sourceArrayIndexStart = source.position(); + + if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + stateTable = sharedData.mbcs.swapLFNLStateTable; + } else { + stateTable = sharedData.mbcs.stateTable; + } + unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits; + + /* get the converter state from UConverter */ + offset = toUnicodeStatus; + byteIndex = toULength; + bytes = toUBytesArray; + + /* + * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data + * (dbcsOnlyState==0 if it is not a DBCS-only converter) + */ + state = (byte)mode; + if (state == 0) { + state = sharedData.mbcs.dbcsOnlyState; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex = byteIndex == 0 ? 0 : -1; + nextSourceIndex = 0; + + /* conversion loop */ + while (sourceArrayIndex < source.limit()) { + /* + * This following test is to see if available input would overflow the output. It does not catch output + * of more than one code unit that overflows as a result of a surrogate pair or callback output from the + * last source byte. Therefore, those situations also test for overflows and will then break the loop, + * too. + */ + if (!target.hasRemaining()) { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + break; + } + + if (byteIndex == 0) { + /* optimized loop for 1/2-byte input and BMP output */ + // agljport:todo see ucnvmbcs.c for deleted block + do { + entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK]; + if (MBCS_ENTRY_IS_TRANSITION(entry)) { + state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry); + offset = MBCS_ENTRY_TRANSITION_OFFSET(entry); + ++sourceArrayIndex; + if (sourceArrayIndex < source.limit() + && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK]) + && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16 + && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) { + ++sourceArrayIndex; + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + sourceIndex = (nextSourceIndex += 2); + } + state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + offset = 0; + } else { + /* set the state and leave the optimized loop */ + ++nextSourceIndex; + bytes[0] = source.get(sourceArrayIndex - 1); + byteIndex = 1; + break; + } + } else { + if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + ++sourceArrayIndex; + target.put(MBCS_ENTRY_FINAL_VALUE_16(entry)); + if (offsets != null) { + offsets.put(sourceIndex); + sourceIndex = ++nextSourceIndex; + } + state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + } else { + /* leave the optimized loop */ + break; + } + } + } while (sourceArrayIndex < source.limit() && target.hasRemaining()); + /* + * these tests and break statements could be put inside the loop if C had "break outerLoop" like + * Java + */ + if (sourceArrayIndex >= source.limit()) { + break; + } + if (!target.hasRemaining()) { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + break; + } + + ++nextSourceIndex; + bytes[byteIndex++] = source.get(sourceArrayIndex++); + } else /* byteIndex>0 */{ + ++nextSourceIndex; + entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++)) + & UConverterConstants.UNSIGNED_BYTE_MASK]; + } + + if (MBCS_ENTRY_IS_TRANSITION(entry)) { + state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry); + offset += MBCS_ENTRY_TRANSITION_OFFSET(entry); + continue; + } + + /* save the previous state for proper extension mapping with SI/SO-stateful converters */ + mode = state; + + /* set the next state early so that we can reuse the entry variable */ + state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + + /* + * An if-else-if chain provides more reliable performance for the most common cases compared to a + * switch. + */ + action = (byte)MBCS_ENTRY_FINAL_ACTION(entry); + if (action == MBCS_STATE_VALID_16) { + offset += MBCS_ENTRY_FINAL_VALUE_16(entry); + c = unicodeCodeUnits[offset]; + if (c < 0xfffe) { + /* output BMP code point */ + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + } else if (c == 0xfffe) { + if (isFallbackUsed() && (entry = getFallback(sharedData.mbcs, offset)) != 0xfffe) { + /* output fallback BMP code point */ + target.put((char)entry); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + } + } else { + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(byteIndex); + } + } else if (action == MBCS_STATE_VALID_DIRECT_16) { + /* output BMP code point */ + target.put(MBCS_ENTRY_FINAL_VALUE_16(entry)); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + } else if (action == MBCS_STATE_VALID_16_PAIR) { + offset += MBCS_ENTRY_FINAL_VALUE_16(entry); + c = unicodeCodeUnits[offset++]; + if (c < 0xd800) { + /* output BMP code point below 0xd800 */ + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) { + /* output roundtrip or fallback surrogate pair */ + target.put((char)(c & 0xdbff)); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + if (target.hasRemaining()) { + target.put(unicodeCodeUnits[offset]); + if (offsets != null) { + offsets.put(sourceIndex); + } + } else { + /* target overflow */ + charErrorBufferArray[0] = unicodeCodeUnits[offset]; + charErrorBufferLength = 1; + cr[0] = CoderResult.OVERFLOW; + + offset = 0; + break; + } + } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) { + /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ + target.put(unicodeCodeUnits[offset]); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + } else if (c == 0xffff) { + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(byteIndex); + } + } else if (action == MBCS_STATE_VALID_DIRECT_20 + || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) { + entry = MBCS_ENTRY_FINAL_VALUE(entry); + /* output surrogate pair */ + target.put((char)(0xd800 | (char)(entry >> 10))); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + c = (char)(0xdc00 | (char)(entry & 0x3ff)); + if (target.hasRemaining()) { + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + } + } else { + /* target overflow */ + charErrorBufferArray[0] = c; + charErrorBufferLength = 1; + cr[0] = CoderResult.OVERFLOW; + + offset = 0; + break; + } + } else if (action == MBCS_STATE_CHANGE_ONLY) { + /* + * This serves as a state change without any output. It is useful for reading simple stateful + * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used + * for more sophisticated state transitions. + */ + if (sharedData.mbcs.dbcsOnlyState == 0) { + byteIndex = 0; + } else { + /* SI/SO are illegal for DBCS-only conversion */ + state = (byte)(mode); /* restore the previous state */ + + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(byteIndex); + } + } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) { + if (isFallbackUsed()) { + /* output BMP code point */ + target.put(MBCS_ENTRY_FINAL_VALUE_16(entry)); + if (offsets != null) { + offsets.put(sourceIndex); + } + byteIndex = 0; + } + } else if (action == MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if (action == MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(byteIndex); + } else { + /* reserved, must never occur */ + byteIndex = 0; + } + + /* end of action codes: prepare for a new character */ + offset = 0; + + if (byteIndex == 0) { + sourceIndex = nextSourceIndex; + } else if (cr[0].isError()) { + /* callback(illegal) */ + if (byteIndex > 1) { + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0); + byte i; + for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {} + if (i < byteIndex) { + byte backOutDistance = (byte)(byteIndex - i); + int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart; + byteIndex = i; /* length of reported illegal byte sequence */ + if (backOutDistance <= bytesFromThisBuffer) { + sourceArrayIndex -= backOutDistance; + } else { + /* Back out bytes from the previous buffer: Need to replay them. */ + this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance); + /* preToULength is negative! */ + for (int n = 0; n < -this.preToULength; n++) { + this.preToUArray[n] = bytes[i+n]; + } + sourceArrayIndex = sourceArrayIndexStart; + } + } + } + break; + } else /* unassigned sequences indicated with byteIndex>0 */{ + /* try an extension mapping */ + int sourceBeginIndex = sourceArrayIndex; + source.position(sourceArrayIndex); + byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr); + sourceArrayIndex = source.position(); + sourceIndex = nextSourceIndex += (sourceArrayIndex - sourceBeginIndex); + + if (cr[0].isError() || cr[0].isOverflow()) { + /* not mappable or buffer overflow */ + break; + } + } + } + + /* set the converter state back into UConverter */ + toUnicodeStatus = offset; + mode = state; + toULength = byteIndex; + + /* write back the updated pointers */ + source.position(sourceArrayIndex); + + return cr[0]; + } + /* + * This version of cnvMBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages that + * only map to and from the BMP. In addition to single-byte optimizations, the offset calculations become much + * easier. + */ + private CoderResult cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, + boolean flush) { + CoderResult[] cr = { CoderResult.UNDERFLOW }; + + int sourceArrayIndex, lastSource; + int targetCapacity, length; + int[][] stateTable; + + int sourceIndex; + + int entry; + byte action; + + /* set up the local pointers */ + sourceArrayIndex = source.position(); + targetCapacity = target.remaining(); + + if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + stateTable = sharedData.mbcs.swapLFNLStateTable; + } else { + stateTable = sharedData.mbcs.stateTable; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex = 0; + lastSource = sourceArrayIndex; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the + * sourceLength and targetCapacity + */ + length = source.remaining(); + if (length < targetCapacity) { + targetCapacity = length; + } + + /* conversion loop */ + while (targetCapacity > 0 && sourceArrayIndex < source.limit()) { + entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK]; + /* MBCS_ENTRY_IS_FINAL(entry) */ + + /* test the most common case first */ + if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + target.put(MBCS_ENTRY_FINAL_VALUE_16(entry)); + --targetCapacity; + continue; + } + + /* + * An if-else-if chain provides more reliable performance for the most common cases compared to a + * switch. + */ + action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry)); + if (action == MBCS_STATE_FALLBACK_DIRECT_16) { + if (isFallbackUsed()) { + /* output BMP code point */ + target.put(MBCS_ENTRY_FINAL_VALUE_16(entry)); + --targetCapacity; + continue; + } + } else if (action == MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if (action == MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(sourceArrayIndex - lastSource); + } else { + /* reserved, must never occur */ + continue; + } + + /* set offsets since the start or the last extension */ + if (offsets != null) { + int count = sourceArrayIndex - lastSource; + + /* predecrement: do not set the offset for the callback-causing character */ + while (--count > 0) { + offsets.put(sourceIndex++); + } + /* offset and sourceIndex are now set for the current character */ + } + + if (cr[0].isError()) { + /* callback(illegal) */ + break; + } else /* unassigned sequences indicated with byteIndex>0 */{ + /* try an extension mapping */ + lastSource = sourceArrayIndex; + toUBytesArray[0] = source.get(sourceArrayIndex - 1); + source.position(sourceArrayIndex); + toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr); + sourceArrayIndex = source.position(); + sourceIndex += 1 + (sourceArrayIndex - lastSource); + + if (cr[0].isError()) { + /* not mappable or buffer overflow */ + break; + } + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity = target.remaining(); + length = source.remaining(); + if (length < targetCapacity) { + targetCapacity = length; + } + } + } + + if (!cr[0].isError() && sourceArrayIndex < source.limit() && !target.hasRemaining()) { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + } + + /* set offsets since the start or the last callback */ + if (offsets != null) { + int count = sourceArrayIndex - lastSource; + while (count > 0) { + offsets.put(sourceIndex++); + --count; + } + } + + /* write back the updated pointers */ + source.position(sourceArrayIndex); + + return cr[0]; + } + + /* This version of cnvMBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */ + private CoderResult cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, + boolean flush) { + CoderResult[] cr = { CoderResult.UNDERFLOW }; + + int sourceArrayIndex; + int[][] stateTable; + + int sourceIndex; + + int entry; + char c; + byte action; + + /* set up the local pointers */ + sourceArrayIndex = source.position(); + + if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + stateTable = sharedData.mbcs.swapLFNLStateTable; + } else { + stateTable = sharedData.mbcs.stateTable; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex = 0; + + /* conversion loop */ + while (sourceArrayIndex < source.limit()) { + /* + * This following test is to see if available input would overflow the output. It does not catch output + * of more than one code unit that overflows as a result of a surrogate pair or callback output from the + * last source byte. Therefore, those situations also test for overflows and will then break the loop, + * too. + */ + if (!target.hasRemaining()) { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + break; + } + + entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK]; + /* MBCS_ENTRY_IS_FINAL(entry) */ + + /* test the most common case first */ + if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + target.put(MBCS_ENTRY_FINAL_VALUE_16(entry)); + if (offsets != null) { + offsets.put(sourceIndex); + } + + /* normal end of action codes: prepare for a new character */ + ++sourceIndex; + continue; + } + + /* + * An if-else-if chain provides more reliable performance for the most common cases compared to a + * switch. + */ + action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry)); + if (action == MBCS_STATE_VALID_DIRECT_20 + || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) { + + entry = MBCS_ENTRY_FINAL_VALUE(entry); + /* output surrogate pair */ + target.put((char) (0xd800 | (char) (entry >>> 10))); + if (offsets != null) { + offsets.put(sourceIndex); + } + c = (char) (0xdc00 | (char) (entry & 0x3ff)); + if (target.hasRemaining()) { + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + } + } else { + /* target overflow */ + charErrorBufferArray[0] = c; + charErrorBufferLength = 1; + cr[0] = CoderResult.OVERFLOW; + break; + } + + ++sourceIndex; + continue; + } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) { + if (isFallbackUsed()) { + /* output BMP code point */ + target.put(MBCS_ENTRY_FINAL_VALUE_16(entry)); + if (offsets != null) { + offsets.put(sourceIndex); + } + + ++sourceIndex; + continue; + } + } else if (action == MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if (action == MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + } else { + /* reserved, must never occur */ + ++sourceIndex; + continue; + } + + if (cr[0].isError()) { + /* callback(illegal) */ + break; + } else /* unassigned sequences indicated with byteIndex>0 */{ + /* try an extension mapping */ + int sourceBeginIndex = sourceArrayIndex; + toUBytesArray[0] = source.get(sourceArrayIndex - 1); + source.position(sourceArrayIndex); + toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr); + sourceArrayIndex = source.position(); + sourceIndex += 1 + (sourceArrayIndex - sourceBeginIndex); + + if (cr[0].isError()) { + /* not mappable or buffer overflow */ + break; + } + } + } + + /* write back the updated pointers */ + source.position(sourceArrayIndex); + + return cr[0]; + } + + private int getFallback(UConverterMBCSTable mbcsTable, int offset) { + MBCSToUFallback[] toUFallbacks; + int i, start, limit; + + limit = mbcsTable.countToUFallbacks; + if (limit > 0) { + /* do a binary search for the fallback mapping */ + toUFallbacks = mbcsTable.toUFallbacks; + start = 0; + while (start < limit - 1) { + i = (start + limit) / 2; + if (offset < toUFallbacks[i].offset) { + limit = i; + } else { + start = i; + } + } + + /* did we really find it? */ + if (offset == toUFallbacks[start].offset) { + return toUFallbacks[start].codePoint; + } + } + + return 0xfffe; + } + + /** + * This is a simple version of _MBCSGetNextUChar() that is used by other converter implementations. It only + * returns an "assigned" result if it consumes the entire input. It does not use state from the converter, nor + * error codes. It does not handle the EBCDIC swaplfnl option (set in UConverter). It handles conversion + * extensions but not GB 18030. + * + * @return U+fffe unassigned U+ffff illegal otherwise the Unicode code point + */ + int simpleGetNextUChar(ByteBuffer source, boolean useFallback) { + + // #if 0 + // /* + // * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus + // * TODO In future releases, verify that this function is never called for SBCS + // * conversions, i.e., that sharedData->mbcs.countStates==1 is still true. + // * Removal improves code coverage. + // */ + // /* use optimized function if possible */ + // if(sharedData->mbcs.countStates==1) { + // if(length==1) { + // return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback); + // } else { + // return 0xffff; /* illegal: more than a single byte for an SBCS converter */ + // } + // } + // #endif + + /* set up the local pointers */ + int[][] stateTable = sharedData.mbcs.stateTable; + char[] unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits; + + /* converter state */ + int offset = 0; + int state = sharedData.mbcs.dbcsOnlyState; + + int action; + int entry; + int c; + int i = source.position(); + int length = source.limit() - i; + + /* conversion loop */ + while (true) { + // entry=stateTable[state][(uint8_t)source[i++]]; + entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK]; + + if (MBCS_ENTRY_IS_TRANSITION(entry)) { + state = MBCS_ENTRY_TRANSITION_STATE(entry); + offset += MBCS_ENTRY_TRANSITION_OFFSET(entry); + + if (i == source.limit()) { + return 0xffff; /* truncated character */ + } + } else { + /* + * An if-else-if chain provides more reliable performance for the most common cases compared to a + * switch. + */ + action = MBCS_ENTRY_FINAL_ACTION(entry); + if (action == MBCS_STATE_VALID_16) { + offset += MBCS_ENTRY_FINAL_VALUE_16(entry); + c = unicodeCodeUnits[offset]; + if (c != 0xfffe) { + /* done */ + } else if (isToUUseFallback()) { + c = getFallback(sharedData.mbcs, offset); + } + /* else done with 0xfffe */ + } else if (action == MBCS_STATE_VALID_DIRECT_16) { + // /* output BMP code point */ + c = MBCS_ENTRY_FINAL_VALUE_16(entry); + } else if (action == MBCS_STATE_VALID_16_PAIR) { + offset += MBCS_ENTRY_FINAL_VALUE_16(entry); + c = unicodeCodeUnits[offset++]; + if (c < 0xd800) { + /* output BMP code point below 0xd800 */ + } else if (isToUUseFallback() ? c <= 0xdfff : c <= 0xdbff) { + /* output roundtrip or fallback supplementary code point */ + c = (((c & 0x3ff) << 10) + unicodeCodeUnits[offset] + (0x10000 - 0xdc00)); + } else if (isToUUseFallback() ? (c & 0xfffe) == 0xe000 : c == 0xe000) { + /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ + c = unicodeCodeUnits[offset]; + } else if (c == 0xffff) { + return 0xffff; + } else { + c = 0xfffe; + } + } else if (action == MBCS_STATE_VALID_DIRECT_20) { + /* output supplementary code point */ + c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry); + } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) { + if (!isToUUseFallback(useFallback)) { + c = 0xfffe; + } else { + /* output BMP code point */ + c = MBCS_ENTRY_FINAL_VALUE_16(entry); + } + } else if (action == MBCS_STATE_FALLBACK_DIRECT_20) { + if (!isToUUseFallback(useFallback)) { + c = 0xfffe; + } else { + /* output supplementary code point */ + c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry); + } + } else if (action == MBCS_STATE_UNASSIGNED) { + c = 0xfffe; + } else { + /* + * forbid MBCS_STATE_CHANGE_ONLY for this function, and MBCS_STATE_ILLEGAL and reserved action + * codes + */ + return 0xffff; + } + break; + } + } + + if (i != source.limit()) { + /* illegal for this function: not all input consumed */ + return 0xffff; + } + + if (c == 0xfffe) { + /* try an extension mapping */ + if (sharedData.mbcs.extIndexes != null) { + /* Increase the limit for proper handling. Used in LMBCS. */ + if (source.limit() > i + length) { + source.limit(i + length); + } + return simpleMatchToU(source, useFallback); + } + } + + return c; + } + private boolean hasValidTrailBytes(int[][] stateTable, short state) { + int[] row = stateTable[state]; + int b, entry; + /* First test for final entries in this state for some commonly valid byte values. */ + entry = row[0xa1]; + if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) { + return true; + } + entry = row[0x41]; + if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) { + return true; + } + /* Then test for final entries in this state. */ + for (b = 0; b <= 0xff; b++) { + entry = row[b]; + if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) { + return true; + } + } + /* Then recurse for transition entries. */ + for (b = 0; b <= 0xff; b++) { + entry = row[b]; + if (MBCS_ENTRY_IS_TRANSITION(entry) && + hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) { + return true; + } + } + return false; + } + + private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) { + int[] row = stateTable[state]; + int entry = row[b]; + if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ + return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK)); + } else { + short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK); + if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { + return false; /* SI/SO are illegal for DBCS-only conversion */ + } else { + return (action != MBCS_STATE_ILLEGAL); + } + } + } + + + } + + class CharsetEncoderMBCS extends CharsetEncoderICU { + private boolean allowReplacementChanges = false; + + CharsetEncoderMBCS(CharsetICU cs) { + super(cs, fromUSubstitution); + allowReplacementChanges = true; // allow changes in implReplaceWith + implReset(); + } + + protected void implReset() { + super.implReset(); + preFromUFirstCP = UConverterConstants.U_SENTINEL; + } + + @SuppressWarnings("fallthrough") + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult[] cr = { CoderResult.UNDERFLOW }; + // if (!source.hasRemaining() && fromUChar32 == 0) + // return cr[0]; + + int sourceArrayIndex; + char[] table; + byte[] pArray, bytes; + int pArrayIndex, outputType, c; + int prevSourceIndex, sourceIndex, nextSourceIndex; + int stage2Entry = 0, value = 0, length = 0, prevLength; + short uniMask; + // long asciiRoundtrips; + + byte[] si_value = new byte[2]; + byte[] so_value = new byte[2]; + int si_value_length = 0, so_value_length = 0; + + boolean gotoUnassigned = false; + + try { + + if (!flush && preFromUFirstCP >= 0) { + /* + * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change + * with continuous offsets + */ + cr[0] = continueMatchFromU(source, target, offsets, flush, -1); + + if (cr[0].isError() || preFromULength < 0) { + return cr[0]; + } + } + + /* use optimized function if possible */ + outputType = sharedData.mbcs.outputType; + uniMask = sharedData.mbcs.unicodeMask; + if (outputType == MBCS_OUTPUT_1 && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) { + if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) { + cr[0] = cnvMBCSSingleFromBMPWithOffsets(source, target, offsets, flush); + } else { + cr[0] = cnvMBCSSingleFromUnicodeWithOffsets(source, target, offsets, flush); + } + return cr[0]; + } else if (outputType == MBCS_OUTPUT_2) { + cr[0] = cnvMBCSDoubleFromUnicodeWithOffsets(source, target, offsets, flush); + return cr[0]; + } + + table = sharedData.mbcs.fromUnicodeTable; + sourceArrayIndex = source.position(); + + if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes; + } else { + bytes = sharedData.mbcs.fromUnicodeBytes; + } + + // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips; + + /* get the converter state from UConverter */ + c = fromUChar32; + + if (outputType == MBCS_OUTPUT_2_SISO) { + prevLength = fromUnicodeStatus; + if (prevLength == 0) { + /* set the real value */ + prevLength = 1; + } + } else { + /* prevent fromUnicodeStatus from being set to something non-0 */ + prevLength = 0; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + prevSourceIndex = -1; + sourceIndex = c == 0 ? 0 : -1; + nextSourceIndex = 0; + + /* Get the SI/SO character for the converter */ + si_value_length = getSISOBytes(SISO_Option.SI, options, si_value); + so_value_length = getSISOBytes(SISO_Option.SO, options, so_value); + + /* conversion loop */ + /* + * This is another piece of ugly code: A goto into the loop if the converter state contains a first + * surrogate from the previous function call. It saves me to check in each loop iteration a check of + * if(c==0) and duplicating the trail-surrogate-handling code in the else branch of that check. I could + * not find any other way to get around this other than using a function call for the conversion and + * callback, which would be even more inefficient. + * + * Markus Scherer 2000-jul-19 + */ + boolean doloop = true; + boolean doread = true; + if (c != 0 && target.hasRemaining()) { + if (UTF16.isLeadSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) { + // c is a lead surrogate, read another input + SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, + prevSourceIndex, prevLength); + doloop = getTrail(source, target, uniMask, x, flush, cr); + doread = x.doread; + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + prevSourceIndex = x.prevSourceIndex; + prevLength = x.prevLength; + } else { + // c is not a lead surrogate, do not read another input + doread = false; + } + } + + if (doloop) { + while (!doread || sourceArrayIndex < source.limit()) { + /* + * This following test is to see if available input would overflow the output. It does not catch + * output of more than one byte that overflows as a result of a multi-byte character or callback + * output from the last source character. Therefore, those situations also test for overflows + * and will then break the loop, too. + */ + if (target.hasRemaining()) { + /* + * Get a correct Unicode code point: a single UChar for a BMP code point or a matched + * surrogate pair for a "supplementary code point". + */ + + if (doread) { + // doread might be false only on the first looping + + c = source.get(sourceArrayIndex++); + ++nextSourceIndex; + + /* + * This also tests if the codepage maps single surrogates. If it does, then surrogates + * are not paired but mapped separately. Note that in this case unmatched surrogates are + * not detected. + */ + if (UTF16.isSurrogate((char) c) + && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) { + if (UTF16.isLeadSurrogate((char) c)) { + // getTrail: + SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, + nextSourceIndex, prevSourceIndex, prevLength); + doloop = getTrail(source, target, uniMask, x, flush, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + prevSourceIndex = x.prevSourceIndex; + + if (x.doread) { + if (doloop) + continue; + else + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + break; + } + } + } else { + doread = true; + } + /* convert the Unicode code point in c into codepage bytes */ + + /* + * The basic lookup is a triple-stage compact array (trie) lookup. For details see the + * beginning of this file. + * + * Single-byte codepages are handled with a different data structure by _MBCSSingle... + * functions. + * + * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are + * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0 + * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are + * flags for which of the 16 characters in the block are roundtrip-assigned. + * + * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as + * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in + * big-endian order. + * + * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest + * byte sequences, the first two bytes in this third stage indicate with their 7th bits + * whether these bytes are to be written directly or actually need to be preceeded by one of + * the two Single-Shift codes. With this, the third stage stores one byte fewer per + * character than the actual maximum length of EUC byte sequences. + * + * Other than that, leading zero bytes are removed and the other bytes output. A single zero + * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not + * support zero byte output as a fallback, and also does not allow output of leading zeros. + */ + stage2Entry = MBCS_STAGE_2_FROM_U(table, c); + + /* get the bytes and the length for the output */ + switch (outputType) { + /* This is handled above with the method cnvMBCSDoubleFromUnicodeWithOffsets() */ + /* case MBCS_OUTPUT_2: + value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + length = 1; + } else { + length = 2; + } + break; */ + case MBCS_OUTPUT_2_SISO: + /* 1/2-byte stateful with Shift-In/Shift-Out */ + /* + * Save the old state in the converter object right here, then change the local + * prevLength state variable if necessary. Then, if this character turns out to be + * unassigned or a fallback that is not taken, the callback code must not save the new + * state in the converter because the new state is for a character that is not output. + * However, the callback must still restore the state from the converter in case the + * callback function changed it for its output. + */ + fromUnicodeStatus = prevLength; /* save the old state */ + value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) { + /* no mapping, leave value==0 */ + length = 0; + } else if (prevLength <= 1) { + length = 1; + } else { + /* change from double-byte mode to single-byte */ + if (si_value_length == 1) { + value|=si_value[0]<<8; + length = 2; + } else if (si_value_length == 2) { + value|=si_value[1]<<8; + value|=si_value[0]<<16; + length = 3; + } + prevLength = 1; + } + } else { + if (prevLength == 2) { + length = 2; + } else { + /* change from single-byte mode to double-byte */ + if (so_value_length == 1) { + value|=so_value[0]<<16; + length = 3; + } else if (so_value_length == 2) { + value|=so_value[1]<<16; + value|=so_value[0]<<24; + length = 4; + } + prevLength = 2; + } + } + break; + case MBCS_OUTPUT_DBCS_ONLY: + /* table with single-byte results, but only DBCS mappings used */ + value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + /* no mapping or SBCS result, not taken for DBCS-only */ + value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */ + length = 0; + } else { + length = 2; + } + break; + case MBCS_OUTPUT_3: + pArray = bytes; + pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); + value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16) + | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) + | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK); + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + length = 1; + } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) { + length = 2; + } else { + length = 3; + } + break; + case MBCS_OUTPUT_4: + value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c); + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + length = 1; + } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) { + length = 2; + } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) { + length = 3; + } else { + length = 4; + } + break; + case MBCS_OUTPUT_3_EUC: + value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + /* EUC 16-bit fixed-length representation */ + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + length = 1; + } else if ((value & 0x8000) == 0) { + value |= 0x8e8000; + length = 3; + } else if ((value & 0x80) == 0) { + value |= 0x8f0080; + length = 3; + } else { + length = 2; + } + break; + case MBCS_OUTPUT_4_EUC: + pArray = bytes; + pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); + value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16) + | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) + | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK); + /* EUC 16-bit fixed-length representation applied to the first two bytes */ + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + length = 1; + } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) { + length = 2; + } else if ((value & 0x800000) == 0) { + value |= 0x8e800000; + length = 4; + } else if ((value & 0x8000) == 0) { + value |= 0x8f008000; + length = 4; + } else { + length = 3; + } + break; + default: + /* must not occur */ + /* + * To avoid compiler warnings that value & length may be used without having been + * initialized, we set them here. In reality, this is unreachable code. Not having a + * default branch also causes warnings with some compilers. + */ + value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */ + length = 0; + break; + } + + /* is this code point assigned, or do we use fallbacks? */ + if (gotoUnassigned || (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0)))) { + gotoUnassigned = false; + /* + * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way + * with this data structure for fallback output to be a zero byte. + */ + + // unassigned: + SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, + prevSourceIndex, prevLength); + doloop = unassigned(source, target, offsets, x, flush, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + prevSourceIndex = x.prevSourceIndex; + prevLength = x.prevLength; + if (doloop) + continue; + else + break; + } + + /* write the output character bytes from value and length */ + /* from the first if in the loop we know that targetCapacity>0 */ + if (length <= target.remaining()) { + switch (length) { + /* each branch falls through to the next one */ + case 4: + target.put((byte) (value >>> 24)); + if (offsets != null) { + offsets.put(sourceIndex); + } + case 3: + target.put((byte) (value >>> 16)); + if (offsets != null) { + offsets.put(sourceIndex); + } + case 2: + target.put((byte) (value >>> 8)); + if (offsets != null) { + offsets.put(sourceIndex); + } + case 1: + target.put((byte) value); + if (offsets != null) { + offsets.put(sourceIndex); + } + default: + /* will never occur */ + break; + } + } else { + int errorBufferArrayIndex; + + /* + * We actually do this backwards here: In order to save an intermediate variable, we + * output first to the overflow buffer what does not fit into the regular target. + */ + /* we know that 1<=targetCapacity>> 16); + case 2: + errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 8); + case 1: + errorBuffer[errorBufferArrayIndex] = (byte) value; + default: + /* will never occur */ + break; + } + errorBufferLength = (byte) length; + + /* now output what fits into the regular target */ + value >>>= 8 * length; /* length was reduced by targetCapacity */ + switch (target.remaining()) { + /* each branch falls through to the next one */ + case 3: + target.put((byte) (value >>> 16)); + if (offsets != null) { + offsets.put(sourceIndex); + } + case 2: + target.put((byte) (value >>> 8)); + if (offsets != null) { + offsets.put(sourceIndex); + } + case 1: + target.put((byte) value); + if (offsets != null) { + offsets.put(sourceIndex); + } + default: + /* will never occur */ + break; + } + + /* target overflow */ + cr[0] = CoderResult.OVERFLOW; + c = 0; + break; + } + + /* normal end of conversion: prepare for a new character */ + c = 0; + if (offsets != null) { + prevSourceIndex = sourceIndex; + sourceIndex = nextSourceIndex; + } + continue; + } else { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + break; + } + } + } + + /* + * the end of the input stream and detection of truncated input are handled by the framework, but for + * EBCDIC_STATEFUL conversion we need to emit an SI at the very end + * + * conditions: successful EBCDIC_STATEFUL in DBCS mode end of input and no truncated input + */ + if (outputType == MBCS_OUTPUT_2_SISO && prevLength == 2 && flush && sourceArrayIndex >= source.limit() + && c == 0) { + + /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */ + if (target.hasRemaining()) { + target.put(si_value[0]); + if (si_value_length == 2) { + if (target.remaining() > 0) { + target.put(si_value[1]); + } else { + errorBuffer[0] = si_value[1]; + errorBufferLength = 1; + cr[0] = CoderResult.OVERFLOW; + } + } + if (offsets != null) { + /* set the last source character's index (sourceIndex points at sourceLimit now) */ + offsets.put(prevSourceIndex); + } + } else { + /* target is full */ + errorBuffer[0] = si_value[0]; + if (si_value_length == 2) { + errorBuffer[1] = si_value[1]; + } + errorBufferLength = si_value_length; + cr[0] = CoderResult.OVERFLOW; + } + prevLength = 1; /* we switched into SBCS */ + } + + /* set the converter state back into UConverter */ + fromUChar32 = c; + fromUnicodeStatus = prevLength; + + source.position(sourceArrayIndex); + } catch (BufferOverflowException ex) { + cr[0] = CoderResult.OVERFLOW; + } + + return cr[0]; + } + + /* + * This is another simple conversion function for internal use by other conversion implementations. It does not + * use the converter state nor call callbacks. It does not handle the EBCDIC swaplfnl option (set in + * UConverter). It handles conversion extensions but not GB 18030. + * + * It converts one single Unicode code point into codepage bytes, encoded as one 32-bit value. The function + * returns the number of bytes in *pValue: 1..4 the number of bytes in *pValue 0 unassigned (*pValue undefined) + * -1 illegal (currently not used, *pValue undefined) + * + * *pValue will contain the resulting bytes with the last byte in bits 7..0, the second to last byte in bits + * 15..8, etc. Currently, the function assumes but does not check that 0<=c<=0x10ffff. + */ + int fromUChar32(int c, int[] pValue, boolean isUseFallback) { + // #if 0 + // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ + // const uint8_t *p; + // #endif + + char[] table; + int stage2Entry; + int value; + int length; + int p; + + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + if (c <= 0xffff || ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0)) { + table = sharedData.mbcs.fromUnicodeTable; + + /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ + if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) { + value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c); + /* is this code point assigned, or do we use fallbacks? */ + if (isUseFallback ? value >= 0x800 : value >= 0xc00) { + pValue[0] = value & 0xff; + return 1; + } + } else /* outputType!=MBCS_OUTPUT_1 */{ + stage2Entry = MBCS_STAGE_2_FROM_U(table, c); + + /* get the bytes and the length for the output */ + switch (sharedData.mbcs.outputType) { + case MBCS_OUTPUT_2: + value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c); + if (value <= 0xff) { + length = 1; + } else { + length = 2; + } + break; + // #if 0 + // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ + // case MBCS_OUTPUT_DBCS_ONLY: + // /* table with single-byte results, but only DBCS mappings used */ + // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + // if(value<=0xff) { + // /* no mapping or SBCS result, not taken for DBCS-only */ + // value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ + // length=0; + // } else { + // length=2; + // } + // break; + case MBCS_OUTPUT_3: + byte[] bytes = sharedData.mbcs.fromUnicodeBytes; + p = CharsetMBCS.MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); + value = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | + ((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) | + (bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK); + if (value <= 0xff) { + length = 1; + } else if (value <= 0xffff) { + length = 2; + } else { + length = 3; + } + break; + // case MBCS_OUTPUT_4: + // value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + // if(value<=0xff) { + // length=1; + // } else if(value<=0xffff) { + // length=2; + // } else if(value<=0xffffff) { + // length=3; + // } else { + // length=4; + // } + // break; + // case MBCS_OUTPUT_3_EUC: + // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + // /* EUC 16-bit fixed-length representation */ + // if(value<=0xff) { + // length=1; + // } else if((value&0x8000)==0) { + // value|=0x8e8000; + // length=3; + // } else if((value&0x80)==0) { + // value|=0x8f0080; + // length=3; + // } else { + // length=2; + // } + // break; + // case MBCS_OUTPUT_4_EUC: + // p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + // value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + // /* EUC 16-bit fixed-length representation applied to the first two bytes */ + // if(value<=0xff) { + // length=1; + // } else if(value<=0xffff) { + // length=2; + // } else if((value&0x800000)==0) { + // value|=0x8e800000; + // length=4; + // } else if((value&0x8000)==0) { + // value|=0x8f008000; + // length=4; + // } else { + // length=3; + // } + // break; + // #endif + default: + /* must not occur */ + return -1; + } + + /* is this code point assigned, or do we use fallbacks? */ + if (MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) + || (CharsetEncoderICU.isFromUUseFallback(isUseFallback, c) && value != 0)) { + /* + * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way with + * this data structure for fallback output to be a zero byte. + */ + /* assigned */ + pValue[0] = value; + return length; + } + } + } + + if (sharedData.mbcs.extIndexes != null) { + length = simpleMatchFromU(c, pValue, isUseFallback); + return length >= 0 ? length : -length; /* return abs(length); */ + } + + /* unassigned */ + return 0; + } + + /* + * continue partial match with new input, requires cnv->preFromUFirstCP>=0 never called for simple, + * single-character conversion + */ + private CoderResult continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush, + int srcIndex) { + CoderResult cr = CoderResult.UNDERFLOW; + int[] value = new int[1]; + int match; + + match = matchFromU(preFromUFirstCP, preFromUArray, preFromUBegin, preFromULength, source, value, useFallback, flush); + if (match >= 2) { + match -= 2; /* remove 2 for the initial code point */ + + if (match >= preFromULength) { + /* advance src pointer for the consumed input */ + source.position(source.position() + match - preFromULength); + preFromULength = 0; + } else { + /* the match did not use all of preFromU[] - keep the rest for replay */ + int length = preFromULength - match; + System.arraycopy(preFromUArray, preFromUBegin + match, preFromUArray, preFromUBegin, length); + preFromULength = (byte) -length; + } + + /* finish the partial match */ + preFromUFirstCP = UConverterConstants.U_SENTINEL; + + /* write result */ + writeFromU(value[0], target, offsets, srcIndex); + } else if (match < 0) { + /* save state for partial match */ + int sArrayIndex; + int j; + + /* just _append_ the newly consumed input to preFromU[] */ + sArrayIndex = source.position(); + match = -match - 2; /* remove 2 for the initial code point */ + for (j = preFromULength; j < match; ++j) { + preFromUArray[j] = source.get(sArrayIndex++); + } + source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */ + preFromULength = (byte) match; + } else { /* match==0 or 1 */ + /* + * no match + * + * We need to split the previous input into two parts: + * + * 1. The first code point is unmappable - that's how we got into trying the extension data in the first + * place. We need to move it from the preFromU buffer to the error buffer, set an error code, and + * prepare the rest of the previous input for 2. + * + * 2. The rest of the previous input must be converted once we come back from the callback for the first + * code point. At that time, we have to try again from scratch to convert these input characters. The + * replay will be handled by the ucnv.c conversion code. + */ + + if (match == 1) { + /* matched, no mapping but request for */ + useSubChar1 = true; + } + + /* move the first code point to the error field */ + fromUChar32 = preFromUFirstCP; + preFromUFirstCP = UConverterConstants.U_SENTINEL; + + /* mark preFromU for replay */ + preFromULength = (byte) -preFromULength; + + /* set the error code for unassigned */ + // TODO: figure out what the unmappable length really should be + cr = CoderResult.unmappableForLength(1); + } + return cr; + } + + /** + * @param cx + * pointer to extension data; if NULL, returns 0 + * @param firstCP + * the first code point before all the other UChars + * @param pre + * UChars that must match; !initialMatch: partial match with them + * @param preLength + * length of pre, >=0 + * @param src + * UChars that can be used to complete a match + * @param srcLength + * length of src, >=0 + * @param pMatchValue + * [out] output result value for the match from the data structure + * @param useFallback + * "use fallback" flag, usually from cnv->useFallback + * @param flush + * TRUE if the end of the input stream is reached + * @return >1: matched, return value=total match length (number of input units matched) 1: matched, no mapping + * but request for (only for the first code point) 0: no match <0: partial match, return + * value=negative total match length (partial matches are never returned for flush==TRUE) (partial + * matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) the matchLength is 2 if only + * firstCP matched, and >2 if firstCP and further code units matched + */ + // static int32_t ucnv_extMatchFromU(const int32_t *cx, UChar32 firstCP, const UChar *pre, int32_t preLength, + // const UChar *src, int32_t srcLength, uint32_t *pMatchValue, UBool useFallback, UBool flush) + private int matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source, + int[] pMatchValue, boolean isUseFallback, boolean flush) { + ByteBuffer cx = sharedData.mbcs.extIndexes; + + CharBuffer stage12, stage3; + IntBuffer stage3b; + + CharBuffer fromUTableUChars, fromUSectionUChars; + IntBuffer fromUTableValues, fromUSectionValues; + + int value, matchValue; + int i, j, index, length, matchLength; + char c; + + if (cx == null) { + return 0; /* no extension data, no match */ + } + + /* trie lookup of firstCP */ + index = firstCP >>> 10; /* stage 1 index */ + if (index >= cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) { + return 0; /* the first code point is outside the trie */ + } + + stage12 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class); + stage3 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class); + index = FROM_U(stage12, stage3, index, firstCP); + + stage3b = (IntBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class); + value = stage3b.get(stage3b.position() + index); + if (value == 0) { + return 0; + } + + if (TO_U_IS_PARTIAL(value)) { + /* partial match, enter the loop below */ + index = FROM_U_GET_PARTIAL_INDEX(value); + + /* initialize */ + fromUTableUChars = (CharBuffer) ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class); + fromUTableValues = (IntBuffer) ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class); + + matchValue = 0; + i = j = matchLength = 0; + + /* we must not remember fallback matches when not using fallbacks */ + + /* match input units until there is a full match or the input is consumed */ + for (;;) { + /* go to the next section */ + int oldpos = fromUTableUChars.position(); + fromUSectionUChars = ((CharBuffer) fromUTableUChars.position(index)).slice(); + fromUTableUChars.position(oldpos); + oldpos = fromUTableValues.position(); + fromUSectionValues = ((IntBuffer) fromUTableValues.position(index)).slice(); + fromUTableValues.position(oldpos); + + /* read first pair of the section */ + length = fromUSectionUChars.get(); + value = fromUSectionValues.get(); + if (value != 0 && (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP))) { + /* remember longest match so far */ + matchValue = value; + matchLength = 2 + i + j; + } + + /* match pre[] then src[] */ + if (i < preLength) { + c = preArray[preArrayBegin + i++]; + } else if (source != null && j < source.remaining()) { + c = source.get(source.position() + j++); + } else { + /* all input consumed, partial match */ + if (flush || (length = (i + j)) > MAX_UCHARS) { + /* + * end of the entire input stream, stop with the longest match so far or: partial match must + * not be longer than UCNV_EXT_MAX_UCHARS because it must fit into state buffers + */ + break; + } else { + /* continue with more input next time */ + return -(2 + length); + } + } + + /* search for the current UChar */ + index = findFromU(fromUSectionUChars, length, c); + if (index < 0) { + /* no match here, stop with the longest match so far */ + break; + } else { + value = fromUSectionValues.get(fromUSectionValues.position() + index); + if (FROM_U_IS_PARTIAL(value)) { + /* partial match, continue */ + index = FROM_U_GET_PARTIAL_INDEX(value); + } else { + if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) { + /* full match, stop with result */ + matchValue = value; + matchLength = 2 + i + j; + } else { + /* full match on fallback not taken, stop with the longest match so far */ + } + break; + } + } + } + + if (matchLength == 0) { + /* no match at all */ + return 0; + } + } else /* result from firstCP trie lookup */{ + if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) { + /* full match, stop with result */ + matchValue = value; + matchLength = 2; + } else { + /* fallback not taken */ + return 0; + } + } + + if ((matchValue & FROM_U_RESERVED_MASK) != 0) { + /* do not interpret values with reserved bits used, for forward compatibility */ + return 0; + } + + /* return result */ + if (matchValue == FROM_U_SUBCHAR1) { + return 1; /* assert matchLength==2 */ + } + + pMatchValue[0] = FROM_U_MASK_ROUNDTRIP(matchValue); + return matchLength; + } + + private int simpleMatchFromU(int cp, int[] pValue, boolean isUseFallback) { + int[] value = new int[1]; + int match; // signed + + /* try to match */ + match = matchFromU(cp, null, 0, 0, null, value, isUseFallback, true); + if (match >= 2) { + /* write result for simple, single-character conversion */ + int length; + boolean isRoundtrip; + + isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]); + length = FROM_U_GET_LENGTH(value[0]); + value[0] = FROM_U_GET_DATA(value[0]); + + if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) { + pValue[0] = value[0]; + return isRoundtrip ? length : -length; + // #if 0 /* not currently used */ + // } else if(length==4) { + // /* de-serialize a 4-byte result */ + // const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; + // *pValue= + // ((uint32_t)result[0]<<24)| + // ((uint32_t)result[1]<<16)| + // ((uint32_t)result[2]<<8)| + // result[3]; + // return isRoundtrip ? 4 : -4; + // #endif + } + } + + /* + * return no match because - match>1 && resultLength>4: result too long for simple conversion - match==1: no + * match found, preferred - match==0: no match found in the first place - match<0: partial + * match, not supported for simple conversion (and flush==TRUE) + */ + return 0; + } + + @SuppressWarnings("fallthrough") + private CoderResult writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex) { + ByteBuffer cx = sharedData.mbcs.extIndexes; + + byte bufferArray[] = new byte[1 + MAX_BYTES]; + int bufferArrayIndex = 0; + byte[] resultArray; + int resultArrayIndex; + int length, prevLength; + + length = FROM_U_GET_LENGTH(value); + value = FROM_U_GET_DATA(value); + + /* output the result */ + if (length <= FROM_U_MAX_DIRECT_LENGTH) { + /* + * Generate a byte array and then write it below. This is not the fastest possible way, but it should be + * ok for extension mappings, and it is much simpler. Offset and overflow handling are only done once + * this way. + */ + int p = bufferArrayIndex + 1; /* reserve buffer[0] for shiftByte below */ + switch (length) { + case 3: + bufferArray[p++] = (byte) (value >>> 16); + case 2: + bufferArray[p++] = (byte) (value >>> 8); + case 1: + bufferArray[p++] = (byte) value; + default: + break; /* will never occur */ + } + resultArray = bufferArray; + resultArrayIndex = bufferArrayIndex + 1; + } else { + byte[] slice = new byte[length]; + + ByteBuffer bb = ((ByteBuffer) ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class)); + bb.position(value); + bb.get(slice, 0, slice.length); + + resultArray = slice; + resultArrayIndex = 0; + } + + /* with correct data we have length>0 */ + + if ((prevLength = fromUnicodeStatus) != 0) { + /* handle SI/SO stateful output */ + byte shiftByte; + + if (prevLength > 1 && length == 1) { + /* change from double-byte mode to single-byte */ + shiftByte = (byte) UConverterConstants.SI; + fromUnicodeStatus = 1; + } else if (prevLength == 1 && length > 1) { + /* change from single-byte mode to double-byte */ + shiftByte = (byte) UConverterConstants.SO; + fromUnicodeStatus = 2; + } else { + shiftByte = 0; + } + + if (shiftByte != 0) { + /* prepend the shift byte to the result bytes */ + bufferArray[0] = shiftByte; + if (resultArray != bufferArray || resultArrayIndex != bufferArrayIndex + 1) { + System.arraycopy(resultArray, resultArrayIndex, bufferArray, bufferArrayIndex + 1, length); + } + resultArray = bufferArray; + resultArrayIndex = bufferArrayIndex; + ++length; + } + } + + return fromUWriteBytes(this, resultArray, resultArrayIndex, length, target, offsets, srcIndex); + } + + /* + * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written + * to the target + */ + private int fromU(int cp_, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex, + int length, boolean flush, CoderResult[] cr) { + // ByteBuffer cx; + long cp = cp_ & UConverterConstants.UNSIGNED_INT_MASK; + + useSubChar1 = false; + + if (sharedData.mbcs.extIndexes != null + && initialMatchFromU((int) cp, source, target, offsets, sourceIndex, flush, cr)) { + return 0; /* an extension mapping handled the input */ + } + + /* GB 18030 */ + if ((options & MBCS_OPTION_GB18030) != 0) { + long[] range; + int i; + + for (i = 0; i < gb18030Ranges.length; ++i) { + range = gb18030Ranges[i]; + if (range[0] <= cp && cp <= range[1]) { + /* found the Unicode code point, output the four-byte sequence for it */ + long linear; + byte bytes[] = new byte[4]; + + /* get the linear value of the first GB 18030 code in this range */ + linear = range[2] - LINEAR_18030_BASE; + + /* add the offset from the beginning of the range */ + linear += (cp - range[0]); + + bytes[3] = (byte) (0x30 + linear % 10); + linear /= 10; + bytes[2] = (byte) (0x81 + linear % 126); + linear /= 126; + bytes[1] = (byte) (0x30 + linear % 10); + linear /= 10; + bytes[0] = (byte) (0x81 + linear); + + /* output this sequence */ + cr[0] = fromUWriteBytes(this, bytes, 0, 4, target, offsets, sourceIndex); + return 0; + } + } + } + + /* no mapping */ + cr[0] = CoderResult.unmappableForLength(length); + return (int) cp; + } + + /* + * target= 2 + && !(FROM_U_GET_LENGTH(value[0]) == 1 && sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY)) { + /* advance src pointer for the consumed input */ + source.position(source.position() + match - 2); /* remove 2 for the initial code point */ + + /* write result to target */ + cr[0] = writeFromU(value[0], target, offsets, srcIndex); + return true; + } else if (match < 0) { + /* save state for partial match */ + int sArrayIndex; + int j; + + /* copy the first code point */ + preFromUFirstCP = cp; + + /* now copy the newly consumed input */ + sArrayIndex = source.position(); + match = -match - 2; /* remove 2 for the initial code point */ + for (j = 0; j < match; ++j) { + preFromUArray[j] = source.get(sArrayIndex++); + } + source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */ + preFromULength = (byte) match; + return true; + } else if (match == 1) { + /* matched, no mapping but request for */ + useSubChar1 = true; + return false; + } else /* match==0 no match */{ + return false; + } + } + + CoderResult cnvMBCSFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + // Just call encodeLoop to remove duplicate code. + return encodeLoop(source, target, offsets, flush); + } + + /* + * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages that map only to and from the + * BMP. In addition to single-byte/state optimizations, the offset calculations become much easier. + */ + private CoderResult cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, + boolean flush) { + + CoderResult[] cr = { CoderResult.UNDERFLOW }; + + int sourceArrayIndex, lastSource; + int targetCapacity, length; + char[] table; + byte[] results; + + int c, sourceIndex; + char value, minValue; + + /* set up the local pointers */ + sourceArrayIndex = source.position(); + targetCapacity = target.remaining(); + table = sharedData.mbcs.fromUnicodeTable; + + if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes + // be a ByteBuffer so results can be a 16-bit view + // of it? + } else { + results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a + // ByteBuffer so results can be a 16-bit view of it? + } + + if (useFallback) { + /* use all roundtrip and fallback results */ + minValue = 0x800; + } else { + /* use only roundtrips and fallbacks from private-use characters */ + minValue = 0xc00; + } + + /* get the converter state from UConverter */ + c = fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex = c == 0 ? 0 : -1; + lastSource = sourceArrayIndex; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the + * sourceLength and targetCapacity + */ + length = source.limit() - sourceArrayIndex; + if (length < targetCapacity) { + targetCapacity = length; + } + + boolean doloop = true; + if (c != 0 && targetCapacity > 0) { + SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex); + doloop = getTrailSingleBMP(source, x, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + } + + if (doloop) { + while (targetCapacity > 0) { + /* + * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate pair + * for a "supplementary code point". + */ + c = source.get(sourceArrayIndex++); + /* + * Do not immediately check for single surrogates: Assume that they are unassigned and check for + * them in that case. This speeds up the conversion of assigned characters. + */ + /* convert the Unicode code point in c into codepage bytes */ + value = MBCS_SINGLE_RESULT_FROM_U(table, results, c); + + /* is this code point assigned, or do we use fallbacks? */ + if (value >= minValue) { + /* assigned, write the output character bytes from value and length */ + /* length==1 */ + /* this is easy because we know that there is enough space */ + target.put((byte) value); + --targetCapacity; + + /* normal end of conversion: prepare for a new character */ + c = 0; + continue; + } else if (!UTF16.isSurrogate((char) c)) { + /* normal, unassigned BMP character */ + } else if (UTF16.isLeadSurrogate((char) c)) { + // getTrail: + SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex); + doloop = getTrailSingleBMP(source, x, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + if (!doloop) + break; + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + break; + } + + /* c does not have a mapping */ + + /* get the number of code units for c to correctly advance sourceIndex */ + length = UTF16.getCharCount(c); + + /* set offsets since the start or the last extension */ + if (offsets != null) { + int count = sourceArrayIndex - lastSource; + + /* do not set the offset for this character */ + count -= length; + + while (count > 0) { + offsets.put(sourceIndex++); + --count; + } + /* offsets and sourceIndex are now set for the current character */ + } + + /* try an extension mapping */ + lastSource = sourceArrayIndex; + source.position(sourceArrayIndex); + c = fromU(c, source, target, offsets, sourceIndex, length, flush, cr); + sourceArrayIndex = source.position(); + sourceIndex += length + (sourceArrayIndex - lastSource); + lastSource = sourceArrayIndex; + + if (cr[0].isError()) { + /* not mappable or buffer overflow */ + break; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity = target.remaining(); + length = source.limit() - sourceArrayIndex; + if (length < targetCapacity) { + targetCapacity = length; + } + } + } + } + + if (sourceArrayIndex < source.limit() && !target.hasRemaining()) { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + } + + /* set offsets since the start or the last callback */ + if (offsets != null) { + int count = sourceArrayIndex - lastSource; + while (count > 0) { + offsets.put(sourceIndex++); + --count; + } + } + + /* set the converter state back into UConverter */ + fromUChar32 = c; + + /* write back the updated pointers */ + source.position(sourceArrayIndex); + + return cr[0]; + } + + /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */ + private CoderResult cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, + IntBuffer offsets, boolean flush) { + + CoderResult[] cr = { CoderResult.UNDERFLOW }; + + int sourceArrayIndex; + + char[] table; + byte[] results; // agljport:comment results is used to to get 16-bit values out of byte[] array + + int c; + int sourceIndex, nextSourceIndex; + + char value, minValue; + + /* set up the local pointers */ + short uniMask; + sourceArrayIndex = source.position(); + + table = sharedData.mbcs.fromUnicodeTable; + + if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes + // be a ByteBuffer so results can be a 16-bit view + // of it? + } else { + results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a + // ByteBuffer so results can be a 16-bit view of it? + } + + if (useFallback) { + /* use all roundtrip and fallback results */ + minValue = 0x800; + } else { + /* use only roundtrips and fallbacks from private-use characters */ + minValue = 0xc00; + } + // agljport:comment hasSupplementary only used in getTrail block which now simply repeats the mask operation + uniMask = sharedData.mbcs.unicodeMask; + + /* get the converter state from UConverter */ + c = fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex = c == 0 ? 0 : -1; + nextSourceIndex = 0; + + boolean doloop = true; + boolean doread = true; + if (c != 0 && target.hasRemaining()) { + if (UTF16.isLeadSurrogate((char) c)) { + SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex); + doloop = getTrailDouble(source, target, uniMask, x, flush, cr); + doread = x.doread; + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + } else { + doread = false; + } + } + + if (doloop) { + while (!doread || sourceArrayIndex < source.limit()) { + /* + * This following test is to see if available input would overflow the output. It does not catch + * output of more than one byte that overflows as a result of a multi-byte character or callback + * output from the last source character. Therefore, those situations also test for overflows and + * will then break the loop, too. + */ + if (target.hasRemaining()) { + /* + * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate + * pair for a "supplementary code point". + */ + + if (doread) { + c = source.get(sourceArrayIndex++); + ++nextSourceIndex; + if (UTF16.isSurrogate((char) c)) { + if (UTF16.isLeadSurrogate((char) c)) { + // getTrail: + SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, + nextSourceIndex); + doloop = getTrailDouble(source, target, uniMask, x, flush, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + if (x.doread) { + if (doloop) + continue; + else + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + break; + } + } + } else { + doread = true; + } + + /* convert the Unicode code point in c into codepage bytes */ + value = MBCS_SINGLE_RESULT_FROM_U(table, results, c); + + /* is this code point assigned, or do we use fallbacks? */ + if (value >= minValue) { + /* assigned, write the output character bytes from value and length */ + /* length==1 */ + /* this is easy because we know that there is enough space */ + target.put((byte) value); + if (offsets != null) { + offsets.put(sourceIndex); + } + + /* normal end of conversion: prepare for a new character */ + c = 0; + sourceIndex = nextSourceIndex; + } else { /* unassigned */ + /* try an extension mapping */ + SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, + nextSourceIndex); + doloop = unassignedDouble(source, target, x, flush, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + if (!doloop) + break; + } + } else { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + break; + } + } + } + + /* set the converter state back into UConverter */ + fromUChar32 = c; + + /* write back the updated pointers */ + source.position(sourceArrayIndex); + + return cr[0]; + } + + /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */ + private CoderResult cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, + IntBuffer offsets, boolean flush) { + CoderResult[] cr = { CoderResult.UNDERFLOW }; + + int sourceArrayIndex; + + char[] table; + byte[] bytes; + + int c, sourceIndex, nextSourceIndex; + + int stage2Entry; + int value; + int length; + short uniMask; + + /* use optimized function if possible */ + uniMask = sharedData.mbcs.unicodeMask; + + /* set up the local pointers */ + sourceArrayIndex = source.position(); + + table = sharedData.mbcs.fromUnicodeTable; + + if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { + bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes; + } else { + bytes = sharedData.mbcs.fromUnicodeBytes; + } + + /* get the converter state from UConverter */ + c = fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex = c == 0 ? 0 : -1; + nextSourceIndex = 0; + + /* conversion loop */ + boolean doloop = true; + boolean doread = true; + if (c != 0 && target.hasRemaining()) { + if (UTF16.isLeadSurrogate((char) c)) { + SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex); + doloop = getTrailDouble(source, target, uniMask, x, flush, cr); + doread = x.doread; + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + } else { + doread = false; + } + } + + if (doloop) { + while (!doread || sourceArrayIndex < source.limit()) { + /* + * This following test is to see if available input would overflow the output. It does not catch + * output of more than one byte that overflows as a result of a multi-byte character or callback + * output from the last source character. Therefore, those situations also test for overflows and + * will then break the loop, too. + */ + if (target.hasRemaining()) { + if (doread) { + /* + * Get a correct Unicode code point: a single UChar for a BMP code point or a matched + * surrogate pair for a "supplementary code point". + */ + c = source.get(sourceArrayIndex++); + ++nextSourceIndex; + /* + * This also tests if the codepage maps single surrogates. If it does, then surrogates are + * not paired but mapped separately. Note that in this case unmatched surrogates are not + * detected. + */ + if (UTF16.isSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) { + if (UTF16.isLeadSurrogate((char) c)) { + // getTrail: + SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, + nextSourceIndex); + doloop = getTrailDouble(source, target, uniMask, x, flush, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + + if (x.doread) { + if (doloop) + continue; + else + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + break; + } + } + } else { + doread = true; + } + + /* convert the Unicode code point in c into codepage bytes */ + stage2Entry = MBCS_STAGE_2_FROM_U(table, c); + + /* get the bytes and the length for the output */ + /* MBCS_OUTPUT_2 */ + value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + length = 1; + } else { + length = 2; + } + + /* is this code point assigned, or do we use fallbacks? */ + if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0))) { + /* + * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way + * with this data structure for fallback output to be a zero byte. + */ + + // unassigned: + SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, + nextSourceIndex); + + doloop = unassignedDouble(source, target, x, flush, cr); + c = x.c; + sourceArrayIndex = x.sourceArrayIndex; + sourceIndex = x.sourceIndex; + nextSourceIndex = x.nextSourceIndex; + if (doloop) + continue; + else + break; + } + + /* write the output character bytes from value and length */ + /* from the first if in the loop we know that targetCapacity>0 */ + if (length == 1) { + /* this is easy because we know that there is enough space */ + target.put((byte) value); + if (offsets != null) { + offsets.put(sourceIndex); + } + } else /* length==2 */{ + target.put((byte) (value >>> 8)); + if (2 <= target.remaining()) { + target.put((byte) value); + if (offsets != null) { + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + } else { + if (offsets != null) { + offsets.put(sourceIndex); + } + errorBuffer[0] = (byte) value; + errorBufferLength = 1; + + /* target overflow */ + cr[0] = CoderResult.OVERFLOW; + c = 0; + break; + } + } + + /* normal end of conversion: prepare for a new character */ + c = 0; + sourceIndex = nextSourceIndex; + continue; + } else { + /* target is full */ + cr[0] = CoderResult.OVERFLOW; + break; + } + } + } + + /* set the converter state back into UConverter */ + fromUChar32 = c; + + /* write back the updated pointers */ + source.position(sourceArrayIndex); + + return cr[0]; + } + + private final class SideEffectsSingleBMP { + int c, sourceArrayIndex; + + SideEffectsSingleBMP(int c_, int sourceArrayIndex_) { + c = c_; + sourceArrayIndex = sourceArrayIndex_; + } + } + + // function made out of block labeled getTrail in ucnv_MBCSSingleFromUnicodeWithOffsets + // assumes input c is lead surrogate + private final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr) { + if (x.sourceArrayIndex < source.limit()) { + /* test the following code unit */ + char trail = source.get(x.sourceArrayIndex); + if (UTF16.isTrailSurrogate(trail)) { + ++x.sourceArrayIndex; + x.c = UCharacter.getCodePoint((char) x.c, trail); + /* this codepage does not map supplementary code points */ + /* callback(unassigned) */ + cr[0] = CoderResult.unmappableForLength(2); + return false; + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + return false; + } + } else { + /* no more input */ + return false; + } + // return true; + } + + private final class SideEffects { + int c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength; + boolean doread = true; + + SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_, + int prevLength_) { + c = c_; + sourceArrayIndex = sourceArrayIndex_; + sourceIndex = sourceIndex_; + nextSourceIndex = nextSourceIndex_; + prevSourceIndex = prevSourceIndex_; + prevLength = prevLength_; + } + } + + // function made out of block labeled getTrail in ucnv_MBCSFromUnicodeWithOffsets + // assumes input c is lead surrogate + private final boolean getTrail(CharBuffer source, ByteBuffer target, int uniMask, SideEffects x, + boolean flush, CoderResult[] cr) { + if (x.sourceArrayIndex < source.limit()) { + /* test the following code unit */ + char trail = source.get(x.sourceArrayIndex); + if (UTF16.isTrailSurrogate(trail)) { + ++x.sourceArrayIndex; + ++x.nextSourceIndex; + /* convert this supplementary code point */ + x.c = UCharacter.getCodePoint((char) x.c, trail); + if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) { + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + fromUnicodeStatus = x.prevLength; /* save the old state */ + /* callback(unassigned) */ + x.doread = true; + return unassigned(source, target, null, x, flush, cr); + } else { + x.doread = false; + return true; + } + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + return false; + } + } else { + /* no more input */ + return false; + } + } + + // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets + private final boolean unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x, + boolean flush, CoderResult[] cr) { + /* try an extension mapping */ + int sourceBegin = x.sourceArrayIndex; + source.position(x.sourceArrayIndex); + x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr); + x.sourceArrayIndex = source.position(); + x.nextSourceIndex += x.sourceArrayIndex - sourceBegin; + x.prevLength = fromUnicodeStatus; + + if (cr[0].isError()) { + /* not mappable or buffer overflow */ + return false; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex; + /* normal end of conversion: prepare for a new character */ + if (offsets != null) { + x.prevSourceIndex = x.sourceIndex; + x.sourceIndex = x.nextSourceIndex; + } + return true; + } + } + + private final class SideEffectsDouble { + int c, sourceArrayIndex, sourceIndex, nextSourceIndex; + boolean doread = true; + + SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_) { + c = c_; + sourceArrayIndex = sourceArrayIndex_; + sourceIndex = sourceIndex_; + nextSourceIndex = nextSourceIndex_; + } + } + + // function made out of block labeled getTrail in ucnv_MBCSDoubleFromUnicodeWithOffsets + // assumes input c is lead surrogate + private final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask, + SideEffectsDouble x, boolean flush, CoderResult[] cr) { + if (x.sourceArrayIndex < source.limit()) { + /* test the following code unit */ + char trail = source.get(x.sourceArrayIndex); + if (UTF16.isTrailSurrogate(trail)) { + ++x.sourceArrayIndex; + ++x.nextSourceIndex; + /* convert this supplementary code point */ + x.c = UCharacter.getCodePoint((char) x.c, trail); + if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) { + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + /* callback(unassigned) */ + x.doread = true; + return unassignedDouble(source, target, x, flush, cr); + } else { + x.doread = false; + return true; + } + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + cr[0] = CoderResult.malformedForLength(1); + return false; + } + } else { + /* no more input */ + return false; + } + } + + // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets + private final boolean unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x, + boolean flush, CoderResult[] cr) { + /* try an extension mapping */ + int sourceBegin = x.sourceArrayIndex; + source.position(x.sourceArrayIndex); + x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr); + x.sourceArrayIndex = source.position(); + x.nextSourceIndex += x.sourceArrayIndex - sourceBegin; + + if (cr[0].isError()) { + /* not mappable or buffer overflow */ + return false; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex; + /* normal end of conversion: prepare for a new character */ + x.sourceIndex = x.nextSourceIndex; + return true; + } + } + + /** + * Overrides super class method + * + * @param encoder + * @param source + * @param target + * @param offsets + * @return + */ + protected CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target, + IntBuffer offsets) { + CharsetMBCS cs = (CharsetMBCS) encoder.charset(); + byte[] subchar; + int length; + + if (cs.subChar1 != 0 + && (cs.sharedData.mbcs.extIndexes != null ? encoder.useSubChar1 + : (encoder.invalidUCharBuffer[0] <= 0xff))) { + /* + * select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS + * behavior) + */ + subchar = new byte[] { cs.subChar1 }; + length = 1; + } else { + /* select subChar in all other cases */ + subchar = cs.subChar; + length = cs.subCharLen; + } + + /* reset the selector for the next code point */ + encoder.useSubChar1 = false; + + if (cs.sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) { + byte[] buffer = new byte[4]; + int i = 0; + + /* fromUnicodeStatus contains prevLength */ + switch (length) { + case 1: + if (encoder.fromUnicodeStatus == 2) { + /* DBCS mode and SBCS sub char: change to SBCS */ + encoder.fromUnicodeStatus = 1; + buffer[i++] = UConverterConstants.SI; + } + buffer[i++] = subchar[0]; + break; + case 2: + if (encoder.fromUnicodeStatus <= 1) { + /* SBCS mode and DBCS sub char: change to DBCS */ + encoder.fromUnicodeStatus = 2; + buffer[i++] = UConverterConstants.SO; + } + buffer[i++] = subchar[0]; + buffer[i++] = subchar[1]; + break; + default: + throw new IllegalArgumentException(); + } + + subchar = buffer; + length = i; + } + return CharsetEncoderICU.fromUWriteBytes(encoder, subchar, 0, length, target, offsets, source.position()); + } + + /** + * Gets called whenever CharsetEncoder.replaceWith gets called. allowReplacementChanges only allows subChar and + * subChar1 to be modified outside construction (since replaceWith is called once during construction). + * + * @param replacement + * The replacement for subchar. + */ + protected void implReplaceWith(byte[] replacement) { + if (allowReplacementChanges) { + CharsetMBCS cs = (CharsetMBCS) this.charset(); + + System.arraycopy(replacement, 0, cs.subChar, 0, replacement.length); + cs.subCharLen = (byte) replacement.length; + cs.subChar1 = 0; + } + } + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoderMBCS(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderMBCS(this); + } + + @SuppressWarnings("fallthrough") + void MBCSGetFilteredUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which, int filter){ + UConverterMBCSTable mbcsTable; + char[] table; + char st1,maxStage1, st2; + int st3; + int c ; + + mbcsTable = data.mbcs; + table = mbcsTable.fromUnicodeTable; + if((mbcsTable.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY)!=0){ + maxStage1 = 0x440; + } + else{ + maxStage1 = 0x40; + } + c=0; /* keep track of current code point while enumerating */ + + if(mbcsTable.outputType==MBCS_OUTPUT_1){ + char stage2, stage3; + char minValue; + CharBuffer results; + results = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer(); + + if(which==ROUNDTRIP_SET) { + /* use only roundtrips */ + minValue=0xf00; + } else { + /* use all roundtrip and fallback results */ + minValue=0x800; + } + for(st1=0;st1maxStage1){ + stage2 = st2; + for(st2=0; st2<64; ++st2){ + st3 = table[stage2 + st2]; + if(st3!=0){ + /*read the stage 3 block */ + stage3 = (char)st3; + do { + if(results.get(stage3++)>=minValue){ + setFillIn.add(c); + } + + }while((++c&0xf) !=0); + } else { + c+= 16; /*empty stage 2 block */ + } + } + } else { + c+=1024; /* empty stage 2 block */ + } + } + } else { + int stage2,stage3; + byte[] bytes; + int st3Multiplier; + int value; + boolean useFallBack; + bytes = mbcsTable.fromUnicodeBytes; + useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET); + switch(mbcsTable.outputType) { + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4_EUC: + st3Multiplier = 3; + break; + case MBCS_OUTPUT_4: + st3Multiplier =4; + break; + default: + st3Multiplier =2; + break; + } + //ByteBuffer buffer = (ByteBuffer)charTobyte(table); + + for(st1=0;st1(maxStage1>>1)){ + stage2 = st2 ; + for(st2=0;st2<128;++st2){ + /*read the stage 3 block */ + st3 = table[stage2*2 + st2]<<16; + st3+=table[stage2*2 + ++st2]; + if(st3!=0){ + //if((st3=table[stage2+st2])!=0){ + stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK); + + /* get the roundtrip flags for the stage 3 block */ + st3>>=16; + st3 &= UConverterConstants.UNSIGNED_SHORT_MASK; + switch(filter) { + case UCNV_SET_FILTER_NONE: + do { + + if((st3&1)!=0){ + setFillIn.add(c); + stage3+=st3Multiplier; + }else if (useFallBack) { + + char b =0; + switch(st3Multiplier) { + case 4 : + + b|= ByteBuffer.wrap(bytes).getChar(stage3++); + + case 3 : + + b|= ByteBuffer.wrap(bytes).getChar(stage3++); + + case 2 : + + b|= ByteBuffer.wrap(bytes).getChar(stage3) | ByteBuffer.wrap(bytes).getChar(stage3+1); + stage3+=2; + default: + break; + } + if(b!=0) { + setFillIn.add(c); + } + } + st3>>=1; + }while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_DBCS_ONLY: + /* Ignore single bytes results (<0x100). */ + do { + if(((st3&1) != 0 || useFallBack) && + (UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))) >= 0x100){ + setFillIn.add(c); + } + st3>>=1; + stage3+=2; + }while((++c&0xf) != 0); + break; + case UCNV_SET_FILTER_2022_CN : + /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */ + do { + if(((st3&1) != 0 || useFallBack) && + ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & (ByteBuffer.wrap(bytes).get(stage3))))==0x81 || value==0x82) ){ + setFillIn.add(c); + } + st3>>=1; + stage3+=3; + }while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_SJIS: + /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */ + do{ + + if(((st3&1) != 0 || useFallBack) && (value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))>=0x8140 && value<=0xeffc){ + setFillIn.add(c); + } + st3>>=1; + stage3+=2; + }while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_GR94DBCS: + /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/ + do { + if(((st3&1) != 0 || useFallBack) && + (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))- 0xa1a1))<=(0xfefe - 0xa1a1) && + (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){ + setFillIn.add(c); + } + st3>>=1; + stage3+=2; + }while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_HZ: + /*Only add code points that are suitable for HZ DBCS*/ + do { + if( ((st3&1) != 0 || useFallBack) && + (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))-0xa1a1))<=(0xfdfe - 0xa1a1) && + (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){ + setFillIn.add(c); + } + st3>>=1; + stage3+=2; + }while((++c&0xf) != 0); + break; + default: + return; + } + } else { + c+=16; /* empty stage 3 block */ + } + } + } else { + c+=1024; /*empty stage2 block */ + } + } + } + extGetUnicodeSet(setFillIn, which, filter, data); + } + + static void extGetUnicodeSetString(ByteBuffer cx,UnicodeSet setFillIn, boolean useFallback, + int minLength, int c, char s[],int length,int sectionIndex){ + CharBuffer fromUSectionUChar; + IntBuffer fromUSectionValues; + fromUSectionUChar = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX,char.class ); + fromUSectionValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX,int.class ); + int fromUSectionUCharIndex = fromUSectionUChar.position()+sectionIndex; + int fromUSectionValuesIndex = fromUSectionValues.position()+sectionIndex; + int value, i, count; + + /* read first pair of the section */ + count = fromUSectionUChar.get(fromUSectionUCharIndex++); + value = fromUSectionValues.get(fromUSectionValuesIndex++); + if(value!=0 && (FROM_U_IS_ROUNDTRIP(value) || useFallback) && FROM_U_GET_LENGTH(value)>=minLength) { + if(c>=0){ + setFillIn.add(c); + } else { + String normalizedString=""; // String for composite characters + for(int j=0; j=minLength) { + String normalizedString=""; // String for composite characters + for(int j=0; j<(length+1);j++){ + normalizedString+=s[j]; + } + setFillIn.add(normalizedString); + } + } + + } + + + static void extGetUnicodeSet(UnicodeSet setFillIn, int which, int filter, UConverterSharedData Data){ + int st1, stage1Length, st2, st3, minLength; + int ps2, ps3; + + CharBuffer stage12, stage3; + int value, length; + IntBuffer stage3b; + boolean useFallback; + char s[] = new char[MAX_UCHARS]; + int c; + ByteBuffer cx = Data.mbcs.extIndexes; + if(cx == null){ + return; + } + stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX,char.class ); + stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX,char.class ); + stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX,int.class ); + + stage1Length = cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH); + useFallback = (which==ROUNDTRIP_AND_FALLBACK_SET); + + c = 0; + if(filter == UCNV_SET_FILTER_2022_CN) { + minLength = 3; + } else if (Data.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY || filter != UCNV_SET_FILTER_NONE) { + /* DBCS-only, ignore single-byte results */ + minLength = 2; + } else { + minLength = 1; + } + + for(st1=0; st1< stage1Length; ++st1){ + st2 = stage12.get(st1); + if(st2>stage1Length) { + ps2 = st2; + for(st2=0;st2<64;++st2){ + st3=((int) stage12.get(ps2+st2))<=minLength){ + + switch(filter) { + case UCNV_SET_FILTER_2022_CN: + if(!(FROM_U_GET_LENGTH(value)==3 && FROM_U_GET_DATA(value)<=0x82ffff)){ + continue; + } + break; + case UCNV_SET_FILTER_SJIS: + if(!(FROM_U_GET_LENGTH(value)==2 && (value=FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)){ + continue; + } + break; + case UCNV_SET_FILTER_GR94DBCS: + if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfefe - 0xa1a1) + && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){ + + continue; + } + break; + case UCNV_SET_FILTER_HZ: + if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfdfe - 0xa1a1) + && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){ + continue; + } + break; + default: + /* + * UCNV_SET_FILTER_NONE, + * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength + */ + break; + } + setFillIn.add(c); + + } + }while((++c&0xf) != 0); + + } else { + c+=16; /* emplty stage3 block */ + } + } + } else { + c+=1024; /* empty stage 2 block*/ + } + } + } + + void MBCSGetUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which){ + MBCSGetFilteredUnicodeSetForUnicode(data, setFillIn, which, + this.sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? UCNV_SET_FILTER_DBCS_ONLY : UCNV_SET_FILTER_NONE ); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + if((options & MBCS_OPTION_GB18030)!=0){ + setFillIn.add(0, 0xd7ff); + setFillIn.add(0xe000, 0x10ffff); + } + else { + this.MBCSGetUnicodeSetForUnicode(sharedData, setFillIn, which); + } + } + +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java new file mode 100644 index 00000000000..71c530ae8d1 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java @@ -0,0 +1,334 @@ +/** +******************************************************************************* +* Copyright (C) 2006-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ + +package com.ibm.icu.charset; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.UnsupportedCharsetException; +import java.nio.charset.spi.CharsetProvider; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import com.ibm.icu.impl.InvalidFormatException; + + +/** + * A concrete subclass of CharsetProvider for loading and providing charset converters + * in ICU. + * @stable ICU 3.6 + */ +public final class CharsetProviderICU extends CharsetProvider{ + private static String optionsString = null; + private static boolean gettingJavaCanonicalName = false; + + /** + * Default constructor + * @stable ICU 3.6 + */ + public CharsetProviderICU() { + } + + /** + * Constructs a charset for the given charset name. + * Implements the abstract method of super class. + * @param charsetName charset name + * @return charset objet for the given charset name, null if unsupported + * @stable ICU 3.6 + */ + public final Charset charsetForName(String charsetName){ + try{ + // extract the options from the charset name + charsetName = processOptions(charsetName); + // get the canonical name + String icuCanonicalName = getICUCanonicalName(charsetName); + + // create the converter object and return it + if(icuCanonicalName==null || icuCanonicalName.length()==0){ + // Try the original name, may be something added and not in the alias table. + // Will get an unsupported encoding exception if it doesn't work. + return getCharset(charsetName); + } + return getCharset(icuCanonicalName); + }catch(UnsupportedCharsetException ex){ + }catch(IOException ex){ + } + return null; + } + + /** + * Constructs a charset for the given ICU conversion table from the specified class path. + * Example use: cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");. + * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package. + * Conversion tables can be made with ICU4C's makeconv tool. + * This function allows you to allows you to load user defined conversion + * tables that are outside of ICU's core data. + * @param charsetName The name of the charset conversion table. + * @param classPath The class path that contain the conversion table. + * @return charset object for the given charset name, null if unsupported + * @stable ICU 3.8 + */ + public final Charset charsetForName(String charsetName, String classPath) { + return charsetForName(charsetName, classPath, null); + } + + /** + * Constructs a charset for the given ICU conversion table from the specified class path. + * This function is similar to {@link #charsetForName(String, String)}. + * @param charsetName The name of the charset conversion table. + * @param classPath The class path that contain the conversion table. + * @param loader the class object from which to load the charset conversion table + * @return charset object for the given charset name, null if unsupported + * @stable ICU 3.8 + */ + public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) { + CharsetMBCS cs = null; + try { + cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader); + } catch (InvalidFormatException e) { + // return null; + } + return cs; + } + + /** + * Gets the canonical name of the converter as defined by Java + * @param enc converter name + * @return canonical name of the converter + * @internal + * @deprecated This API is ICU internal only. + */ + public static final String getICUCanonicalName(String enc) + throws UnsupportedCharsetException{ + String canonicalName = null; + String ret = null; + try{ + if(enc!=null){ + if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){ + ret = canonicalName; + } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){ + ret = canonicalName; + } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){ + /* we have some aliases in the form x-blah .. match those */ + ret = canonicalName; + }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){ + ret = canonicalName; + }*/else if(enc.indexOf("x-")==0){ + /* TODO: Match with getJavaCanonicalName method */ + /* + char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0}; + strcpy(temp, encName+2); + */ + // Remove the 'x-' and get the ICU canonical name + if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) { + ret = canonicalName; + } else { + ret = ""; + } + + }else{ + /* unsupported encoding */ + ret = ""; + } + } + return ret; + }catch(IOException ex){ + throw new UnsupportedCharsetException(enc); + } + } + private static final Charset getCharset(String icuCanonicalName) throws IOException{ + String[] aliases = getAliases(icuCanonicalName); + String canonicalName = getJavaCanonicalName(icuCanonicalName); + + /* Concat the option string to the icuCanonicalName so that the options can be handled properly + * by the actual charset. + * Note: getJavaCanonicalName() may eventually call this method so skip the concatenation part + * during getJavaCanonicalName() call. + */ + if (!gettingJavaCanonicalName && optionsString != null) { + icuCanonicalName = icuCanonicalName.concat(optionsString); + optionsString = null; + } + + return (CharsetICU.getCharset(icuCanonicalName,canonicalName, aliases)); + } + /** + * Gets the canonical name of the converter as defined by Java + * @param charsetName converter name + * @return canonical name of the converter + * @internal + * @deprecated This API is ICU internal only. + */ + public static String getJavaCanonicalName(String charsetName){ + /* + If a charset listed in the IANA Charset Registry is supported by an implementation + of the Java platform then its canonical name must be the name listed in the registry. + Many charsets are given more than one name in the registry, in which case the registry + identifies one of the names as MIME-preferred. If a charset has more than one registry + name then its canonical name must be the MIME-preferred name and the other names in + the registry must be valid aliases. If a supported charset is not listed in the IANA + registry then its canonical name must begin with one of the strings "X-" or "x-". + */ + if(charsetName==null ){ + return null; + } + try{ + String cName = null; + /* find out the alias with MIME tag */ + if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){ + /* find out the alias with IANA tag */ + }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){ + }else { + /* + check to see if an alias already exists with x- prefix, if yes then + make that the canonical name + */ + int aliasNum = UConverterAlias.countAliases(charsetName); + String name; + for(int i=0;i=0;) { + ret[j] = aliasArray[j]; + } + + } + return (ret); + + } + + private static final void putCharsets(Map map){ + int num = UConverterAlias.countAvailable(); + for(int i=0;i charsets(){ + HashMap map = new HashMap(); + putCharsets(map); + return map.keySet().iterator(); + } + + /** + * Gets the canonical names of available converters + * @return array of available converter names + * @internal + * @deprecated This API is ICU internal only. + */ + public static final String[] getAvailableNames(){ + HashMap map = new HashMap(); + putCharsets(map); + return map.values().toArray(new String[0]); + } + + /** + * Return all names available + * @return String[] an array of all available names + * @internal + * @deprecated This API is ICU internal only. + */ + public static final String[] getAllNames(){ + int num = UConverterAlias.countAvailable(); + String[] names = new String[num]; + for(int i=0;i -1) { + /* Remove and save the swap lfnl option string portion of the charset name. */ + optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING; + + charsetName = charsetName.substring(0, charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING)); + } + + return charsetName; + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java new file mode 100644 index 00000000000..dab80a77412 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java @@ -0,0 +1,1267 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author krajwade + * + */ +class CharsetSCSU extends CharsetICU{ + /* SCSU definitions --------------------------------------------------------- */ + + /* SCSU command byte values */ + //enum { + private static final short SQ0=0x01; /* Quote from window pair 0 */ + private static final short SQ7=0x08; /* Quote from window pair 7 */ + private static final short SDX=0x0B; /* Define a window as extended */ + //private static final short Srs=0x0C; /* reserved */ + private static final short SQU=0x0E; /* Quote a single Unicode character */ + private static final short SCU=0x0F; /* Change to Unicode mode */ + private static final short SC0=0x10; /* Select window 0 */ + private static final short SC7=0x17; /* Select window 7 */ + private static final short SD0=0x18; /* Define and select window 0 */ + //private static final short SD7=0x1F; /* Define and select window 7 */ + + private static final short UC0=0xE0; /* Select window 0 */ + private static final short UC7=0xE7; /* Select window 7 */ + private static final short UD0=0xE8; /* Define and select window 0 */ + private static final short UD7=0xEF; /* Define and select window 7 */ + private static final short UQU=0xF0; /* Quote a single Unicode character */ + private static final short UDX=0xF1; /* Define a Window as extended */ + private static final short Urs=0xF2; /* reserved */ + // }; + + // enum { + /* + * Unicode code points from 3400 to E000 are not adressible by + * dynamic window, since in these areas no short run alphabets are + * found. Therefore add gapOffset to all values from gapThreshold. + */ + private static final int gapThreshold=0x68; + private static final int gapOffset = 0xAC00 ; + /* values between reservedStart and fixedThreshold are reserved */ + private static final int reservedStart=0xA8; + /* use table of predefined fixed offsets for values from fixedThreshold */ + private static final int fixedThreshold=0xF9; + //}; + + protected byte[] fromUSubstitution = new byte[]{(byte)0x0E,(byte)0xFF, (byte)0xFD}; + + /* constant offsets for the 8 static windows */ + private static final int staticOffsets[]={ + 0x0000, /* ASCII for quoted tags */ + 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ + 0x0100, /* Latin Extended-A */ + 0x0300, /* Combining Diacritical Marks */ + 0x2000, /* General Punctuation */ + 0x2080, /* Currency Symbols */ + 0x2100, /* Letterlike Symbols and Number Forms */ + 0x3000 /* CJK Symbols and punctuation */ + }; + + /* initial offsets for the 8 dynamic (sliding) windows */ + private static final int initialDynamicOffsets[]={ + 0x0080, /* Latin-1 */ + 0x00C0, /* Latin Extended A */ + 0x0400, /* Cyrillic */ + 0x0600, /* Arabic */ + 0x0900, /* Devanagari */ + 0x3040, /* Hiragana */ + 0x30A0, /* Katakana */ + 0xFF00 /* Fullwidth ASCII */ + }; + + /* Table of fixed predefined Offsets */ + private static final int fixedOffsets[]={ + /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ + /* 0xFA */ 0x0250, /* IPA extensions */ + /* 0xFB */ 0x0370, /* Greek */ + /* 0xFC */ 0x0530, /* Armenian */ + /* 0xFD */ 0x3040, /* Hiragana */ + /* 0xFE */ 0x30A0, /* Katakana */ + /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ + }; + + /* state values */ + //enum { + private static final int readCommand=0; + private static final int quotePairOne=1; + private static final int quotePairTwo=2; + private static final int quoteOne=3; + private static final int definePairOne=4; + private static final int definePairTwo=5; + private static final int defineOne=6; + // }; + + @SuppressWarnings("unused") + private final class SCSUData{ + /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ + int toUDynamicOffsets[] = new int[8] ; + int fromUDynamicOffsets[] = new int[8] ; + + /* state machine state - toUnicode */ + boolean toUIsSingleByteMode; + short toUState; + byte toUQuoteWindow, toUDynamicWindow; + short toUByteOne; + short toUPadding[]; + + /* state machine state - fromUnicode */ + boolean fromUIsSingleByteMode; + byte fromUDynamicWindow; + + /* + * windowUse[] keeps track of the use of the dynamic windows: + * At nextWindowUseIndex there is the least recently used window, + * and the following windows (in a wrapping manner) are more and more + * recently used. + * At nextWindowUseIndex-1 there is the most recently used window. + */ + byte locale; + byte nextWindowUseIndex; + byte windowUse[] = new byte[8]; + + SCSUData(){ + initialize(); + } + + void initialize(){ + for(int i=0;i<8;i++){ + this.toUDynamicOffsets[i] = initialDynamicOffsets[i]; + } + this.toUIsSingleByteMode = true; + this.toUState = readCommand; + this.toUQuoteWindow = 0; + this.toUDynamicWindow = 0; + this.toUByteOne = 0; + this.fromUIsSingleByteMode = true; + this.fromUDynamicWindow = 0; + for(int i=0;i<8;i++){ + this.fromUDynamicOffsets[i] = initialDynamicOffsets[i]; + } + this.nextWindowUseIndex = 0; + switch(this.locale){ + /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ + /* case l_ja: + for(int i=0;i<8;i++){ + this.windowUse[i] = initialWindowUse_ja[i]; + } + break; */ + default: + for(int i=0;i<8;i++){ + this.windowUse[i] = initialWindowUse[i]; + } + + } + } + } + + static final byte initialWindowUse[]={ 7, 0, 3, 2, 4, 5, 6, 1 }; + /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ + // static final byte initialWindowUse_ja[]={ 3, 2, 4, 1, 0, 7, 5, 6 }; + + //enum { + //private static final int lGeneric = 0; + /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ + // private static final int l_ja = 1; + //}; + + private SCSUData extraInfo = null; + + public CharsetSCSU(String icuCanonicalName, String javaCanonicalName, String[] aliases){ + super(icuCanonicalName, javaCanonicalName, aliases); + maxBytesPerChar = 3; + minBytesPerChar = 1; + maxCharsPerByte = 1; + extraInfo = new SCSUData(); + } + + class CharsetDecoderSCSU extends CharsetDecoderICU { + /* label values for supporting behavior similar to goto in C */ + private static final int FastSingle=0; + private static final int SingleByteMode=1; + private static final int EndLoop=2; + + /* Mode Type */ + private static final int ByteMode = 0; + private static final int UnicodeMode =1; + + public CharsetDecoderSCSU(CharsetICU cs) { + super(cs); + implReset(); + } + + //private SCSUData data ; + protected void implReset(){ + super.implReset(); + toULength = 0; + extraInfo.initialize(); + } + + short b; + + //Get the state machine state + private boolean isSingleByteMode ; + private short state ; + private byte quoteWindow ; + private byte dynamicWindow ; + private short byteOne; + + + //sourceIndex=-1 if the current character began in the previous buffer + private int sourceIndex ; + private int nextSourceIndex ; + + CoderResult cr; + SCSUData data ; + private boolean LabelLoop;// used to break the while loop + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, + boolean flush){ + data = extraInfo; + + //Get the state machine state + isSingleByteMode = data.toUIsSingleByteMode; + state = data.toUState; + quoteWindow = data.toUQuoteWindow; + dynamicWindow = data.toUDynamicWindow; + byteOne = data.toUByteOne; + + LabelLoop = true; + + //sourceIndex=-1 if the current character began in the previous buffer + sourceIndex = data.toUState == readCommand ? 0: -1 ; + nextSourceIndex = 0; + + cr = CoderResult.UNDERFLOW; + int labelType = 0; + while(LabelLoop){ + if(isSingleByteMode){ + switch(labelType){ + case FastSingle: + /*fast path for single-byte mode*/ + labelType = fastSingle(source, target, offsets, ByteMode); + break; + case SingleByteMode: + /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */ + labelType = singleByteMode(source, target, offsets, ByteMode); + break; + case EndLoop: + endLoop(source, target, offsets); + break; + } + }else{ + switch(labelType){ + case FastSingle: + /*fast path for single-byte mode*/ + labelType = fastSingle(source, target, offsets, UnicodeMode); + break; + case SingleByteMode: + /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */ + labelType = singleByteMode(source, target, offsets, UnicodeMode); + break; + case EndLoop: + endLoop(source, target, offsets); + break; + } + //LabelLoop = false; + } + } + return cr; + } + + private int fastSingle(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){ + int label = 0; + if(modeType==ByteMode){ + + if(state==readCommand){ + while(source.hasRemaining() && target.hasRemaining() && (b=(short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK)) >= 0x20){ + source.position(source.position()+1); + ++nextSourceIndex; + if(b <= 0x7f){ + /*Write US graphic character or DEL*/ + target.put((char)b); + if(offsets != null){ + offsets.put(sourceIndex); + } + }else{ + /*Write from dynamic window*/ + int c = data.toUDynamicOffsets[dynamicWindow] + (b&0x7f); + if(c <= 0xffff){ + target.put((char)c); + if(offsets != null){ + offsets.put(sourceIndex); + } + }else{ + /*Output surrogate pair */ + target.put((char)(0xd7c0 + (c>>10))); + if(target.hasRemaining()){ + target.put((char)(0xdc00 | (c&0x3ff))); + if(offsets != null){ + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + }else{ + /* target overflow */ + if(offsets != null){ + offsets.put(sourceIndex); + } + charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff)); + charErrorBufferLength = 1; + label = EndLoop; + cr = CoderResult.OVERFLOW; + return label; + } + } + } + sourceIndex = nextSourceIndex; + } + // label = SingleByteMode; + } + }else if(modeType==UnicodeMode){ + /* fast path for unicode mode */ + if(state == readCommand){ + while((source.position()+1)(Urs-UC0)){ + target.put((char)((b<<8)|(source.get(source.position()+1)&UConverterConstants.UNSIGNED_BYTE_MASK))); + if(offsets != null){ + offsets.put(sourceIndex); + } + sourceIndex = nextSourceIndex; + nextSourceIndex+=2; + source.position(source.position()+2); + } + } + } + label = SingleByteMode; + return label; + } + + private int singleByteMode(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){ + int label = SingleByteMode; + if(modeType == ByteMode){ + while(source.hasRemaining()){ + if(!target.hasRemaining()){ + cr = CoderResult.OVERFLOW; + LabelLoop = false; + return label; + } + b = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + ++nextSourceIndex; + switch(state){ + case readCommand: + /*redundant conditions are commented out */ + if(((1L<>10))); + if(target.hasRemaining()){ + target.put((char)(0xdc00 | (c&0x3ff))); + if(offsets != null){ + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + }else { + /* target overflow */ + if(offsets != null){ + offsets.put(sourceIndex); + } + charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff)); + charErrorBufferLength = 1; + label = EndLoop; + cr = CoderResult.OVERFLOW; + LabelLoop = false; + return label; + } + } + } + sourceIndex = nextSourceIndex; + state = readCommand; + label = FastSingle; + return label; + case definePairOne: + dynamicWindow = (byte)((b>>5)&7); + byteOne = (byte)(b&0x1f); + toUBytesArray[1] = (byte)b; + toULength = 2; + state = definePairTwo; + break; + case definePairTwo: + data.toUDynamicOffsets[dynamicWindow] = 0x10000 + (byteOne<<15L | b<<7L); + sourceIndex = nextSourceIndex; + state = readCommand; + label = FastSingle; + return label; + case defineOne: + if(b==0){ + /*callback (illegal)*/ + toUBytesArray[1] = (byte)b; + toULength =2; + label = EndLoop; + return label; + }else if(b=fixedThreshold){ + data.toUDynamicOffsets[dynamicWindow] = fixedOffsets[b-fixedThreshold]; + }else{ + /*callback (illegal)*/ + toUBytesArray[1] = (byte)b; + toULength =2; + label = EndLoop; + return label; + } + sourceIndex = nextSourceIndex; + state = readCommand; + label = FastSingle; + return label; + } + } + + }else if(modeType==UnicodeMode){ + while(source.hasRemaining()){ + if(!target.hasRemaining()){ + cr = CoderResult.OVERFLOW; + LabelLoop = false; + return label; + } + b = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); + ++nextSourceIndex; + switch(state){ + case readCommand: + if((short)((b -UC0)&UConverterConstants.UNSIGNED_BYTE_MASK)>(Urs - UC0)){ + byteOne = b; + toUBytesArray[0] = (byte)b; + toULength = 1; + state = quotePairTwo; + }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UC7){ + dynamicWindow = (byte)(b - UC0); + sourceIndex = nextSourceIndex; + isSingleByteMode = true; + label = FastSingle; + return label; + }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UD7){ + dynamicWindow = (byte)(b - UD0); + isSingleByteMode = true; + toUBytesArray[0] = (byte)b; + toULength = 1; + state = defineOne; + label = SingleByteMode; + return label; + }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UDX){ + isSingleByteMode = true; + toUBytesArray[0] = (byte)b; + toULength = 1; + state = definePairOne; + label = SingleByteMode; + return label; + }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UQU){ + toUBytesArray[0] = (byte)b; + toULength = 1; + state = quotePairOne; + }else { + /* callback (illegal)*/ + cr = CoderResult.malformedForLength(1); + toUBytesArray[0] = (byte)b; + toULength = 1; + label = EndLoop; + return label; + } + break; + case quotePairOne: + byteOne = b; + toUBytesArray[1] = (byte)b; + toULength = 2; + state = quotePairTwo; + break; + case quotePairTwo: + target.put((char)((byteOne<<8) | b)); + if(offsets != null){ + offsets.put(sourceIndex); + } + sourceIndex = nextSourceIndex; + state = readCommand; + label = FastSingle; + return label; + } + } + } + label = EndLoop; + return label; + } + + private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){ + if(cr==CoderResult.OVERFLOW){ + state = readCommand; + }else if(state == readCommand){ + toULength = 0; + } + data.toUIsSingleByteMode = isSingleByteMode; + data.toUState = state; + data.toUQuoteWindow = quoteWindow; + data.toUDynamicWindow = dynamicWindow; + data.toUByteOne = byteOne; + LabelLoop = false; + } + } + + class CharsetEncoderSCSU extends CharsetEncoderICU{ + public CharsetEncoderSCSU(CharsetICU cs) { + super(cs, fromUSubstitution); + implReset(); + } + + //private SCSUData data; + protected void implReset() { + super.implReset(); + extraInfo.initialize(); + } + + /* label values for supporting behavior similar to goto in C */ + private static final int Loop=0; + private static final int GetTrailUnicode=1; + private static final int OutputBytes=2; + private static final int EndLoop =3; + + private int delta; + private int length; + + ///variables of compression heuristics + private int offset; + private char lead, trail; + private int code; + private byte window; + + //Get the state machine state + private boolean isSingleByteMode; + private byte dynamicWindow ; + private int currentOffset; + int c; + + SCSUData data ; + + //sourceIndex=-1 if the current character began in the previous buffer + private int sourceIndex ; + private int nextSourceIndex; + private int targetCapacity; + + private boolean LabelLoop;//used to break the while loop + private boolean AfterGetTrail;// its value is set to true in order to ignore the code before getTrailSingle: + private boolean AfterGetTrailUnicode;// is value is set to true in order to ignore the code before getTrailUnicode: + + CoderResult cr; + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + data = extraInfo; + cr = CoderResult.UNDERFLOW; + + //Get the state machine state + isSingleByteMode = data.fromUIsSingleByteMode; + dynamicWindow = data.fromUDynamicWindow; + currentOffset = data.fromUDynamicOffsets[dynamicWindow]; + c = fromUChar32; + + sourceIndex = c== 0 ? 0: -1 ; + nextSourceIndex = 0; + + + targetCapacity = target.limit()-target.position(); + + //sourceIndex=-1 if the current character began in the previous buffer + sourceIndex = c== 0 ? 0: -1 ; + nextSourceIndex = 0; + + int labelType = Loop; // set to Loop so that the code starts from loop: + LabelLoop = true; + AfterGetTrail = false; + AfterGetTrailUnicode = false; + + while(LabelLoop){ + switch(labelType){ + case Loop: + labelType = loop(source, target, offsets); + break; + case GetTrailUnicode: + labelType = getTrailUnicode(source, target, offsets); + break; + case OutputBytes: + labelType = outputBytes(source, target, offsets); + break; + case EndLoop: + endLoop(source, target, offsets); + break; + } + } + return cr; + } + + private byte getWindow(int[] offsets){ + int i; + for (i=0;i<8;i++){ + if(((c-offsets[i]) & UConverterConstants.UNSIGNED_INT_MASK) <= 0x7f){ + return (byte)i; + } + } + return -1; + } + + private boolean isInOffsetWindowOrDirect(int offsetValue, int a){ + return (a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f & + ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) || + ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20 + || ((1L<<(a & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0))); + } + + private byte getNextDynamicWindow(){ + byte windowValue = data.windowUse[data.nextWindowUseIndex]; + if(++data.nextWindowUseIndex==8){ + data.nextWindowUseIndex=0; + } + return windowValue; + } + + private void useDynamicWindow(byte windowValue){ + /*first find the index of the window*/ + int i,j; + i = data.nextWindowUseIndex; + do{ + if(--i<0){ + i=7; + } + }while(data.windowUse[i]!=windowValue); + + /*now copy each window[i+1] to [i]*/ + j= i+1; + if(j==8){ + j=0; + } + while(j!=data.nextWindowUseIndex){ + data.windowUse[i] = data.windowUse[j]; + i=j; + if(++j==8){ + j=0; + } + } + + /*finally, set the window into the most recently used index*/ + data.windowUse[i]= windowValue; + } + + + private int getDynamicOffset(){ + int i; + for(i=0;i<7;++i){ + if(((c-fixedOffsets[i])&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){ + offset = fixedOffsets[i]; + return 0xf9+i; + } + } + if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x80){ + /*No dynamic window for US-ASCII*/ + return -1; + }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) || + ((c-0x1d000)&UConverterConstants.UNSIGNED_INT_MASK)<=(0x1ffff-0x1d000)){ + /*This character is in the code range for a "small", i.e, reasonably windowable, script*/ + offset = c&0x7fffff80; + return (c>>7); + }else if(0xe000<=(c&UConverterConstants.UNSIGNED_INT_MASK) && (c&UConverterConstants.UNSIGNED_INT_MASK)!=0xfeff && (c&UConverterConstants.UNSIGNED_INT_MASK) < 0xfff0){ + /*for these characters we need to take the gapOffset into account*/ + offset=(c)&0x7fffff80; + return ((c-gapOffset)>>7); + }else{ + return -1; + } + } + + private int loop(CharBuffer source, ByteBuffer target, IntBuffer offsets){ + int label = 0; + if(isSingleByteMode){ + if(c!=0 && targetCapacity>0 && !AfterGetTrail){ + label = getTrail(source, target, offsets); + return label; + } + /*state machine for single byte mode*/ + while(AfterGetTrail || source.hasRemaining()){ + if(targetCapacity<=0 && !AfterGetTrail){ + /*target is full*/ + cr = CoderResult.OVERFLOW; + label = EndLoop; + return label; + } + if(!AfterGetTrail){ + c = source.get(); + ++nextSourceIndex; + + } + if(((c -0x20)&UConverterConstants.UNSIGNED_INT_MASK)<=0x5f && !AfterGetTrail){ + /*pass US-ASCII graphic character through*/ + target.put((byte)c); + if(offsets!=null){ + offsets.put(sourceIndex); + } + --targetCapacity; + }else if((c & UConverterConstants.UNSIGNED_INT_MASK)<0x20 && !AfterGetTrail){ + if(((1L<<(c & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0){ + /*CR/LF/TAB/NUL*/ + target.put((byte)c); + if(offsets!=null){ + offsets.put(sourceIndex); + } + --targetCapacity; + } else { + /*quote c0 control character*/ + c|=SQ0<<8; + length = 2; + label = OutputBytes; + return label; + } + } else if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && !AfterGetTrail){ + /*use the current dynamic window*/ + target.put((byte)(delta|0x80)); + if(offsets!=null){ + offsets.put(sourceIndex); + } + --targetCapacity; + } else if(AfterGetTrail || UTF16.isSurrogate((char)c)){ + if(!AfterGetTrail){ + if(UTF16.isLeadSurrogate((char)c)){ + label = getTrail(source, target, offsets); + if(label==EndLoop){ + return label; + } + } else { + /*this is unmatched lead code unit (2nd Surrogate)*/ + /*callback(illegal)*/ + cr = CoderResult.malformedForLength(1); + label = EndLoop; + return label; + } + } + + + if(AfterGetTrail){ + AfterGetTrail = false; + } + + /*Compress supplementary character U+10000...U+10ffff */ + if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){ + /*use the current dynamic window*/ + target.put((byte)(delta|0x80)); + if(offsets!=null){ + offsets.put(sourceIndex); + } + --targetCapacity; + } else if((window=getWindow(data.fromUDynamicOffsets))>=0){ + /*there is a dynamic window that contains this character, change to it*/ + dynamicWindow = window; + currentOffset = data.fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(dynamicWindow); + c = ((SC0+dynamicWindow)<<8 | (c-currentOffset)|0x80); + length = 2; + label = OutputBytes; + return label; + } else if((code=getDynamicOffset())>=0){ + /*might check if there are come character in this window to come */ + /*define an extended window with this character*/ + code-=0x200; + dynamicWindow=getNextDynamicWindow(); + currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(dynamicWindow); + c = ((SDX<<24) | (dynamicWindow<<21)| + (code<<8)| (c- currentOffset) |0x80); + // c = (((SDX)<<25) | (dynamicWindow<<21)| + // (code<<8)| (c- currentOffset) |0x80 ); + length = 4; + label = OutputBytes; + return label; + } else { + /*change to unicode mode and output this (lead, trail) pair*/ + isSingleByteMode = false; + target.put((byte)SCU); + if(offsets!=null){ + offsets.put(sourceIndex); + } + --targetCapacity; + c = (lead<<16)|trail; + length = 4; + label = OutputBytes; + return label; + } + } else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0xa0){ + /*quote C1 control character*/ + c = (c&0x7f) | (SQ0+1)<<8; /*SQ0+1 == SQ1*/ + length = 2; + label = OutputBytes; + return label; + } else if((c&UConverterConstants.UNSIGNED_INT_MASK)==0xfeff || (c&UConverterConstants.UNSIGNED_INT_MASK)>= 0xfff0){ + /*quote signature character = byte order mark and specials*/ + c |= SQU<<16; + length = 3; + label = OutputBytes; + return label; + } else { + /*compress all other BMP characters*/ + if((window=getWindow(data.fromUDynamicOffsets))>=0){ + /*there is a window defined that contains this character - switch to it or quote from it*/ + if(source.position()>=source.limit() || isInOffsetWindowOrDirect(data.fromUDynamicOffsets[window], source.get(source.position()))){ + /*change to dynamic window*/ + dynamicWindow = window; + currentOffset = data.fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(dynamicWindow); + c = ((SC0+window)<<8) | (c- currentOffset) | 0x80; + length = 2; + label = OutputBytes; + return label; + } else { + /*quote from dynamic window*/ + c = ((SQ0+window)<<8) | (c - data.fromUDynamicOffsets[window]) | + 0x80; + length = 2; + label = OutputBytes; + return label; + } + } else if((window = getWindow(staticOffsets))>=0){ + /*quote from static window*/ + c = ((SQ0+window)<<8) | (c - staticOffsets[window]); + length = 2; + label = OutputBytes; + return label; + }else if((code=getDynamicOffset())>=0){ + /*define a dynamic window with this character*/ + dynamicWindow = getNextDynamicWindow(); + currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(dynamicWindow); + c = ((SD0+dynamicWindow)<<16) | (code<<8)| + (c - currentOffset) | 0x80; + length = 3; + label = OutputBytes; + return label; + } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() || + ((int)((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))< (0xd800 - 0x3400))){ + + /* + * this character is not compressible (a BMP ideograph of similar) + * switch to Unicode mode if this is the last character in the block + * or there is at least one more ideograph following immediately + */ + isSingleByteMode = false; + c|=SCU<<16; + length =3; + label = OutputBytes; + return label; + } else { + /*quote Unicode*/ + c|=SQU<<16; + length = 3; + label = OutputBytes; + return label; + } + } + /*normal end of conversion : prepare for new character */ + c = 0; + sourceIndex = nextSourceIndex; + } + } else { + if(c!=0 && targetCapacity>0 && !AfterGetTrailUnicode){ + label = GetTrailUnicode; + return label; + } + + /*state machine for Unicode*/ + /*unicodeByteMode*/ + while(AfterGetTrailUnicode || source.hasRemaining()){ + if(targetCapacity<=0 && !AfterGetTrailUnicode){ + /*target is full*/ + cr = CoderResult.OVERFLOW; + LabelLoop = false; + break; + } + if(!AfterGetTrailUnicode){ + c = source.get(); + ++nextSourceIndex; + } + + if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && !AfterGetTrailUnicode){ + /*not compressible, write character directly */ + if(targetCapacity>=2){ + target.put((byte)(c>>8)); + target.put((byte)c); + if(offsets!=null){ + offsets.put(sourceIndex); + offsets.put(sourceIndex); + } + targetCapacity-=2; + } else { + length =2; + label = OutputBytes; + return label; + } + } else if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300*/&& !AfterGetTrailUnicode){ + /*compress BMP character if the following one is not an uncompressible ideograph*/ + if(!(source.hasRemaining() && (((source.get(source.position())-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400))){ + if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26 + || (((c-0x41)&UConverterConstants.UNSIGNED_INT_MASK))<26)){ + /*ASCII digit or letter*/ + isSingleByteMode = true; + c |=((UC0+dynamicWindow)<<8)|c; + length = 2; + label = OutputBytes; + return label; + } else if((window=getWindow(data.fromUDynamicOffsets))>=0){ + /*there is a dynamic window that contains this character, change to it*/ + isSingleByteMode = true; + dynamicWindow = window; + currentOffset = data.fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(dynamicWindow); + c = ((UC0+dynamicWindow)<<8) | (c- currentOffset) | 0x80; + length = 2; + label = OutputBytes; + return label; + } else if((code=getDynamicOffset())>=0){ + /*define a dynamic window with this character*/ + isSingleByteMode = true; + dynamicWindow = getNextDynamicWindow(); + currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(dynamicWindow); + c = ((UD0+dynamicWindow)<<16) | (code<<8) + |(c - currentOffset) | 0x80; + length = 3; + label = OutputBytes; + return label; + } + } + + /*don't know how to compress these character, just write it directly*/ + length = 2; + label = OutputBytes; + return label; + } else if(c<0xe000 && !AfterGetTrailUnicode){ + label = GetTrailUnicode; + return label; + } else if (!AfterGetTrailUnicode){ + /*quote to avoid SCSU tags*/ + c|=UQU<<16; + length = 3; + label = OutputBytes; + return label; + } + + if(AfterGetTrailUnicode){ + AfterGetTrailUnicode = false; + } + /*normal end of conversion, prepare for a new character*/ + c = 0; + sourceIndex = nextSourceIndex; + } + } + label = EndLoop; + return label; + } + + private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){ + lead = (char)c; + int label = Loop; + if(source.hasRemaining()){ + /*test the following code unit*/ + trail = source.get(source.position()); + if(UTF16.isTrailSurrogate(trail)){ + source.position(source.position()+1); + ++nextSourceIndex; + c = UCharacter.getCodePoint((char)c, trail); + label = Loop; + } else { + /*this is unmatched lead code unit (1st Surrogate)*/ + /*callback(illegal)*/ + cr = CoderResult.malformedForLength(1); + label = EndLoop; + } + }else { + /*no more input*/ + label = EndLoop; + } + AfterGetTrail = true; + return label; + } + + private int getTrailUnicode(CharBuffer source, ByteBuffer target, IntBuffer offsets){ + int label = EndLoop; + AfterGetTrailUnicode = true; + /*c is surrogate*/ + if(UTF16.isLeadSurrogate((char)c)){ + // getTrailUnicode: + lead = (char)c; + if(source.hasRemaining()){ + /*test the following code unit*/ + trail = source.get(source.position()); + if(UTF16.isTrailSurrogate(trail)){ + source.get(); + ++nextSourceIndex; + c = UCharacter.getCodePoint((char)c, trail); + /*convert this surrogate code point*/ + /*exit this condition tree*/ + } else { + /*this is unmatched lead code unit(1st surrogate)*/ + /*callback(illegal)*/ + cr = CoderResult.malformedForLength(1); + label = EndLoop; + return label; + } + } else { + /*no more input*/ + label = EndLoop; + return label; + } + } else { + /*this is an unmatched trail code point (2nd surrogate)*/ + /*callback (illegal)*/ + cr = CoderResult.malformedForLength(1); + label = EndLoop; + return label; + } + + /*compress supplementary character*/ + if((window=getWindow(data.fromUDynamicOffsets))>=0 && + !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) < + (0xd800 - 0x3400))){ + /* + * this is the dynamic window that contains this character and the following + * character is not uncompressible, + * change to the window + */ + isSingleByteMode = true; + dynamicWindow = window; + currentOffset = data.fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(dynamicWindow); + c = ((UC0+dynamicWindow)<<8 | (c-currentOffset) | 0x80); + length = 2; + label = OutputBytes; + return label; + } else if(source.hasRemaining() && lead == source.get(source.position()) && (code=getDynamicOffset())>=0){ + /*two supplementary characters in (probably) the same window - define an extended one*/ + isSingleByteMode = true; + dynamicWindow = getNextDynamicWindow(); + currentOffset = data.fromUDynamicOffsets[dynamicWindow] = offset; + useDynamicWindow(dynamicWindow); + c = (UDX<<24) | (dynamicWindow<<21) |(code<<8) |(c - currentOffset) | 0x80; + length = 4; + label = OutputBytes; + return label; + } else { + /*don't know how to compress this character, just write it directly*/ + c = (lead<<16)|trail; + length = 4; + label = OutputBytes; + return label; + } + + } + + private void endLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){ + /*set the converter state back to UConverter*/ + data.fromUIsSingleByteMode = isSingleByteMode; + data.fromUDynamicWindow = dynamicWindow; + fromUChar32 = c; + LabelLoop = false; + } + + @SuppressWarnings("fallthrough") + private int outputBytes(CharBuffer source, ByteBuffer target, IntBuffer offsets){ + int label; + //int targetCapacity = target.limit()-target.position(); + /*write the output character byte from c and length*/ + /*from the first if in the loop we know that targetCapacity>0*/ + if(length<=targetCapacity){ + switch(length){ + /*each branch falls through the next one*/ + case 4: + target.put((byte)(c>>24)); + if(offsets!=null){ + offsets.put(sourceIndex); + } + case 3: + target.put((byte)(c>>16)); + if(offsets!=null){ + offsets.put(sourceIndex); + } + case 2: + target.put((byte)(c>>8)); + if(offsets!=null){ + offsets.put(sourceIndex); + } + case 1: + target.put((byte)c); + if(offsets!=null){ + offsets.put(sourceIndex); + } + default: + /*will never occur*/ + break; + } + targetCapacity-=length; + + /*normal end of conversion: prepare for a new character*/ + c = 0; + sourceIndex = nextSourceIndex; + label = Loop; + return label; + } else { + ByteBuffer p = ByteBuffer.wrap(errorBuffer); + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target + */ + /* we know that 0<=targetCapacity>24)); + case 3: + p.put((byte)(c>>16)); + case 2: + p.put((byte)(c>>8)); + case 1: + p.put((byte)c); + default: + /*will never occur*/ + break; + } + errorBufferLength = length; + + /*now output what fits into the regular target*/ + c>>=8*length; //length was reduced by targetCapacity + switch(targetCapacity){ + /*each branch falls through the next one*/ + case 3: + target.put((byte)(c>>16)); + if(offsets!=null){ + offsets.put(sourceIndex); + } + case 2: + target.put((byte)(c>>8)); + if(offsets!=null){ + offsets.put(sourceIndex); + } + case 1: + target.put((byte)c); + if(offsets!=null){ + offsets.put(sourceIndex); + } + default: + break; + } + + /*target overflow*/ + targetCapacity = 0; + cr = CoderResult.OVERFLOW; + c = 0; + label = EndLoop; + return label; + } + } + + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoderSCSU(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderSCSU(this); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + CharsetICU.getCompleteUnicodeSet(setFillIn); + } + +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetSelector.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetSelector.java new file mode 100644 index 00000000000..8eeaae015c1 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetSelector.java @@ -0,0 +1,215 @@ +/* + ****************************************************************************** + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +/* + * This is a port of the C++ class UConverterSelector. + * + * Methods related to serialization are not ported in this version. In addition, + * the selectForUTF8 method is not going to be ported, as UTF8 is seldom used + * in Java. + * + * @author Shaopeng Jia + */ + +package com.ibm.icu.charset; + +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; +import java.util.List; +import java.util.Vector; + +import com.ibm.icu.impl.IntTrie; +import com.ibm.icu.impl.PropsVectors; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * Charset Selector + * + * A charset selector is built with a list of charset names and given an input + * CharSequence returns the list of names the corresponding charsets which can + * convert the CharSequence. + * + * @stable ICU 4.2 + */ +public final class CharsetSelector { + private IntTrie trie; + private int[] pv; // table of bits + private String[] encodings; // encodings users ask to use + + private void generateSelectorData(PropsVectors pvec, + UnicodeSet excludedCodePoints, int mappingTypes) { + int columns = (encodings.length + 31) / 32; + + // set errorValue to all-ones + for (int col = 0; col < columns; ++col) { + pvec.setValue(PropsVectors.ERROR_VALUE_CP, + PropsVectors.ERROR_VALUE_CP, col, ~0, ~0); + } + + for (int i = 0; i < encodings.length; ++i) { + Charset testCharset = CharsetICU.forNameICU(encodings[i]); + UnicodeSet unicodePointSet = new UnicodeSet(); // empty set + ((CharsetICU) testCharset).getUnicodeSet(unicodePointSet, + mappingTypes); + int column = i / 32; + int mask = 1 << (i % 32); + // now iterate over intervals on set i + int itemCount = unicodePointSet.getRangeCount(); + for (int j = 0; j < itemCount; ++j) { + int startChar = unicodePointSet.getRangeStart(j); + int endChar = unicodePointSet.getRangeEnd(j); + pvec.setValue(startChar, endChar, column, ~0, mask); + } + } + + // handle excluded encodings + // Simply set their values to all 1's in the pvec + if (!excludedCodePoints.isEmpty()) { + int itemCount = excludedCodePoints.getRangeCount(); + for (int j = 0; j < itemCount; ++j) { + int startChar = excludedCodePoints.getRangeStart(j); + int endChar = excludedCodePoints.getRangeEnd(j); + for (int col = 0; col < columns; col++) { + pvec.setValue(startChar, endChar, col, ~0, ~0); + } + } + } + + trie = pvec.compactToTrieWithRowIndexes(); + pv = pvec.getCompactedArray(); + } + + // internal function to intersect two sets of masks + // returns whether the mask has reduced to all zeros. The + // second set of mask consists of len elements in pv starting from + // pvIndex + private boolean intersectMasks(int[] dest, int pvIndex, int len) { + int oredDest = 0; + for (int i = 0; i < len; ++i) { + oredDest |= (dest[i] &= pv[pvIndex + i]); + } + return oredDest == 0; + } + + // internal function + private List selectForMask(int[] mask) { + // this is the context we will use. Store a table of indices to which + // encodings are legit + + Vector result = new Vector(); + int columns = (encodings.length + 31) / 32; + int numOnes = countOnes(mask, columns); + + // now we know the exact space we need to index + if (numOnes > 0) { + int k = 0; + for (int j = 0; j < columns; j++) { + int v = mask[j]; + for (int i = 0; i < 32 && k < encodings.length; i++, k++) { + if ((v & 1) != 0) { + result.addElement(encodings[k]); + } + v >>= 1; + } + } + } + + // otherwise, index will remain NULL + return result; + } + + // internal function to count how many 1's are there in a mask + // algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html + private int countOnes(int[] mask, int len) { + int totalOnes = 0; + for (int i = 0; i < len; ++i) { + int ent = mask[i]; + for (; ent != 0; totalOnes++) { + ent &= ent - 1; // clear the least significant bit set + } + } + return totalOnes; + } + + /** + * Construct a CharsetSelector from a list of charset names. + * + * @param charsetList + * a list of charset names in the form of strings. If charsetList + * is empty, a selector for all available charset is constructed. + * @param excludedCodePoints + * a set of code points to be excluded from consideration. + * Excluded code points appearing in the input CharSequence do + * not change the selection result. It could be empty when no + * code point should be excluded. + * @param mappingTypes + * an int which determines whether to consider only roundtrip + * mappings or also fallbacks, e.g. CharsetICU.ROUNDTRIP_SET. See + * CharsetICU.java for the constants that are currently + * supported. + * @throws IllegalArgumentException + * if the parameters is invalid. + * @throws IllegalCharsetNameException + * If the given charset name is illegal. + * @throws UnsupportedCharsetException + * If no support for the named charset is available in this + * instance of the Java virtual machine. + * @stable ICU 4.2 + */ + public CharsetSelector(List charsetList, UnicodeSet excludedCodePoints, + int mappingTypes) { + if (mappingTypes != CharsetICU.ROUNDTRIP_AND_FALLBACK_SET + && mappingTypes != CharsetICU.ROUNDTRIP_SET) { + throw new IllegalArgumentException("Unsupported mappingTypes"); + } + + int encodingCount = charsetList.size(); + if (encodingCount > 0) { + encodings = charsetList.toArray(new String[0]); + } else { + encodings = CharsetProviderICU.getAvailableNames(); + encodingCount = encodings.length; + } + + PropsVectors pvec = new PropsVectors((encodingCount + 31) / 32); + generateSelectorData(pvec, excludedCodePoints, mappingTypes); + } + + /** + * Select charsets that can map all characters in a CharSequence, ignoring + * the excluded code points. + * + * @param unicodeText + * a CharSequence. It could be empty. + * @return a list that contains charset names in the form of strings. The + * returned encoding names and their order will be the same as + * supplied when building the selector. + * + * @stable ICU 4.2 + */ + public List selectForString(CharSequence unicodeText) { + int columns = (encodings.length + 31) / 32; + int[] mask = new int[columns]; + for (int i = 0; i < columns; i++) { + mask[i] = - 1; // set each bit to 1 + // Note: All integers are signed in Java, assigning + // 2 ^ 32 -1 to mask is wrong! + } + int index = 0; + while (index < unicodeText.length()) { + int c = UTF16.charAt(unicodeText, index); + int pvIndex = trie.getCodePointValue(c); + index += UTF16.getCharCount(c); + if (intersectMasks(mask, pvIndex, columns)) { + break; + } + } + return selectForMask(mask); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java new file mode 100644 index 00000000000..556cda16222 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java @@ -0,0 +1,288 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author Niti Hantaweepant + */ +class CharsetUTF16 extends CharsetICU { + + private static final int SIGNATURE_LENGTH = 2; + private static final byte[] fromUSubstitution_BE = { (byte) 0xff, (byte) 0xfd }; + private static final byte[] fromUSubstitution_LE = { (byte) 0xfd, (byte) 0xff }; + private static final byte[] BOM_BE = { (byte) 0xfe, (byte) 0xff }; + private static final byte[] BOM_LE = { (byte) 0xff, (byte) 0xfe }; + private static final int ENDIAN_XOR_BE = 0; + private static final int ENDIAN_XOR_LE = 1; + private static final int NEED_TO_WRITE_BOM = 1; + + private boolean isEndianSpecified; + private boolean isBigEndian; + private int endianXOR; + private byte[] bom; + private byte[] fromUSubstitution; + + public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + + this.isEndianSpecified = (this instanceof CharsetUTF16BE || this instanceof CharsetUTF16LE); + this.isBigEndian = !(this instanceof CharsetUTF16LE); + + if (isBigEndian) { + this.bom = BOM_BE; + this.fromUSubstitution = fromUSubstitution_BE; + this.endianXOR = ENDIAN_XOR_BE; + } else { + this.bom = BOM_LE; + this.fromUSubstitution = fromUSubstitution_LE; + this.endianXOR = ENDIAN_XOR_LE; + } + + maxBytesPerChar = 2; + minBytesPerChar = 2; + maxCharsPerByte = 1; + } + + class CharsetDecoderUTF16 extends CharsetDecoderICU { + + private boolean isBOMReadYet; + private int actualEndianXOR; + private byte[] actualBOM; + + public CharsetDecoderUTF16(CharsetICU cs) { + super(cs); + } + + protected void implReset() { + super.implReset(); + isBOMReadYet = false; + actualBOM = null; + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + /* + * If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual + * converter function will not see and count the BOM. offsetDelta will have the number of the BOM bytes that + * are in the current buffer. + */ + if (!isBOMReadYet) { + while (true) { + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + + toUBytesArray[toULength++] = source.get(); + + if (toULength == 1) { + // on the first byte, we haven't decided whether or not it's bigEndian yet + if ((!isEndianSpecified || isBigEndian) + && toUBytesArray[toULength - 1] == BOM_BE[toULength - 1]) { + actualBOM = BOM_BE; + actualEndianXOR = ENDIAN_XOR_BE; + } else if ((!isEndianSpecified || !isBigEndian) + && toUBytesArray[toULength - 1] == BOM_LE[toULength - 1]) { + actualBOM = BOM_LE; + actualEndianXOR = ENDIAN_XOR_LE; + } else { + // we do not have a BOM (and we have toULength==1 bytes) + actualBOM = null; + actualEndianXOR = endianXOR; + break; + } + } else if (toUBytesArray[toULength - 1] != actualBOM[toULength - 1]) { + // we do not have a BOM (and we have toULength bytes) + actualBOM = null; + actualEndianXOR = endianXOR; + break; + } else if (toULength == SIGNATURE_LENGTH) { + // we found a BOM! at last! + // too bad we have to get ignore it now (like it was unwanted or something) + toULength = 0; + break; + } + } + + isBOMReadYet = true; + } + + // now that we no longer need to look for a BOM, let's do some work + + // if we have unfinished business + if (toUnicodeStatus != 0) { + CoderResult cr = decodeTrail(source, target, offsets, (char) toUnicodeStatus); + if (cr != null) + return cr; + } + + char char16; + + while (true) { + while (toULength < 2) { + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + toUBytesArray[toULength++] = source.get(); + } + + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + char16 = (char) (((toUBytesArray[0 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | ((toUBytesArray[1 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK))); + + if (!UTF16.isSurrogate(char16)) { + toULength = 0; + target.put(char16); + } else { + CoderResult cr = decodeTrail(source, target, offsets, char16); + if (cr != null) + return cr; + } + } + } + + private final CoderResult decodeTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets, char lead) { + if (!UTF16.isLeadSurrogate(lead)) { + // 2 bytes, lead malformed + toUnicodeStatus = 0; + return CoderResult.malformedForLength(2); + } + + while (toULength < 4) { + if (!source.hasRemaining()) { + // let this be unfinished business + toUnicodeStatus = lead; + return CoderResult.UNDERFLOW; + } + toUBytesArray[toULength++] = source.get(); + } + + char trail = (char) (((toUBytesArray[2 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | ((toUBytesArray[3 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK))); + + if (!UTF16.isTrailSurrogate(trail)) { + // pretend like we didnt read the last 2 bytes + toULength = 2; + source.position(source.position() - 2); + + // 2 bytes, lead malformed + toUnicodeStatus = 0; + return CoderResult.malformedForLength(2); + } + + toUnicodeStatus = 0; + toULength = 0; + + target.put(lead); + + if (target.hasRemaining()) { + target.put(trail); + return null; + } else { + /* Put in overflow buffer (not handled here) */ + charErrorBufferArray[0] = trail; + charErrorBufferLength = 1; + return CoderResult.OVERFLOW; + } + } + } + + class CharsetEncoderUTF16 extends CharsetEncoderICU { + private final byte[] temp = new byte[4]; + + public CharsetEncoderUTF16(CharsetICU cs) { + super(cs, fromUSubstitution); + fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM; + } + + protected void implReset() { + super.implReset(); + fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult cr; + + /* write the BOM if necessary */ + if (fromUnicodeStatus == NEED_TO_WRITE_BOM) { + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + fromUnicodeStatus = 0; + cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1); + if (cr.isOverflow()) + return cr; + } + + if (fromUChar32 != 0) { + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + // a note: fromUChar32 will either be 0 or a lead surrogate + cr = encodeChar(source, target, offsets, (char) fromUChar32); + if (cr != null) + return cr; + } + + while (true) { + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + cr = encodeChar(source, target, offsets, source.get()); + if (cr != null) + return cr; + } + } + + private final CoderResult encodeChar(CharBuffer source, ByteBuffer target, IntBuffer offsets, char ch) { + int sourceIndex = source.position() - 1; + CoderResult cr; + + if (UTF16.isSurrogate(ch)) { + cr = handleSurrogates(source, ch); + if (cr != null) + return cr; + + char trail = UTF16.getTrailSurrogate(fromUChar32); + fromUChar32 = 0; + + // 4 bytes + temp[0 ^ endianXOR] = (byte) (ch >>> 8); + temp[1 ^ endianXOR] = (byte) (ch); + temp[2 ^ endianXOR] = (byte) (trail >>> 8); + temp[3 ^ endianXOR] = (byte) (trail); + cr = fromUWriteBytes(this, temp, 0, 4, target, offsets, sourceIndex); + } else { + // 2 bytes + temp[0 ^ endianXOR] = (byte) (ch >>> 8); + temp[1 ^ endianXOR] = (byte) (ch); + cr = fromUWriteBytes(this, temp, 0, 2, target, offsets, sourceIndex); + } + return (cr.isUnderflow() ? null : cr); + } + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoderUTF16(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderUTF16(this); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + getNonSurrogateUnicodeSet(setFillIn); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16BE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16BE.java new file mode 100644 index 00000000000..b1bb374d8cc --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16BE.java @@ -0,0 +1,17 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +/** + * The purpose of this class is to set isBigEndian to true and isEndianSpecified to true in the super class, and to + * allow the Charset framework to open the variant UTF-16 converter without extra setup work. + */ +class CharsetUTF16BE extends CharsetUTF16 { + public CharsetUTF16BE(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16LE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16LE.java new file mode 100644 index 00000000000..07607a0156d --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16LE.java @@ -0,0 +1,17 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +/** + * The purpose of this class is to set isBigEndian to false and isEndianSpecified to true in the super class, and to + * allow the Charset framework to open the variant UTF-16 converter without extra setup work. + */ +class CharsetUTF16LE extends CharsetUTF16 { + public CharsetUTF16LE(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java new file mode 100644 index 00000000000..d2a8a5f2898 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java @@ -0,0 +1,251 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author Niti Hantaweepant + */ +class CharsetUTF32 extends CharsetICU { + + private static final int SIGNATURE_LENGTH = 4; + private static final byte[] fromUSubstitution_BE = { (byte) 0, (byte) 0, (byte) 0xff, (byte) 0xfd }; + private static final byte[] fromUSubstitution_LE = { (byte) 0xfd, (byte) 0xff, (byte) 0, (byte) 0 }; + private static final byte[] BOM_BE = { 0, 0, (byte) 0xfe, (byte) 0xff }; + private static final byte[] BOM_LE = { (byte) 0xff, (byte) 0xfe, 0, 0 }; + private static final int ENDIAN_XOR_BE = 0; + private static final int ENDIAN_XOR_LE = 3; + private static final int NEED_TO_WRITE_BOM = 1; + + private boolean isEndianSpecified; + private boolean isBigEndian; + private int endianXOR; + private byte[] bom; + private byte[] fromUSubstitution; + + public CharsetUTF32(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + + this.isEndianSpecified = (this instanceof CharsetUTF32BE || this instanceof CharsetUTF32LE); + this.isBigEndian = !(this instanceof CharsetUTF32LE); + + if (isBigEndian) { + this.bom = BOM_BE; + this.fromUSubstitution = fromUSubstitution_BE; + this.endianXOR = ENDIAN_XOR_BE; + } else { + this.bom = BOM_LE; + this.fromUSubstitution = fromUSubstitution_LE; + this.endianXOR = ENDIAN_XOR_LE; + } + + maxBytesPerChar = 4; + minBytesPerChar = 4; + maxCharsPerByte = 1; + } + + class CharsetDecoderUTF32 extends CharsetDecoderICU { + + private boolean isBOMReadYet; + private int actualEndianXOR; + private byte[] actualBOM; + + public CharsetDecoderUTF32(CharsetICU cs) { + super(cs); + } + + protected void implReset() { + super.implReset(); + isBOMReadYet = false; + actualBOM = null; + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + /* + * If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual + * converter function will not see and count the BOM. offsetDelta will have the number of the BOM bytes that + * are in the current buffer. + */ + if (!isBOMReadYet) { + while (true) { + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + + toUBytesArray[toULength++] = source.get(); + + if (toULength == 1) { + // on the first byte, we haven't decided whether or not it's bigEndian yet + if ((!isEndianSpecified || isBigEndian) + && toUBytesArray[toULength - 1] == BOM_BE[toULength - 1]) { + actualBOM = BOM_BE; + actualEndianXOR = ENDIAN_XOR_BE; + } else if ((!isEndianSpecified || !isBigEndian) + && toUBytesArray[toULength - 1] == BOM_LE[toULength - 1]) { + actualBOM = BOM_LE; + actualEndianXOR = ENDIAN_XOR_LE; + } else { + // we do not have a BOM (and we have toULength==1 bytes) + actualBOM = null; + actualEndianXOR = endianXOR; + break; + } + } else if (toUBytesArray[toULength - 1] != actualBOM[toULength - 1]) { + // we do not have a BOM (and we have toULength bytes) + actualBOM = null; + actualEndianXOR = endianXOR; + break; + } else if (toULength == SIGNATURE_LENGTH) { + // we found a BOM! at last! + // too bad we have to get ignore it now (like it was unwanted or something) + toULength = 0; + break; + } + } + + isBOMReadYet = true; + } + + // now that we no longer need to look for a BOM, let's do some work + int char32; + + while (true) { + while (toULength < 4) { + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + toUBytesArray[toULength++] = source.get(); + } + + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + char32 = 0; + for (int i = 0; i < 4; i++) + char32 = (char32 << 8) + | (toUBytesArray[i ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK); + + if (0 <= char32 && char32 <= UConverterConstants.MAXIMUM_UTF && !isSurrogate(char32)) { + toULength = 0; + if (char32 <= UConverterConstants.MAXIMUM_UCS2) { + /* fits in 16 bits */ + target.put((char) char32); + } else { + /* write out the surrogates */ + target.put(UTF16.getLeadSurrogate(char32)); + char32 = UTF16.getTrailSurrogate(char32); + if (target.hasRemaining()) { + target.put((char) char32); + } else { + /* Put in overflow buffer (not handled here) */ + charErrorBufferArray[0] = (char) char32; + charErrorBufferLength = 1; + return CoderResult.OVERFLOW; + } + } + } else { + return CoderResult.malformedForLength(toULength); + } + } + } + } + + class CharsetEncoderUTF32 extends CharsetEncoderICU { + private final byte[] temp = new byte[4]; + + public CharsetEncoderUTF32(CharsetICU cs) { + super(cs, fromUSubstitution); + fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM; + } + + protected void implReset() { + super.implReset(); + fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult cr; + + /* write the BOM if necessary */ + if (fromUnicodeStatus == NEED_TO_WRITE_BOM) { + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + fromUnicodeStatus = 0; + cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1); + if (cr.isOverflow()) + return cr; + } + + if (fromUChar32 != 0) { + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + // a note: fromUChar32 will either be 0 or a lead surrogate + cr = encodeChar(source, target, offsets, (char) fromUChar32); + if (cr != null) + return cr; + } + + while (true) { + if (!source.hasRemaining()) + return CoderResult.UNDERFLOW; + if (!target.hasRemaining()) + return CoderResult.OVERFLOW; + + cr = encodeChar(source, target, offsets, source.get()); + if (cr != null) + return cr; + } + } + + private final CoderResult encodeChar(CharBuffer source, ByteBuffer target, IntBuffer offsets, char ch) { + int sourceIndex = source.position() - 1; + CoderResult cr; + int char32; + + if (UTF16.isSurrogate(ch)) { + cr = handleSurrogates(source, ch); + if (cr != null) + return cr; + + char32 = fromUChar32; + fromUChar32 = 0; + } else { + char32 = ch; + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + // temp[0 ^ endianXOR] = (byte) (char32 >>> 24); // (always 0) + temp[1 ^ endianXOR] = (byte) (char32 >>> 16); // same as (byte)((char32 >>> 16) & 0x1f) + temp[2 ^ endianXOR] = (byte) (char32 >>> 8); + temp[3 ^ endianXOR] = (byte) (char32); + cr = fromUWriteBytes(this, temp, 0, 4, target, offsets, sourceIndex); + return (cr.isUnderflow() ? null : cr); + } + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoderUTF32(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderUTF32(this); + } + + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + getNonSurrogateUnicodeSet(setFillIn); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32BE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32BE.java new file mode 100644 index 00000000000..177b1f7eeb6 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32BE.java @@ -0,0 +1,17 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +/** + * The purpose of this class is to set isBigEndian to true and isEndianSpecified to true in the super class, and to + * allow the Charset framework to open the variant UTF-32 converter without extra setup work. + */ +class CharsetUTF32BE extends CharsetUTF32 { + public CharsetUTF32BE(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32LE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32LE.java new file mode 100644 index 00000000000..beb8303f22e --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32LE.java @@ -0,0 +1,17 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +/** + * The purpose of this class is to set isBigEndian to false and isEndianSpecified to true in the super class, and to + * allow the Charset framework to open the variant UTF-32 converter without extra setup work. + */ +class CharsetUTF32LE extends CharsetUTF32 { + public CharsetUTF32LE(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java new file mode 100644 index 00000000000..9447bdb8b03 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java @@ -0,0 +1,756 @@ +/* + ******************************************************************************* + * Copyright (C) 2007-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UnicodeSet; + +/** + * @author Michael Ow + * + */ +class CharsetUTF7 extends CharsetICU { + private final String IMAP_NAME="IMAP-mailbox-name"; + private boolean useIMAP; + protected byte[] fromUSubstitution=new byte[]{0x3F}; + + public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */ + minBytesPerChar=1; + maxCharsPerByte=1; + + useIMAP=false; + + if (icuCanonicalName.equals(IMAP_NAME)) { + useIMAP=true; + } + } + + //private static boolean inSetD(char c) { + // return ( + // (char)(c - 97) < 26 || (char)(c - 65) < 26 || /* letters */ + // (char)(c - 48) < 10 || /* digits */ + // (char)(c - 39) < 3 || /* ' () */ + // (char)(c - 44) < 4 || /* ,-./ */ + // (c==58) || (c==63) /* :? */ + // ); + //} + + //private static boolean inSetO(char c) { + // return ( + // (char)(c - 33) < 6 || /* !"#$%& */ + // (char)(c - 59) < 4 || /* ;<=> */ + // (char)(c - 93) < 4 || /* ]^_` */ + // (char)(c - 123) < 3 || /* {|} */ + // (c==58) || (c==63) /* *@[ */ + // ); + //} + + private static boolean isCRLFTAB(char c) { + return ( + (c==13) || (c==10) || (c==9) + ); + } + + //private static boolean isCRLFSPTAB(char c) { + // return ( + // (c==32) || (c==13) || (c==10) || (c==9) + // ); + //} + + private static final byte PLUS=43; + private static final byte MINUS=45; + private static final byte BACKSLASH=92; + //private static final byte TILDE=126; + private static final byte AMPERSAND=0x26; + private static final byte COMMA=0x2c; + private static final byte SLASH=0x2f; + + // legal byte values: all US-ASCII graphic characters 0x20..0x7e + private static boolean isLegal(char c, boolean useIMAP) { + if (useIMAP) { + return ( + (0x20 <= c) && (c <= 0x7e) + ); + } else { + return ( + ((char)(c - 32) < 94 && (c != BACKSLASH)) || isCRLFTAB(c) + ); + } + } + + // directly encode all of printable ASCII 0x20..0x7e except '&' 0x26 + private static boolean inSetDIMAP(char c) { + return ( + (isLegal(c, true) && c != AMPERSAND) + ); + } + + private static byte TO_BASE64_IMAP(int n) { + return (n < 63 ? TO_BASE_64[n] : COMMA); + } + + private static byte FROM_BASE64_IMAP(char c) { + return (c==COMMA ? 63 : c==SLASH ? -1 : FROM_BASE_64[c]); + } + + /* encode directly sets D and O and CR LF SP TAB */ + private static final byte ENCODE_DIRECTLY_MAXIMUM[] = + { + /*0 1 2 3 4 5 6 7 8 9 a b c d e f*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 + }; + + /* encode directly set D and CR LF SP TAB but not set O */ + private static final byte ENCODE_DIRECTLY_RESTRICTED[] = + { + /*0 1 2 3 4 5 6 7 8 9 a b c d e f*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 + }; + + private static final byte TO_BASE_64[] = + { + /* A-Z */ + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + /* a-z */ + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, + /* 0-9 */ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* +/ */ + 43, 47 + }; + + private static final byte FROM_BASE_64[] = + { + /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */ + -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + /* general punctuation with + and / and a special value (-2) for - */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63, + /* digits */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + /* A-Z */ + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, + /* a-z*/ + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3 + }; + + class CharsetDecoderUTF7 extends CharsetDecoderICU { + public CharsetDecoderUTF7(CharsetICU cs) { + super(cs); + implReset(); + } + + protected void implReset() { + super.implReset(); + toUnicodeStatus=(toUnicodeStatus & 0xf0000000) | 0x1000000; + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + CoderResult cr=CoderResult.UNDERFLOW; + byte base64Value; + byte base64Counter; + byte inDirectMode; + char bits; + int byteIndex; + int sourceIndex, nextSourceIndex; + + int length; + + char b; + char c; + + int sourceArrayIndex=source.position(); + + //get the state of the machine state + { + int status=toUnicodeStatus; + inDirectMode=(byte)((status >> 24) & 1); + base64Counter=(byte)(status >> 16); + bits=(char)status; + } + byteIndex=toULength; + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + directMode: while (true) { + if (inDirectMode==1) { + /* + * In Direct Mode, most US-ASCII characters are encoded directly, i.e., + * with their US-ASCII byte values. + * Backslash and Tilde and most control characters are not alled in UTF-7. + * A plus sign starts Unicode (or "escape") Mode. + * An ampersand starts Unicode Mode for IMAP. + * + * In Direct Mode, only the sourceIndex is used. + */ + byteIndex=0; + length=source.remaining(); + //targetCapacity=target.remaining(); + //Commented out because length of source may be larger than target when it comes to bytes + /*if (useIMAP && length > targetCapacity) { + length=targetCapacity; + }*/ + while (length > 0) { + b=(char)(source.get()); + sourceArrayIndex++; + if (!isLegal(b, useIMAP)) { + toUBytesArray[0]=(byte)b; + byteIndex=1; + cr=CoderResult.malformedForLength(sourceArrayIndex); + break; + } else if ((!useIMAP && b!=PLUS) || (useIMAP && b!=AMPERSAND)) { + // write directly encoded character + if (target.hasRemaining()) { // Check to make sure that there is room in target. + target.put(b); + if (offsets!= null) { + offsets.put(sourceIndex++); + } + } else { // Get out and set the CoderResult. + break; + } + } else { /* PLUS or (AMPERSAND in IMAP)*/ + /* switch to Unicode mode */ + nextSourceIndex=++sourceIndex; + inDirectMode=0; + byteIndex=0; + bits=0; + base64Counter=-1; + continue directMode; + } + --length; + }//end of while + if (source.hasRemaining() && target.position() >= target.limit()) { + /* target is full */ + cr=CoderResult.OVERFLOW; + } + break directMode; + } else { /* Unicode Mode*/ + /* + * In Unicode Mode, UTF-16BE is base64-encoded. + * The base64 sequence ends with any character that is not in the base64 alphabet. + * A terminating minus sign is consumed. + * + * In Unicode Mode, the sourceIndex has the index to the start of the current + * base64 bytes, while nextSourceIndex is precisely parallel to source, + * keeping the index to the following byte. + */ + while(source.hasRemaining()) { + if (target.hasRemaining()) { + b=(char)source.get(); + sourceArrayIndex++; + toUBytesArray[byteIndex++]=(byte)b; + if ((!useIMAP && b>=126) || (useIMAP && b>0x7e)) { + /* illegal - test other illegal US-ASCII values by base64Value==-3 */ + inDirectMode=1; + cr=CoderResult.malformedForLength(sourceArrayIndex); + break directMode; + } else if (((base64Value=FROM_BASE_64[b])>=0 && !useIMAP) || ((base64Value=FROM_BASE64_IMAP(b))>=0) && useIMAP) { + /* collect base64 bytes */ + switch (base64Counter) { + case -1: /* -1 is immediately after the + */ + case 0: + bits=(char)base64Value; + base64Counter=1; + break; + case 1: + case 3: + case 4: + case 6: + bits=(char)((bits<<6) | base64Value); + ++base64Counter; + break; + case 2: + c=(char)((bits<<4) | (base64Value>>2)); + if (useIMAP && isLegal(c, useIMAP)) { + // illegal + inDirectMode=1; + cr=CoderResult.malformedForLength(sourceArrayIndex); + // goto endloop; + break directMode; + } + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + sourceIndex=nextSourceIndex - 1; + } + toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(char)(base64Value&3); + base64Counter=3; + break; + case 5: + c=(char)((bits<<2) | (base64Value>>4)); + if(useIMAP && isLegal(c, useIMAP)) { + // illegal + inDirectMode=1; + cr=CoderResult.malformedForLength(sourceArrayIndex); + // goto endloop; + break directMode; + } + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + sourceIndex=nextSourceIndex - 1; + } + toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(char)(base64Value&15); + base64Counter=6; + break; + case 7: + c=(char)((bits<<6) | base64Value); + if (useIMAP && isLegal(c, useIMAP)) { + // illegal + inDirectMode=1; + cr=CoderResult.malformedForLength(sourceArrayIndex); + // goto endloop; + break directMode; + } + target.put(c); + if (offsets != null) { + offsets.put(sourceIndex); + sourceIndex=nextSourceIndex; + } + byteIndex=0; + bits=0; + base64Counter=0; + break; + //default: + /* will never occur */ + //break; + }//end of switch + } else if (base64Value==-2) { + /* minus sign terminates the base64 sequence */ + inDirectMode=1; + if (base64Counter==-1) { + /* +- i.e. a minus immediately following a plus */ + target.put(useIMAP ? (char)AMPERSAND : (char)PLUS); + if (offsets != null) { + offsets.put(sourceIndex - 1); + } + } else { + /* absorb the minus and leave the Unicode Mode */ + if (bits!=0 || (useIMAP && base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) { + /*bits are illegally left over, a unicode character is incomplete */ + cr=CoderResult.malformedForLength(sourceArrayIndex); + break; + } + } + sourceIndex=nextSourceIndex; + continue directMode; + } else if (!useIMAP && base64Value==-1) { /* for any legal character except base64 and minus sign */ + /* leave the Unicode Mode */ + inDirectMode=1; + if (base64Counter==-1) { + /* illegal: + immediately followed by something other than base64 minus sign */ + /* include the plus sign in the reported sequence */ + --sourceIndex; + toUBytesArray[0]=PLUS; + toUBytesArray[1]=(byte)b; + byteIndex=2; + cr=CoderResult.malformedForLength(sourceArrayIndex); + break; + } else if (bits==0) { + /* un-read the character in case it is a plus sign */ + source.position(--sourceArrayIndex); + sourceIndex=nextSourceIndex - 1; + continue directMode; + } else { + /* bits are illegally left over, a unicode character is incomplete */ + cr=CoderResult.malformedForLength(sourceArrayIndex); + break; + } + } else { + if (useIMAP && base64Counter==-1) { + // illegal: & immediately followed by something other than base64 or minus sign + // include the ampersand in the reported sequence + --sourceIndex; + toUBytesArray[0]=AMPERSAND; + toUBytesArray[1]=(byte)b; + byteIndex=2; + } + /* base64Value==-3 for illegal characters */ + /* illegal */ + inDirectMode=1; + cr=CoderResult.malformedForLength(sourceArrayIndex); + break; + } + } else { + /* target is full */ + cr=CoderResult.OVERFLOW; + break; + } + } //end of while + break directMode; + } + }//end of direct mode label + if (useIMAP) { + if (!cr.isError() && inDirectMode==0 && flush && byteIndex==0 && !source.hasRemaining()) { + if (base64Counter==-1) { + /* & at the very end of the input */ + /* make the ampersand the reported sequence */ + toUBytesArray[0]=AMPERSAND; + byteIndex=1; + } + /* else if (base64Counter!=-1) byteIndex remains 0 because ther is no particular byte sequence */ + inDirectMode=1; + cr=CoderResult.malformedForLength(sourceIndex); + } + + } else { + if (!cr.isError() && flush && !source.hasRemaining() && bits ==0) { + /* + * if we are in Unicode Mode, then the byteIndex might not be 0, + * but that is ok if bits -- 0 + * -> we set byteIndex=0 at the end of the stream to avoid a truncated error + * (not true for IMAP-mailbox-name where we must end in direct mode) + */ + if (!cr.isOverflow()) { + byteIndex=0; + } + } + } + /* set the converter state */ + toUnicodeStatus=(inDirectMode<<24 | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (int)bits); + toULength=byteIndex; + + return cr; + } + } + + class CharsetEncoderUTF7 extends CharsetEncoderICU { + public CharsetEncoderUTF7(CharsetICU cs) { + super(cs, fromUSubstitution); + implReset(); + } + + protected void implReset() { + super.implReset(); + fromUnicodeStatus=(fromUnicodeStatus & 0xf0000000) | 0x1000000; + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { + CoderResult cr=CoderResult.UNDERFLOW; + byte inDirectMode; + byte encodeDirectly[]; + int status; + + int length, targetCapacity, sourceIndex; + + byte base64Counter; + char bits; + char c; + char b; + /* get the state machine state */ + { + status=fromUnicodeStatus; + encodeDirectly=(((long)status) < 0x10000000) ? ENCODE_DIRECTLY_MAXIMUM : ENCODE_DIRECTLY_RESTRICTED; + inDirectMode=(byte)((status >> 24) & 1); + base64Counter=(byte)(status >> 16); + bits=(char)((byte)status); + } + /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ + sourceIndex=0; + + directMode: while(true) { + if(inDirectMode==1) { + length=source.remaining(); + targetCapacity=target.remaining(); + if(length > targetCapacity) { + length=targetCapacity; + } + while (length > 0) { + c=source.get(); + /* UTF7: currently always encode CR LF SP TAB directly */ + /* IMAP: encode 0x20..0x7e except '&' directly */ + if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && inSetDIMAP(c))) { + /* encode directly */ + target.put((byte)c); + if (offsets != null) { + offsets.put(sourceIndex++); + } + } else if ((!useIMAP && c==PLUS) || (useIMAP && c==AMPERSAND)) { + /* IMAP: output &- for & */ + /* UTF-7: output +- for + */ + target.put(useIMAP ? AMPERSAND : PLUS); + if (target.hasRemaining()) { + target.put(MINUS); + if (offsets != null) { + offsets.put(sourceIndex); + offsets.put(sourceIndex++); + } + /* realign length and targetCapacity */ + continue directMode; + } else { + if (offsets != null) { + offsets.put(sourceIndex++); + } + errorBuffer[0]=MINUS; + errorBufferLength=1; + cr=CoderResult.OVERFLOW; + break; + } + } else { + /* un-read this character and switch to unicode mode */ + source.position(source.position() - 1); + target.put(useIMAP ? AMPERSAND : PLUS); + if (offsets != null) { + offsets.put(sourceIndex); + } + inDirectMode=0; + base64Counter=0; + continue directMode; + } + --length; + } //end of while + if (source.hasRemaining() && !target.hasRemaining()) { + /* target is full */ + cr=CoderResult.OVERFLOW; + } + break directMode; + } else { + /* Unicode Mode */ + while (source.hasRemaining()) { + if (target.hasRemaining()) { + c=source.get(); + if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && isLegal(c, useIMAP))) { + /* encode directly */ + inDirectMode=1; + + /* trick: back out this character to make this easier */ + source.position(source.position() - 1); + + /* terminate the base64 sequence */ + if (base64Counter!=0) { + /* write remaining bits for the previous character */ + target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]); + if (offsets!=null) { + offsets.put(sourceIndex-1); + } + } + if (FROM_BASE_64[c]!=-1 || useIMAP) { + /* need to terminate with a minus */ + if (target.hasRemaining()) { + target.put(MINUS); + if (offsets!=null) { + offsets.put(sourceIndex-1); + } + } else { + errorBuffer[0]=MINUS; + errorBufferLength=1; + cr=CoderResult.OVERFLOW; + break; + } + } + continue directMode; + } else { + /* + * base64 this character: + * Output 2 or 3 base64 bytres for the remaining bits of the previous character + * and the bits of this character, each implicitly in UTF-16BE. + * + * Here, bits is an 8-bit variable because only 6 bits need to be kept from one + * character to the next. The actual 2 or 4 bits are shifted to the left edge + * of the 6-bits filed 5..0 to make the termination of the base64 sequence easier. + */ + switch (base64Counter) { + case 0: + b=(char)(c>>10); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (target.hasRemaining()) { + b=(char)((c>>4)&0x3f); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (offsets!=null) { + offsets.put(sourceIndex); + offsets.put(sourceIndex++); + } + } else { + if (offsets!=null) { + offsets.put(sourceIndex++); + } + b=(char)((c>>4)&0x3f); + errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]; + errorBufferLength=1; + cr=CoderResult.OVERFLOW; + } + bits=(char)((c&15)<<2); + base64Counter=1; + break; + case 1: + b=(char)(bits|(c>>14)); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (target.hasRemaining()) { + b=(char)((c>>8)&0x3f); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (target.hasRemaining()) { + b=(char)((c>>2)&0x3f); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (offsets!=null) { + offsets.put(sourceIndex); + offsets.put(sourceIndex); + offsets.put(sourceIndex++); + } + } else { + if (offsets!=null) { + offsets.put(sourceIndex); + offsets.put(sourceIndex++); + } + b=(char)((c>>2)&0x3f); + errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]; + errorBufferLength=1; + cr=CoderResult.OVERFLOW; + } + } else { + if (offsets!=null) { + offsets.put(sourceIndex++); + } + b=(char)((c>>8)&0x3f); + errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]; + b=(char)((c>>2)&0x3f); + errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]; + errorBufferLength=2; + cr=CoderResult.OVERFLOW; + } + bits=(char)((c&3)<<4); + base64Counter=2; + break; + case 2: + b=(char)(bits|(c>>12)); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (target.hasRemaining()) { + b=(char)((c>>6)&0x3f); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (target.hasRemaining()) { + b=(char)(c&0x3f); + target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]); + if (offsets!=null) { + offsets.put(sourceIndex); + offsets.put(sourceIndex); + offsets.put(sourceIndex++); + } + } else { + if (offsets!=null) { + offsets.put(sourceIndex); + offsets.put(sourceIndex++); + } + b=(char)(c&0x3f); + errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]; + errorBufferLength=1; + cr=CoderResult.OVERFLOW; + } + } else { + if (offsets!=null) { + offsets.put(sourceIndex++); + } + b=(char)((c>>6)&0x3f); + errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]; + b=(char)(c&0x3f); + errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]; + errorBufferLength=2; + cr=CoderResult.OVERFLOW; + } + bits=0; + base64Counter=0; + break; + //default: + /* will never occur */ + //break; + } //end of switch + } + } else { + /* target is full */ + cr=CoderResult.OVERFLOW; + break; + } + } //end of while + break directMode; + } + } //end of directMode label + + if (flush && !source.hasRemaining()) { + /* flush remaining bits to the target */ + if (inDirectMode==0) { + if (base64Counter!=0) { + if (target.hasRemaining()) { + target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]); + if (offsets!=null) { + offsets.put(sourceIndex - 1); + } + } else { + errorBuffer[errorBufferLength++]=useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]; + cr=CoderResult.OVERFLOW; + } + } + if (useIMAP) { + /* IMAP: need to terminate with a minus */ + if (target.hasRemaining()) { + target.put(MINUS); + if (offsets!=null) { + offsets.put(sourceIndex - 1); + } + } else { + errorBuffer[errorBufferLength++]=MINUS; + cr=CoderResult.OVERFLOW; + } + } + } + /*reset the state for the next conversion */ + fromUnicodeStatus=((status&0xf0000000) | 0x1000000); /* keep version, inDirectMode=TRUE */ + } else { + /* set the converter state back */ + fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | ((int)bits)); + } + + return cr; + } + } + + public CharsetDecoder newDecoder() { + return new CharsetDecoderUTF7(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderUTF7(this); + } + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + getCompleteUnicodeSet(setFillIn); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java new file mode 100644 index 00000000000..60f33a723ce --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java @@ -0,0 +1,694 @@ +/** + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ + +package com.ibm.icu.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author Niti Hantaweepant + */ +class CharsetUTF8 extends CharsetICU { + + private static final byte[] fromUSubstitution = new byte[] { (byte) 0xef, (byte) 0xbf, (byte) 0xbd }; + + public CharsetUTF8(String icuCanonicalName, String javaCanonicalName, String[] aliases) { + super(icuCanonicalName, javaCanonicalName, aliases); + /* max 3 bytes per code unit from UTF-8 (4 bytes from surrogate _pair_) */ + maxBytesPerChar = 3; + minBytesPerChar = 1; + maxCharsPerByte = 1; + } + + private static final int BITMASK_FROM_UTF8[] = { -1, 0x7f, 0x1f, 0xf, 0x7, 0x3, 0x1 }; + + private static final byte BYTES_FROM_UTF8[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 + }; + + /* + * Starting with Unicode 3.0.1: UTF-8 byte sequences of length N _must_ encode code points of or + * above utf8_minChar32[N]; byte sequences with more than 4 bytes are illegal in UTF-8, which is + * tested with impossible values for them + */ + private static final int UTF8_MIN_CHAR32[] = { 0, 0, 0x80, 0x800, 0x10000, + Integer.MAX_VALUE, Integer.MAX_VALUE }; + + private final boolean isCESU8 = this instanceof CharsetCESU8; + + class CharsetDecoderUTF8 extends CharsetDecoderICU { + + public CharsetDecoderUTF8(CharsetICU cs) { + super(cs); + } + + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, + boolean flush) { + if (!source.hasRemaining()) { + /* no input, nothing to do */ + return CoderResult.UNDERFLOW; + } + if (!target.hasRemaining()) { + /* no output available, can't do anything */ + return CoderResult.OVERFLOW; + } + + if (source.hasArray() && target.hasArray()) { + /* source and target are backed by arrays, so use the arrays for optimal performance */ + byte[] sourceArray = source.array(); + int sourceIndex = source.arrayOffset() + source.position(); + int sourceLimit = source.arrayOffset() + source.limit(); + char[] targetArray = target.array(); + int targetIndex = target.arrayOffset() + target.position(); + int targetLimit = target.arrayOffset() + target.limit(); + + byte ch; + int char32, bytesExpected, bytesSoFar; + CoderResult cr; + + if (mode == 0) { + /* nothing is stored in toUnicodeStatus, read a byte as input */ + char32 = (toUBytesArray[0] = sourceArray[sourceIndex++]) & 0xff; + bytesExpected = BYTES_FROM_UTF8[char32]; + char32 &= BITMASK_FROM_UTF8[bytesExpected]; + bytesSoFar = 1; + } else { + /* a partially or fully built code point is stored in toUnicodeStatus */ + char32 = toUnicodeStatus; + bytesExpected = mode; + bytesSoFar = toULength; + + toUnicodeStatus = 0; + mode = 0; + toULength = 0; + } + + outer: while (true) { + if (bytesSoFar < bytesExpected) { + /* read a trail byte and insert its relevant bits into char32 */ + if (sourceIndex >= sourceLimit) { + /* no source left, save the state for later and break out of the loop */ + toUnicodeStatus = char32; + mode = bytesExpected; + toULength = bytesSoFar; + cr = CoderResult.UNDERFLOW; + break; + } + if (((ch = toUBytesArray[bytesSoFar] = sourceArray[sourceIndex++]) & 0xc0) != 0x80) { + /* not a trail byte (is not of the form 10xxxxxx) */ + sourceIndex--; + toULength = bytesSoFar; + cr = CoderResult.malformedForLength(bytesSoFar); + break; + } + char32 = (char32 << 6) | (ch & 0x3f); + bytesSoFar++; + } else if (bytesSoFar == bytesExpected && UTF8_MIN_CHAR32[bytesExpected] <= char32 && char32 <= 0x10ffff + && (isCESU8 ? bytesExpected <= 3 : !UTF16.isSurrogate((char) char32))) { + /* + * char32 is a valid code point and is composed of the correct number of + * bytes ... we now need to output it in UTF-16 + */ + + if (char32 <= UConverterConstants.MAXIMUM_UCS2) { + /* fits in 16 bits */ + targetArray[targetIndex++] = (char) char32; + } else { + /* fit char32 into 20 bits */ + char32 -= UConverterConstants.HALF_BASE; + + /* write out the surrogates */ + targetArray[targetIndex++] = (char) ((char32 >>> UConverterConstants.HALF_SHIFT) + UConverterConstants.SURROGATE_HIGH_START); + + if (targetIndex >= targetLimit) { + /* put in overflow buffer (not handled here) */ + charErrorBufferArray[charErrorBufferBegin++] = (char) char32; + cr = CoderResult.OVERFLOW; + break; + } + targetArray[targetIndex++] = (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START); + } + + /* + * we're finished outputing, so now we need to read in the first byte of the + * next byte sequence that could form a code point + */ + + if (sourceIndex >= sourceLimit) { + cr = CoderResult.UNDERFLOW; + break; + } + if (targetIndex >= targetLimit) { + cr = CoderResult.OVERFLOW; + break; + } + + /* keep reading the next input (and writing it) while bytes == 1 */ + while ((bytesExpected = BYTES_FROM_UTF8[char32 = (toUBytesArray[0] = sourceArray[sourceIndex++]) & 0xff]) == 1) { + targetArray[targetIndex++] = (char) char32; + if (sourceIndex >= sourceLimit) { + cr = CoderResult.UNDERFLOW; + break outer; + } + if (targetIndex >= targetLimit) { + cr = CoderResult.OVERFLOW; + break outer; + } + } + + /* remove the bits that indicate the number of bytes */ + char32 &= BITMASK_FROM_UTF8[bytesExpected]; + bytesSoFar = 1; + } else { + /* + * either the lead byte in the code sequence is invalid (bytes == 0) or the + * lead byte combined with all the trail chars does not form a valid code + * point + */ + toULength = bytesSoFar; + cr = CoderResult.malformedForLength(bytesSoFar); + break; + } + } + + source.position(sourceIndex - source.arrayOffset()); + target.position(targetIndex - target.arrayOffset()); + return cr; + + } else { + + int sourceIndex = source.position(); + int sourceLimit = source.limit(); + int targetIndex = target.position(); + int targetLimit = target.limit(); + + byte ch; + int char32, bytesExpected, bytesSoFar; + CoderResult cr; + + if (mode == 0) { + /* nothing is stored in toUnicodeStatus, read a byte as input */ + char32 = (toUBytesArray[0] = source.get(sourceIndex++)) & 0xff; + bytesExpected = BYTES_FROM_UTF8[char32]; + char32 &= BITMASK_FROM_UTF8[bytesExpected]; + bytesSoFar = 1; + } else { + /* a partially or fully built code point is stored in toUnicodeStatus */ + char32 = toUnicodeStatus; + bytesExpected = mode; + bytesSoFar = toULength; + + toUnicodeStatus = 0; + mode = 0; + toULength = 0; + } + + outer: while (true) { + if (bytesSoFar < bytesExpected) { + /* read a trail byte and insert its relevant bits into char32 */ + if (sourceIndex >= sourceLimit) { + /* no source left, save the state for later and break out of the loop */ + toUnicodeStatus = char32; + mode = bytesExpected; + toULength = bytesSoFar; + cr = CoderResult.UNDERFLOW; + break; + } + if (((ch = toUBytesArray[bytesSoFar] = source.get(sourceIndex++)) & 0xc0) != 0x80) { + /* not a trail byte (is not of the form 10xxxxxx) */ + sourceIndex--; + toULength = bytesSoFar; + cr = CoderResult.malformedForLength(bytesSoFar); + break; + } + char32 = (char32 << 6) | (ch & 0x3f); + bytesSoFar++; + } + /* + * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: + * - use only trail bytes after a lead byte (checked above) + * - use the right number of trail bytes for a given lead byte + * - encode a code point <= U+10ffff + * - use the fewest possible number of bytes for their code points + * - use at most 4 bytes (for i>=5 it is 0x10ffff>> UConverterConstants.HALF_SHIFT) + UConverterConstants.SURROGATE_HIGH_START)); + + if (targetIndex >= targetLimit) { + /* put in overflow buffer (not handled here) */ + charErrorBufferArray[charErrorBufferBegin++] = (char) char32; + cr = CoderResult.OVERFLOW; + break; + } + target.put( + targetIndex++, + (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START)); + } + + /* + * we're finished outputing, so now we need to read in the first byte of the + * next byte sequence that could form a code point + */ + + if (sourceIndex >= sourceLimit) { + cr = CoderResult.UNDERFLOW; + break; + } + if (targetIndex >= targetLimit) { + cr = CoderResult.OVERFLOW; + break; + } + + /* keep reading the next input (and writing it) while bytes == 1 */ + while ((bytesExpected = BYTES_FROM_UTF8[char32 = (toUBytesArray[0] = source.get(sourceIndex++)) & 0xff]) == 1) { + target.put(targetIndex++, (char) char32); + if (sourceIndex >= sourceLimit) { + cr = CoderResult.UNDERFLOW; + break outer; + } + if (targetIndex >= targetLimit) { + cr = CoderResult.OVERFLOW; + break outer; + } + } + + /* remove the bits that indicate the number of bytes */ + char32 &= BITMASK_FROM_UTF8[bytesExpected]; + bytesSoFar = 1; + } else { + /* + * either the lead byte in the code sequence is invalid (bytes == 0) or the + * lead byte combined with all the trail chars does not form a valid code + * point + */ + toULength = bytesSoFar; + cr = CoderResult.malformedForLength(bytesSoFar); + break; + } + } + + source.position(sourceIndex); + target.position(targetIndex); + return cr; + } + } + + } + + class CharsetEncoderUTF8 extends CharsetEncoderICU { + + public CharsetEncoderUTF8(CharsetICU cs) { + super(cs, fromUSubstitution); + implReset(); + } + + protected void implReset() { + super.implReset(); + } + + protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, + boolean flush) { + if (!source.hasRemaining()) { + /* no input, nothing to do */ + return CoderResult.UNDERFLOW; + } + if (!target.hasRemaining()) { + /* no output available, can't do anything */ + return CoderResult.OVERFLOW; + } + + if (source.hasArray() && target.hasArray()) { + /* source and target are backed by arrays, so use the arrays for optimal performance */ + char[] sourceArray = source.array(); + int srcIdx = source.arrayOffset() + source.position(); + int sourceLimit = source.arrayOffset() + source.limit(); + byte[] targetArray = target.array(); + int tgtIdx = target.arrayOffset() + target.position(); + int targetLimit = target.arrayOffset() + target.limit(); + + int char32; + CoderResult cr; + + /* take care of the special condition of fromUChar32 not being 0 (it is a surrogate) */ + if (fromUChar32 != 0) { + /* 4 bytes to encode from char32 and a following char in source */ + + sourceIndex = srcIdx; + targetIndex = tgtIdx; + cr = encodeFourBytes(sourceArray, targetArray, sourceLimit, targetLimit, + fromUChar32); + srcIdx = sourceIndex; + tgtIdx = targetIndex; + if (cr != null) { + source.position(srcIdx - source.arrayOffset()); + target.position(tgtIdx - target.arrayOffset()); + return cr; + } + } + + while (true) { + if (srcIdx >= sourceLimit) { + /* nothing left to read */ + cr = CoderResult.UNDERFLOW; + break; + } + if (tgtIdx >= targetLimit) { + /* no space left to write */ + cr = CoderResult.OVERFLOW; + break; + } + + /* reach the next char into char32 */ + char32 = sourceArray[srcIdx++]; + + if (char32 <= 0x7f) { + /* 1 byte to encode from char32 */ + + targetArray[tgtIdx++] = encodeHeadOf1(char32); + + } else if (char32 <= 0x7ff) { + /* 2 bytes to encode from char32 */ + + targetArray[tgtIdx++] = encodeHeadOf2(char32); + + if (tgtIdx >= targetLimit) { + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + cr = CoderResult.OVERFLOW; + break; + } + targetArray[tgtIdx++] = encodeLastTail(char32); + + } else if (!UTF16.isSurrogate((char) char32) || isCESU8) { + /* 3 bytes to encode from char32 */ + + targetArray[tgtIdx++] = encodeHeadOf3(char32); + + if (tgtIdx >= targetLimit) { + errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + cr = CoderResult.OVERFLOW; + break; + } + targetArray[tgtIdx++] = encodeSecondToLastTail(char32); + + if (tgtIdx >= targetLimit) { + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + cr = CoderResult.OVERFLOW; + break; + } + targetArray[tgtIdx++] = encodeLastTail(char32); + + } else { + /* 4 bytes to encode from char32 and a following char in source */ + + sourceIndex = srcIdx; + targetIndex = tgtIdx; + cr = encodeFourBytes(sourceArray, targetArray, sourceLimit, targetLimit, + char32); + srcIdx = sourceIndex; + tgtIdx = targetIndex; + if (cr != null) + break; + } + } + + /* set the new source and target positions and return the CoderResult stored in cr */ + source.position(srcIdx - source.arrayOffset()); + target.position(tgtIdx - target.arrayOffset()); + return cr; + + } else { + int char32; + CoderResult cr; + + /* take care of the special condition of fromUChar32 not being 0 (it is a surrogate) */ + if (fromUChar32 != 0) { + /* 4 bytes to encode from char32 and a following char in source */ + + cr = encodeFourBytes(source, target, fromUChar32); + if (cr != null) + return cr; + } + + while (true) { + if (!source.hasRemaining()) { + /* nothing left to read */ + cr = CoderResult.UNDERFLOW; + break; + } + if (!target.hasRemaining()) { + /* no space left to write */ + cr = CoderResult.OVERFLOW; + break; + } + + /* reach the next char into char32 */ + char32 = source.get(); + + if (char32 <= 0x7f) { + /* 1 byte to encode from char32 */ + + target.put(encodeHeadOf1(char32)); + + } else if (char32 <= 0x7ff) { + /* 2 bytes to encode from char32 */ + + target.put(encodeHeadOf2(char32)); + + if (!target.hasRemaining()) { + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + cr = CoderResult.OVERFLOW; + break; + } + target.put(encodeLastTail(char32)); + + } else if (!UTF16.isSurrogate((char) char32) || isCESU8) { + /* 3 bytes to encode from char32 */ + + target.put(encodeHeadOf3(char32)); + + if (!target.hasRemaining()) { + errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + cr = CoderResult.OVERFLOW; + break; + } + target.put(encodeSecondToLastTail(char32)); + + if (!target.hasRemaining()) { + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + cr = CoderResult.OVERFLOW; + break; + } + target.put(encodeLastTail(char32)); + + } else { + /* 4 bytes to encode from char32 and a following char in source */ + + cr = encodeFourBytes(source, target, char32); + if (cr != null) + break; + } + } + + /* set the new source and target positions and return the CoderResult stored in cr */ + return cr; + } + } + + private final CoderResult encodeFourBytes(char[] sourceArray, byte[] targetArray, + int sourceLimit, int targetLimit, int char32) { + + /* we need to read another char to match up the surrogate stored in char32 */ + /* handle the surrogate stuff, returning on a non-null CoderResult */ + CoderResult cr = handleSurrogates(sourceArray, sourceIndex, sourceLimit, (char)char32); + if (cr != null) + return cr; + + sourceIndex++; + char32 = fromUChar32; + fromUChar32 = 0; + + /* the rest is routine -- encode four bytes, stopping on overflow */ + + targetArray[targetIndex++] = encodeHeadOf4(char32); + + if (targetIndex >= targetLimit) { + errorBuffer[errorBufferLength++] = encodeThirdToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + return CoderResult.OVERFLOW; + } + targetArray[targetIndex++] = encodeThirdToLastTail(char32); + + if (targetIndex >= targetLimit) { + errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + return CoderResult.OVERFLOW; + } + targetArray[targetIndex++] = encodeSecondToLastTail(char32); + + if (targetIndex >= targetLimit) { + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + return CoderResult.OVERFLOW; + } + targetArray[targetIndex++] = encodeLastTail(char32); + + /* return null for success */ + return null; + } + + private final CoderResult encodeFourBytes(CharBuffer source, ByteBuffer target, int char32) { + + /* handle the surrogate stuff, returning on a non-null CoderResult */ + CoderResult cr = handleSurrogates(source, (char)char32); + if (cr != null) + return cr; + + char32 = fromUChar32; + fromUChar32 = 0; + + /* the rest is routine -- encode four bytes, stopping on overflow */ + + target.put(encodeHeadOf4(char32)); + + if (!target.hasRemaining()) { + errorBuffer[errorBufferLength++] = encodeThirdToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + return CoderResult.OVERFLOW; + } + target.put(encodeThirdToLastTail(char32)); + + if (!target.hasRemaining()) { + errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32); + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + return CoderResult.OVERFLOW; + } + target.put(encodeSecondToLastTail(char32)); + + if (!target.hasRemaining()) { + errorBuffer[errorBufferLength++] = encodeLastTail(char32); + return CoderResult.OVERFLOW; + } + target.put(encodeLastTail(char32)); + + /* return null for success */ + return null; + } + + private int sourceIndex; + + private int targetIndex; + + } + + private static final byte encodeHeadOf1(int char32) { + return (byte) char32; + } + + private static final byte encodeHeadOf2(int char32) { + return (byte) (0xc0 | (char32 >>> 6)); + } + + private static final byte encodeHeadOf3(int char32) { + return (byte) (0xe0 | ((char32 >>> 12))); + } + + private static final byte encodeHeadOf4(int char32) { + return (byte) (0xf0 | ((char32 >>> 18))); + } + + private static final byte encodeThirdToLastTail(int char32) { + return (byte) (0x80 | ((char32 >>> 12) & 0x3f)); + } + + private static final byte encodeSecondToLastTail(int char32) { + return (byte) (0x80 | ((char32 >>> 6) & 0x3f)); + } + + private static final byte encodeLastTail(int char32) { + return (byte) (0x80 | (char32 & 0x3f)); + } + + /* single-code point definitions -------------------------------------------- */ + + /* + * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + */ + // static final boolean isSingle(byte c) {return (((c)&0x80)==0);} + /* + * Is this code unit (byte) a UTF-8 lead byte? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + */ + // static final boolean isLead(byte c) {return ((((c)-0xc0) & + // UConverterConstants.UNSIGNED_BYTE_MASK)<0x3e);} + /* + * Is this code unit (byte) a UTF-8 trail byte? + * + * @param c + * 8-bit code unit (byte) + * @return TRUE or FALSE + */ + /*private static final boolean isTrail(byte c) { + return (((c) & 0xc0) == 0x80); + }*/ + + public CharsetDecoder newDecoder() { + return new CharsetDecoderUTF8(this); + } + + public CharsetEncoder newEncoder() { + return new CharsetEncoderUTF8(this); + } + + + void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ + getNonSurrogateUnicodeSet(setFillIn); + } +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java new file mode 100644 index 00000000000..ad55260af73 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java @@ -0,0 +1,831 @@ +/** +******************************************************************************* +* Copyright (C) 2006-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ +package com.ibm.icu.charset; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; + +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.ICUResourceBundle; + +final class UConverterAlias { + static final int UNNORMALIZED = 0; + + static final int STD_NORMALIZED = 1; + + static final int AMBIGUOUS_ALIAS_MAP_BIT = 0x8000; + + static final int CONTAINS_OPTION_BIT = 0x4000; + + static final int CONVERTER_INDEX_MASK = 0xFFF; + + static final int NUM_RESERVED_TAGS = 2; + + static final int NUM_HIDDEN_TAGS = 1; + + static int[] gConverterList = null; + + static int[] gTagList = null; + + static int[] gAliasList = null; + + static int[] gUntaggedConvArray = null; + + static int[] gTaggedAliasArray = null; + + static int[] gTaggedAliasLists = null; + + static int[] gOptionTable = null; + + static byte[] gStringTable = null; + + static byte[] gNormalizedStringTable = null; + + static final String GET_STRING(int idx) { + return new String(gStringTable, 2 * idx, strlen(gStringTable, 2 * idx)); + } + + private static final String GET_NORMALIZED_STRING(int idx) { + return new String(gNormalizedStringTable, 2 * idx, strlen(gNormalizedStringTable, 2 * idx)); + } + + public static final int strlen(byte[] sArray, int sBegin) + { + int i = sBegin; + while(i < sArray.length && sArray[i++] != 0) {} + return i - sBegin - 1; + } + + /*private*/ static final int tocLengthIndex = 0; + + private static final int converterListIndex = 1; + + private static final int tagListIndex = 2; + + private static final int aliasListIndex = 3; + + private static final int untaggedConvArrayIndex = 4; + + private static final int taggedAliasArrayIndex = 5; + + private static final int taggedAliasListsIndex = 6; + + private static final int optionTableIndex = 7; + + private static final int stringTableIndex = 8; + + private static final int normalizedStringTableIndex = 9; + + private static final int minTocLength = 9; /* + * min. tocLength in the file, + * does not count the + * tocLengthIndex! + */ + + private static final int offsetsCount = minTocLength + 1; /* + * length of the + * swapper's + * temporary + * offsets[] + */ + + static ByteBuffer gAliasData = null; + + private static final boolean isAlias(String alias) { + if (alias == null) { + throw new IllegalArgumentException("Alias param is null!"); + } + return (alias.length() != 0); + } + + private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu"; + + /** + * Default buffer size of datafile + */ + private static final int CNVALIAS_DATA_BUFFER_SIZE = 25000; + + private static final synchronized boolean haveAliasData() + throws IOException{ + boolean needInit; + + // agljport:todo umtx_lock(NULL); + needInit = gAliasData == null; + + /* load converter alias data from file if necessary */ + if (needInit) { + ByteBuffer data = null; + int[] tableArray = null; + int tableStart; + //byte[] reservedBytes = null; + + InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME); + BufferedInputStream b = new BufferedInputStream(i, CNVALIAS_DATA_BUFFER_SIZE); + UConverterAliasDataReader reader = new UConverterAliasDataReader(b); + tableArray = reader.readToc(offsetsCount); + + tableStart = tableArray[0]; + if (tableStart < minTocLength) { + throw new IOException("Invalid data format."); + } + gConverterList = new int[tableArray[converterListIndex]]; + gTagList= new int[tableArray[tagListIndex]]; + gAliasList = new int[tableArray[aliasListIndex]]; + gUntaggedConvArray = new int[tableArray[untaggedConvArrayIndex]]; + gTaggedAliasArray = new int[tableArray[taggedAliasArrayIndex]]; + gTaggedAliasLists = new int[tableArray[taggedAliasListsIndex]]; + gOptionTable = new int[tableArray[optionTableIndex]]; + gStringTable = new byte[tableArray[stringTableIndex]*2]; + gNormalizedStringTable = new byte[tableArray[normalizedStringTableIndex]*2]; + + reader.read(gConverterList, gTagList, + gAliasList, gUntaggedConvArray, + gTaggedAliasArray, gTaggedAliasLists, + gOptionTable, gStringTable, gNormalizedStringTable); + data = ByteBuffer.allocate(0); // dummy UDataMemory object in absence + // of memory mapping + + if (gOptionTable[0] != STD_NORMALIZED) { + throw new IOException("Unsupported alias normalization"); + } + + // agljport:todo umtx_lock(NULL); + if (gAliasData == null) { + gAliasData = data; + data = null; + + // agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO, + // io_cleanup); + } + // agljport:todo umtx_unlock(NULL); + + /* if a different thread set it first, then close the extra data */ + if (data != null) { + // agljport:fix udata_close(data); /* NULL if it was set + // correctly */ + } + } + + return true; + } + + // U_CFUNC const char * io_getConverterName(const char *alias, UErrorCode + // *pErrorCode) +// public static final String io_getConverterName(String alias) +// throws IOException{ +// if (haveAliasData() && isAlias(alias)) { +// boolean[] isAmbigous = new boolean[1]; +// int convNum = findConverter(alias, isAmbigous); +// if (convNum < gConverterList.length) { +// return GET_STRING(gConverterList[(int) convNum]); +// } +// /* else converter not found */ +// } +// return null; +// } + + /* + * search for an alias return the converter number index for gConverterList + */ + // static U_INLINE uint32_t findConverter(const char *alias, UErrorCode + // *pErrorCode) + private static final int findConverter(String alias, boolean[] isAmbigous) { + int mid, start, limit; + int lastMid; + int result; + StringBuilder strippedName = new StringBuilder(); + String aliasToCompare; + + stripForCompare(strippedName, alias); + alias = strippedName.toString(); + + /* do a binary search for the alias */ + start = 0; + limit = gUntaggedConvArray.length; + mid = limit; + lastMid = Integer.MAX_VALUE; + + for (;;) { + mid = (start + limit) / 2; + if (lastMid == mid) { /* Have we moved? */ + break; /* We haven't moved, and it wasn't found. */ + } + lastMid = mid; + aliasToCompare = GET_NORMALIZED_STRING(gAliasList[mid]); + result = alias.compareTo(aliasToCompare); + + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid; + } else { + /* + * Since the gencnval tool folds duplicates into one entry, this + * alias in gAliasList is unique, but different standards may + * map an alias to different converters. + */ + if ((gUntaggedConvArray[mid] & AMBIGUOUS_ALIAS_MAP_BIT) != 0) { + isAmbigous[0]=true; + } + /* State whether the canonical converter name contains an option. + This information is contained in this list in order to maintain backward & forward compatibility. */ + /*if (containsOption) { + UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo; + *containsOption = (UBool)((containsCnvOptionInfo + && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0)) + || !containsCnvOptionInfo); + }*/ + return gUntaggedConvArray[mid] & CONVERTER_INDEX_MASK; + } + } + return Integer.MAX_VALUE; + } + + /** + * stripForCompare Remove the underscores, dashes and spaces from + * the name, and convert the name to lower case. + * + * @param dst The destination buffer, which is <= the buffer of name. + * @param name The alias to strip + * @return the destination buffer. + */ + public static final StringBuilder stripForCompare(StringBuilder dst, String name) { + return io_stripASCIIForCompare(dst, name); + } + + // enum { + private static final byte IGNORE = 0; + private static final byte ZERO = 1; + private static final byte NONZERO = 2; + static final byte MINLETTER = 3; /* any values from here on are lowercase letter mappings */ + // } + + /* character types for ASCII 00..7F */ + static final byte asciiTypes[] = new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0, + 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0, + 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0 + }; + + private static final char GET_CHAR_TYPE(char c) { + return (char)((c < asciiTypes.length) ? asciiTypes[c] : (char)IGNORE); + } + + /** @see UConverterAlias#compareNames */ + private static final StringBuilder io_stripASCIIForCompare(StringBuilder dst, String name) { + int nameIndex = 0; + char type, nextType; + char c1; + boolean afterDigit = false; + + while (nameIndex < name.length()) { + c1 = name.charAt(nameIndex++); + type = GET_CHAR_TYPE(c1); + switch (type) { + case IGNORE: + afterDigit = false; + continue; /* ignore all but letters and digits */ + case ZERO: + if (!afterDigit && nameIndex < name.length()) { + nextType = GET_CHAR_TYPE(name.charAt(nameIndex)); + if (nextType == ZERO || nextType == NONZERO) { + continue; /* ignore leading zero before another digit */ + } + } + break; + case NONZERO: + afterDigit = true; + break; + default: + c1 = type; /* lowercased letter */ + afterDigit = false; + break; + } + dst.append(c1); + } + return dst; + } + + /** + * Do a fuzzy compare of a two converter/alias names. The comparison is + * case-insensitive. It also ignores the characters '-', '_', and ' ' (dash, + * underscore, and space). Thus the strings "UTF-8", "utf_8", and "Utf 8" + * are exactly equivalent. + * + * This is a symmetrical (commutative) operation; order of arguments is + * insignificant. This is an important property for sorting the list (when + * the list is preprocessed into binary form) and for performing binary + * searches on it at run time. + * + * @param name1 + * a converter name or alias, zero-terminated + * @param name2 + * a converter name or alias, zero-terminated + * @return 0 if the names match, or a negative value if the name1 lexically + * precedes name2, or a positive value if the name1 lexically + * follows name2. + * + * @see UConverterAlias#stripForCompare + */ + static int compareNames(String name1, String name2){ + int rc, name1Index = 0, name2Index = 0; + char type, nextType; + char c1 = 0, c2 = 0; + boolean afterDigit1 = false, afterDigit2 = false; + + for (;;) { + while (name1Index < name1.length()) { + c1 = name1.charAt(name1Index++); + type = GET_CHAR_TYPE(c1); + switch (type) { + case IGNORE: + afterDigit1 = false; + continue; /* ignore all but letters and digits */ + case ZERO: + if (!afterDigit1 && name1Index < name1.length()) { + nextType = GET_CHAR_TYPE(name1.charAt(name1Index)); + if (nextType == ZERO || nextType == NONZERO) { + continue; /* ignore leading zero before another digit */ + } + } + break; + case NONZERO: + afterDigit1 = true; + break; + default: + c1 = type; /* lowercased letter */ + afterDigit1 = false; + break; + } + break; /* deliver c1 */ + } + while (name2Index < name2.length()) { + c2 = name2.charAt(name2Index++); + type = GET_CHAR_TYPE(c2); + switch (type) { + case IGNORE: + afterDigit2 = false; + continue; /* ignore all but letters and digits */ + case ZERO: + if (!afterDigit2 && name1Index < name1.length()) { + nextType = GET_CHAR_TYPE(name2.charAt(name2Index)); + if (nextType == ZERO || nextType == NONZERO) { + continue; /* ignore leading zero before another digit */ + } + } + break; + case NONZERO: + afterDigit2 = true; + break; + default: + c2 = type; /* lowercased letter */ + afterDigit2 = false; + break; + } + break; /* deliver c2 */ + } + + /* If we reach the ends of both strings then they match */ + if (name1Index >= name1.length() && name2Index >= name2.length()) { + return 0; + } + + /* Case-insensitive comparison */ + rc = (int)c1 - (int)c2; + if (rc != 0) { + return rc; + } + } + } + + static int io_countAliases(String alias) + throws IOException{ + if (haveAliasData() && isAlias(alias)) { + boolean[] isAmbigous = new boolean[1]; + int convNum = findConverter(alias, isAmbigous); + if (convNum < gConverterList.length) { + /* tagListNum - 1 is the ALL tag */ + int listOffset = gTaggedAliasArray[(gTagList.length - 1) + * gConverterList.length + convNum]; + + if (listOffset != 0) { + return gTaggedAliasLists[listOffset]; + } + /* else this shouldn't happen. internal program error */ + } + /* else converter not found */ + } + return 0; + } + + /** + * Return the number of all aliases (and converter names). + * + * @return the number of all aliases + */ + // U_CFUNC uint16_t io_countTotalAliases(UErrorCode *pErrorCode); +// static int io_countTotalAliases() throws IOException{ +// if (haveAliasData()) { +// return (int) gAliasList.length; +// } +// return 0; +// } + + // U_CFUNC const char * io_getAlias(const char *alias, uint16_t n, + // UErrorCode *pErrorCode) + static String io_getAlias(String alias, int n) throws IOException{ + if (haveAliasData() && isAlias(alias)) { + boolean[] isAmbigous = new boolean[1]; + int convNum = findConverter(alias,isAmbigous); + if (convNum < gConverterList.length) { + /* tagListNum - 1 is the ALL tag */ + int listOffset = gTaggedAliasArray[(gTagList.length - 1) + * gConverterList.length + convNum]; + + if (listOffset != 0) { + //int listCount = gTaggedAliasListsArray[listOffset]; + /* +1 to skip listCount */ + int[] currListArray = gTaggedAliasLists; + int currListArrayIndex = listOffset + 1; + + return GET_STRING(currListArray[currListArrayIndex + n]); + + } + /* else this shouldn't happen. internal program error */ + } + /* else converter not found */ + } + return null; + } + + // U_CFUNC uint16_t io_countStandards(UErrorCode *pErrorCode) { +// static int io_countStandards() throws IOException{ +// if (haveAliasData()) { +// return (int) (gTagList.length - NUM_HIDDEN_TAGS); +// } +// return 0; +// } + + // U_CAPI const char * U_EXPORT2getStandard(uint16_t n, UErrorCode + // *pErrorCode) +// static String getStandard(int n) throws IOException{ +// if (haveAliasData()) { +// return GET_STRING(gTagList[n]); +// } +// return null; +// } + + // U_CAPI const char * U_EXPORT2 getStandardName(const char *alias, const + // char *standard, UErrorCode *pErrorCode) + static final String getStandardName(String alias, String standard)throws IOException { + if (haveAliasData() && isAlias(alias)) { + int listOffset = findTaggedAliasListsOffset(alias, standard); + + if (0 < listOffset && listOffset < gTaggedAliasLists.length) { + int[] currListArray = gTaggedAliasLists; + int currListArrayIndex = listOffset + 1; + if (currListArray[0] != 0) { + return GET_STRING(currListArray[currListArrayIndex]); + } + } + } + return null; + } + + // U_CAPI uint16_t U_EXPORT2 countAliases(const char *alias, UErrorCode + // *pErrorCode) + static int countAliases(String alias) throws IOException{ + return io_countAliases(alias); + } + + // U_CAPI const char* U_EXPORT2 getAlias(const char *alias, uint16_t n, + // UErrorCode *pErrorCode) + static String getAlias(String alias, int n) throws IOException{ + return io_getAlias(alias, n); + } + + // U_CFUNC uint16_t countStandards(void) +// static int countStandards()throws IOException{ +// return io_countStandards(); +// } + + /*returns a single Name from the list, will return NULL if out of bounds + */ + static String getAvailableName (int n){ + try{ + if (0 <= n && n <= 0xffff) { + String name = bld_getAvailableConverter(n); + return name; + } + }catch(IOException ex){ + //throw away exception + } + return null; + } + // U_CAPI const char * U_EXPORT2 getCanonicalName(const char *alias, const + // char *standard, UErrorCode *pErrorCode) { + static String getCanonicalName(String alias, String standard) throws IOException{ + if (haveAliasData() && isAlias(alias)) { + int convNum = findTaggedConverterNum(alias, standard); + + if (convNum < gConverterList.length) { + return GET_STRING(gConverterList[convNum]); + } + } + + return null; + } + static int countAvailable (){ + try{ + return bld_countAvailableConverters(); + }catch(IOException ex){ + //throw away exception + } + return -1; + } + + // U_CAPI UEnumeration * U_EXPORT2 openStandardNames(const char *convName, + // const char *standard, UErrorCode *pErrorCode) +/* static final UConverterAliasesEnumeration openStandardNames(String convName, String standard)throws IOException { + UConverterAliasesEnumeration aliasEnum = null; + if (haveAliasData() && isAlias(convName)) { + int listOffset = findTaggedAliasListsOffset(convName, standard); + + + * When listOffset == 0, we want to acknowledge that the converter + * name and standard are okay, but there is nothing to enumerate. + + if (listOffset < gTaggedAliasLists.length) { + + UConverterAliasesEnumeration.UAliasContext context = new UConverterAliasesEnumeration.UAliasContext(listOffset, 0); + aliasEnum = new UConverterAliasesEnumeration(); + aliasEnum.setContext(context); + } + else converter or tag not found + } + return aliasEnum; + }*/ + + // static uint32_t getTagNumber(const char *tagname) + private static int getTagNumber(String tagName) { + if (gTagList != null) { + int tagNum; + for (tagNum = 0; tagNum < gTagList.length; tagNum++) { + if (tagName.equals(GET_STRING(gTagList[tagNum]))) { + return tagNum; + } + } + } + + return Integer.MAX_VALUE; + } + + // static uint32_t findTaggedAliasListsOffset(const char *alias, const char + // *standard, UErrorCode *pErrorCode) + private static int findTaggedAliasListsOffset(String alias, String standard) { + int idx; + int listOffset; + int convNum; + int tagNum = getTagNumber(standard); + boolean[] isAmbigous = new boolean[1]; + /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ + convNum = findConverter(alias, isAmbigous); + + if (tagNum < (gTagList.length - NUM_HIDDEN_TAGS) + && convNum < gConverterList.length) { + listOffset = gTaggedAliasArray[tagNum + * gConverterList.length + convNum]; + if (listOffset != 0 + && gTaggedAliasLists[listOffset + 1] != 0) { + return listOffset; + } + if (isAmbigous[0]==true) { + /* + * Uh Oh! They used an ambiguous alias. We have to search the + * whole swiss cheese starting at the highest standard affinity. + * This may take a while. + */ + + for (idx = 0; idx < gTaggedAliasArray.length; idx++) { + listOffset = gTaggedAliasArray[idx]; + if (listOffset != 0 && isAliasInList(alias, listOffset)) { + int currTagNum = idx / gConverterList.length; + int currConvNum = (idx - currTagNum + * gConverterList.length); + int tempListOffset = gTaggedAliasArray[tagNum + * gConverterList.length + currConvNum]; + if (tempListOffset != 0 + && gTaggedAliasLists[tempListOffset + 1] != 0) { + return tempListOffset; + } + /* + * else keep on looking We could speed this up by + * starting on the next row because an alias is unique + * per row, right now. This would change if alias + * versioning appears. + */ + } + } + /* The standard doesn't know about the alias */ + } + /* else no default name */ + return 0; + } + /* else converter or tag not found */ + + return Integer.MAX_VALUE; + } + + /* Return the canonical name */ + // static uint32_t findTaggedConverterNum(const char *alias, const char + // *standard, UErrorCode *pErrorCode) + private static int findTaggedConverterNum(String alias, String standard) { + int idx; + int listOffset; + int convNum; + int tagNum = getTagNumber(standard); + boolean[] isAmbigous = new boolean[1]; + + /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ + convNum = findConverter(alias, isAmbigous); + + if (tagNum < (gTagList.length - NUM_HIDDEN_TAGS) + && convNum < gConverterList.length) { + listOffset = gTaggedAliasArray[tagNum + * gConverterList.length + convNum]; + if (listOffset != 0 && isAliasInList(alias, listOffset)) { + return convNum; + } + if (isAmbigous[0] == true) { + /* + * Uh Oh! They used an ambiguous alias. We have to search one + * slice of the swiss cheese. We search only in the requested + * tag, not the whole thing. This may take a while. + */ + int convStart = (tagNum) * gConverterList.length; + int convLimit = (tagNum + 1) * gConverterList.length; + for (idx = convStart; idx < convLimit; idx++) { + listOffset = gTaggedAliasArray[idx]; + if (listOffset != 0 && isAliasInList(alias, listOffset)) { + return idx - convStart; + } + } + /* The standard doesn't know about the alias */ + } + /* else no canonical name */ + } + /* else converter or tag not found */ + + return Integer.MAX_VALUE; + } + + // static U_INLINE UBool isAliasInList(const char *alias, uint32_t + // listOffset) + private static boolean isAliasInList(String alias, int listOffset) { + if (listOffset != 0) { + int currAlias; + int listCount = gTaggedAliasLists[listOffset]; + /* +1 to skip listCount */ + int[] currList = gTaggedAliasLists; + int currListArrayIndex = listOffset + 1; + for (currAlias = 0; currAlias < listCount; currAlias++) { + if (currList[currAlias + currListArrayIndex] != 0 + && compareNames( + alias, + GET_STRING(currList[currAlias + currListArrayIndex])) == 0) { + return true; + } + } + } + return false; + } + + // begin bld.c + static String[] gAvailableConverters = null; + + static int gAvailableConverterCount = 0; + + static byte[] gDefaultConverterNameBuffer; // [MAX_CONVERTER_NAME_LENGTH + + // 1]; /* +1 for NULL */ + + static String gDefaultConverterName = null; + + // static UBool haveAvailableConverterList(UErrorCode *pErrorCode) + static boolean haveAvailableConverterList() throws IOException{ + if (gAvailableConverters == null) { + int idx; + int localConverterCount; + String converterName; + String[] localConverterList; + + if (!haveAliasData()) { + return false; + } + + /* We can't have more than "*converterTable" converters to open */ + localConverterList = new String[gConverterList.length]; + + localConverterCount = 0; + + for (idx = 0; idx < gConverterList.length; idx++) { + converterName = GET_STRING(gConverterList[idx]); + //UConverter cnv = UConverter.open(converterName); + //TODO: Fix me + localConverterList[localConverterCount++] = converterName; + + } + + // agljport:todo umtx_lock(NULL); + if (gAvailableConverters == null) { + gAvailableConverters = localConverterList; + gAvailableConverterCount = localConverterCount; + /* haveData should have already registered the cleanup function */ + } else { + // agljport:todo free((char **)localConverterList); + } + // agljport:todo umtx_unlock(NULL); + } + return true; + } + + // U_CFUNC uint16_t bld_countAvailableConverters(UErrorCode *pErrorCode) + static int bld_countAvailableConverters() throws IOException{ + if (haveAvailableConverterList()) { + return gAvailableConverterCount; + } + return 0; + } + + // U_CFUNC const char * bld_getAvailableConverter(uint16_t n, UErrorCode + // *pErrorCode) + static String bld_getAvailableConverter(int n) throws IOException{ + if (haveAvailableConverterList()) { + if (n < gAvailableConverterCount) { + return gAvailableConverters[n]; + } + } + return null; + } + + /* default converter name --------------------------------------------------- */ + + /* + * In order to be really thread-safe, the get function would have to take + * a buffer parameter and copy the current string inside a mutex block. + * This implementation only tries to be really thread-safe while + * setting the name. + * It assumes that setting a pointer is atomic. + */ + + // U_CFUNC const char * getDefaultName() +// static final synchronized String getDefaultName() { +// /* local variable to be thread-safe */ +// String name; +// +// //agljport:todo umtx_lock(null); +// name = gDefaultConverterName; +// //agljport:todo umtx_unlock(null); +// +// if (name == null) { +// //UConverter cnv = null; +// int length = 0; +// +// name = CharsetICU.getDefaultCharsetName(); +// +// /* if the name is there, test it out and get the canonical name with options */ +// if (name != null) { +// // cnv = UConverter.open(name); +// // name = cnv.getName(cnv); +// // TODO: fix me +// } +// +// if (name == null || name.length() == 0 ||/* cnv == null ||*/ +// length >= gDefaultConverterNameBuffer.length) { +// /* Panic time, let's use a fallback. */ +// name = new String("US-ASCII"); +// } +// +// //length=(int32_t)(strlen(name)); +// +// /* Copy the name before we close the converter. */ +// name = gDefaultConverterName; +// } +// +// return name; +// } + + //end bld.c +} \ No newline at end of file diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java new file mode 100644 index 00000000000..c4134b8c6ad --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java @@ -0,0 +1,221 @@ +/* +******************************************************************************* +* Copyright (C) 2006-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.charset; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +import com.ibm.icu.impl.ICUBinary; + + +/* Format of cnvalias.icu ----------------------------------------------------- + * + * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. + * This binary form contains several tables. All indexes are to uint16_t + * units, and not to the bytes (uint8_t units). Addressing everything on + * 16-bit boundaries allows us to store more information with small index + * numbers, which are also 16-bit in size. The majority of the table (except + * the string table) are 16-bit numbers. + * + * First there is the size of the Table of Contents (TOC). The TOC + * entries contain the size of each section. In order to find the offset + * you just need to sum up the previous offsets. + * The TOC length and entries are an array of uint32_t values. + * The first section after the TOC starts immediately after the TOC. + * + * 1) This section contains a list of converters. This list contains indexes + * into the string table for the converter name. The index of this list is + * also used by other sections, which are mentioned later on. + * This list is not sorted. + * + * 2) This section contains a list of tags. This list contains indexes + * into the string table for the tag name. The index of this list is + * also used by other sections, which are mentioned later on. + * This list is in priority order of standards. + * + * 3) This section contains a list of sorted unique aliases. This + * list contains indexes into the string table for the alias name. The + * index of this list is also used by other sections, like the 4th section. + * The index for the 3rd and 4th section is used to get the + * alias -> converter name mapping. Section 3 and 4 form a two column table. + * + * 4) This section contains a list of mapped converter names. Consider this + * as a table that maps the 3rd section to the 1st section. This list contains + * indexes into the 1st section. The index of this list is the same index in + * the 3rd section. There is also some extra information in the high bits of + * each converter index in this table. Currently it's only used to say that + * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK + * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is + * the predigested form of the 5th section so that an alias lookup can be fast. + * + * 5) This section contains a 2D array with indexes to the 6th section. This + * section is the full form of all alias mappings. The column index is the + * index into the converter list (column header). The row index is the index + * to tag list (row header). This 2D array is the top part a 3D array. The + * third dimension is in the 6th section. + * + * 6) This is blob of variable length arrays. Each array starts with a size, + * and is followed by indexes to alias names in the string table. This is + * the third dimension to the section 5. No other section should be referencing + * this section. + * + * 7) Reserved at this time (There is no information). This _usually_ has a + * size of 0. Future versions may add more information here. + * + * 8) This is the string table. All strings are indexed on an even address. + * There are two reasons for this. First many chip architectures locate strings + * faster on even address boundaries. Second, since all indexes are 16-bit + * numbers, this string table can be 128KB in size instead of 64KB when we + * only have strings starting on an even address. + * + * + * Here is the concept of section 5 and 6. It's a 3D cube. Each tag + * has a unique alias among all converters. That same alias can + * be mentioned in other standards on different converters, + * but only one alias per tag can be unique. + * + * + * Converter Names (Usually in TR22 form) + * -------------------------------------------. + * T / /| + * a / / | + * g / / | + * s / / | + * / / | + * ------------------------------------------/ | + * A | | | + * l | | | + * i | | / + * a | | / + * s | | / + * e | | / + * s | |/ + * ------------------------------------------- + * + * + * + * Here is what it really looks like. It's like swiss cheese. + * There are holes. Some converters aren't recognized by + * a standard, or they are really old converters that the + * standard doesn't recognize anymore. + * + * Converter Names (Usually in TR22 form) + * -------------------------------------------. + * T /##########################################/| + * a / # # /# + * g / # ## ## ### # ### ### ### #/ + * s / # ##### #### ## ## #/# + * / ### # # ## # # # ### # # #/## + * ------------------------------------------/# # + * A |### # # ## # # # ### # # #|# # + * l |# # # # # ## # #|# # + * i |# # # # # # #|# + * a |# #|# + * s | #|# + * e + * s + * + */ + +final class UConverterAliasDataReader implements ICUBinary.Authenticate { +// private final static boolean debug = ICUDebug.enabled("UConverterAliasDataReader"); + + /** + *

    Protected constructor.

    + * @param inputStream ICU uprop.dat file input stream + * @exception IOException throw if data file fails authentication + */ + protected UConverterAliasDataReader(InputStream inputStream) + throws IOException{ + //if(debug) System.out.println("Bytes in inputStream " + inputStream.available()); + + /*unicodeVersion = */ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this); + + //if(debug) System.out.println("Bytes left in inputStream " +inputStream.available()); + + dataInputStream = new DataInputStream(inputStream); + + //if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available()); + } + + // protected methods ------------------------------------------------- + + protected int[] readToc(int n)throws IOException + { + int[] toc = new int[n]; + //Read the toc + for (int i = 0; i < n ; ++i) { + toc[i] = dataInputStream.readInt() & UNSIGNED_INT_MASK; + } + return toc; + } + + protected void read(int[] convList, int[] tagList, int[] aliasList, int[]untaggedConvArray, int[] taggedAliasArray, int[] taggedAliasLists, int[] optionTable, byte[] stringTable, byte[] normalizedStringTable) throws IOException{ + int i; + //int listnum = 1; + //long listsize; + + for(i = 0; i < convList.length; ++i) + convList[i] = dataInputStream.readUnsignedShort(); + + for(i = 0; i < tagList.length; ++i) + tagList[i] = dataInputStream.readUnsignedShort(); + + for(i = 0; i < aliasList.length; ++i) + aliasList[i] = dataInputStream.readUnsignedShort(); + + for(i = 0; i < untaggedConvArray.length; ++i) + untaggedConvArray[i] = dataInputStream.readUnsignedShort(); + + for(i = 0; i < taggedAliasArray.length; ++i) + taggedAliasArray[i] = dataInputStream.readUnsignedShort(); + + for(i = 0; i < taggedAliasLists.length; ++i) + taggedAliasLists[i] = dataInputStream.readUnsignedShort(); + + for(i = 0; i < optionTable.length; ++i) + optionTable[i] = dataInputStream.readUnsignedShort(); + + dataInputStream.readFully(stringTable); + dataInputStream.readFully(normalizedStringTable); + } + + public boolean isDataVersionAcceptable(byte version[]) + { + return version.length >= DATA_FORMAT_VERSION.length + && version[0] == DATA_FORMAT_VERSION[0] + && version[1] == DATA_FORMAT_VERSION[1] + && version[2] == DATA_FORMAT_VERSION[2]; + } + + /*byte[] getUnicodeVersion(){ + return unicodeVersion; + }*/ + // private data members ------------------------------------------------- + + + /** + * ICU data file input stream + */ + private DataInputStream dataInputStream; + +// private byte[] unicodeVersion; + + /** + * File format version that this class understands. + * No guarantees are made if a older version is used + * see store.c of gennorm for more information and values + */ + // DATA_FORMAT_ID_ values taken from icu4c isAcceptable (ucnv_io.c) + private static final byte DATA_FORMAT_ID[] = {(byte)0x43, (byte)0x76, (byte)0x41, (byte)0x6c}; // dataFormat="CvAl" + private static final byte DATA_FORMAT_VERSION[] = {3, 0, 1}; + + //private static final int UNSIGNED_SHORT_MASK = 0xffff; + private static final int UNSIGNED_INT_MASK = 0xffffffff; + +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterConstants.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterConstants.java new file mode 100644 index 00000000000..2fdf5070e47 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterConstants.java @@ -0,0 +1,169 @@ +/* +******************************************************************************* +* Copyright (C) 2006-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.charset; + +interface UConverterConstants { + + static final short UNSIGNED_BYTE_MASK = 0xff; + static final int UNSIGNED_SHORT_MASK = 0xffff; + static final long UNSIGNED_INT_MASK = 0xffffffffL; + + static final int U_IS_BIG_ENDIAN = 0; + + /** + * Useful constant for the maximum size of the whole locale ID + * (including the terminating NULL). + */ + static final int ULOC_FULLNAME_CAPACITY = 56; + + /** + * This value is intended for sentinel values for APIs that + * (take or) return single code points (UChar32). + * It is outside of the Unicode code point range 0..0x10ffff. + * + * For example, a "done" or "error" value in a new API + * could be indicated with U_SENTINEL. + * + * ICU APIs designed before ICU 2.4 usually define service-specific "done" + * values, mostly 0xffff. + * Those may need to be distinguished from + * actual U+ffff text contents by calling functions like + * CharacterIterator::hasNext() or UnicodeString::length(). + */ + static final int U_SENTINEL = -1; + + //end utf.h + + //begin ucnv.h + /** + * Character that separates converter names from options and options from each other. + * @see CharsetICU#forNameICU(String) + */ + static final byte OPTION_SEP_CHAR = ','; + + /** Maximum length of a converter name including the terminating NULL */ + static final int MAX_CONVERTER_NAME_LENGTH = 60; + /** Maximum length of a converter name including path and terminating NULL */ + static final int MAX_FULL_FILE_NAME_LENGTH = (600+MAX_CONVERTER_NAME_LENGTH); + + /** Shift in for EBDCDIC_STATEFUL and iso2022 states */ + static final int SI = 0x0F; + /** Shift out for EBDCDIC_STATEFUL and iso2022 states */ + static final int SO = 0x0E; + + //end ucnv.h + + // begin bld.h + /* size of the overflow buffers in UConverter, enough for escaping callbacks */ + //#define ERROR_BUFFER_LENGTH 32 + static final int ERROR_BUFFER_LENGTH = 32; + + /* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */ + static final int MAX_SUBCHAR_LEN = 4; + + /* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */ + static final int MAX_CHAR_LEN = 8; + + /* converter options bits */ + static final int OPTION_VERSION = 0xf; + static final int OPTION_SWAP_LFNL = 0x10; + static final int OPTION_MAC = 0x20; //agljport:comment added for Mac ISCII encodings + + static final String OPTION_SWAP_LFNL_STRING = ",swaplfnl"; + + /** values for the unicodeMask */ + static final int HAS_SUPPLEMENTARY = 1; + static final int HAS_SURROGATES = 2; + // end bld.h + + // begin cnv.h + /* this is used in fromUnicode DBCS tables as an "unassigned" marker */ + static final int missingCharMarker = 0xFFFF; + /** + * + * @author ram + */ + static interface UConverterResetChoice { + static final int RESET_BOTH = 0; + static final int RESET_TO_UNICODE = RESET_BOTH + 1; + static final int RESET_FROM_UNICODE = RESET_TO_UNICODE + 1; + } + + // begin utf16.h + /** + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). + */ + static final int U16_MAX_LENGTH = 2; + // end utf16.h + + // begin err.h + /** + * FROM_U, TO_U context options for sub callback + */ + static byte[] SUB_STOP_ON_ILLEGAL = {'i'}; + + /** + * FROM_U, TO_U context options for skip callback + */ + static byte[] SKIP_STOP_ON_ILLEGAL = {'i'}; + + /** + * The process condition code to be used with the callbacks. + * Codes which are greater than IRREGULAR should be + * passed on to any chained callbacks. + */ + static interface UConverterCallbackReason { + static final int UNASSIGNED = 0; /**< The code point is unassigned. + The error code U_INVALID_CHAR_FOUND will be set. */ + static final int ILLEGAL = 1; /**< The code point is illegal. For example, + \\x81\\x2E is illegal in SJIS because \\x2E + is not a valid trail byte for the \\x81 + lead byte. + Also, starting with Unicode 3.0.1, non-shortest byte sequences + in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) + are also illegal, not just irregular. + The error code U_ILLEGAL_CHAR_FOUND will be set. */ + static final int IRREGULAR = 2; /**< The codepoint is not a regular sequence in + the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF + are irregular UTF-8 byte sequences for single surrogate + code points. + The error code U_INVALID_CHAR_FOUND will be set. */ + static final int RESET = 3; /**< The callback is called with this reason when a + 'reset' has occured. Callback should reset all + state. */ + static final int CLOSE = 4; /**< Called when the converter is closed. The + callback should release any allocated memory.*/ + static final int CLONE = 5; /**< Called when safeClone() is called on the + converter. the pointer available as the + 'context' is an alias to the original converters' + context pointer. If the context must be owned + by the new converter, the callback must clone + the data and call setFromUCallback + (or setToUCallback) with the correct pointer. + */ + } + //end err.h + + + static final String DATA_TYPE = "cnv"; + static final int CNV_DATA_BUFFER_SIZE = 25000; + static final int SIZE_OF_UCONVERTER_SHARED_DATA = 100; + + static final int MAXIMUM_UCS2 = 0x0000FFFF; + static final int MAXIMUM_UTF = 0x0010FFFF; + //static final int MAXIMUM_UCS4 = 0x7FFFFFFF; + static final int HALF_SHIFT = 10; + static final int HALF_BASE = 0x0010000; + static final int HALF_MASK = 0x3FF; + static final int SURROGATE_HIGH_START = 0xD800; + static final int SURROGATE_HIGH_END = 0xDBFF; + static final int SURROGATE_LOW_START = 0xDC00; + static final int SURROGATE_LOW_END = 0xDFFF; + + /* -SURROGATE_LOW_START + HALF_BASE */ + static final int SURROGATE_LOW_BASE = 9216; +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java new file mode 100644 index 00000000000..e37671a9d4f --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java @@ -0,0 +1,612 @@ +/* +******************************************************************************* +* Copyright (C) 2006-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.charset; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; + +import com.ibm.icu.impl.ICUBinary; + +/** + * ucnvmbcs.h + * + * ICU conversion (.cnv) data file structure, following the usual UDataInfo + * header. + * + * Format version: 6.2 + * + * struct UConverterStaticData -- struct containing the converter name, IBM CCSID, + * min/max bytes per character, etc. + * see ucnv_bld.h + * + * -------------------- + * + * The static data is followed by conversionType-specific data structures. + * At the moment, there are only variations of MBCS converters. They all have + * the same toUnicode structures, while the fromUnicode structures for SBCS + * differ from those for other MBCS-style converters. + * + * _MBCSHeader.version 4.2 adds an optional conversion extension data structure. + * If it is present, then an ICU version reading header versions 4.0 or 4.1 + * will be able to use the base table and ignore the extension. + * + * The unicodeMask in the static data is part of the base table data structure. + * Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the + * fromUnicode stage 1 array. + * The static data unicodeMask refers only to the base table's properties if + * a base table is included. + * In an extension-only file, the static data unicodeMask is 0. + * The extension data indexes have a separate field with the unicodeMask flags. + * + * MBCS-style data structure following the static data. + * Offsets are counted in bytes from the beginning of the MBCS header structure. + * Details about usage in comments in ucnvmbcs.c. + * + * struct _MBCSHeader (see the definition in this header file below) + * contains 32-bit fields as follows: + * 8 values: + * 0 uint8_t[4] MBCS version in UVersionInfo format (currently 4.2.0.0) + * 1 uint32_t countStates + * 2 uint32_t countToUFallbacks + * 3 uint32_t offsetToUCodeUnits + * 4 uint32_t offsetFromUTable + * 5 uint32_t offsetFromUBytes + * 6 uint32_t flags, bits: + * 31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher + * 0 for older versions and if + * there is not extension structure + * 7.. 0 outputType + * 7 uint32_t fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher + * counts bytes in fromUBytes[] + * + * if(outputType==MBCS_OUTPUT_EXT_ONLY) { + * -- base table name for extension-only table + * char baseTableName[variable]; -- with NUL plus padding for 4-alignment + * + * -- all _MBCSHeader fields except for version and flags are 0 + * } else { + * -- normal base table with optional extension + * + * int32_t stateTable[countStates][256]; + * + * struct _MBCSToUFallback { (fallbacks are sorted by offset) + * uint32_t offset; + * UChar32 codePoint; + * } toUFallbacks[countToUFallbacks]; + * + * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2]; + * (padded to an even number of units) + * + * -- stage 1 tables + * if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) { + * -- stage 1 table for all of Unicode + * uint16_t fromUTable[0x440]; (32-bit-aligned) + * } else { + * -- BMP-only tables have a smaller stage 1 table + * uint16_t fromUTable[0x40]; (32-bit-aligned) + * } + * + * -- stage 2 tables + * length determined by top of stage 1 and bottom of stage 3 tables + * if(outputType==MBCS_OUTPUT_1) { + * -- SBCS: pure indexes + * uint16_t stage 2 indexes[?]; + * } else { + * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes + * uint32_t stage 2 flags and indexes[?]; + * } + * + * -- stage 3 tables with byte results + * if(outputType==MBCS_OUTPUT_1) { + * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c + * uint16_t fromUBytes[fromUBytesLength/2]; + * } else { + * -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c + * uint8_t fromUBytes[fromUBytesLength]; or + * uint16_t fromUBytes[fromUBytesLength/2]; or + * uint32_t fromUBytes[fromUBytesLength/4]; + * } + * } + * + * -- extension table, details see ucnv_ext.h + * int32_t indexes[>=32]; ... + */ +/* + * ucnv_ext.h + * + * See icuhtml/design/conversion/conversion_extensions.html + * + * Conversion extensions serve two purposes: + * 1. They support m:n mappings. + * 2. They support extension-only conversion files that are used together + * with the regular conversion data in base files. + * + * A base file may contain an extension table (explicitly requested or + * implicitly generated for m:n mappings), but its extension table is not + * used when an extension-only file is used. + * + * It is an error if a base file contains any regular (not extension) mapping + * from the same sequence as a mapping in the extension file + * because the base mapping would hide the extension mapping. + * + * + * Data for conversion extensions: + * + * One set of data structures per conversion direction (to/from Unicode). + * The data structures are sorted by input units to allow for binary search. + * Input sequences of more than one unit are handled like contraction tables + * in collation: + * The lookup value of a unit points to another table that is to be searched + * for the next unit, recursively. + * + * For conversion from Unicode, the initial code point is looked up in + * a 3-stage trie for speed, + * with an additional table of unique results to save space. + * + * Long output strings are stored in separate arrays, with length and index + * in the lookup tables. + * Output results also include a flag distinguishing roundtrip from + * (reverse) fallback mappings. + * + * Input Unicode strings must not begin or end with unpaired surrogates + * to avoid problems with matches on parts of surrogate pairs. + * + * Mappings from multiple characters (code points or codepage state + * table sequences) must be searched preferring the longest match. + * For this to work and be efficient, the variable-width table must contain + * all mappings that contain prefixes of the multiple characters. + * If an extension table is built on top of a base table in another file + * and a base table entry is a prefix of a multi-character mapping, then + * this is an error. + * + * + * Implementation note: + * + * Currently, the parser and several checks in the code limit the number + * of UChars or bytes in a mapping to + * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively, + * which are output value limits in the data structure. + * + * For input, this is not strictly necessary - it is a hard limit only for the + * buffers in UConverter that are used to store partial matches. + * + * Input sequences could otherwise be arbitrarily long if partial matches + * need not be stored (i.e., if a sequence does not span several buffers with too + * many units before the last buffer), although then results would differ + * depending on whether partial matches exceed the limits or not, + * which depends on the pattern of buffer sizes. + * + * + * Data structure: + * + * int32_t indexes[>=32]; + * + * Array of indexes and lengths etc. The length of the array is at least 32. + * The actual length is stored in indexes[0] to be forward compatible. + * + * Each index to another array is the number of bytes from indexes[]. + * Each length of an array is the number of array base units in that array. + * + * Some of the structures may not be present, in which case their indexes + * and lengths are 0. + * + * Usage of indexes[i]: + * [0] length of indexes[] + * + * // to Unicode table + * [1] index of toUTable[] (array of uint32_t) + * [2] length of toUTable[] + * [3] index of toUUChars[] (array of UChar) + * [4] length of toUUChars[] + * + * // from Unicode table, not for the initial code point + * [5] index of fromUTableUChars[] (array of UChar) + * [6] index of fromUTableValues[] (array of uint32_t) + * [7] length of fromUTableUChars[] and fromUTableValues[] + * [8] index of fromUBytes[] (array of char) + * [9] length of fromUBytes[] + * + * // from Unicode trie for initial-code point lookup + * [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2) + * [11] length of stage 1 portion of fromUStage12[] + * [12] length of fromUStage12[] + * [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[]) + * [14] length of fromUStage3[] + * [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[]) + * [16] length of fromUStage3b[] + * + * [17] Bit field containing numbers of bytes: + * 31..24 reserved, 0 + * 23..16 maximum input bytes + * 15.. 8 maximum output bytes + * 7.. 0 maximum bytes per UChar + * + * [18] Bit field containing numbers of UChars: + * 31..24 reserved, 0 + * 23..16 maximum input UChars + * 15.. 8 maximum output UChars + * 7.. 0 maximum UChars per byte + * + * [19] Bit field containing flags: + * (extension table unicodeMask) + * 1 UCNV_HAS_SURROGATES flag for the extension table + * 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table + * + * [20]..[30] reserved, 0 + * [31] number of bytes for the entire extension structure + * [>31] reserved; there are indexes[0] indexes + * + * + * uint32_t toUTable[]; + * + * Array of byte/value pairs for lookups for toUnicode conversion. + * The array is partitioned into sections like collation contraction tables. + * Each section contains one word with the number of following words and + * a default value for when the lookup in this section yields no match. + * + * A section is sorted in ascending order of input bytes, + * allowing for fast linear or binary searches. + * The builder may store entries for a contiguous range of byte values + * (compare difference between the first and last one with count), + * which then allows for direct array access. + * The builder should always do this for the initial table section. + * + * Entries may have 0 values, see below. + * No two entries in a section have the same byte values. + * + * Each uint32_t contains an input byte value in bits 31..24 and the + * corresponding lookup value in bits 23..0. + * Interpret the value as follows: + * if(value==0) { + * no match, see below + * } else if(value<0x1f0000) { + * partial match - use value as index to the next toUTable section + * and match the next unit; (value indexes toUTable[value]) + * } else { + * if(bit 23 set) { + * roundtrip; + * } else { + * fallback; + * } + * unset value bit 23; + * if(value<=0x2fffff) { + * (value-0x1f0000) is a code point; (BMP: value<=0x1fffff) + * } else { + * bits 17..0 (value&0x3ffff) is an index to + * the result UChars in toUUChars[]; (0 indexes toUUChars[0]) + * length of the result=((value>>18)-12); (length=0..19) + * } + * } + * + * The first word in a section contains the number of following words in the + * input byte position (bits 31..24, number=1..0xff). + * The value of the initial word is used when the current byte is not found + * in this section. + * If the value is not 0, then it represents a result as above. + * If the value is 0, then the search has to return a shorter match with an + * earlier default value as the result, or result in "unmappable" even for the + * initial bytes. + * If the value is 0 for the initial toUTable entry, then the initial byte + * does not start any mapping input. + * + * + * UChar toUUChars[]; + * + * Contains toUnicode mapping results, stored as sequences of UChars. + * Indexes and lengths stored in the toUTable[]. + * + * + * UChar fromUTableUChars[]; + * uint32_t fromUTableValues[]; + * + * The fromUTable is split into two arrays, but works otherwise much like + * the toUTable. The array is partitioned into sections like collation + * contraction tables and toUTable. + * A row in the table consists of same-index entries in fromUTableUChars[] + * and fromUTableValues[]. + * + * Interpret a value as follows: + * if(value==0) { + * no match, see below + * } else if(value<=0xffffff) { (bits 31..24 are 0) + * partial match - use value as index to the next fromUTable section + * and match the next unit; (value indexes fromUTable[value]) + * } else { + * if(value==0x80000001) { + * return no mapping, but request for ; + * } + * if(bit 31 set) { + * roundtrip; + * } else { + * fallback; + * } + * // bits 30..29 reserved, 0 + * length=(value>>24)&0x1f; (bits 28..24) + * if(length==1..3) { + * bits 23..0 contain 1..3 bytes, padded with 00s on the left; + * } else { + * bits 23..0 (value&0xffffff) is an index to + * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0]) + * } + * } + * + * The first pair in a section contains the number of following pairs in the + * UChar position (16 bits, number=1..0xffff). + * The value of the initial pair is used when the current UChar is not found + * in this section. + * If the value is not 0, then it represents a result as above. + * If the value is 0, then the search has to return a shorter match with an + * earlier default value as the result, or result in "unmappable" even for the + * initial UChars. + * + * If the from Unicode trie is present, then the from Unicode search tables + * are not used for initial code points. + * In this case, the first entries (index 0) in the tables are not used + * (reserved, set to 0) because a value of 0 is used in trie results + * to indicate no mapping. + * + * + * uint16_t fromUStage12[]; + * + * Stages 1 & 2 of a trie that maps an initial code point. + * Indexes in stage 1 are all offset by the length of stage 1 so that the + * same array pointer can be used for both stages. + * If (c>>10)>=(length of stage 1) then c does not start any mapping. + * Same bit distribution as for regular conversion tries. + * + * + * uint16_t fromUStage3[]; + * uint32_t fromUStage3b[]; + * + * Stage 3 of the trie. The first array simply contains indexes to the second, + * which contains words in the same format as fromUTableValues[]. + * Use a stage 3 granularity of 4, which allows for 256k stage 3 entries, + * and 16-bit entries in stage 3 allow for 64k stage 3b entries. + * The stage 3 granularity means that the stage 2 entry needs to be left-shifted. + * + * Two arrays are used because it is expected that more than half of the stage 3 + * entries will be zero. The 16-bit index stage 3 array saves space even + * considering storing a total of 6 bytes per non-zero entry in both arrays + * together. + * Using a stage 3 granularity of >1 diminishes the compactability in that stage + * but provides a larger effective addressing space in stage 2. + * All but the final result stage use 16-bit entries to save space. + * + * fromUStage3b[] contains a zero for "no mapping" at its index 0, + * and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for " SUB mapping" + * (i.e., "no mapping" with preference for rather than ), + * and all other items are unique non-zero results. + * + * The default value of a fromUTableValues[] section that is referenced + * _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1, + * but this value must not occur anywhere else in fromUTableValues[] + * because "no mapping" is always a property of a single code point, + * never of multiple. + * + * + * char fromUBytes[]; + * + * Contains fromUnicode mapping results, stored as sequences of chars. + * Indexes and lengths stored in the fromUTableValues[]. + */ + +final class UConverterDataReader implements ICUBinary.Authenticate { + //private final static boolean debug = ICUDebug.enabled("UConverterDataReader"); + + /* + * UConverterDataReader(UConverterDataReader r) + { + dataInputStream = new DataInputStream(r.dataInputStream); + unicodeVersion = r.unicodeVersion; + } + */ + /* the number bytes read from the stream */ + int bytesRead = 0; + /* the number of bytes read for static data */ + int staticDataBytesRead = 0; + /** + *

    Protected constructor.

    + * @param inputStream ICU uprop.dat file input stream + * @exception IOException throw if data file fails authentication + */ + protected UConverterDataReader(InputStream inputStream) + throws IOException{ + //if(debug) System.out.println("Bytes in inputStream " + inputStream.available()); + + /*unicodeVersion = */ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this); + + //if(debug) System.out.println("Bytes left in inputStream " +inputStream.available()); + + dataInputStream = new DataInputStream(inputStream); + + //if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available()); + } + + // protected methods ------------------------------------------------- + + protected void readStaticData(UConverterStaticData sd) throws IOException + { + int bRead = 0; + sd.structSize = dataInputStream.readInt(); + bRead +=4; + byte[] name = new byte[UConverterConstants.MAX_CONVERTER_NAME_LENGTH]; + dataInputStream.readFully(name); + bRead +=name.length; + sd.name = new String(name, 0, name.length); + sd.codepage = dataInputStream.readInt(); + bRead +=4; + sd.platform = dataInputStream.readByte(); + bRead++; + sd.conversionType = dataInputStream.readByte(); + bRead++; + sd.minBytesPerChar = dataInputStream.readByte(); + bRead++; + sd.maxBytesPerChar = dataInputStream.readByte(); + bRead++; + dataInputStream.readFully(sd.subChar); + bRead += sd.subChar.length; + sd.subCharLen = dataInputStream.readByte(); + bRead++; + sd.hasToUnicodeFallback = dataInputStream.readByte(); + bRead++; + sd.hasFromUnicodeFallback = dataInputStream.readByte(); + bRead++; + sd.unicodeMask = (short)dataInputStream.readUnsignedByte(); + bRead++; + sd.subChar1 = dataInputStream.readByte(); + bRead++; + dataInputStream.readFully(sd.reserved); + bRead += sd.reserved.length; + staticDataBytesRead = bRead; + bytesRead += bRead; + } + + protected void readMBCSHeader(CharsetMBCS.MBCSHeader h) throws IOException + { + dataInputStream.readFully(h.version); + bytesRead += h.version.length; + h.countStates = dataInputStream.readInt(); + bytesRead+=4; + h.countToUFallbacks = dataInputStream.readInt(); + bytesRead+=4; + h.offsetToUCodeUnits = dataInputStream.readInt(); + bytesRead+=4; + h.offsetFromUTable = dataInputStream.readInt(); + bytesRead+=4; + h.offsetFromUBytes = dataInputStream.readInt(); + bytesRead+=4; + h.flags = dataInputStream.readInt(); + bytesRead+=4; + h.fromUBytesLength = dataInputStream.readInt(); + bytesRead+=4; + if (h.version[0] == 5 && h.version[1] >= 3) { + h.options = dataInputStream.readInt(); + bytesRead+=4; + if ((h.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0) { + h.fullStage2Length = dataInputStream.readInt(); + bytesRead+=4; + } + } + } + + protected void readMBCSTable(int[][] stateTableArray, CharsetMBCS.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException + { + int i, j; + for(i = 0; i < stateTableArray.length; ++i){ + for(j = 0; j < stateTableArray[i].length; ++j){ + stateTableArray[i][j] = dataInputStream.readInt(); + bytesRead+=4; + } + } + for(i = 0; i < toUFallbacksArray.length; ++i) { + toUFallbacksArray[i].offset = dataInputStream.readInt(); + bytesRead+=4; + toUFallbacksArray[i].codePoint = dataInputStream.readInt(); + bytesRead+=4; + } + for(i = 0; i < unicodeCodeUnitsArray.length; ++i){ + unicodeCodeUnitsArray[i] = dataInputStream.readChar(); + bytesRead+=2; + } + for(i = 0; i < fromUnicodeTableArray.length; ++i){ + fromUnicodeTableArray[i] = dataInputStream.readChar(); + bytesRead+=2; + } + for(i = 0; i < fromUnicodeBytesArray.length; ++i){ + fromUnicodeBytesArray[i] = dataInputStream.readByte(); + bytesRead++; + } + } + + protected String readBaseTableName() throws IOException + { + char c; + StringBuilder name = new StringBuilder(); + while((c = (char)dataInputStream.readByte()) != 0){ + name.append(c); + bytesRead++; + } + bytesRead++/*for null terminator*/; + return name.toString(); + } + + //protected int[] readExtIndexes(int skip) throws IOException + protected ByteBuffer readExtIndexes(int skip) throws IOException + { + int skipped = dataInputStream.skipBytes(skip); + if(skipped != skip){ + throw new IOException("could not skip "+ skip +" bytes"); + } + int n = dataInputStream.readInt(); + bytesRead+=4; + int[] indexes = new int[n]; + indexes[0] = n; + for(int i = 1; i < n; ++i) { + indexes[i] = dataInputStream.readInt(); + bytesRead+=4; + } + //return indexes; + + ByteBuffer b = ByteBuffer.allocate(indexes[31]); + for(int i = 0; i < n; ++i) { + b.putInt(indexes[i]); + } + int len = dataInputStream.read(b.array(), b.position(), b.remaining()); + if(len==-1){ + throw new IOException("Read failed"); + } + bytesRead += len; + return b; + } + + /*protected byte[] readExtTables(int n) throws IOException + { + byte[] tables = new byte[n]; + int len =dataInputStream.read(tables); + if(len==-1){ + throw new IOException("Read failed"); + } + bytesRead += len; + return tables; + }*/ + + byte[] getDataFormatVersion(){ + return DATA_FORMAT_VERSION; + } + /** + * Inherited method + */ + public boolean isDataVersionAcceptable(byte version[]){ + return version[0] == DATA_FORMAT_VERSION[0]; + } + +/* byte[] getUnicodeVersion(){ + return unicodeVersion; + }*/ + // private data members ------------------------------------------------- + + /** + * ICU data file input stream + */ + DataInputStream dataInputStream; + +// private byte[] unicodeVersion; + + /** + * File format version that this class understands. + * No guarantees are made if a older version is used + * see store.c of gennorm for more information and values + */ + // DATA_FORMAT_ID_ values taken from icu4c isCnvAcceptable (ucnv_bld.c) + private static final byte DATA_FORMAT_ID[] = {(byte)0x63, (byte)0x6e, (byte)0x76, (byte)0x74}; // dataFormat="cnvt" + private static final byte DATA_FORMAT_VERSION[] = {(byte)0x6}; + +} + diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterSharedData.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterSharedData.java new file mode 100644 index 00000000000..e69f40a490c --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterSharedData.java @@ -0,0 +1,448 @@ +/* + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.charset; + +/** + * Defines the UConverterSharedData struct, the immutable, shared part of + * UConverter. + */ +final class UConverterSharedData { + // uint32_t structSize; /* Size of this structure */ + // int structSize; /* Size of this structure */ + /** + * used to count number of clients, 0xffffffff for static SharedData + */ + int referenceCounter; + + // agljport:todo const void *dataMemory; /* from udata_openChoice() - for cleanup */ + // agljport:todo void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */ + + // const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */ + /** + * pointer to the static (non changing) + * data. + */ + UConverterStaticData staticData; + + // UBool sharedDataCached; /* TRUE: shared data is in cache, don't destroy + // on close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to + // clean it up if the ref is 0 */ + + /** + * TRUE: shared data is in cache, don't destroy + * on close() if 0 ref. FALSE: shared data isn't + * in the cache, do attempt to clean it up if + * the ref is 0 + */ + boolean sharedDataCached; + + /* + * UBool staticDataOwned; TRUE if static data owned by shared data & should + * be freed with it, NEVER true for udata() loaded statics. This ignored + * variable was removed to make space for sharedDataCached. + */ + + // const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ + // UConverterImpl impl; /* vtable-style struct of mostly function pointers */ + /** initial values of some members of the mutable part of object */ + long toUnicodeStatus; + + /** + * Shared data structures currently come in two flavors: + * - readonly for built-in algorithmic converters + * - allocated for MBCS, with a pointer to an allocated UConverterTable + * which always has a UConverterMBCSTable + * + * To eliminate one allocation, I am making the UConverterMBCSTable a member + * of the shared data. It is the last member so that static definitions of + * UConverterSharedData work as before. The table field above also remains + * to avoid updating all static definitions, but is now unused. + * + */ + CharsetMBCS.UConverterMBCSTable mbcs; + + UConverterSharedData() { + mbcs = new CharsetMBCS.UConverterMBCSTable(); + } + + UConverterSharedData(int referenceCounter_, UConverterStaticData staticData_, boolean sharedDataCached_, long toUnicodeStatus_) + { + this(); + referenceCounter = referenceCounter_; + staticData = staticData_; + sharedDataCached = sharedDataCached_; + // impl = impl_; + toUnicodeStatus = toUnicodeStatus_; + } + + /** + * UConverterImpl contains all the data and functions for a converter type. + * Its function pointers work much like a C++ vtable. Many converter types + * need to define only a subset of the functions; when a function pointer is + * NULL, then a default action will be performed. + * + * Every converter type must implement toUnicode, fromUnicode, and + * getNextUChar, otherwise the converter may crash. Every converter type + * that has variable-length codepage sequences should also implement + * toUnicodeWithOffsets and fromUnicodeWithOffsets for correct offset + * handling. All other functions may or may not be implemented - it depends + * only on whether the converter type needs them. + * + * When open() fails, then close() will be called, if present. + */ +/* class UConverterImpl { + UConverterType type; + UConverterToUnicode toUnicode; + protected void doToUnicode(UConverterToUnicodeArgs args, int[] pErrorCode) + { + } + + final void toUnicode(UConverterToUnicodeArgs args, int[] pErrorCode) + { + doToUnicode(args, pErrorCode); + } + + //UConverterFromUnicode fromUnicode; + protected void doFromUnicode(UConverterFromUnicodeArgs args, int[] pErrorCode) + { + } + + final void fromUnicode(UConverterFromUnicodeArgs args, int[] pErrorCode) + { + doFromUnicode(args, pErrorCode); + } + + protected int doGetNextUChar(UConverterToUnicodeArgs args, int[] pErrorCode) + { + return 0; + } + + //UConverterGetNextUChar getNextUChar; + final int getNextUChar(UConverterToUnicodeArgs args, int[] pErrorCode) + { + return doGetNextUChar(args, pErrorCode); + } + + // interface UConverterImplLoadable extends UConverterImpl + protected void doLoad(UConverterLoadArgs pArgs, short[] raw, int[] pErrorCode) + { + } + + protected void doUnload() + { + } + + // interface UConverterImplOpenable extends UConverterImpl + protected void doOpen(UConverter cnv, String name, String locale, long options, int[] pErrorCode) + { + } + + //UConverterOpen open; + final void open(UConverter cnv, String name, String locale, long options, int[] pErrorCode) + { + doOpen(cnv, name, locale, options, pErrorCode); + } + + protected void doClose(UConverter cnv) + { + } + + //UConverterClose close; + final void close(UConverter cnv) + { + doClose(cnv); + } + + protected void doReset(UConverter cnv, int choice) + { + } + + //typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice); + //UConverterReset reset; + final void reset(UConverter cnv, int choice) + { + doReset(cnv, choice); + } + + // interface UConverterImplVariableLength extends UConverterImpl + protected void doToUnicodeWithOffsets(UConverterToUnicodeArgs args, int[] pErrorCode) + { + } + + //UConverterToUnicode toUnicodeWithOffsets; + final void toUnicodeWithOffsets(UConverterToUnicodeArgs args, int[] pErrorCode) + { + doToUnicodeWithOffsets(args, pErrorCode); + } + + protected void doFromUnicodeWithOffsets(UConverterFromUnicodeArgs args, int[] pErrorCode) + { + } + + //UConverterFromUnicode fromUnicodeWithOffsets; + final void fromUnicodeWithOffsets(UConverterFromUnicodeArgs args, int[] pErrorCode) + { + doFromUnicodeWithOffsets(args, pErrorCode); + } + + // interface UConverterImplMisc extends UConverterImpl + protected void doGetStarters(UConverter converter, boolean starters[], int[] pErrorCode) + { + } + + //UConverterGetStarters getStarters; + final void getStarters(UConverter converter, boolean starters[], int[] pErrorCode) + { + doGetStarters(converter, starters, pErrorCode); + } + + protected String doGetName(UConverter cnv) + { + return ""; + } + + //UConverterGetName getName; + final String getName(UConverter cnv) + { + return doGetName(cnv); + } + + protected void doWriteSub(UConverterFromUnicodeArgs pArgs, long offsetIndex, int[] pErrorCode) + { + } + + //UConverterWriteSub writeSub; + final void writeSub(UConverterFromUnicodeArgs pArgs, long offsetIndex, int[] pErrorCode) + { + doWriteSub(pArgs, offsetIndex, pErrorCode); + } + + protected UConverter doSafeClone(UConverter cnv, byte[] stackBuffer, int[] pBufferSize, int[] status) + { + return new UConverter(); + } + + //UConverterSafeClone safeClone; + final UConverter safeClone(UConverter cnv, byte[] stackBuffer, int[] pBufferSize, int[] status) + { + return doSafeClone(cnv, stackBuffer, pBufferSize, status); + } + + protected void doGetUnicodeSet(UConverter cnv, UnicodeSet /*USetAdder* / sa, int /*UConverterUnicodeSet* / which, int[] pErrorCode) + { + } + + //UConverterGetUnicodeSet getUnicodeSet; + // final void getUnicodeSet(UConverter cnv, UnicodeSet /*USetAdder* / sa, int /*UConverterUnicodeSet* / which, int[] pErrorCode) + //{ + // doGetUnicodeSet(cnv, sa, which, pErrorCode); + //} + + //} + + static final String DATA_TYPE = "cnv"; + private static final int CNV_DATA_BUFFER_SIZE = 25000; + static final int sizeofUConverterSharedData = 100; + + //static UDataMemoryIsAcceptable isCnvAcceptable; + + /** + * Load a non-algorithmic converter. + * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). + + // UConverterSharedData * load(UConverterLoadArgs *pArgs, UErrorCode *err) + static final UConverterSharedData load(UConverterLoadArgs pArgs, int[] err) + { + UConverterSharedData mySharedConverterData = null; + + if(err == null || ErrorCode.isFailure(err[0])) { + return null; + } + + if(pArgs.pkg != null && pArgs.pkg.length() != 0) { + application-provided converters are not currently cached + return UConverterSharedData.createConverterFromFile(pArgs, err); + } + + //agljport:fix mySharedConverterData = getSharedConverterData(pArgs.name); + if (mySharedConverterData == null) + { + Not cached, we need to stream it in from file + mySharedConverterData = UConverterSharedData.createConverterFromFile(pArgs, err); + if (ErrorCode.isFailure(err[0]) || (mySharedConverterData == null)) + { + return null; + } + else + { + share it with other library clients + //agljport:fix shareConverterData(mySharedConverterData); + } + } + else + { + The data for this converter was already in the cache. + Update the reference counter on the shared data: one more client + mySharedConverterData.referenceCounter++; + } + + return mySharedConverterData; + } + + Takes an alias name gets an actual converter file name + *goes to disk and opens it. + *allocates the memory and returns a new UConverter object + + //static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) + static final UConverterSharedData createConverterFromFile(UConverterLoadArgs pArgs, int[] err) + { + UDataMemory data = null; + UConverterSharedData sharedData = null; + + //agljport:todo UTRACE_ENTRY_OC(UTRACE_LOAD); + + if (err == null || ErrorCode.isFailure(err[0])) { + //agljport:todo UTRACE_EXIT_STATUS(*err); + return null; + } + + //agljport:todo UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); + + //agljport:fix data = udata_openChoice(pArgs.pkgArray, DATA_TYPE.getBytes(), pArgs.name, isCnvAcceptable, null, err); + if(ErrorCode.isFailure(err[0])) + { + //agljport:todo UTRACE_EXIT_STATUS(*err); + return null; + } + + sharedData = data_unFlattenClone(pArgs, data, err); + if(ErrorCode.isFailure(err[0])) + { + //agljport:fix udata_close(data); + //agljport:todo UTRACE_EXIT_STATUS(*err); + return null; + } + + + * TODO Store pkg in a field in the shared data so that delta-only converters + * can load base converters from the same package. + * If the pkg name is longer than the field, then either do not load the converter + * in the first place, or just set the pkg field to "". + + + return sharedData; + } +*/ + UConverterDataReader dataReader = null; + + /* + * returns a converter type from a string + */ + /* static final UConverterSharedData getAlgorithmicTypeFromName(String realName) + { + long mid, start, limit; + long lastMid; + int result; + StringBuffer strippedName = new StringBuffer(UConverterConstants.MAX_CONVERTER_NAME_LENGTH); + + // Lower case and remove ignoreable characters. + UConverterAlias.stripForCompare(strippedName, realName); + + // do a binary search for the alias + start = 0; + limit = cnvNameType.length; + mid = limit; + lastMid = -1; + + for (;;) { + mid = (long)((start + limit) / 2); + if (lastMid == mid) { // Have we moved? + break; // We haven't moved, and it wasn't found. + } + lastMid = mid; + result = strippedName.substring(0).compareTo(cnvNameType[(int)mid].name); + + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid; + } else { + return converterData[cnvNameType[(int)mid].type]; + } + } + + return null; + }*/ + + /* + * Enum for specifying basic types of converters + */ + static final class UConverterType { + static final int UNSUPPORTED_CONVERTER = -1; + static final int SBCS = 0; + static final int DBCS = 1; + static final int MBCS = 2; + static final int LATIN_1 = 3; + static final int UTF8 = 4; + static final int UTF16_BigEndian = 5; + static final int UTF16_LittleEndian = 6; + static final int UTF32_BigEndian = 7; + static final int UTF32_LittleEndian = 8; + static final int EBCDIC_STATEFUL = 9; + static final int ISO_2022 = 10; + static final int LMBCS_1 = 11; + static final int LMBCS_2 = LMBCS_1 + 1; // 12 + static final int LMBCS_3 = LMBCS_2 + 1; // 13 + static final int LMBCS_4 = LMBCS_3 + 1; // 14 + static final int LMBCS_5 = LMBCS_4 + 1; // 15 + static final int LMBCS_6 = LMBCS_5 + 1; // 16 + static final int LMBCS_8 = LMBCS_6 + 1; // 17 + static final int LMBCS_11 = LMBCS_8 + 1; // 18 + static final int LMBCS_16 = LMBCS_11 + 1; // 19 + static final int LMBCS_17 = LMBCS_16 + 1; // 20 + static final int LMBCS_18 = LMBCS_17 + 1; // 21 + static final int LMBCS_19 = LMBCS_18 + 1; // 22 + static final int LMBCS_LAST = LMBCS_19; // 22 + static final int HZ = LMBCS_LAST + 1; // 23 + static final int SCSU = HZ + 1; // 24 + static final int ISCII = SCSU + 1; // 25 + static final int US_ASCII = ISCII + 1; // 26 + static final int UTF7 = US_ASCII + 1; // 27 + static final int BOCU1 = UTF7 + 1; // 28 + static final int UTF16 = BOCU1 + 1; // 29 + static final int UTF32 = UTF16 + 1; // 30 + static final int CESU8 = UTF32 + 1; // 31 + static final int IMAP_MAILBOX = CESU8 + 1; // 32 + + // Number of converter types for which we have conversion routines. + static final int NUMBER_OF_SUPPORTED_CONVERTER_TYPES = IMAP_MAILBOX + 1; + } + + /** + * Enum for specifying which platform a converter ID refers to. The use of + * platform/CCSID is not recommended. See openCCSID(). + */ + static final class UConverterPlatform { + static final int UNKNOWN = -1; + static final int IBM = 0; + } + + // static UConverterSharedData[] converterData; + /* static class cnvNameTypeClass { + String name; + int type; + cnvNameTypeClass(String name_, int type_) { name = name_; type = type_; } + } + + static cnvNameTypeClass cnvNameType[];*/ + + + static final String DATA_TYPE = "cnv"; + //static final int CNV_DATA_BUFFER_SIZE = 25000; + //static final int SIZE_OF_UCONVERTER_SHARED_DATA = 228; + +} diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterStaticData.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterStaticData.java new file mode 100644 index 00000000000..0ccd49b0c7b --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterStaticData.java @@ -0,0 +1,61 @@ +/** +******************************************************************************* +* Copyright (C) 2006-2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ +package com.ibm.icu.charset; + +final class UConverterStaticData { /* +offset: size */ + int structSize; /* +0: 4 Size of this structure */ + + String name; /* +4: 60 internal name of the converter- invariant chars */ + + int codepage; /* +64: 4 codepage # (now IBM-$codepage) */ + + byte platform; /* +68: 1 platform of the converter (only IBM now) */ + byte conversionType; /* +69: 1 conversion type */ + + byte minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */ + byte maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */ + + byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */ + byte subCharLen; /* +76: 1 */ + + byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ + byte hasFromUnicodeFallback; /* +78: 1 */ + short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ + byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ + byte reserved[/*19*/]; /* +81: 19 to round out the structure */ + /* total size: 100 */ + public UConverterStaticData() + { + subChar = new byte[UConverterConstants.MAX_SUBCHAR_LEN]; + reserved = new byte[19]; + } + +/* public UConverterStaticData(int structSize_, String name_, int codepage_, byte platform_, byte conversionType_, byte minBytesPerChar_, byte maxBytesPerChar_, byte[] subChar_, byte subCharLen_, byte hasToUnicodeFallback_, byte hasFromUnicodeFallback_, short unicodeMask_, byte subChar1_, byte[] reserved_) + { + structSize = structSize_; + name = name_; + codepage = codepage_; + platform = platform_; + conversionType = conversionType_; + minBytesPerChar = minBytesPerChar_; + maxBytesPerChar = maxBytesPerChar_; + subChar = new byte[UConverterConstants.MAX_SUBCHAR_LEN]; + System.arraycopy(subChar_, 0, subChar, 0, (subChar.length < subChar_.length? subChar.length : subChar_.length)); + subCharLen = subCharLen_; + hasToUnicodeFallback = hasToUnicodeFallback_; + hasFromUnicodeFallback = hasFromUnicodeFallback_; + unicodeMask = unicodeMask_; + subChar1 = subChar1_; + reserved = new byte[19]; + System.arraycopy(reserved_, 0, reserved, 0, (reserved.length < reserved_.length? reserved.length : reserved_.length)); + }*/ + + public static final int SIZE_OF_UCONVERTER_STATIC_DATA = 100; +} + diff --git a/main/classes/charset/src/com/ibm/icu/charset/package.html b/main/classes/charset/src/com/ibm/icu/charset/package.html new file mode 100644 index 00000000000..a9e87ef9e28 --- /dev/null +++ b/main/classes/charset/src/com/ibm/icu/charset/package.html @@ -0,0 +1,15 @@ + + + + +C:ICU4J .charset Package Overview + + + + +

    Enhanced charset conversion support.

    +CharsetICU, CharsetProviderICU, CharsetEncoderICU and CharsetDecoderICU provide conversion services for many charsets. + + diff --git a/main/classes/collate/.classpath b/main/classes/collate/.classpath new file mode 100644 index 00000000000..b0d608f2d91 --- /dev/null +++ b/main/classes/collate/.classpath @@ -0,0 +1,7 @@ + + + + + + + diff --git a/main/classes/collate/.externalToolBuilders/copy-data-collate.launch b/main/classes/collate/.externalToolBuilders/copy-data-collate.launch new file mode 100644 index 00000000000..17542f20e95 --- /dev/null +++ b/main/classes/collate/.externalToolBuilders/copy-data-collate.launch @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/collate/.project b/main/classes/collate/.project new file mode 100644 index 00000000000..72983f29537 --- /dev/null +++ b/main/classes/collate/.project @@ -0,0 +1,29 @@ + + + icu4j-collate + + + icu4j-core + icu4j-shared + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.ui.externaltools.ExternalToolBuilder + full,incremental, + + + LaunchConfigHandle + <project>/.externalToolBuilders/copy-data-collate.launch + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/main/classes/collate/.settings/org.eclipse.jdt.core.prefs b/main/classes/collate/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000000..d11b39f392e --- /dev/null +++ b/main/classes/collate/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,345 @@ +#Thu Aug 27 17:46:56 EDT 2009 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.doc.comment.support=enabled +org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.autoboxing=ignore +org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning +org.eclipse.jdt.core.compiler.problem.deadCode=warning +org.eclipse.jdt.core.compiler.problem.deprecation=ignore +org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled +org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled +org.eclipse.jdt.core.compiler.problem.discouragedReference=warning +org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.problem.fallthroughCase=warning +org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled +org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore +org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning +org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning +org.eclipse.jdt.core.compiler.problem.forbiddenReference=error +org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning +org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning +org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore +org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore +org.eclipse.jdt.core.compiler.problem.invalidJavadoc=warning +org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsDeprecatedRef=disabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=enabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=public +org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore +org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning +org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled +org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public +org.eclipse.jdt.core.compiler.problem.missingJavadocTagDescription=all_standard_tags +org.eclipse.jdt.core.compiler.problem.missingJavadocTags=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled +org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=public +org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning +org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore +org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning +org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning +org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore +org.eclipse.jdt.core.compiler.problem.nullReference=warning +org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning +org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore +org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore +org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore +org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning +org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore +org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore +org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled +org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning +org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled +org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore +org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning +org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning +org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore +org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning +org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore +org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore +org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled +org.eclipse.jdt.core.compiler.problem.unusedImport=warning +org.eclipse.jdt.core.compiler.problem.unusedLabel=warning +org.eclipse.jdt.core.compiler.problem.unusedLocal=warning +org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore +org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled +org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning +org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning +org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning +org.eclipse.jdt.core.compiler.source=1.5 +org.eclipse.jdt.core.formatter.align_type_members_on_columns=false +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_assignment=0 +org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 +org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 +org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 +org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 +org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 +org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_after_package=1 +org.eclipse.jdt.core.formatter.blank_lines_before_field=0 +org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 +org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 +org.eclipse.jdt.core.formatter.blank_lines_before_method=1 +org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 +org.eclipse.jdt.core.formatter.blank_lines_before_package=0 +org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 +org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 +org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false +org.eclipse.jdt.core.formatter.comment.format_block_comments=true +org.eclipse.jdt.core.formatter.comment.format_header=false +org.eclipse.jdt.core.formatter.comment.format_html=true +org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true +org.eclipse.jdt.core.formatter.comment.format_line_comments=true +org.eclipse.jdt.core.formatter.comment.format_source_code=true +org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true +org.eclipse.jdt.core.formatter.comment.indent_root_tags=true +org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert +org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert +org.eclipse.jdt.core.formatter.comment.line_length=120 +org.eclipse.jdt.core.formatter.compact_else_if=true +org.eclipse.jdt.core.formatter.continuation_indentation=2 +org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 +org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true +org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_empty_lines=false +org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true +org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false +org.eclipse.jdt.core.formatter.indentation.size=4 +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert +org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.join_lines_in_comments=true +org.eclipse.jdt.core.formatter.join_wrapped_lines=true +org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false +org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false +org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false +org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false +org.eclipse.jdt.core.formatter.lineSplit=120 +org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false +org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false +org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 +org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 +org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true +org.eclipse.jdt.core.formatter.tabulation.char=space +org.eclipse.jdt.core.formatter.tabulation.size=4 +org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false +org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true diff --git a/main/classes/collate/.settings/org.eclipse.jdt.ui.prefs b/main/classes/collate/.settings/org.eclipse.jdt.ui.prefs new file mode 100644 index 00000000000..977a256edd2 --- /dev/null +++ b/main/classes/collate/.settings/org.eclipse.jdt.ui.prefs @@ -0,0 +1,10 @@ +#Wed Jul 08 12:14:39 EDT 2009 +eclipse.preferences.version=1 +formatter_profile=_ICU4J Standard +formatter_settings_version=11 +org.eclipse.jdt.ui.ignorelowercasenames=true +org.eclipse.jdt.ui.importorder=java;javax;org;com; +org.eclipse.jdt.ui.javadoc=true +org.eclipse.jdt.ui.ondemandthreshold=99 +org.eclipse.jdt.ui.staticondemandthreshold=99 +org.eclipse.jdt.ui.text.custom_code_templates= diff --git a/main/classes/collate/build.properties b/main/classes/collate/build.properties new file mode 100644 index 00000000000..a21fb196196 --- /dev/null +++ b/main/classes/collate/build.properties @@ -0,0 +1,6 @@ +#******************************************************************************* +#* Copyright (C) 2009, International Business Machines Corporation and * +#* others. All Rights Reserved. * +#******************************************************************************* +shared.dir = ../../shared +javac.compilerarg = -Xlint:all,-deprecation,-dep-ann diff --git a/main/classes/collate/build.xml b/main/classes/collate/build.xml new file mode 100644 index 00000000000..6d78df99d6a --- /dev/null +++ b/main/classes/collate/build.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/collate/collate-build.launch b/main/classes/collate/collate-build.launch new file mode 100644 index 00000000000..5b723b1f894 --- /dev/null +++ b/main/classes/collate/collate-build.launch @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/collate/manifest.stub b/main/classes/collate/manifest.stub new file mode 100644 index 00000000000..2a404305ea8 --- /dev/null +++ b/main/classes/collate/manifest.stub @@ -0,0 +1,11 @@ +Manifest-Version: 1.0 + +Name: com/ibm/icu/text +Specification-Title: ICU for Java Collation +Specification-Version: @SPECVERSION@ +Specification-Vendor: ICU +Implementation-Title: ICU for Java Collation +Implementation-Version: @IMPLVERSION@ +Implementation-Vendor: IBM Corporation +Implementation-Vendor-Id: com.ibm +Copyright-Info: @COPYRIGHT@ \ No newline at end of file diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java b/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java new file mode 100644 index 00000000000..c331af7de8b --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java @@ -0,0 +1,2856 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* +******************************************************************************* +*/ +package com.ibm.icu.text; + +/*** + * import java.text.StringCharacterIterator; + * import java.text.CharacterIterator; + */ +import java.text.CharacterIterator; +import java.util.MissingResourceException; + +import com.ibm.icu.impl.CharacterIteratorWrapper; +import com.ibm.icu.impl.ICUDebug; +import com.ibm.icu.impl.Norm2AllModes; +import com.ibm.icu.impl.Normalizer2Impl; +import com.ibm.icu.impl.StringUCharacterIterator; +import com.ibm.icu.impl.UCharacterProperty; +import com.ibm.icu.lang.UCharacter; + +/** + *

    CollationElementIterator is an iterator created by + * a RuleBasedCollator to walk through a string. The return result of + * each iteration is a 32-bit collation element that defines the + * ordering priority of the next character or sequence of characters + * in the source string.

    + * + *

    For illustration, consider the following in Spanish: + *

    + *
    + * "ca" -> the first collation element is collation_element('c') and second
    + *         collation element is collation_element('a').
    + *
    + * Since "ch" in Spanish sorts as one entity, the below example returns one
    + * collation element for the two characters 'c' and 'h'
    + *
    + * "cha" -> the first collation element is collation_element('ch') and second
    + *          collation element is collation_element('a').
    + * 
    + *
    + * And in German, + *
    + *
    + * Since the character 'æ' is a composed character of 'a' and 'e', the
    + * iterator returns two collation elements for the single character 'æ'
    + *
    + * "æb" -> the first collation element is collation_element('a'), the
    + *              second collation element is collation_element('e'), and the
    + *              third collation element is collation_element('b').
    + * 
    + *
    + *

    + * + *

    For collation ordering comparison, the collation element results + * can not be compared simply by using basic arithmetric operators, + * e.g. <, == or >, further processing has to be done. Details + * can be found in the ICU + * + * user guide. An example of using the CollationElementIterator + * for collation ordering comparison is the class + * com.ibm.icu.text.StringSearch.

    + * + *

    To construct a CollationElementIterator object, users + * call the method getCollationElementIterator() on a + * RuleBasedCollator that defines the desired sorting order.

    + * + *

    Example: + *

    + *
    + *  String testString = "This is a test";
    + *  RuleBasedCollator rbc = new RuleBasedCollator("&a<b");
    + *  CollationElementIterator iterator = rbc.getCollationElementIterator(testString);
    + *  int primaryOrder = iterator.IGNORABLE;
    + *  while (primaryOrder != iterator.NULLORDER) {
    + *      int order = iterator.next();
    + *      if (order != iterator.IGNORABLE &&
    + *          order != iterator.NULLORDER) {
    + *          // order is valid, not ignorable and we have not passed the end
    + *          // of the iteration, we do something
    + *          primaryOrder = CollationElementIterator.primaryOrder(order);
    + *          System.out.println("Next primary order 0x" +
    + *                             Integer.toHexString(primaryOrder));
    + *      }
    + *  }
    + * 
    + *
    + *

    + *

    + * This class is not subclassable + *

    + * @see Collator + * @see RuleBasedCollator + * @see StringSearch + * @author Syn Wee Quek + * @stable ICU 2.8 + */ +public final class CollationElementIterator +{ + + + // public data members -------------------------------------------------- + + /** + *

    This constant is returned by the iterator in the methods + * next() and previous() when the end or the beginning of the + * source string has been reached, and there are no more valid + * collation elements to return.

    + * + *

    See class documentation for an example of use.

    + * @stable ICU 2.8 + * @see #next + * @see #previous */ + public final static int NULLORDER = 0xffffffff; + + /** + *

    This constant is returned by the iterator in the methods + * next() and previous() when a collation element result is to be + * ignored.

    + * + *

    See class documentation for an example of use.

    + * @stable ICU 2.8 + * @see #next + * @see #previous */ + public static final int IGNORABLE = 0; + + // public methods ------------------------------------------------------- + + // public getters ------------------------------------------------------- + + /** + *

    Returns the character offset in the source string + * corresponding to the next collation element. I.e., getOffset() + * returns the position in the source string corresponding to the + * collation element that will be returned by the next call to + * next(). This value could be any of: + *

      + *
    • The index of the first character corresponding to + * the next collation element. (This means that if + * setOffset(offset) sets the index in the middle of + * a contraction, getOffset() returns the index of + * the first character in the contraction, which may not be equal + * to the original offset that was set. Hence calling getOffset() + * immediately after setOffset(offset) does not guarantee that the + * original offset set will be returned.) + *
    • If normalization is on, the index of the immediate + * subsequent character, or composite character with the first + * character, having a combining class of 0. + *
    • The length of the source string, if iteration has reached + * the end. + *
    + *

    + * @return The character offset in the source string corresponding to the + * collation element that will be returned by the next call to + * next(). + * @stable ICU 2.8 + */ + public int getOffset() + { + if (m_bufferOffset_ != -1) { + if (m_isForwards_) { + return m_FCDLimit_; + } + return m_FCDStart_; + } + return m_source_.getIndex(); + } + + + /** + *

    Returns the maximum length of any expansion sequence that ends with + * the specified collation element. If there is no expansion with this + * collation element as the last element, returns 1. + *

    + * @param ce a collation element returned by previous() or next(). + * @return the maximum length of any expansion sequence ending + * with the specified collation element. + * @stable ICU 2.8 + */ + public int getMaxExpansion(int ce) + { + int start = 0; + int limit = m_collator_.m_expansionEndCE_.length; + long unsignedce = ce & 0xFFFFFFFFl; + while (start < limit - 1) { + int mid = start + ((limit - start) >> 1); + long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl; + if (unsignedce <= midce) { + limit = mid; + } + else { + start = mid; + } + } + int result = 1; + if (m_collator_.m_expansionEndCE_[start] == ce) { + result = m_collator_.m_expansionEndCEMaxSize_[start]; + } + else if (limit < m_collator_.m_expansionEndCE_.length && + m_collator_.m_expansionEndCE_[limit] == ce) { + result = m_collator_.m_expansionEndCEMaxSize_[limit]; + } + else if ((ce & 0xFFFF) == 0x00C0) { + result = 2; + } + return result; + } + + // public other methods ------------------------------------------------- + + /** + *

    Resets the cursor to the beginning of the string. The next + * call to next() or previous() will return the first and last + * collation element in the string, respectively.

    + * + *

    If the RuleBasedCollator used by this iterator has had its + * attributes changed, calling reset() will reinitialize the + * iterator to use the new attributes.

    + * + * @stable ICU 2.8 + */ + public void reset() + { + m_source_.setToStart(); + updateInternalState(); + } + + /** + *

    Get the next collation element in the source string.

    + * + *

    This iterator iterates over a sequence of collation elements + * that were built from the string. Because there isn't + * necessarily a one-to-one mapping from characters to collation + * elements, this doesn't mean the same thing as "return the + * collation element [or ordering priority] of the next character + * in the string".

    + * + *

    This function returns the collation element that the + * iterator is currently pointing to, and then updates the + * internal pointer to point to the next element. Previous() + * updates the pointer first, and then returns the element. This + * means that when you change direction while iterating (i.e., + * call next() and then call previous(), or call previous() and + * then call next()), you'll get back the same element twice.

    + * + * @return the next collation element or NULLORDER if the end of the + * iteration has been reached. + * @stable ICU 2.8 + */ + public int next() + { + m_isForwards_ = true; + if (m_CEBufferSize_ > 0) { + if (m_CEBufferOffset_ < m_CEBufferSize_) { + // if there are expansions left in the buffer, we return it + return m_CEBuffer_[m_CEBufferOffset_ ++]; + } + m_CEBufferSize_ = 0; + m_CEBufferOffset_ = 0; + } + + int ch_int = nextChar(); + + if (ch_int == UCharacterIterator.DONE) { + return NULLORDER; + } + char ch = (char)ch_int; + if (m_collator_.m_isHiragana4_) { + /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag + * based on whether the previous codepoint was Hiragana or Katakana. + */ + m_isCodePointHiragana_ = (m_isCodePointHiragana_ && (ch >= 0x3099 && ch <= 0x309C)) || + ((ch >= 0x3040 && ch <= 0x309e) && !(ch > 0x3094 && ch < 0x309d)); + } + + int result = NULLORDER; + if (ch <= 0xFF) { + // For latin-1 characters we never need to fall back to the UCA + // table because all of the UCA data is replicated in the + // latinOneMapping array + result = m_collator_.m_trie_.getLatin1LinearValue(ch); + if (RuleBasedCollator.isSpecial(result)) { + result = nextSpecial(m_collator_, result, ch); + } + } + else { + result = m_collator_.m_trie_.getLeadValue(ch); + //System.out.println(Integer.toHexString(result)); + if (RuleBasedCollator.isSpecial(result)) { + // surrogate leads are handled as special ces + result = nextSpecial(m_collator_, result, ch); + } + if (result == CE_NOT_FOUND_ && RuleBasedCollator.UCA_ != null) { + // couldn't find a good CE in the tailoring + // if we got here, the codepoint MUST be over 0xFF - so we look + // directly in the UCA + result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch); + if (RuleBasedCollator.isSpecial(result)) { + // UCA also gives us a special CE + result = nextSpecial(RuleBasedCollator.UCA_, result, ch); + } + } + } + if(result == CE_NOT_FOUND_) { + // maybe there is no UCA, unlikely in Java, but ported for consistency + result = nextImplicit(ch); + } + return result; + } + + /** + *

    Get the previous collation element in the source string.

    + * + *

    This iterator iterates over a sequence of collation elements + * that were built from the string. Because there isn't + * necessarily a one-to-one mapping from characters to collation + * elements, this doesn't mean the same thing as "return the + * collation element [or ordering priority] of the previous + * character in the string".

    + * + *

    This function updates the iterator's internal pointer to + * point to the collation element preceding the one it's currently + * pointing to and then returns that element, while next() returns + * the current element and then updates the pointer. This means + * that when you change direction while iterating (i.e., call + * next() and then call previous(), or call previous() and then + * call next()), you'll get back the same element twice.

    + * + * @return the previous collation element, or NULLORDER when the start of + * the iteration has been reached. + * @stable ICU 2.8 + */ + public int previous() + { + if (m_source_.getIndex() <= 0 && m_isForwards_) { + // if iterator is new or reset, we can immediate perform backwards + // iteration even when the offset is not right. + m_source_.setToLimit(); + updateInternalState(); + } + m_isForwards_ = false; + int result = NULLORDER; + if (m_CEBufferSize_ > 0) { + if (m_CEBufferOffset_ > 0) { + return m_CEBuffer_[-- m_CEBufferOffset_]; + } + m_CEBufferSize_ = 0; + m_CEBufferOffset_ = 0; + } + int ch_int = previousChar(); + if (ch_int == UCharacterIterator.DONE) { + return NULLORDER; + } + char ch = (char)ch_int; + if (m_collator_.m_isHiragana4_) { + m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f); + } + if (m_collator_.isContractionEnd(ch) && !isBackwardsStart()) { + result = previousSpecial(m_collator_, CE_CONTRACTION_, ch); + } + else { + if (ch <= 0xFF) { + result = m_collator_.m_trie_.getLatin1LinearValue(ch); + } + else { + result = m_collator_.m_trie_.getLeadValue(ch); + } + if (RuleBasedCollator.isSpecial(result)) { + result = previousSpecial(m_collator_, result, ch); + } + if (result == CE_NOT_FOUND_) { + if (!isBackwardsStart() + && m_collator_.isContractionEnd(ch)) { + result = CE_CONTRACTION_; + } + else { + if(RuleBasedCollator.UCA_ != null) { + result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch); + } + } + + if (RuleBasedCollator.isSpecial(result)) { + if(RuleBasedCollator.UCA_ != null) { + result = previousSpecial(RuleBasedCollator.UCA_, result, ch); + } + } + } + } + if(result == CE_NOT_FOUND_) { + result = previousImplicit(ch); + } + return result; + } + + /** + * Return the primary order of the specified collation element, + * i.e. the first 16 bits. This value is unsigned. + * @param ce the collation element + * @return the element's 16 bits primary order. + * @stable ICU 2.8 + */ + public final static int primaryOrder(int ce) + { + return (ce & RuleBasedCollator.CE_PRIMARY_MASK_) + >>> RuleBasedCollator.CE_PRIMARY_SHIFT_; + } + /** + * Return the secondary order of the specified collation element, + * i.e. the 16th to 23th bits, inclusive. This value is unsigned. + * @param ce the collation element + * @return the element's 8 bits secondary order + * @stable ICU 2.8 + */ + public final static int secondaryOrder(int ce) + { + return (ce & RuleBasedCollator.CE_SECONDARY_MASK_) + >> RuleBasedCollator.CE_SECONDARY_SHIFT_; + } + + /** + * Return the tertiary order of the specified collation element, i.e. the last + * 8 bits. This value is unsigned. + * @param ce the collation element + * @return the element's 8 bits tertiary order + * @stable ICU 2.8 + */ + public final static int tertiaryOrder(int ce) + { + return ce & RuleBasedCollator.CE_TERTIARY_MASK_; + } + + /** + *

    Sets the iterator to point to the collation element + * corresponding to the character at the specified offset. The + * value returned by the next call to next() will be the collation + * element corresponding to the characters at offset.

    + * + *

    If offset is in the middle of a contracting character + * sequence, the iterator is adjusted to the start of the + * contracting sequence. This means that getOffset() is not + * guaranteed to return the same value set by this method.

    + * + *

    If the decomposition mode is on, and offset is in the middle + * of a decomposible range of source text, the iterator may not + * return a correct result for the next forwards or backwards + * iteration. The user must ensure that the offset is not in the + * middle of a decomposible range.

    + * + * @param offset the character offset into the original source string to + * set. Note that this is not an offset into the corresponding + * sequence of collation elements. + * @stable ICU 2.8 + */ + public void setOffset(int offset) + { + m_source_.setIndex(offset); + int ch_int = m_source_.current(); + char ch = (char)ch_int; + if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) { + // if it is unsafe we need to check if it is part of a contraction + // or a surrogate character + if (UTF16.isTrailSurrogate(ch)) { + // if it is a surrogate pair we move up one character + char prevch = (char)m_source_.previous(); + if (!UTF16.isLeadSurrogate(prevch)) { + m_source_.setIndex(offset); // go back to the same index + } + } + else { + // could be part of a contraction + // backup to a safe point and iterate till we pass offset + while (m_source_.getIndex() > 0) { + if (!m_collator_.isUnsafe(ch)) { + break; + } + ch = (char)m_source_.previous(); + } + updateInternalState(); + int prevoffset = 0; + while (m_source_.getIndex() <= offset) { + prevoffset = m_source_.getIndex(); + next(); + } + m_source_.setIndex(prevoffset); + } + } + updateInternalState(); + // direction code to prevent next and previous from returning a + // character if we are already at the ends + offset = m_source_.getIndex(); + if (offset == 0/* m_source_.getBeginIndex() */) { + // preventing previous() from returning characters from the end of + // the string again if we are at the beginning + m_isForwards_ = false; + } + else if (offset == m_source_.getLength()) { + // preventing next() from returning characters from the start of + // the string again if we are at the end + m_isForwards_ = true; + } + } + + /** + *

    Set a new source string for iteration, and reset the offset + * to the beginning of the text.

    + * + * @param source the new source string for iteration. + * @stable ICU 2.8 + */ + public void setText(String source) + { + m_srcUtilIter_.setText(source); + m_source_ = m_srcUtilIter_; + updateInternalState(); + } + + /** + *

    Set a new source string iterator for iteration, and reset the + * offset to the beginning of the text. + *

    + *

    The source iterator's integrity will be preserved since a new copy + * will be created for use.

    + * @param source the new source string iterator for iteration. + * @stable ICU 2.8 + */ + public void setText(UCharacterIterator source) + { + m_srcUtilIter_.setText(source.getText()); + m_source_ = m_srcUtilIter_; + updateInternalState(); + } + + /** + *

    Set a new source string iterator for iteration, and reset the + * offset to the beginning of the text. + *

    + * @param source the new source string iterator for iteration. + * @stable ICU 2.8 + */ + public void setText(CharacterIterator source) + { + m_source_ = new CharacterIteratorWrapper(source); + m_source_.setToStart(); + updateInternalState(); + } + + // public miscellaneous methods ----------------------------------------- + + /** + * Tests that argument object is equals to this CollationElementIterator. + * Iterators are equal if the objects uses the same RuleBasedCollator, + * the same source text and have the same current position in iteration. + * @param that object to test if it is equals to this + * CollationElementIterator + * @stable ICU 2.8 + */ + public boolean equals(Object that) + { + if (that == this) { + return true; + } + if (that instanceof CollationElementIterator) { + CollationElementIterator thatceiter + = (CollationElementIterator)that; + if (!m_collator_.equals(thatceiter.m_collator_)) { + return false; + } + // checks the text + return m_source_.getIndex() == thatceiter.m_source_.getIndex() + && m_source_.getText().equals( + thatceiter.m_source_.getText()); + } + return false; + } + + // package private constructors ------------------------------------------ + + private CollationElementIterator(RuleBasedCollator collator) { + m_utilStringBuffer_ = new StringBuilder(); + m_collator_ = collator; + m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_]; + m_buffer_ = new StringBuilder(); + m_utilSpecialBackUp_ = new Backup(); + m_nfcImpl_.getFCDTrie(); // ensure the FCD data is initialized + } + + /** + *

    CollationElementIterator constructor. This takes a source + * string and a RuleBasedCollator. The iterator will walk through + * the source string based on the rules defined by the + * collator. If the source string is empty, NULLORDER will be + * returned on the first call to next().

    + * + * @param source the source string. + * @param collator the RuleBasedCollator + * @stable ICU 2.8 + */ + CollationElementIterator(String source, RuleBasedCollator collator) + { + this(collator); + m_source_ = m_srcUtilIter_ = new StringUCharacterIterator(source); + updateInternalState(); + } + + /** + *

    CollationElementIterator constructor. This takes a source + * character iterator and a RuleBasedCollator. The iterator will + * walk through the source string based on the rules defined by + * the collator. If the source string is empty, NULLORDER will be + * returned on the first call to next().

    + * + * @param source the source string iterator. + * @param collator the RuleBasedCollator + * @stable ICU 2.8 + */ + CollationElementIterator(CharacterIterator source, + RuleBasedCollator collator) + { + this(collator); + m_srcUtilIter_ = new StringUCharacterIterator(); + m_source_ = new CharacterIteratorWrapper(source); + updateInternalState(); + } + + /** + *

    CollationElementIterator constructor. This takes a source + * character iterator and a RuleBasedCollator. The iterator will + * walk through the source string based on the rules defined by + * the collator. If the source string is empty, NULLORDER will be + * returned on the first call to next().

    + * + * @param source the source string iterator. + * @param collator the RuleBasedCollator + * @stable ICU 2.8 + */ + CollationElementIterator(UCharacterIterator source, + RuleBasedCollator collator) + { + this(collator); + m_srcUtilIter_ = new StringUCharacterIterator(); + m_srcUtilIter_.setText(source.getText()); + m_source_ = m_srcUtilIter_; + updateInternalState(); + } + + // package private data members ----------------------------------------- + + /** + * true if current codepoint was Hiragana + */ + boolean m_isCodePointHiragana_; + /** + * Position in the original string that starts with a non-FCD sequence + */ + int m_FCDStart_; + /** + * This is the CE from CEs buffer that should be returned. + * Initial value is 0. + * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_, + * backwards will end with m_CEBufferOffset_ == 0. + * The next/previous after we reach the end/beginning of the m_CEBuffer_ + * will cause this value to be reset to 0. + */ + int m_CEBufferOffset_; + + /** + * This is the position to which we have stored processed CEs. + * Initial value is 0. + * The next/previous after we reach the end/beginning of the m_CEBuffer_ + * will cause this value to be reset to 0. + */ + int m_CEBufferSize_; + static final int CE_NOT_FOUND_ = 0xF0000000; + static final int CE_EXPANSION_TAG_ = 1; + static final int CE_CONTRACTION_TAG_ = 2; + /** + * Collate Digits As Numbers (CODAN) implementation + */ + static final int CE_DIGIT_TAG_ = 13; + + // package private methods ---------------------------------------------- + + /** + * Sets the collator used. + * Internal use, all data members will be reset to the default values + * @param collator to set + */ + void setCollator(RuleBasedCollator collator) + { + m_collator_ = collator; + updateInternalState(); + } + + /** + *

    Sets the iterator to point to the collation element corresponding to + * the specified character (the parameter is a CHARACTER offset in the + * original string, not an offset into its corresponding sequence of + * collation elements). The value returned by the next call to next() + * will be the collation element corresponding to the specified position + * in the text. Unlike the public method setOffset(int), this method does + * not try to readjust the offset to the start of a contracting sequence. + * getOffset() is guaranteed to return the same value as was passed to a + * preceding call to setOffset().

    + * @param offset new character offset into the original text to set. + */ + void setExactOffset(int offset) + { + m_source_.setIndex(offset); + updateInternalState(); + } + + /** + * Checks if iterator is in the buffer zone + * @return true if iterator is in buffer zone, false otherwise + */ + boolean isInBuffer() + { + return m_bufferOffset_ > 0; + } + + + /** + *

    Sets the iterator to point to the collation element corresponding to + * the specified character (the parameter is a CHARACTER offset in the + * original string, not an offset into its corresponding sequence of + * collation elements). The value returned by the next call to next() + * will be the collation element corresponding to the specified position + * in the text. Unlike the public method setOffset(int), this method does + * not try to readjust the offset to the start of a contracting sequence. + * getOffset() is guaranteed to return the same value as was passed to a + * preceding call to setOffset().

    + *

    + * @param source the new source string iterator for iteration. + * @param offset to the source + */ + void setText(UCharacterIterator source, int offset) + { + m_srcUtilIter_.setText(source.getText()); + m_source_ = m_srcUtilIter_; + m_source_.setIndex(offset); + updateInternalState(); + } + + // private inner class -------------------------------------------------- + + /** + * Backup data class + */ + private static final class Backup + { + // protected data members ------------------------------------------- + + /** + * Backup non FCD sequence limit + */ + protected int m_FCDLimit_; + /** + * Backup non FCD sequence start + */ + protected int m_FCDStart_; + /** + * Backup if previous Codepoint is Hiragana quatenary + */ + protected boolean m_isCodePointHiragana_; + /** + * Backup buffer position + */ + protected int m_bufferOffset_; + /** + * Backup source iterator offset + */ + protected int m_offset_; + /** + * Backup buffer contents + */ + protected StringBuffer m_buffer_; + + // protected constructor -------------------------------------------- + + /** + * Empty constructor + */ + protected Backup() + { + m_buffer_ = new StringBuffer(); + } + } + // end inner class ------------------------------------------------------ + + /** + * Direction of travel + */ + private boolean m_isForwards_; + /** + * Source string iterator + */ + private UCharacterIterator m_source_; + /** + * This is position to the m_buffer_, -1 if iterator is not in m_buffer_ + */ + private int m_bufferOffset_; + /** + * Buffer for temporary storage of normalized characters, discontiguous + * characters and Thai characters + */ + private StringBuilder m_buffer_; + /** + * Position in the original string to continue forward FCD check from. + */ + private int m_FCDLimit_; + /** + * The collator this iterator is based on + */ + private RuleBasedCollator m_collator_; + /** + * true if Hiragana quatenary is on + */ + //private boolean m_isHiragana4_; + /** + * CE buffer + */ + private int m_CEBuffer_[]; + /** + * In reality we should not have to deal with expansion sequences longer + * then 16. However this value can be change if a bigger buffer is needed. + * Note, if the size is change to too small a number, BIG trouble. + * Reasonable small value is around 10, if there's no Arabic or other + * funky collations that have long expansion sequence. This is the longest + * expansion sequence this can handle without bombing out. + */ + private static final int CE_BUFFER_INIT_SIZE_ = 512; + /** + * Backup storage for special processing inner cases + */ + private Backup m_utilSpecialBackUp_; + /** + * Backup storage in special processing entry state + */ + private Backup m_utilSpecialEntryBackUp_; + /** + * Backup storage in special processing discontiguous state + */ + private Backup m_utilSpecialDiscontiguousBackUp_; + /** + * Utility + */ + private StringUCharacterIterator m_srcUtilIter_; + private StringBuilder m_utilStringBuffer_; + private StringBuilder m_utilSkippedBuffer_; + private CollationElementIterator m_utilColEIter_; + private static final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl; + private StringBuilder m_unnormalized_; + private Normalizer2Impl.ReorderingBuffer m_n2Buffer_; + /** + * The first non-zero combining class character + */ + private static final int FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0xC0; + /** + * One character before the first character with leading non-zero combining + * class + */ + private static final int LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0x300; + /** + * Mask for the last byte + */ + private static final int LAST_BYTE_MASK_ = 0xFF; + /** + * Shift value for the second last byte + */ + private static final int SECOND_LAST_BYTE_SHIFT_ = 8; + + // special ce values and tags ------------------------------------------- + +// private static final int CE_EXPANSION_ = 0xF1000000; + private static final int CE_CONTRACTION_ = 0xF2000000; + /** + * Indicates the last ce has been consumed. Compare with NULLORDER. + * NULLORDER is returned if error occurs. + */ +/* private static final int CE_NO_MORE_CES_ = 0x00010101; + private static final int CE_NO_MORE_CES_PRIMARY_ = 0x00010000; + private static final int CE_NO_MORE_CES_SECONDARY_ = 0x00000100; + private static final int CE_NO_MORE_CES_TERTIARY_ = 0x00000001; +*/ + private static final int CE_NOT_FOUND_TAG_ = 0; + /** + * Charset processing, not yet implemented + */ + private static final int CE_CHARSET_TAG_ = 4; + /** + * AC00-D7AF + */ + private static final int CE_HANGUL_SYLLABLE_TAG_ = 6; + /** + * D800-DBFF + */ + private static final int CE_LEAD_SURROGATE_TAG_ = 7; + /** + * DC00-DFFF + */ + private static final int CE_TRAIL_SURROGATE_TAG_ = 8; + /** + * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D + */ + private static final int CE_CJK_IMPLICIT_TAG_ = 9; + private static final int CE_IMPLICIT_TAG_ = 10; + static final int CE_SPEC_PROC_TAG_ = 11; + /** + * This is a 3 byte primary with starting secondaries and tertiaries. + * It fits in a single 32 bit CE and is used instead of expansion to save + * space without affecting the performance (hopefully). + */ + private static final int CE_LONG_PRIMARY_TAG_ = 12; + +// private static final int CE_CE_TAGS_COUNT = 14; + private static final int CE_BYTE_COMMON_ = 0x05; + + // end special ce values and tags --------------------------------------- + + private static final int HANGUL_SBASE_ = 0xAC00; + private static final int HANGUL_LBASE_ = 0x1100; + private static final int HANGUL_VBASE_ = 0x1161; + private static final int HANGUL_TBASE_ = 0x11A7; + private static final int HANGUL_VCOUNT_ = 21; + private static final int HANGUL_TCOUNT_ = 28; + + // CJK stuff ------------------------------------------------------------ + +/* private static final int CJK_BASE_ = 0x4E00; + private static final int CJK_LIMIT_ = 0x9FFF+1; + private static final int CJK_COMPAT_USED_BASE_ = 0xFA0E; + private static final int CJK_COMPAT_USED_LIMIT_ = 0xFA2F + 1; + private static final int CJK_A_BASE_ = 0x3400; + private static final int CJK_A_LIMIT_ = 0x4DBF + 1; + private static final int CJK_B_BASE_ = 0x20000; + private static final int CJK_B_LIMIT_ = 0x2A6DF + 1; + private static final int NON_CJK_OFFSET_ = 0x110000; +*/ + private static final boolean DEBUG = ICUDebug.enabled("collator"); + + // private methods ------------------------------------------------------ + + /** + * Reset the iterator internally + */ + private void updateInternalState() + { + m_isCodePointHiragana_ = false; + m_buffer_.setLength(0); + m_bufferOffset_ = -1; + m_CEBufferOffset_ = 0; + m_CEBufferSize_ = 0; + m_FCDLimit_ = -1; + m_FCDStart_ = m_source_.getLength(); + //m_isHiragana4_ = m_collator_.m_isHiragana4_; + m_isForwards_ = true; + } + + /** + * Backup the current internal state + * @param backup object to store the data + */ + private void backupInternalState(Backup backup) + { + backup.m_offset_ = m_source_.getIndex(); + backup.m_FCDLimit_ = m_FCDLimit_; + backup.m_FCDStart_ = m_FCDStart_; + backup.m_isCodePointHiragana_ = m_isCodePointHiragana_; + backup.m_bufferOffset_ = m_bufferOffset_; + backup.m_buffer_.setLength(0); + if (m_bufferOffset_ >= 0) { + backup.m_buffer_.append(m_buffer_); + } + } + + /** + * Update the iterator internally with backed-up state + * @param backup object that stored the data + */ + private void updateInternalState(Backup backup) + { + m_source_.setIndex(backup.m_offset_); + m_isCodePointHiragana_ = backup.m_isCodePointHiragana_; + m_bufferOffset_ = backup.m_bufferOffset_; + m_FCDLimit_ = backup.m_FCDLimit_; + m_FCDStart_ = backup.m_FCDStart_; + m_buffer_.setLength(0); + if (m_bufferOffset_ >= 0) { + m_buffer_.append(backup.m_buffer_); + } + } + + /** + * A fast combining class retrieval system. + * @param ch UTF16 character + * @return combining class of ch + */ + private int getCombiningClass(int ch) + { + if (ch >= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ && + m_collator_.isUnsafe((char)ch) || ch > 0xFFFF + ) { + return m_nfcImpl_.getCC(m_nfcImpl_.getNorm16(ch)); + } + return 0; + } + + /** + *

    Incremental normalization, this is an essential optimization. + * Assuming FCD checks has been done, normalize the non-FCD characters into + * the buffer. + * Source offsets points to the current processing character. + *

    + */ + private void normalize() + { + if (m_unnormalized_ == null) { + m_unnormalized_ = new StringBuilder(); + m_n2Buffer_ = new Normalizer2Impl.ReorderingBuffer(m_nfcImpl_, m_buffer_, 10); + } else { + m_unnormalized_.setLength(0); + m_n2Buffer_.remove(); + } + int size = m_FCDLimit_ - m_FCDStart_; + m_source_.setIndex(m_FCDStart_); + for (int i = 0; i < size; i ++) { + m_unnormalized_.append((char)m_source_.next()); + } + m_nfcImpl_.decomposeShort(m_unnormalized_, 0, size, m_n2Buffer_); + } + + /** + *

    Incremental FCD check and normalization. Gets the next base character + * position and determines if the in-between characters needs normalization. + *

    + *

    When entering, the state is known to be this: + *

      + *
    • We are working on source string, not the buffer. + *
    • The leading combining class from the current character is 0 or the + * trailing combining class of the previous char was zero. + *
    + * Incoming source offsets points to the current processing character. + * Return source offsets points to the current processing character. + *

    + * @param ch current character (lead unit) + * @param offset offset of ch +1 + * @return true if FCDCheck passes, false otherwise + */ + private boolean FCDCheck(int ch, int offset) + { + boolean result = true; + + // Get the trailing combining class of the current character. + // If it's zero, we are OK. + m_FCDStart_ = offset - 1; + m_source_.setIndex(offset); + // trie access + int fcd = m_nfcImpl_.getFCD16FromSingleLead((char)ch); + if (fcd != 0 && Character.isHighSurrogate((char)ch)) { + int c2 = m_source_.next(); + if (c2 < 0) { + fcd = 0; // end of input + } else if (Character.isLowSurrogate((char)c2)) { + fcd = m_nfcImpl_.getFCD16(Character.toCodePoint((char)ch, (char)c2)); + } else { + m_source_.moveIndex(-1); + fcd = 0; + } + } + + int prevTrailCC = fcd & LAST_BYTE_MASK_; + + if (prevTrailCC == 0) { + offset = m_source_.getIndex(); + } else { + // The current char has a non-zero trailing CC. Scan forward until + // we find a char with a leading cc of zero. + while (true) { + ch = m_source_.nextCodePoint(); + if (ch < 0) { + offset = m_source_.getIndex(); + break; + } + // trie access + fcd = m_nfcImpl_.getFCD16(ch); + int leadCC = fcd >> SECOND_LAST_BYTE_SHIFT_; + if (leadCC == 0) { + // this is a base character, we stop the FCD checks + offset = m_source_.getIndex() - Character.charCount(ch); + break; + } + + if (leadCC < prevTrailCC) { + result = false; + } + + prevTrailCC = fcd & LAST_BYTE_MASK_; + } + } + m_FCDLimit_ = offset; + m_source_.setIndex(m_FCDStart_ + 1); + return result; + } + + /** + *

    Method tries to fetch the next character that is in fcd form.

    + *

    Normalization is done if required.

    + *

    Offsets are returned at the next character.

    + * @return next fcd character + */ + private int nextChar() + { + int result; + + // loop handles the next character whether it is in the buffer or not. + if (m_bufferOffset_ < 0) { + // we're working on the source and not normalizing. fast path. + // note Thai pre-vowel reordering uses buffer too + result = m_source_.next(); + } + else { + // we are in the buffer, buffer offset will never be 0 here + if (m_bufferOffset_ >= m_buffer_.length()) { + // Null marked end of buffer, revert to the source string and + // loop back to top to try again to get a character. + m_source_.setIndex(m_FCDLimit_); + m_bufferOffset_ = -1; + m_buffer_.setLength(0); + return nextChar(); + } + return m_buffer_.charAt(m_bufferOffset_ ++); + } + int startoffset = m_source_.getIndex(); + if (result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ + // Fast fcd safe path. trail combining class == 0. + || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION + || m_bufferOffset_ >= 0 || m_FCDLimit_ >= startoffset) { + // skip the fcd checks + return result; + } + + if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) { + // We need to peek at the next character in order to tell if we are + // FCD + int next = m_source_.current(); + if (next == UCharacterIterator.DONE + || next < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) { + return result; // end of source string and if next character + // starts with a base character is always fcd. + } + } + + // Need a more complete FCD check and possible normalization. + if (!FCDCheck(result, startoffset)) { + normalize(); + result = m_buffer_.charAt(0); + m_bufferOffset_ = 1; + } + return result; + } + + /** + *

    Incremental normalization, this is an essential optimization. + * Assuming FCD checks has been done, normalize the non-FCD characters into + * the buffer. + * Source offsets points to the current processing character.

    + */ + private void normalizeBackwards() + { + normalize(); + m_bufferOffset_ = m_buffer_.length(); + } + + /** + *

    Incremental backwards FCD check and normalization. Gets the previous + * base character position and determines if the in-between characters + * needs normalization. + *

    + *

    When entering, the state is known to be this: + *

      + *
    • We are working on source string, not the buffer. + *
    • The trailing combining class from the current character is 0 or the + * leading combining class of the next char was zero. + *
    + * Input source offsets points to the previous character. + * Return source offsets points to the current processing character. + *

    + * @param ch current character + * @param offset current character offset + * @return true if FCDCheck passes, false otherwise + */ + private boolean FCDCheckBackwards(int ch, int offset) + { + int fcd; + m_FCDLimit_ = offset + 1; + m_source_.setIndex(offset); + if (!UTF16.isSurrogate((char)ch)) { + fcd = m_nfcImpl_.getFCD16FromSingleLead((char)ch); + } else { + fcd = 0; + if (!Normalizer2Impl.UTF16Plus.isSurrogateLead(ch)) { + int c2 = m_source_.previous(); + if (c2 < 0) { + // start of input + } else if (Character.isHighSurrogate((char)c2)) { + ch = Character.toCodePoint((char)c2, (char)ch); + fcd = m_nfcImpl_.getFCD16(ch); + --offset; + } else { + m_source_.moveIndex(1); + } + } + } + + // Scan backward until we find a char with a leading cc of zero. + boolean result = true; + if (fcd != 0) { + int leadCC; + for (;;) { + leadCC = fcd >> SECOND_LAST_BYTE_SHIFT_; + if (leadCC == 0 || (ch = m_source_.previousCodePoint()) < 0) { + offset = m_source_.getIndex(); + break; + } + fcd = m_nfcImpl_.getFCD16(ch); + int prevTrailCC = fcd & LAST_BYTE_MASK_; + if (leadCC < prevTrailCC) { + result = false; + } else if (fcd == 0) { + offset = m_source_.getIndex() + Character.charCount(ch); + break; + } + } + } + + // storing character with 0 lead fcd or the 1st accent with a base + // character before it + m_FCDStart_ = offset; + m_source_.setIndex(m_FCDLimit_); + return result; + } + + /** + *

    Method tries to fetch the previous character that is in fcd form.

    + *

    Normalization is done if required.

    + *

    Offsets are returned at the current character.

    + * @return previous fcd character + */ + private int previousChar() + { + if (m_bufferOffset_ >= 0) { + m_bufferOffset_ --; + if (m_bufferOffset_ >= 0) { + return m_buffer_.charAt(m_bufferOffset_); + } + else { + // At the start of buffer, route back to string. + m_buffer_.setLength(0); + if (m_FCDStart_ == 0) { + m_FCDStart_ = -1; + m_source_.setIndex(0); + return UCharacterIterator.DONE; + } + else { + m_FCDLimit_ = m_FCDStart_; + m_source_.setIndex(m_FCDStart_); + return previousChar(); + } + } + } + int result = m_source_.previous(); + int startoffset = m_source_.getIndex(); + if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ + || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION + || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) { + return result; + } + int ch = m_source_.previous(); + if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) { + // if previous character is FCD + m_source_.next(); + return result; + } + // Need a more complete FCD check and possible normalization. + if (!FCDCheckBackwards(result, startoffset)) { + normalizeBackwards(); + m_bufferOffset_ --; + result = m_buffer_.charAt(m_bufferOffset_); + } + else { + // fcd checks always reset m_source_ to the limit of the FCD + m_source_.setIndex(startoffset); + } + return result; + } + + /** + * Determines if it is at the start of source iteration + * @return true if iterator at the start, false otherwise + */ + private final boolean isBackwardsStart() + { + return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0) + || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0); + } + + /** + * Checks if iterator is at the end of its source string. + * @return true if it is at the end, false otherwise + */ + private final boolean isEnd() + { + if (m_bufferOffset_ >= 0) { + if (m_bufferOffset_ != m_buffer_.length()) { + return false; + } + else { + // at end of buffer. check if fcd is at the end + return m_FCDLimit_ == m_source_.getLength(); + } + } + return m_source_.getLength() == m_source_.getIndex(); + } + + /** + *

    Special CE management for surrogates

    + *

    Lead surrogate is encountered. CE to be retrieved by using the + * following code unit. If next character is a trail surrogate, both + * characters will be combined to retrieve the CE, otherwise completely + * ignorable (UCA specification) is returned.

    + * @param collator collator to use + * @param ce current CE + * @param trail character + * @return next CE for the surrogate characters + */ + private final int nextSurrogate(RuleBasedCollator collator, int ce, + char trail) + { + if (!UTF16.isTrailSurrogate(trail)) { + updateInternalState(m_utilSpecialBackUp_); + return IGNORABLE; + } + // TODO: CE contain the data from the previous CE + the mask. + // It should at least be unmasked + int result = collator.m_trie_.getTrailValue(ce, trail); + if (result == CE_NOT_FOUND_) { + updateInternalState(m_utilSpecialBackUp_); + } + return result; + } + + /** + * Gets the CE expansion offset + * @param collator current collator + * @param ce ce to test + * @return expansion offset + */ + private int getExpansionOffset(RuleBasedCollator collator, int ce) + { + return ((ce & 0xFFFFF0) >> 4) - collator.m_expansionOffset_; + } + + + /** + * Gets the contraction ce offset + * @param collator current collator + * @param ce current ce + * @return contraction offset + */ + private int getContractionOffset(RuleBasedCollator collator, int ce) + { + return (ce & 0xFFFFFF) - collator.m_contractionOffset_; + } + + /** + * Checks if CE is a special tag CE + * @param ce to check + * @return true if CE is a special tag CE, false otherwise + */ + private boolean isSpecialPrefixTag(int ce) + { + return RuleBasedCollator.isSpecial(ce) && + RuleBasedCollator.getTag(ce) == CE_SPEC_PROC_TAG_; + } + + /** + *

    Special processing getting a CE that is preceded by a certain + * prefix.

    + *

    Used for optimizing Japanese length and iteration marks. When a + * special processing tag is encountered, iterate backwards to see if + * there's a match.

    + *

    Contraction tables are used, prefix data is stored backwards in the + * table.

    + * @param collator collator to use + * @param ce current ce + * @param entrybackup entry backup iterator status + * @return next collation element + */ + private int nextSpecialPrefix(RuleBasedCollator collator, int ce, + Backup entrybackup) + { + backupInternalState(m_utilSpecialBackUp_); + updateInternalState(entrybackup); + previousChar(); + // We want to look at the character where we entered + + while (true) { + // This loop will run once per source string character, for as + // long as we are matching a potential contraction sequence + // First we position ourselves at the begining of contraction + // sequence + int entryoffset = getContractionOffset(collator, ce); + int offset = entryoffset; + if (isBackwardsStart()) { + ce = collator.m_contractionCE_[offset]; + break; + } + char previous = (char)previousChar(); + while (previous > collator.m_contractionIndex_[offset]) { + // contraction characters are ordered, skip smaller characters + offset ++; + } + + if (previous == collator.m_contractionIndex_[offset]) { + // Found the source string char in the table. + // Pick up the corresponding CE from the table. + ce = collator.m_contractionCE_[offset]; + } + else { + // Source string char was not in the table, prefix not found + ce = collator.m_contractionCE_[entryoffset]; + } + + if (!isSpecialPrefixTag(ce)) { + // The source string char was in the contraction table, and + // the corresponding CE is not a prefix CE. We found the + // prefix, break out of loop, this CE will end up being + // returned. This is the normal way out of prefix handling + // when the source actually contained the prefix. + break; + } + } + if (ce != CE_NOT_FOUND_) { + // we found something and we can merilly continue + updateInternalState(m_utilSpecialBackUp_); + } + else { // prefix search was a failure, we have to backup all the way to + // the start + updateInternalState(entrybackup); + } + return ce; + } + + /** + * Checks if the ce is a contraction tag + * @param ce ce to check + * @return true if ce is a contraction tag, false otherwise + */ + private boolean isContractionTag(int ce) + { + return RuleBasedCollator.isSpecial(ce) && + RuleBasedCollator.getTag(ce) == CE_CONTRACTION_TAG_; + } + + /** + * Method to copy skipped characters into the buffer and sets the fcd + * position. To ensure that the skipped characters are considered later, + * we need to place it in the appropriate position in the buffer and + * reassign the source index. simple case if index reside in string, + * simply copy to buffer and fcdposition = pos, pos = start of buffer. + * if pos in normalization buffer, we'll insert the copy infront of pos + * and point pos to the start of the buffer. why am i doing these copies? + * well, so that the whole chunk of codes in the getNextCE, + * ucol_prv_getSpecialCE does not require any changes, which will be + * really painful. + * @param skipped character buffer + */ + private void setDiscontiguous(StringBuilder skipped) + { + if (m_bufferOffset_ >= 0) { + m_buffer_.replace(0, m_bufferOffset_, skipped.toString()); + } + else { + m_FCDLimit_ = m_source_.getIndex(); + m_buffer_.setLength(0); + m_buffer_.append(skipped.toString()); + } + + m_bufferOffset_ = 0; + } + + /** + * Returns the current character for forward iteration + * @return current character + */ + private int currentChar() + { + if (m_bufferOffset_ < 0) { + m_source_.previous(); + return m_source_.next(); + } + + // m_bufferOffset_ is never 0 in normal circumstances except after a + // discontiguous contraction since it is always returned and moved + // by 1 when we do nextChar() + return m_buffer_.charAt(m_bufferOffset_ - 1); + } + + /** + * Method to get the discontiguous collation element within the source. + * Note this function will set the position to the appropriate places. + * Passed in character offset points to the second combining character + * after the start character. + * @param collator current collator used + * @param entryoffset index to the start character in the contraction table + * @return discontiguous collation element offset + */ + private int nextDiscontiguous(RuleBasedCollator collator, int entryoffset) + { + int offset = entryoffset; + boolean multicontraction = false; + // since it will be stuffed into this iterator and ran over again + if (m_utilSkippedBuffer_ == null) { + m_utilSkippedBuffer_ = new StringBuilder(); + } + else { + m_utilSkippedBuffer_.setLength(0); + } + char ch = (char)currentChar(); + m_utilSkippedBuffer_.append((char)currentChar()); + // accent after the first character + if (m_utilSpecialDiscontiguousBackUp_ == null) { + m_utilSpecialDiscontiguousBackUp_ = new Backup(); + } + backupInternalState(m_utilSpecialDiscontiguousBackUp_); + char nextch = ch; + while (true) { + ch = nextch; + int ch_int = nextChar(); + nextch = (char)ch_int; + if (ch_int == UCharacterIterator.DONE + || getCombiningClass(nextch) == 0) { + // if there are no more accents to move around + // we don't have to shift previousChar, since we are resetting + // the offset later + if (multicontraction) { + if (ch_int != UCharacterIterator.DONE) { + previousChar(); // backtrack + } + setDiscontiguous(m_utilSkippedBuffer_); + return collator.m_contractionCE_[offset]; + } + break; + } + + offset ++; // skip the combining class offset + while ((offset < collator.m_contractionIndex_.length) && + (nextch > collator.m_contractionIndex_[offset])) { + offset ++; + } + + int ce = CE_NOT_FOUND_; + if ( offset >= collator.m_contractionIndex_.length) { + break; + } + if ( nextch != collator.m_contractionIndex_[offset] + || getCombiningClass(nextch) == getCombiningClass(ch)) { + // unmatched or blocked character + if ( (m_utilSkippedBuffer_.length()!= 1) || + ((m_utilSkippedBuffer_.charAt(0)!= nextch) && + (m_bufferOffset_<0) )) { // avoid push to skipped buffer twice + m_utilSkippedBuffer_.append(nextch); + } + offset = entryoffset; // Restore the offset before checking next character. + continue; + } + else { + ce = collator.m_contractionCE_[offset]; + } + + if (ce == CE_NOT_FOUND_) { + break; + } + else if (isContractionTag(ce)) { + // this is a multi-contraction + offset = getContractionOffset(collator, ce); + if (collator.m_contractionCE_[offset] != CE_NOT_FOUND_) { + multicontraction = true; + backupInternalState(m_utilSpecialDiscontiguousBackUp_); + } + } + else { + setDiscontiguous(m_utilSkippedBuffer_); + return ce; + } + } + + updateInternalState(m_utilSpecialDiscontiguousBackUp_); + // backup is one forward of the base character, we need to move back + // one more + previousChar(); + return collator.m_contractionCE_[entryoffset]; + } + + /** + * Gets the next contraction ce + * @param collator collator to use + * @param ce current ce + * @return ce of the next contraction + */ + private int nextContraction(RuleBasedCollator collator, int ce) + { + backupInternalState(m_utilSpecialBackUp_); + int entryce = collator.m_contractionCE_[getContractionOffset(collator, ce)]; //CE_NOT_FOUND_; + while (true) { + int entryoffset = getContractionOffset(collator, ce); + int offset = entryoffset; + + if (isEnd()) { + ce = collator.m_contractionCE_[offset]; + if (ce == CE_NOT_FOUND_) { + // back up the source over all the chars we scanned going + // into this contraction. + ce = entryce; + updateInternalState(m_utilSpecialBackUp_); + } + break; + } + + // get the discontiguos maximum combining class + int maxCC = (collator.m_contractionIndex_[offset] & 0xFF); + // checks if all characters have the same combining class + byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8); + char ch = (char)nextChar(); + offset ++; + while (ch > collator.m_contractionIndex_[offset]) { + // contraction characters are ordered, skip all smaller + offset ++; + } + + if (ch == collator.m_contractionIndex_[offset]) { + // Found the source string char in the contraction table. + // Pick up the corresponding CE from the table. + ce = collator.m_contractionCE_[offset]; + } + else { + // Source string char was not in contraction table. + // Unless it is a discontiguous contraction, we are done + int miss = ch; + if(UTF16.isLeadSurrogate(ch)) { // in order to do the proper detection, we + // need to see if we're dealing with a supplementary + miss = UCharacterProperty.getRawSupplementary(ch, (char) nextChar()); + } + int sCC; + if (maxCC == 0 || (sCC = getCombiningClass(miss)) == 0 + || sCC > maxCC || (allSame != 0 && sCC == maxCC) || + isEnd()) { + // Contraction can not be discontiguous, back up by one + previousChar(); + if(miss > 0xFFFF) { + previousChar(); + } + ce = collator.m_contractionCE_[entryoffset]; + } + else { + // Contraction is possibly discontiguous. + // find the next character if ch is not a base character + int ch_int = nextChar(); + if (ch_int != UCharacterIterator.DONE) { + previousChar(); + } + char nextch = (char)ch_int; + if (getCombiningClass(nextch) == 0) { + previousChar(); + if(miss > 0xFFFF) { + previousChar(); + } + // base character not part of discontiguous contraction + ce = collator.m_contractionCE_[entryoffset]; + } + else { + ce = nextDiscontiguous(collator, entryoffset); + } + } + } + + if (ce == CE_NOT_FOUND_) { + // source did not match the contraction, revert back original + updateInternalState(m_utilSpecialBackUp_); + ce = entryce; + break; + } + + // source was a contraction + if (!isContractionTag(ce)) { + break; + } + + // ccontinue looping to check for the remaining contraction. + if (collator.m_contractionCE_[entryoffset] != CE_NOT_FOUND_) { + // there are further contractions to be performed, so we store + // the so-far completed ce, so that if we fail in the next + // round we just return this one. + entryce = collator.m_contractionCE_[entryoffset]; + backupInternalState(m_utilSpecialBackUp_); + if (m_utilSpecialBackUp_.m_bufferOffset_ >= 0) { + m_utilSpecialBackUp_.m_bufferOffset_ --; + } + else { + m_utilSpecialBackUp_.m_offset_ --; + } + } + } + return ce; + } + + /** + * Gets the next ce for long primaries, stuffs the rest of the collation + * elements into the ce buffer + * @param ce current ce + * @return next ce + */ + private int nextLongPrimary(int ce) + { + m_CEBuffer_[1] = ((ce & 0xFF) << 24) + | RuleBasedCollator.CE_CONTINUATION_MARKER_; + m_CEBufferOffset_ = 1; + m_CEBufferSize_ = 2; + m_CEBuffer_[0] = ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | + CE_BYTE_COMMON_; + return m_CEBuffer_[0]; + } + + /** + * Gets the number of expansion + * @param ce current ce + * @return number of expansion + */ + private int getExpansionCount(int ce) + { + return ce & 0xF; + } + + /** + * Gets the next expansion ce and stuffs the rest of the collation elements + * into the ce buffer + * @param collator current collator + * @param ce current ce + * @return next expansion ce + */ + private int nextExpansion(RuleBasedCollator collator, int ce) + { + // NOTE: we can encounter both continuations and expansions in an + // expansion! + // I have to decide where continuations are going to be dealt with + int offset = getExpansionOffset(collator, ce); + m_CEBufferSize_ = getExpansionCount(ce); + m_CEBufferOffset_ = 1; + m_CEBuffer_[0] = collator.m_expansion_[offset]; + if (m_CEBufferSize_ != 0) { + // if there are less than 16 elements in expansion + for (int i = 1; i < m_CEBufferSize_; i ++) { + m_CEBuffer_[i] = collator.m_expansion_[offset + i]; + } + } + else { + // ce are terminated + m_CEBufferSize_ = 1; + while (collator.m_expansion_[offset] != 0) { + m_CEBuffer_[m_CEBufferSize_ ++] = + collator.m_expansion_[++ offset]; + } + } + // in case of one element expansion, we + // want to immediately return CEpos + if (m_CEBufferSize_ == 1) { + m_CEBufferSize_ = 0; + m_CEBufferOffset_ = 0; + } + return m_CEBuffer_[0]; + } + + /** + * Gets the next digit ce + * @param collator current collator + * @param ce current collation element + * @param cp current codepoint + * @return next digit ce + */ + private int nextDigit(RuleBasedCollator collator, int ce, int cp) + { + // We do a check to see if we want to collate digits as numbers; + // if so we generate a custom collation key. Otherwise we pull out + // the value stored in the expansion table. + + if (m_collator_.m_isNumericCollation_){ + int collateVal = 0; + int trailingZeroIndex = 0; + boolean nonZeroValReached = false; + + // I just need a temporary place to store my generated CEs. + // icu4c uses a unsigned byte array, i'll use a stringbuffer here + // to avoid dealing with the sign problems and array allocation + // clear and set initial string buffer length + m_utilStringBuffer_.setLength(3); + + // We parse the source string until we hit a char that's NOT a + // digit. + // Use this u_charDigitValue. This might be slow because we have + // to handle surrogates... + int digVal = UCharacter.digit(cp); + // if we have arrived here, we have already processed possible + // supplementaries that trigered the digit tag - + // all supplementaries are marked in the UCA. + // We pad a zero in front of the first element anyways. + // This takes care of the (probably) most common case where + // people are sorting things followed by a single digit + int digIndx = 1; + for (;;) { + // Make sure we have enough space. + if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) { + m_utilStringBuffer_.setLength(m_utilStringBuffer_.length() + << 1); + } + // Skipping over leading zeroes. + if (digVal != 0 || nonZeroValReached) { + if (digVal != 0 && !nonZeroValReached) { + nonZeroValReached = true; + } + // We parse the digit string into base 100 numbers + // (this fits into a byte). + // We only add to the buffer in twos, thus if we are + // parsing an odd character, that serves as the + // 'tens' digit while the if we are parsing an even + // one, that is the 'ones' digit. We dumped the + // parsed base 100 value (collateVal) into a buffer. + // We multiply each collateVal by 2 (to give us room) + // and add 5 (to avoid overlapping magic CE byte + // values). The last byte we subtract 1 to ensure it is + // less than all the other bytes. + if (digIndx % 2 == 1) { + collateVal += digVal; + // This removes trailing zeroes. + if (collateVal == 0 && trailingZeroIndex == 0) { + trailingZeroIndex = ((digIndx - 1) >>> 1) + 2; + } + else if (trailingZeroIndex != 0) { + trailingZeroIndex = 0; + } + m_utilStringBuffer_.setCharAt( + ((digIndx - 1) >>> 1) + 2, + (char)((collateVal << 1) + 6)); + collateVal = 0; + } + else { + // We drop the collation value into the buffer so if + // we need to do a "front patch" we don't have to + // check to see if we're hitting the last element. + collateVal = digVal * 10; + m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2, + (char)((collateVal << 1) + 6)); + } + digIndx ++; + } + + // Get next character. + if (!isEnd()){ + backupInternalState(m_utilSpecialBackUp_); + int char32 = nextChar(); + char ch = (char)char32; + if (UTF16.isLeadSurrogate(ch)){ + if (!isEnd()) { + char trail = (char)nextChar(); + if (UTF16.isTrailSurrogate(trail)) { + char32 = UCharacterProperty.getRawSupplementary( + ch, trail); + } + else { + goBackOne(); + } + } + } + + digVal = UCharacter.digit(char32); + if (digVal == -1) { + // Resetting position to point to the next unprocessed + // char. We overshot it when doing our test/set for + // numbers. + updateInternalState(m_utilSpecialBackUp_); + break; + } + } + else { + break; + } + } + + if (nonZeroValReached == false){ + digIndx = 2; + m_utilStringBuffer_.setCharAt(2, (char)6); + } + + int endIndex = trailingZeroIndex != 0 ? trailingZeroIndex + : (digIndx >>> 1) + 2; + if (digIndx % 2 != 0){ + // We missed a value. Since digIndx isn't even, stuck too many + // values into the buffer (this is what we get for padding the + // first byte with a zero). "Front-patch" now by pushing all + // nybbles forward. + // Doing it this way ensures that at least 50% of the time + // (statistically speaking) we'll only be doing a single pass + // and optimizes for strings with single digits. I'm just + // assuming that's the more common case. + for (int i = 2; i < endIndex; i ++){ + m_utilStringBuffer_.setCharAt(i, + (char)((((((m_utilStringBuffer_.charAt(i) - 6) >>> 1) + % 10) * 10) + + (((m_utilStringBuffer_.charAt(i + 1) - 6) + >>> 1) / 10) << 1) + 6)); + } + -- digIndx; + } + + // Subtract one off of the last byte. + m_utilStringBuffer_.setCharAt(endIndex - 1, + (char)(m_utilStringBuffer_.charAt(endIndex - 1) - 1)); + + // We want to skip over the first two slots in the buffer. + // The first slot is reserved for the header byte CODAN_PLACEHOLDER. + // The second slot is for the sign/exponent byte: + // 0x80 + (decimalPos/2) & 7f. + m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER); + m_utilStringBuffer_.setCharAt(1, + (char)(0x80 + ((digIndx >>> 1) & 0x7F))); + + // Now transfer the collation key to our collIterate struct. + // The total size for our collation key is endIndx bumped up to the next largest even value divided by two. + ce = (((m_utilStringBuffer_.charAt(0) << 8) + // Primary weight + | m_utilStringBuffer_.charAt(1)) + << RuleBasedCollator.CE_PRIMARY_SHIFT_) + // Secondary weight + | (RuleBasedCollator.BYTE_COMMON_ + << RuleBasedCollator.CE_SECONDARY_SHIFT_) + | RuleBasedCollator.BYTE_COMMON_; // Tertiary weight. + int i = 2; // Reset the index into the buffer. + + m_CEBuffer_[0] = ce; + m_CEBufferSize_ = 1; + m_CEBufferOffset_ = 1; + while (i < endIndex) + { + int primWeight = m_utilStringBuffer_.charAt(i ++) << 8; + if (i < endIndex) { + primWeight |= m_utilStringBuffer_.charAt(i ++); + } + m_CEBuffer_[m_CEBufferSize_ ++] + = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_) + | RuleBasedCollator.CE_CONTINUATION_MARKER_; + } + return ce; + } + + // no numeric mode, we'll just switch to whatever we stashed and + // continue + // find the offset to expansion table + return collator.m_expansion_[getExpansionOffset(collator, ce)]; + } + + /** + * Gets the next implicit ce for codepoints + * @param codepoint current codepoint + * @return implicit ce + */ + private int nextImplicit(int codepoint) + { + if (!UCharacter.isLegal(codepoint)) { + // synwee to check with vladimir on the range of isNonChar() + // illegal code value, use completely ignoreable! + return IGNORABLE; + } + int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint); + m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_) + | 0x00000505; + m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0; + m_CEBufferOffset_ = 1; + m_CEBufferSize_ = 2; + return m_CEBuffer_[0]; + } + + /** + * Returns the next ce associated with the following surrogate characters + * @param ch current character + * @return ce + */ + private int nextSurrogate(char ch) + { + int ch_int = nextChar(); + char nextch = (char)ch_int; + if (ch_int != CharacterIterator.DONE && + UTF16.isTrailSurrogate(nextch)) { + int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch); + return nextImplicit(codepoint); + } + if (nextch != CharacterIterator.DONE) { + previousChar(); // reverts back to the original position + } + return IGNORABLE; // completely ignorable + } + + /** + * Returns the next ce for a hangul character, this is an implicit + * calculation + * @param collator current collator + * @param ch current character + * @return hangul ce + */ + private int nextHangul(RuleBasedCollator collator, char ch) + { + char L = (char)(ch - HANGUL_SBASE_); + + // divide into pieces + // do it in this order since some compilers can do % and / in one + // operation + char T = (char)(L % HANGUL_TCOUNT_); + L /= HANGUL_TCOUNT_; + char V = (char)(L % HANGUL_VCOUNT_); + L /= HANGUL_VCOUNT_; + + // offset them + L += HANGUL_LBASE_; + V += HANGUL_VBASE_; + T += HANGUL_TBASE_; + + // return the first CE, but first put the rest into the expansion + // buffer + m_CEBufferSize_ = 0; + if (!collator.m_isJamoSpecial_) { // FAST PATH + m_CEBuffer_[m_CEBufferSize_ ++] = + collator.m_trie_.getLeadValue(L); + m_CEBuffer_[m_CEBufferSize_ ++] = + collator.m_trie_.getLeadValue(V); + + if (T != HANGUL_TBASE_) { + m_CEBuffer_[m_CEBufferSize_ ++] = + collator.m_trie_.getLeadValue(T); + } + m_CEBufferOffset_ = 1; + return m_CEBuffer_[0]; + } + else { + // Jamo is Special + // Since Hanguls pass the FCD check, it is guaranteed that we + // won't be in the normalization buffer if something like this + // happens + // Move Jamos into normalization buffer + m_buffer_.append(L); + m_buffer_.append(V); + if (T != HANGUL_TBASE_) { + m_buffer_.append(T); + } + m_FCDLimit_ = m_source_.getIndex(); + m_FCDStart_ = m_FCDLimit_ - 1; + // Indicate where to continue in main input string after + // exhausting the buffer + return IGNORABLE; + } + } + + /** + *

    Special CE management. Expansions, contractions etc...

    + * @param collator can be plain UCA + * @param ce current ce + * @param ch current character + * @return next special ce + */ + private int nextSpecial(RuleBasedCollator collator, int ce, char ch) + { + int codepoint = ch; + Backup entrybackup = m_utilSpecialEntryBackUp_; + // this is to handle recursive looping + if (entrybackup != null) { + m_utilSpecialEntryBackUp_ = null; + } + else { + entrybackup = new Backup(); + } + backupInternalState(entrybackup); + try { // forces it to assign m_utilSpecialEntryBackup_ + while (true) { + // This loop will repeat only in the case of contractions, + // surrogate + switch(RuleBasedCollator.getTag(ce)) { + case CE_NOT_FOUND_TAG_: + // impossible case for icu4j + return ce; + case RuleBasedCollator.CE_SURROGATE_TAG_: + if (isEnd()) { + return IGNORABLE; + } + backupInternalState(m_utilSpecialBackUp_); + char trail = (char)nextChar(); + ce = nextSurrogate(collator, ce, trail); + // calculate the supplementary code point value, + // if surrogate was not tailored we go one more round + codepoint = + UCharacterProperty.getRawSupplementary(ch, trail); + break; + case CE_SPEC_PROC_TAG_: + ce = nextSpecialPrefix(collator, ce, entrybackup); + break; + case CE_CONTRACTION_TAG_: + ce = nextContraction(collator, ce); + break; + case CE_LONG_PRIMARY_TAG_: + return nextLongPrimary(ce); + case CE_EXPANSION_TAG_: + return nextExpansion(collator, ce); + case CE_DIGIT_TAG_: + ce = nextDigit(collator, ce, codepoint); + break; + // various implicits optimization + case CE_CJK_IMPLICIT_TAG_: + // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D + return nextImplicit(codepoint); + case CE_IMPLICIT_TAG_: // everything that is not defined + return nextImplicit(codepoint); + case CE_TRAIL_SURROGATE_TAG_: + return IGNORABLE; // DC00-DFFF broken surrogate + case CE_LEAD_SURROGATE_TAG_: // D800-DBFF + return nextSurrogate(ch); + case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF + return nextHangul(collator, ch); + case CE_CHARSET_TAG_: + // not yet implemented probably after 1.8 + return CE_NOT_FOUND_; + default: + ce = IGNORABLE; + // synwee todo, throw exception or something here. + } + if (!RuleBasedCollator.isSpecial(ce)) { + break; + } + } + } + finally { + m_utilSpecialEntryBackUp_ = entrybackup; + } + return ce; + } + + /** + * Special processing is getting a CE that is preceded by a certain prefix. + * Currently this is only needed for optimizing Japanese length and + * iteration marks. When we encouter a special processing tag, we go + * backwards and try to see if we have a match. Contraction tables are used + * - so the whole process is not unlike contraction. prefix data is stored + * backwards in the table. + * @param collator current collator + * @param ce current ce + * @return previous ce + */ + private int previousSpecialPrefix(RuleBasedCollator collator, int ce) + { + backupInternalState(m_utilSpecialBackUp_); + while (true) { + // position ourselves at the begining of contraction sequence + int offset = getContractionOffset(collator, ce); + int entryoffset = offset; + if (isBackwardsStart()) { + ce = collator.m_contractionCE_[offset]; + break; + } + char prevch = (char)previousChar(); + while (prevch > collator.m_contractionIndex_[offset]) { + // since contraction codepoints are ordered, we skip all that + // are smaller + offset ++; + } + if (prevch == collator.m_contractionIndex_[offset]) { + ce = collator.m_contractionCE_[offset]; + } + else { + // if there is a completely ignorable code point in the middle + // of a prefix, we need to act as if it's not there assumption: + // 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to + // zero) + // lone surrogates cannot be set to zero as it would break + // other processing + int isZeroCE = collator.m_trie_.getLeadValue(prevch); + // it's easy for BMP code points + if (isZeroCE == 0) { + continue; + } + else if (UTF16.isTrailSurrogate(prevch) + || UTF16.isLeadSurrogate(prevch)) { + // for supplementary code points, we have to check the next one + // situations where we are going to ignore + // 1. beginning of the string: schar is a lone surrogate + // 2. schar is a lone surrogate + // 3. schar is a trail surrogate in a valid surrogate + // sequence that is explicitly set to zero. + if (!isBackwardsStart()) { + char lead = (char)previousChar(); + if (UTF16.isLeadSurrogate(lead)) { + isZeroCE = collator.m_trie_.getLeadValue(lead); + if (RuleBasedCollator.getTag(isZeroCE) + == RuleBasedCollator.CE_SURROGATE_TAG_) { + int finalCE = collator.m_trie_.getTrailValue( + isZeroCE, + prevch); + if (finalCE == 0) { + // this is a real, assigned completely + // ignorable code point + continue; + } + } + } + else { + nextChar(); // revert to original offset + // lone surrogate, completely ignorable + continue; + } + nextChar(); // revert to original offset + } + else { + // lone surrogate at the beggining, completely ignorable + continue; + } + } + + // char was not in the table. prefix not found + ce = collator.m_contractionCE_[entryoffset]; + } + + if (!isSpecialPrefixTag(ce)) { + // char was in the contraction table, and the corresponding ce + // is not a prefix ce. We found the prefix, break out of loop, + // this ce will end up being returned. + break; + } + } + updateInternalState(m_utilSpecialBackUp_); + return ce; + } + + /** + * Retrieves the previous contraction ce. To ensure that the backwards and + * forwards iteration matches, we take the current region of most possible + * match and pass it through the forward iteration. This will ensure that + * the obstinate problem of overlapping contractions will not occur. + * @param collator current collator + * @param ce current ce + * @param ch current character + * @return previous contraction ce + */ + private int previousContraction(RuleBasedCollator collator, int ce, char ch) + { + m_utilStringBuffer_.setLength(0); + // since we might encounter normalized characters (from the thai + // processing) we can't use peekCharacter() here. + char prevch = (char)previousChar(); + boolean atStart = false; + // TODO: address the comment above - maybe now we *can* use peekCharacter + //while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) { + while (collator.isUnsafe(ch)) { + m_utilStringBuffer_.insert(0, ch); + ch = prevch; + if (isBackwardsStart()) { + atStart = true; + break; + } + prevch = (char)previousChar(); + } + if (!atStart) { + // undo the previousChar() if we didn't reach the beginning + nextChar(); + } + // adds the initial base character to the string + m_utilStringBuffer_.insert(0, ch); + + // a new collation element iterator is used to simply things, since + // using the current collation element iterator will mean that the + // forward and backwards iteration will share and change the same + // buffers. it is going to be painful. + int originaldecomp = collator.getDecomposition(); + // for faster access, since string would have been normalized above + collator.setDecomposition(Collator.NO_DECOMPOSITION); + if (m_utilColEIter_ == null) { + m_utilColEIter_ = new CollationElementIterator( + m_utilStringBuffer_.toString(), + collator); + } + else { + m_utilColEIter_.m_collator_ = collator; + m_utilColEIter_.setText(m_utilStringBuffer_.toString()); + } + ce = m_utilColEIter_.next(); + m_CEBufferSize_ = 0; + while (ce != NULLORDER) { + if (m_CEBufferSize_ == m_CEBuffer_.length) { + try { + // increasing cebuffer size + int tempbuffer[] = new int[m_CEBuffer_.length + 50]; + System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0, + m_CEBuffer_.length); + m_CEBuffer_ = tempbuffer; + } + catch( MissingResourceException e) + { + throw e; + } + catch (Exception e) { + if(DEBUG){ + e.printStackTrace(); + } + return NULLORDER; + } + } + m_CEBuffer_[m_CEBufferSize_ ++] = ce; + ce = m_utilColEIter_.next(); + } + collator.setDecomposition(originaldecomp); + m_CEBufferOffset_ = m_CEBufferSize_ - 1; + return m_CEBuffer_[m_CEBufferOffset_]; + } + + /** + * Returns the previous long primary ces + * @param ce long primary ce + * @return previous long primary ces + */ + private int previousLongPrimary(int ce) + { + m_CEBufferSize_ = 0; + m_CEBuffer_[m_CEBufferSize_ ++] = + ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | CE_BYTE_COMMON_; + m_CEBuffer_[m_CEBufferSize_ ++] = ((ce & 0xFF) << 24) + | RuleBasedCollator.CE_CONTINUATION_MARKER_; + m_CEBufferOffset_ = m_CEBufferSize_ - 1; + return m_CEBuffer_[m_CEBufferOffset_]; + } + + /** + * Returns the previous expansion ces + * @param collator current collator + * @param ce current ce + * @return previous expansion ce + */ + private int previousExpansion(RuleBasedCollator collator, int ce) + { + // find the offset to expansion table + int offset = getExpansionOffset(collator, ce); + m_CEBufferSize_ = getExpansionCount(ce); + if (m_CEBufferSize_ != 0) { + // less than 16 elements in expansion + for (int i = 0; i < m_CEBufferSize_; i ++) { + m_CEBuffer_[i] = collator.m_expansion_[offset + i]; + } + + } + else { + // null terminated ces + while (collator.m_expansion_[offset + m_CEBufferSize_] != 0) { + m_CEBuffer_[m_CEBufferSize_] = + collator.m_expansion_[offset + m_CEBufferSize_]; + m_CEBufferSize_ ++; + } + } + m_CEBufferOffset_ = m_CEBufferSize_ - 1; + return m_CEBuffer_[m_CEBufferOffset_]; + } + + /** + * Getting the digit collation elements + * @param collator + * @param ce current collation element + * @param ch current code point + * @return digit collation element + */ + private int previousDigit(RuleBasedCollator collator, int ce, char ch) + { + // We do a check to see if we want to collate digits as numbers; if so we generate + // a custom collation key. Otherwise we pull out the value stored in the expansion table. + if (m_collator_.m_isNumericCollation_){ + int leadingZeroIndex = 0; + int collateVal = 0; + boolean nonZeroValReached = false; + + // clear and set initial string buffer length + m_utilStringBuffer_.setLength(3); + + // We parse the source string until we hit a char that's NOT a digit + // Use this u_charDigitValue. This might be slow because we have to + // handle surrogates... + int char32 = ch; + if (UTF16.isTrailSurrogate(ch)) { + if (!isBackwardsStart()){ + char lead = (char)previousChar(); + if (UTF16.isLeadSurrogate(lead)) { + char32 = UCharacterProperty.getRawSupplementary(lead, + ch); + } + else { + goForwardOne(); + } + } + } + int digVal = UCharacter.digit(char32); + int digIndx = 0; + for (;;) { + // Make sure we have enough space. + if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) { + m_utilStringBuffer_.setLength(m_utilStringBuffer_.length() + << 1); + } + // Skipping over "trailing" zeroes but we still add to digIndx. + if (digVal != 0 || nonZeroValReached) { + if (digVal != 0 && !nonZeroValReached) { + nonZeroValReached = true; + } + + // We parse the digit string into base 100 numbers (this + // fits into a byte). + // We only add to the buffer in twos, thus if we are + // parsing an odd character, that serves as the 'tens' + // digit while the if we are parsing an even one, that is + // the 'ones' digit. We dumped the parsed base 100 value + // (collateVal) into a buffer. We multiply each collateVal + // by 2 (to give us room) and add 5 (to avoid overlapping + // magic CE byte values). The last byte we subtract 1 to + // ensure it is less than all the other bytes. + // Since we're doing in this reverse we want to put the + // first digit encountered into the ones place and the + // second digit encountered into the tens place. + + if (digIndx % 2 == 1){ + collateVal += digVal * 10; + + // This removes leading zeroes. + if (collateVal == 0 && leadingZeroIndex == 0) { + leadingZeroIndex = ((digIndx - 1) >>> 1) + 2; + } + else if (leadingZeroIndex != 0) { + leadingZeroIndex = 0; + } + + m_utilStringBuffer_.setCharAt(((digIndx - 1) >>> 1) + 2, + (char)((collateVal << 1) + 6)); + collateVal = 0; + } + else { + collateVal = digVal; + } + } + digIndx ++; + + if (!isBackwardsStart()){ + backupInternalState(m_utilSpecialBackUp_); + char32 = previousChar(); + if (UTF16.isTrailSurrogate(ch)){ + if (!isBackwardsStart()) { + char lead = (char)previousChar(); + if (UTF16.isLeadSurrogate(lead)) { + char32 + = UCharacterProperty.getRawSupplementary( + lead, ch); + } + else { + updateInternalState(m_utilSpecialBackUp_); + } + } + } + + digVal = UCharacter.digit(char32); + if (digVal == -1) { + updateInternalState(m_utilSpecialBackUp_); + break; + } + } + else { + break; + } + } + + if (nonZeroValReached == false) { + digIndx = 2; + m_utilStringBuffer_.setCharAt(2, (char)6); + } + + if (digIndx % 2 != 0) { + if (collateVal == 0 && leadingZeroIndex == 0) { + // This removes the leading 0 in a odd number sequence of + // numbers e.g. avery001 + leadingZeroIndex = ((digIndx - 1) >>> 1) + 2; + } + else { + // this is not a leading 0, we add it in + m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2, + (char)((collateVal << 1) + 6)); + digIndx ++; + } + } + + int endIndex = leadingZeroIndex != 0 ? leadingZeroIndex + : ((digIndx >>> 1) + 2) ; + digIndx = ((endIndex - 2) << 1) + 1; // removing initial zeros + // Subtract one off of the last byte. + // Really the first byte here, but it's reversed... + m_utilStringBuffer_.setCharAt(2, + (char)(m_utilStringBuffer_.charAt(2) - 1)); + // We want to skip over the first two slots in the buffer. + // The first slot is reserved for the header byte CODAN_PLACEHOLDER. + // The second slot is for the sign/exponent byte: + // 0x80 + (decimalPos/2) & 7f. + m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER); + m_utilStringBuffer_.setCharAt(1, + (char)(0x80 + ((digIndx >>> 1) & 0x7F))); + + // Now transfer the collation key to our collIterate struct. + // The total size for our collation key is endIndx bumped up to the + // next largest even value divided by two. + m_CEBufferSize_ = 0; + m_CEBuffer_[m_CEBufferSize_ ++] + = (((m_utilStringBuffer_.charAt(0) << 8) + // Primary weight + | m_utilStringBuffer_.charAt(1)) + << RuleBasedCollator.CE_PRIMARY_SHIFT_) + // Secondary weight + | (RuleBasedCollator.BYTE_COMMON_ + << RuleBasedCollator.CE_SECONDARY_SHIFT_) + // Tertiary weight. + | RuleBasedCollator.BYTE_COMMON_; + int i = endIndex - 1; // Reset the index into the buffer. + while (i >= 2) { + int primWeight = m_utilStringBuffer_.charAt(i --) << 8; + if (i >= 2) { + primWeight |= m_utilStringBuffer_.charAt(i --); + } + m_CEBuffer_[m_CEBufferSize_ ++] + = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_) + | RuleBasedCollator.CE_CONTINUATION_MARKER_; + } + m_CEBufferOffset_ = m_CEBufferSize_ - 1; + return m_CEBuffer_[m_CEBufferOffset_]; + } + else { + return collator.m_expansion_[getExpansionOffset(collator, ce)]; + } + } + + /** + * Returns previous hangul ces + * @param collator current collator + * @param ch current character + * @return previous hangul ce + */ + private int previousHangul(RuleBasedCollator collator, char ch) + { + char L = (char)(ch - HANGUL_SBASE_); + // we do it in this order since some compilers can do % and / in one + // operation + char T = (char)(L % HANGUL_TCOUNT_); + L /= HANGUL_TCOUNT_; + char V = (char)(L % HANGUL_VCOUNT_); + L /= HANGUL_VCOUNT_; + + // offset them + L += HANGUL_LBASE_; + V += HANGUL_VBASE_; + T += HANGUL_TBASE_; + + m_CEBufferSize_ = 0; + if (!collator.m_isJamoSpecial_) { + m_CEBuffer_[m_CEBufferSize_ ++] = + collator.m_trie_.getLeadValue(L); + m_CEBuffer_[m_CEBufferSize_ ++] = + collator.m_trie_.getLeadValue(V); + if (T != HANGUL_TBASE_) { + m_CEBuffer_[m_CEBufferSize_ ++] = + collator.m_trie_.getLeadValue(T); + } + m_CEBufferOffset_ = m_CEBufferSize_ - 1; + return m_CEBuffer_[m_CEBufferOffset_]; + } + else { + // Since Hanguls pass the FCD check, it is guaranteed that we won't + // be in the normalization buffer if something like this happens + // Move Jamos into normalization buffer + m_buffer_.append(L); + m_buffer_.append(V); + if (T != HANGUL_TBASE_) { + m_buffer_.append(T); + } + + m_FCDStart_ = m_source_.getIndex(); + m_FCDLimit_ = m_FCDStart_ + 1; + return IGNORABLE; + } + } + + /** + * Gets implicit codepoint ces + * @param codepoint current codepoint + * @return implicit codepoint ces + */ + private int previousImplicit(int codepoint) + { + if (!UCharacter.isLegal(codepoint)) { + return IGNORABLE; // illegal code value, completely ignoreable! + } + int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint); + m_CEBufferSize_ = 2; + m_CEBufferOffset_ = 1; + m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_) + | 0x00000505; + m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0; + return m_CEBuffer_[1]; + } + + /** + * Gets the previous surrogate ce + * @param ch current character + * @return previous surrogate ce + */ + private int previousSurrogate(char ch) + { + if (isBackwardsStart()) { + // we are at the start of the string, wrong place to be at + return IGNORABLE; + } + char prevch = (char)previousChar(); + // Handles Han and Supplementary characters here. + if (UTF16.isLeadSurrogate(prevch)) { + return previousImplicit( + UCharacterProperty.getRawSupplementary(prevch, ch)); + } + if (prevch != CharacterIterator.DONE) { + nextChar(); + } + return IGNORABLE; // completely ignorable + } + + /** + *

    Special CE management. Expansions, contractions etc...

    + * @param collator can be plain UCA + * @param ce current ce + * @param ch current character + * @return previous special ce + */ + private int previousSpecial(RuleBasedCollator collator, int ce, char ch) + { + while(true) { + // the only ces that loops are thai, special prefix and + // contractions + switch (RuleBasedCollator.getTag(ce)) { + case CE_NOT_FOUND_TAG_: // this tag always returns + return ce; + case RuleBasedCollator.CE_SURROGATE_TAG_: + // essentialy a disengaged lead surrogate. a broken + // sequence was encountered and this is an error + return IGNORABLE; + case CE_SPEC_PROC_TAG_: + ce = previousSpecialPrefix(collator, ce); + break; + case CE_CONTRACTION_TAG_: + // may loop for first character e.g. "0x0f71" for english + if (isBackwardsStart()) { + // start of string or this is not the end of any contraction + ce = collator.m_contractionCE_[ + getContractionOffset(collator, ce)]; + break; + } + return previousContraction(collator, ce, ch); // else + case CE_LONG_PRIMARY_TAG_: + return previousLongPrimary(ce); + case CE_EXPANSION_TAG_: // always returns + return previousExpansion(collator, ce); + case CE_DIGIT_TAG_: + ce = previousDigit(collator, ce, ch); + break; + case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF + return previousHangul(collator, ch); + case CE_LEAD_SURROGATE_TAG_: // D800-DBFF + return IGNORABLE; // broken surrogate sequence + case CE_TRAIL_SURROGATE_TAG_: // DC00-DFFF + return previousSurrogate(ch); + case CE_CJK_IMPLICIT_TAG_: + // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D + return previousImplicit(ch); + case CE_IMPLICIT_TAG_: // everything that is not defined + // UCA is filled with these. Tailorings are NOT_FOUND + return previousImplicit(ch); + case CE_CHARSET_TAG_: // this tag always returns + return CE_NOT_FOUND_; + default: // this tag always returns + ce = IGNORABLE; + } + if (!RuleBasedCollator.isSpecial(ce)) { + break; + } + } + return ce; + } + + /** + * GET IMPLICIT PRIMARY WEIGHTS + * @param cp codepoint + * @param value is left justified primary key + */ +// private static final int getImplicitPrimary(int cp) +// { +// cp = swapCJK(cp); +// +// //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp)); +// // we now have a range of numbers from 0 to 21FFFF. +// // we must skip all 00, 01, 02 bytes, so most bytes have 253 values +// // we must leave a gap of 01 between all values of the last byte, so +// // the last byte has 126 values (3 byte case) +// // we shift so that HAN all has the same first primary, for +// // compression. +// // for the 4 byte case, we make the gap as large as we can fit. +// // Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1) +// // Four byte forms (most supplementaries) are EF xx xx xx (with a gap +// // of LAST2_MULTIPLIER == 14) +// +// int last0 = cp - RuleBasedCollator.IMPLICIT_4BYTE_BOUNDARY_; +// if (last0 < 0) { +// int last1 = cp / RuleBasedCollator.LAST_COUNT_; +// last0 = cp % RuleBasedCollator.LAST_COUNT_; +// +// int last2 = last1 / RuleBasedCollator.OTHER_COUNT_; +// last1 %= RuleBasedCollator.OTHER_COUNT_; +// return RuleBasedCollator.IMPLICIT_BASE_3BYTE_ + (last2 << 24) +// + (last1 << 16) +// + ((last0 * RuleBasedCollator.LAST_MULTIPLIER_) << 8); +// } +// else { +// int last1 = last0 / RuleBasedCollator.LAST_COUNT2_; +// last0 %= RuleBasedCollator.LAST_COUNT2_; +// +// int last2 = last1 / RuleBasedCollator.OTHER_COUNT_; +// last1 %= RuleBasedCollator.OTHER_COUNT_; +// +// int last3 = last2 / RuleBasedCollator.OTHER_COUNT_; +// last2 %= RuleBasedCollator.OTHER_COUNT_; +// return RuleBasedCollator.IMPLICIT_BASE_4BYTE_ + (last3 << 24) +// + (last2 << 16) + (last1 << 8) +// + (last0 * RuleBasedCollator.LAST2_MULTIPLIER_); +// } +// } + +// /** +// * Swapping CJK characters for implicit ces +// * @param cp codepoint CJK +// * @return swapped result +// */ +// private static final int swapCJK(int cp) +// { +// if (cp >= CJK_BASE_) { +// if (cp < CJK_LIMIT_) { +// return cp - CJK_BASE_; +// } +// if (cp < CJK_COMPAT_USED_BASE_) { +// return cp + NON_CJK_OFFSET_; +// } +// if (cp < CJK_COMPAT_USED_LIMIT_) { +// return cp - CJK_COMPAT_USED_BASE_ + (CJK_LIMIT_ - CJK_BASE_); +// } +// if (cp < CJK_B_BASE_) { +// return cp + NON_CJK_OFFSET_; +// } +// if (cp < CJK_B_LIMIT_) { +// return cp; // non-BMP-CJK +// } +// return cp + NON_CJK_OFFSET_; // non-CJK +// } +// if (cp < CJK_A_BASE_) { +// return cp + NON_CJK_OFFSET_; +// } +// if (cp < CJK_A_LIMIT_) { +// return cp - CJK_A_BASE_ + (CJK_LIMIT_ - CJK_BASE_) +// + (CJK_COMPAT_USED_LIMIT_ - CJK_COMPAT_USED_BASE_); +// } +// return cp + NON_CJK_OFFSET_; // non-CJK +// } + +// /** +// * Gets a character from the source string at a given offset. +// * Handles both normal and iterative cases. +// * No error checking and does not access the normalization buffer +// * - caller beware! +// * @param offset offset from current position which character is to be +// * retrieved +// * @return character at current position + offset +// */ +// private char peekCharacter(int offset) +// { +// if (offset != 0) { +// int currentoffset = m_source_.getIndex(); +// m_source_.setIndex(currentoffset + offset); +// char result = (char)m_source_.current(); +// m_source_.setIndex(currentoffset); +// return result; +// } +// else { +// return (char)m_source_.current(); +// } +// } + + /** + * Moves back 1 position in the source string. This is slightly less + * complicated than previousChar in that it doesn't normalize while + * moving back. Boundary checks are not performed. + * This method is to be used with caution, with the assumption that + * moving back one position will not exceed the source limits. + * Use only with nextChar() and never call this API twice in a row without + * nextChar() in the middle. + */ + private void goBackOne() + { + if (m_bufferOffset_ >= 0) { + m_bufferOffset_ --; + } + else { + m_source_.setIndex(m_source_.getIndex() - 1); + } + } + + /** + * Moves forward 1 position in the source string. This is slightly less + * complicated than nextChar in that it doesn't normalize while + * moving back. Boundary checks are not performed. + * This method is to be used with caution, with the assumption that + * moving back one position will not exceed the source limits. + * Use only with previousChar() and never call this API twice in a row + * without previousChar() in the middle. + */ + private void goForwardOne() + { + if (m_bufferOffset_ < 0) { + // we're working on the source and not normalizing. fast path. + // note Thai pre-vowel reordering uses buffer too + m_source_.setIndex(m_source_.getIndex() + 1); + } + else { + // we are in the buffer, buffer offset will never be 0 here + m_bufferOffset_ ++; + } + } +} diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationKey.java b/main/classes/collate/src/com/ibm/icu/text/CollationKey.java new file mode 100644 index 00000000000..fddd61cb0a1 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/CollationKey.java @@ -0,0 +1,624 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.text; + +/** + *

    A CollationKey represents a String + * under the rules of a specific Collator + * object. Comparing two CollationKeys returns the + * relative order of the Strings they represent.

    + * + *

    Since the rule set of Collators can differ, the + * sort orders of the same string under two different + * Collators might differ. Hence comparing + * CollationKeys generated from different + * Collators can give incorrect results.

    + + *

    Both the method + * CollationKey.compareTo(CollationKey) and the method + * Collator.compare(String, String) compare two strings + * and returns their relative order. The performance characterictics + * of these two approaches can differ.

    + * + *

    During the construction of a CollationKey, the + * entire source string is examined and processed into a series of + * bits terminated by a null, that are stored in the CollationKey. + * When CollationKey.compareTo(CollationKey) executes, it + * performs bitwise comparison on the bit sequences. This can incurs + * startup cost when creating the CollationKey, but once + * the key is created, binary comparisons are fast. This approach is + * recommended when the same strings are to be compared over and over + * again.

    + * + *

    On the other hand, implementations of + * Collator.compare(String, String) can examine and + * process the strings only until the first characters differing in + * order. This approach is recommended if the strings are to be + * compared only once.

    + * + *

    More information about the composition of the bit sequence can + * be found in the + * + * user guide.

    + * + *

    The following example shows how CollationKeys can be used + * to sort a list of Strings.

    + *
    + *
    + * // Create an array of CollationKeys for the Strings to be sorted.
    + * Collator myCollator = Collator.getInstance();
    + * CollationKey[] keys = new CollationKey[3];
    + * keys[0] = myCollator.getCollationKey("Tom");
    + * keys[1] = myCollator.getCollationKey("Dick");
    + * keys[2] = myCollator.getCollationKey("Harry");
    + * sort( keys );
    + * 
    + * //... + *
    + * // Inside body of sort routine, compare keys this way + * if( keys[i].compareTo( keys[j] ) > 0 ) + * // swap keys[i] and keys[j] + *
    + * //... + *
    + * // Finally, when we've returned from sort. + * System.out.println( keys[0].getSourceString() ); + * System.out.println( keys[1].getSourceString() ); + * System.out.println( keys[2].getSourceString() ); + *
    + *
    + *

    + *

    + * This class is not subclassable + *

    + * @see Collator + * @see RuleBasedCollator + * @author Syn Wee Quek + * @stable ICU 2.8 + */ +public final class CollationKey implements Comparable +{ + // public inner classes ------------------------------------------------- + + /** + * Options that used in the API CollationKey.getBound() for getting a + * CollationKey based on the bound mode requested. + * @stable ICU 2.6 + */ + public static final class BoundMode + { + /* + * do not change the values assigned to the members of this enum. + * Underlying code depends on them having these numbers + */ + + /** + * Lower bound + * @stable ICU 2.6 + */ + public static final int LOWER = 0; + + /** + * Upper bound that will match strings of exact size + * @stable ICU 2.6 + */ + public static final int UPPER = 1; + + /** + * Upper bound that will match all the strings that have the same + * initial substring as the given string + * @stable ICU 2.6 + */ + public static final int UPPER_LONG = 2; + + /** + * Number of bound mode + * @stable ICU 2.6 + */ + public static final int COUNT = 3; + + /** + * Private Constructor + */ + ///CLOVER:OFF + private BoundMode(){} + ///CLOVER:ON + } + + // public constructor --------------------------------------------------- + + /** + * CollationKey constructor. + * This constructor is given public access, unlike the JDK version, to + * allow access to users extending the Collator class. See + * {@link Collator#getCollationKey(String)}. + * @param source string this CollationKey is to represent + * @param key array of bytes that represent the collation order of argument + * source terminated by a null + * @see Collator + * @stable ICU 2.8 + */ + public CollationKey(String source, byte key[]) + { + m_source_ = source; + m_key_ = key; + m_hashCode_ = 0; + m_length_ = -1; + } + + /** + * CollationKey constructor that forces key to release its internal byte + * array for adoption. key will have a null byte array after this + * construction. + * @param source string this CollationKey is to represent + * @param key RawCollationKey object that represents the collation order of + * argument source. + * @see Collator + * @see RawCollationKey + * @stable ICU 2.8 + */ + public CollationKey(String source, RawCollationKey key) + { + m_source_ = source; + m_key_ = key.releaseBytes(); + m_hashCode_ = 0; + m_length_ = -1; + } + + // public getters ------------------------------------------------------- + + /** + * Return the source string that this CollationKey represents. + * @return source string that this CollationKey represents + * @stable ICU 2.8 + */ + public String getSourceString() + { + return m_source_; + } + + /** + *

    Duplicates and returns the value of this CollationKey as a sequence + * of big-endian bytes terminated by a null.

    + * + *

    If two CollationKeys can be legitimately compared, then one can + * compare the byte arrays of each to obtain the same result, e.g. + *

    +     * byte key1[] = collationkey1.toByteArray();
    +     * byte key2[] = collationkey2.toByteArray();
    +     * int key, targetkey;
    +     * int i = 0;
    +     * do {
    +     *       key = key1[i] & 0xFF;
    +     *     targetkey = key2[i] & 0xFF;
    +     *     if (key < targetkey) {
    +     *         System.out.println("String 1 is less than string 2");
    +     *         return;
    +     *     }
    +     *     if (targetkey < key) {
    +     *         System.out.println("String 1 is more than string 2");
    +     *     }
    +     *     i ++;
    +     * } while (key != 0 && targetKey != 0);
    +     *
    +     * System.out.println("Strings are equal.");
    +     * 
    + *

    + * @return CollationKey value in a sequence of big-endian byte bytes + * terminated by a null. + * @stable ICU 2.8 + */ + public byte[] toByteArray() + { + int length = 0; + while (true) { + if (m_key_[length] == 0) { + break; + } + length ++; + } + length ++; + byte result[] = new byte[length]; + System.arraycopy(m_key_, 0, result, 0, length); + return result; + } + + // public other methods ------------------------------------------------- + + /** + *

    Compare this CollationKey to another CollationKey. The + * collation rules of the Collator that created this key are + * applied.

    + * + *

    Note: Comparison between CollationKeys + * created by different Collators might return incorrect + * results. See class documentation.

    + * + * @param target target CollationKey + * @return an integer value. If the value is less than zero this CollationKey + * is less than than target, if the value is zero they are equal, and + * if the value is greater than zero this CollationKey is greater + * than target. + * @exception NullPointerException is thrown if argument is null. + * @see Collator#compare(String, String) + * @stable ICU 2.8 + */ + public int compareTo(CollationKey target) + { + for (int i = 0;; ++i) { + int l = m_key_[i]&0xff; + int r = target.m_key_[i]&0xff; + if (l < r) { + return -1; + } else if (l > r) { + return 1; + } else if (l == 0) { + return 0; + } + } + } + + /** + *

    Compare this CollationKey and the specified Object for + * equality. The collation rules of the Collator that created + * this key are applied.

    + * + *

    See note in compareTo(CollationKey) for warnings about + * possible incorrect results.

    + * + * @param target the object to compare to. + * @return true if the two keys compare as equal, false otherwise. + * @see #compareTo(CollationKey) + * @exception ClassCastException is thrown when the argument is not + * a CollationKey. NullPointerException is thrown when the argument + * is null. + * @stable ICU 2.8 + */ + public boolean equals(Object target) + { + if (!(target instanceof CollationKey)) { + return false; + } + + return equals((CollationKey)target); + } + + /** + *

    + * Compare this CollationKey and the argument target CollationKey for + * equality. + * The collation + * rules of the Collator object which created these objects are applied. + *

    + *

    + * See note in compareTo(CollationKey) for warnings of incorrect results + *

    + * @param target the CollationKey to compare to. + * @return true if two objects are equal, false otherwise. + * @exception NullPointerException is thrown when the argument is null. + * @stable ICU 2.8 + */ + public boolean equals(CollationKey target) + { + if (this == target) { + return true; + } + if (target == null) { + return false; + } + CollationKey other = target; + int i = 0; + while (true) { + if (m_key_[i] != other.m_key_[i]) { + return false; + } + if (m_key_[i] == 0) { + break; + } + i ++; + } + return true; + } + + /** + *

    Returns a hash code for this CollationKey. The hash value is calculated + * on the key itself, not the String from which the key was created. Thus + * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode() + * if x.equals(y) is true. This allows language-sensitive comparison in a + * hash table. + *

    + * @return the hash value. + * @stable ICU 2.8 + */ + public int hashCode() + { + if (m_hashCode_ == 0) { + if (m_key_ == null) { + m_hashCode_ = 1; + } + else { + int size = m_key_.length >> 1; + StringBuilder key = new StringBuilder(size); + int i = 0; + while (m_key_[i] != 0 && m_key_[i + 1] != 0) { + key.append((char)((m_key_[i] << 8) | m_key_[i + 1])); + i += 2; + } + if (m_key_[i] != 0) { + key.append((char)(m_key_[i] << 8)); + } + m_hashCode_ = key.toString().hashCode(); + } + } + return m_hashCode_; + } + + /** + *

    + * Produce a bound for the sort order of a given collation key and a + * strength level. This API does not attempt to find a bound for the + * CollationKey String representation, hence null will be returned in its + * place. + *

    + *

    + * Resulting bounds can be used to produce a range of strings that are + * between upper and lower bounds. For example, if bounds are produced + * for a sortkey of string "smith", strings between upper and lower + * bounds with primary strength would include "Smith", "SMITH", "sMiTh". + *

    + *

    + * There are two upper bounds that can be produced. If BoundMode.UPPER + * is produced, strings matched would be as above. However, if a bound + * is produced using BoundMode.UPPER_LONG is used, the above example will + * also match "Smithsonian" and similar. + *

    + *

    + * For more on usage, see example in test procedure + * + * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds. + * + *

    + *

    + * Collation keys produced may be compared using the compare API. + *

    + * @param boundType Mode of bound required. It can be BoundMode.LOWER, which + * produces a lower inclusive bound, BoundMode.UPPER, that + * produces upper bound that matches strings of the same + * length or BoundMode.UPPER_LONG that matches strings that + * have the same starting substring as the source string. + * @param noOfLevels Strength levels required in the resulting bound + * (for most uses, the recommended value is PRIMARY). This + * strength should be less than the maximum strength of + * this CollationKey. + * See users guide for explanation on the strength levels a + * collation key can have. + * @return the result bounded CollationKey with a valid sort order but + * a null String representation. + * @exception IllegalArgumentException thrown when the strength level + * requested is higher than or equal to the strength in this + * CollationKey. + * In the case of an Exception, information + * about the maximum strength to use will be returned in the + * Exception. The user can then call getBound() again with the + * appropriate strength. + * @see CollationKey + * @see CollationKey.BoundMode + * @see Collator#PRIMARY + * @see Collator#SECONDARY + * @see Collator#TERTIARY + * @see Collator#QUATERNARY + * @see Collator#IDENTICAL + * @stable ICU 2.6 + */ + public CollationKey getBound(int boundType, int noOfLevels) + { + // Scan the string until we skip enough of the key OR reach the end of + // the key + int offset = 0; + int keystrength = Collator.PRIMARY; + + if (noOfLevels > Collator.PRIMARY) { + while (offset < m_key_.length && m_key_[offset] != 0) { + if (m_key_[offset ++] + == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) { + keystrength ++; + noOfLevels --; + if (noOfLevels == Collator.PRIMARY + || offset == m_key_.length || m_key_[offset] == 0) { + offset --; + break; + } + } + } + } + + if (noOfLevels > 0) { + throw new IllegalArgumentException( + "Source collation key has only " + + keystrength + + " strength level. Call getBound() again " + + " with noOfLevels < " + keystrength); + } + + // READ ME: this code assumes that the values for BoundMode variables + // will not changes. They are set so that the enum value corresponds to + // the number of extra bytes each bound type needs. + byte resultkey[] = new byte[offset + boundType + 1]; + System.arraycopy(m_key_, 0, resultkey, 0, offset); + switch (boundType) { + case BoundMode.LOWER: // = 0 + // Lower bound just gets terminated. No extra bytes + break; + case BoundMode.UPPER: // = 1 + // Upper bound needs one extra byte + resultkey[offset ++] = 2; + break; + case BoundMode.UPPER_LONG: // = 2 + // Upper long bound needs two extra bytes + resultkey[offset ++] = (byte)0xFF; + resultkey[offset ++] = (byte)0xFF; + break; + default: + throw new IllegalArgumentException( + "Illegal boundType argument"); + } + resultkey[offset ++] = 0; + return new CollationKey(null, resultkey); + } + + + + /** + *

    + * Merges this CollationKey with another. Only the sorting order of the + * CollationKeys will be merged. This API does not attempt to merge the + * String representations of the CollationKeys, hence null will be returned + * as the String representation. + *

    + *

    + * The strength levels are merged with their corresponding counterparts + * (PRIMARIES with PRIMARIES, SECONDARIES with SECONDARIES etc.). + *

    + *

    + * The merged String representation of the result CollationKey will be a + * concatenation of the String representations of the 2 source + * CollationKeys. + *

    + *

    + * Between the values from the same level a separator is inserted. + * example (uncompressed): + *

     
    +     * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
    +     * will be merged as 
    +     * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
    +     * 
    + *

    + *

    + * This allows for concatenating of first and last names for sorting, among + * other things. + *

    + *

    + * @param source CollationKey to merge with + * @return a CollationKey that contains the valid merged sorting order + * with a null String representation, + * i.e. new CollationKey(null, merge_sort_order) + * @exception IllegalArgumentException thrown if source CollationKey + * argument is null or of 0 length. + * @stable ICU 2.6 + */ + public CollationKey merge(CollationKey source) + { + // check arguments + if (source == null || source.getLength() == 0) { + throw new IllegalArgumentException( + "CollationKey argument can not be null or of 0 length"); + } + + getLength(); // gets the length of this sort key + int sourcelength = source.getLength(); + // 1 extra for the last strength that has no seperators + byte result[] = new byte[m_length_ + sourcelength + 2]; + + // merge the sort keys with the same number of levels + int rindex = 0; + int index = 0; + int sourceindex = 0; + while (true) { + // while both have another level + // copy level from src1 not including 00 or 01 + // unsigned issues + while (m_key_[index] < 0 || m_key_[index] >= MERGE_SEPERATOR_) { + result[rindex ++] = m_key_[index ++]; + } + + // add a 02 merge separator + result[rindex ++] = MERGE_SEPERATOR_; + + // copy level from src2 not including 00 or 01 + while (source.m_key_[sourceindex] < 0 + || source.m_key_[sourceindex] >= MERGE_SEPERATOR_) { + result[rindex ++] = source.m_key_[sourceindex ++]; + } + + // if both sort keys have another level, then add a 01 level + // separator and continue + if (m_key_[index] == RuleBasedCollator.SORT_LEVEL_TERMINATOR_ + && source.m_key_[sourceindex] + == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) { + ++ index; + ++ sourceindex; + result[rindex ++] = RuleBasedCollator.SORT_LEVEL_TERMINATOR_; + } + else { + break; + } + } + + // here, at least one sort key is finished now, but the other one + // might have some contents left from containing more levels; + // that contents is just appended to the result + if (m_key_[index] != 0) { + System.arraycopy(m_key_, index, result, rindex, + m_length_ - index); + } + else if (source.m_key_[sourceindex] != 0) { + System.arraycopy(source.m_key_, sourceindex, result, rindex, + source.m_length_ - sourceindex); + } + result[result.length - 1] = 0; + + // trust that neither sort key contained illegally embedded zero bytes + return new CollationKey(null, result); + } + + // private data members ------------------------------------------------- + + /** + * Sequence of bytes that represents the sort key + */ + private byte m_key_[]; + + /** + * Source string this CollationKey represents + */ + private String m_source_; + + /** + * Hash code for the key + */ + private int m_hashCode_; + /** + * Gets the length of this CollationKey + */ + private int m_length_; + /** + * Collation key merge seperator + */ + private static final int MERGE_SEPERATOR_ = 2; + + // private methods ------------------------------------------------------ + + /** + * Gets the length of the CollationKey + * @return length of the CollationKey + */ + private int getLength() + { + if (m_length_ >= 0) { + return m_length_; + } + int length = m_key_.length; + for (int index = 0; index < length; index ++) { + if (m_key_[index] == 0) { + length = index; + break; + } + } + m_length_ = length; + return m_length_; + } +} diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java b/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java new file mode 100644 index 00000000000..5a7eb53eefa --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java @@ -0,0 +1,4234 @@ +/** + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.text; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Vector; + +import com.ibm.icu.impl.IntTrieBuilder; +import com.ibm.icu.impl.Norm2AllModes; +import com.ibm.icu.impl.Normalizer2Impl; +import com.ibm.icu.impl.TrieBuilder; +import com.ibm.icu.impl.TrieIterator; +import com.ibm.icu.impl.UCharacterProperty; +import com.ibm.icu.impl.Utility; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UCharacterCategory; +import com.ibm.icu.util.RangeValueIterator; +import com.ibm.icu.util.VersionInfo; + +/** + * Class for building a collator from a list of collation rules. This class is + * uses CollationRuleParser + * + * @author Syn Wee Quek + * @since release 2.2, June 11 2002 + */ +final class CollationParsedRuleBuilder { + // package private constructors ------------------------------------------ + + /** + * Constructor + * + * @param rules + * collation rules + * @exception ParseException + * thrown when argument rules have an invalid syntax + */ + CollationParsedRuleBuilder(String rules) throws ParseException { + m_parser_ = new CollationRuleParser(rules); + m_parser_.assembleTokenList(); + m_utilColEIter_ = RuleBasedCollator.UCA_ + .getCollationElementIterator(""); + } + + // package private inner classes ----------------------------------------- + + /** + * Inverse UCA wrapper + */ + static class InverseUCA { + // package private constructor --------------------------------------- + + InverseUCA() { + } + + // package private data member --------------------------------------- + + /** + * Array list of characters + */ + int m_table_[]; + /** + * Array list of continuation characters + */ + char m_continuations_[]; + + /** + * UCA version of inverse UCA table + */ + VersionInfo m_UCA_version_; + + // package private method -------------------------------------------- + + /** + * Returns the previous inverse ces of the argument ces + * + * @param ce + * ce to test + * @param contce + * continuation ce to test + * @param strength + * collation strength + * @param prevresult + * an array to store the return results previous inverse ce + * and previous inverse continuation ce + * @return result of the inverse ce + */ + final int getInversePrevCE(int ce, int contce, int strength, + int prevresult[]) { + int result = findInverseCE(ce, contce); + + if (result < 0) { + prevresult[0] = CollationElementIterator.NULLORDER; + return -1; + } + + ce &= STRENGTH_MASK_[strength]; + contce &= STRENGTH_MASK_[strength]; + + prevresult[0] = ce; + prevresult[1] = contce; + + while ((prevresult[0] & STRENGTH_MASK_[strength]) == ce + && (prevresult[1] & STRENGTH_MASK_[strength]) == contce + && result > 0) { + // this condition should prevent falling off the edge of the + // world + // here, we end up in a singularity - zero + prevresult[0] = m_table_[3 * (--result)]; + prevresult[1] = m_table_[3 * result + 1]; + } + return result; + } + + final int getCEStrengthDifference(int CE, int contCE, int prevCE, + int prevContCE) { + int strength = Collator.TERTIARY; + while (((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) || (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength])) + && (strength != 0)) { + strength--; + } + return strength; + } + + private int compareCEs(int source0, int source1, int target0, + int target1) { + int s1 = source0, s2, t1 = target0, t2; + if (RuleBasedCollator.isContinuation(source1)) { + s2 = source1; + } else { + s2 = 0; + } + if (RuleBasedCollator.isContinuation(target1)) { + t2 = target1; + } else { + t2 = 0; + } + + int s = 0, t = 0; + if (s1 == t1 && s2 == t2) { + return 0; + } + s = (s1 & 0xFFFF0000) | ((s2 & 0xFFFF0000) >>> 16); + t = (t1 & 0xFFFF0000) | ((t2 & 0xFFFF0000) >>> 16); + if (s == t) { + s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00) >> 8; + t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00) >> 8; + if (s == t) { + s = (s1 & 0x000000FF) << 8 | (s2 & 0x000000FF); + t = (t1 & 0x000000FF) << 8 | (t2 & 0x000000FF); + return Utility.compareUnsigned(s, t); + } else { + return Utility.compareUnsigned(s, t); + } + } else { + return Utility.compareUnsigned(s, t); + } + } + + /** + * Finding the inverse CE of the argument CEs + * + * @param ce + * CE to be tested + * @param contce + * continuation CE + * @return inverse CE + */ + int findInverseCE(int ce, int contce) { + int bottom = 0; + int top = m_table_.length / 3; + int result = 0; + + while (bottom < top - 1) { + result = (top + bottom) >> 1; + int first = m_table_[3 * result]; + int second = m_table_[3 * result + 1]; + int comparison = compareCEs(first, second, ce, contce); + if (comparison > 0) { + top = result; + } else if (comparison < 0) { + bottom = result; + } else { + break; + } + } + + return result; + } + + /** + * Getting gap offsets in the inverse UCA + * + * @param listheader + * parsed token lists + * @exception Exception + * thrown when error occurs while finding the collation + * gaps + */ + void getInverseGapPositions( + CollationRuleParser.TokenListHeader listheader) + throws Exception { + // reset all the gaps + CollationRuleParser.Token token = listheader.m_first_; + int tokenstrength = token.m_strength_; + + for (int i = 0; i < 3; i++) { + listheader.m_gapsHi_[3 * i] = 0; + listheader.m_gapsHi_[3 * i + 1] = 0; + listheader.m_gapsHi_[3 * i + 2] = 0; + listheader.m_gapsLo_[3 * i] = 0; + listheader.m_gapsLo_[3 * i + 1] = 0; + listheader.m_gapsLo_[3 * i + 2] = 0; + listheader.m_numStr_[i] = 0; + listheader.m_fStrToken_[i] = null; + listheader.m_lStrToken_[i] = null; + listheader.m_pos_[i] = -1; + } + + if ((listheader.m_baseCE_ >>> 24) >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_ + && (listheader.m_baseCE_ >>> 24) <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_) { + // implicits - + listheader.m_pos_[0] = 0; + int t1 = listheader.m_baseCE_; + int t2 = listheader.m_baseContCE_; + listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY); + listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY); + listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY); + int primaryCE = t1 & RuleBasedCollator.CE_PRIMARY_MASK_ + | (t2 & RuleBasedCollator.CE_PRIMARY_MASK_) >>> 16; + primaryCE = RuleBasedCollator.impCEGen_ + .getImplicitFromRaw(RuleBasedCollator.impCEGen_ + .getRawFromImplicit(primaryCE) + 1); + + t1 = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505; + t2 = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ + | RuleBasedCollator.CE_CONTINUATION_MARKER_; + + // if (listheader.m_baseCE_ < 0xEF000000) { + // // first implicits have three byte primaries, with a gap of + // // one so we esentially need to add 2 to the top byte in + // // listheader.m_baseContCE_ + // t2 += 0x02000000; + // } + // else { + // // second implicits have four byte primaries, with a gap of + // // IMPLICIT_LAST2_MULTIPLIER_ + // // Now, this guy is not really accessible here, so until we + // // find a better way to pass it around, assume that the gap + // is 1 + // t2 += 0x00020000; + // } + listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY); + listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY); + listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY); + } else if (listheader.m_indirect_ == true + && listheader.m_nextCE_ != 0) { + listheader.m_pos_[0] = 0; + int t1 = listheader.m_baseCE_; + int t2 = listheader.m_baseContCE_; + listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY); + listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY); + listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY); + t1 = listheader.m_nextCE_; + t2 = listheader.m_nextContCE_; + listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY); + listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY); + listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY); + } else { + while (true) { + if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) { + listheader.m_pos_[tokenstrength] = getInverseNext( + listheader, tokenstrength); + if (listheader.m_pos_[tokenstrength] >= 0) { + listheader.m_fStrToken_[tokenstrength] = token; + } else { + // The CE must be implicit, since it's not in the + // table + // Error + throw new Exception("Internal program error"); + } + } + + while (token != null && token.m_strength_ >= tokenstrength) { + if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) { + listheader.m_lStrToken_[tokenstrength] = token; + } + token = token.m_next_; + } + if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_ - 1) { + // check if previous interval is the same and merge the + // intervals if it is so + if (listheader.m_pos_[tokenstrength] == listheader.m_pos_[tokenstrength + 1]) { + listheader.m_fStrToken_[tokenstrength] = listheader.m_fStrToken_[tokenstrength + 1]; + listheader.m_fStrToken_[tokenstrength + 1] = null; + listheader.m_lStrToken_[tokenstrength + 1] = null; + listheader.m_pos_[tokenstrength + 1] = -1; + } + } + if (token != null) { + tokenstrength = token.m_strength_; + } else { + break; + } + } + for (int st = 0; st < 3; st++) { + int pos = listheader.m_pos_[st]; + if (pos >= 0) { + int t1 = m_table_[3 * pos]; + int t2 = m_table_[3 * pos + 1]; + listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2, + Collator.PRIMARY); + listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2, + Collator.SECONDARY); + listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24 + | (t2 & 0x3f) << 16; + // pos --; + // t1 = m_table_[3 * pos]; + // t2 = m_table_[3 * pos + 1]; + t1 = listheader.m_baseCE_; + t2 = listheader.m_baseContCE_; + + listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2, + Collator.PRIMARY); + listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2, + Collator.SECONDARY); + listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24 + | (t2 & 0x3f) << 16; + } + } + } + } + + /** + * Gets the next CE in the inverse table + * + * @param listheader + * token list header + * @param strength + * collation strength + * @return next ce + */ + private final int getInverseNext( + CollationRuleParser.TokenListHeader listheader, int strength) { + int ce = listheader.m_baseCE_; + int secondce = listheader.m_baseContCE_; + int result = findInverseCE(ce, secondce); + + if (result < 0) { + return -1; + } + + ce &= STRENGTH_MASK_[strength]; + secondce &= STRENGTH_MASK_[strength]; + + int nextce = ce; + int nextcontce = secondce; + + while ((nextce & STRENGTH_MASK_[strength]) == ce + && (nextcontce & STRENGTH_MASK_[strength]) == secondce) { + nextce = m_table_[3 * (++result)]; + nextcontce = m_table_[3 * result + 1]; + } + + listheader.m_nextCE_ = nextce; + listheader.m_nextContCE_ = nextcontce; + + return result; + } + } + + // package private data members ------------------------------------------ + + /** + * Inverse UCA, instantiate only when required + */ + static final InverseUCA INVERSE_UCA_; + + /** + * UCA and Inverse UCA version do not match + */ + private static final String INV_UCA_VERSION_MISMATCH_ = "UCA versions of UCA and inverse UCA should match"; + + /** + * UCA and Inverse UCA version do not match + */ + private static final String UCA_NOT_INSTANTIATED_ = "UCA is not instantiated!"; + + /** + * Initializing the inverse UCA + */ + static { + InverseUCA temp = null; + try { + temp = CollatorReader.getInverseUCA(); + } catch (IOException e) { + } + /* + * try { String invdat = "/com/ibm/icu/impl/data/invuca.icu"; + * InputStream i = + * CollationParsedRuleBuilder.class.getResourceAsStream(invdat); + * BufferedInputStream b = new BufferedInputStream(i, 110000); + * INVERSE_UCA_ = CollatorReader.readInverseUCA(b); b.close(); + * i.close(); } catch (Exception e) { e.printStackTrace(); throw new + * RuntimeException(e.getMessage()); } + */ + + if (temp != null && RuleBasedCollator.UCA_ != null) { + if (!temp.m_UCA_version_ + .equals(RuleBasedCollator.UCA_.m_UCA_version_)) { + throw new RuntimeException(INV_UCA_VERSION_MISMATCH_); + } + } else { + throw new RuntimeException(UCA_NOT_INSTANTIATED_); + } + + INVERSE_UCA_ = temp; + } + + // package private methods ----------------------------------------------- + + /** + * Parse and sets the collation rules in the argument collator + * + * @param collator + * to set + * @exception Exception + * thrown when internal program error occurs + */ + void setRules(RuleBasedCollator collator) throws Exception { + if (m_parser_.m_resultLength_ > 0 || m_parser_.m_removeSet_ != null) { + // we have a set of rules, let's make something of it + assembleTailoringTable(collator); + } else { // no rules, but no error either must be only options + // We will init the collator from UCA + collator.setWithUCATables(); + } + // And set only the options + m_parser_.setDefaultOptionsInCollator(collator); + } + + private void copyRangeFromUCA(BuildTable t, int start, int end) { + int u = 0; + for (u = start; u <= end; u++) { + // if ((CE = ucmpe32_get(t.m_mapping, u)) == UCOL_NOT_FOUND + int CE = t.m_mapping_.getValue(u); + if (CE == CE_NOT_FOUND_ + // this test is for contractions that are missing the starting + // element. Looks like latin-1 should be done before + // assembling the table, even if it results in more false + // closure elements + || (isContractionTableElement(CE) && getCE( + t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) { + // m_utilElement_.m_uchars_ = str.toString(); + m_utilElement_.m_uchars_ = UCharacter.toString(u); + m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; + m_utilElement_.m_prefix_ = 0; + m_utilElement_.m_CELength_ = 0; + m_utilElement_.m_prefixChars_ = null; + m_utilColEIter_.setText(m_utilElement_.m_uchars_); + while (CE != CollationElementIterator.NULLORDER) { + CE = m_utilColEIter_.next(); + if (CE != CollationElementIterator.NULLORDER) { + m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE; + } + } + addAnElement(t, m_utilElement_); + } + } + } + + /** + * 2. Eliminate the negative lists by doing the following for each non-null + * negative list: o if previousCE(baseCE, strongestN) != some ListHeader X's + * baseCE, create new ListHeader X o reverse the list, add to the end of X's + * positive list. Reset the strength of the first item you add, based on the + * stronger strength levels of the two lists. + * + * 3. For each ListHeader with a non-null positive list: o Find all + * character strings with CEs between the baseCE and the next/previous CE, + * at the strength of the first token. Add these to the tailoring. ? That + * is, if UCA has ... x <<< X << x' <<< X' < y ..., and the tailoring has & + * x < z... ? Then we change the tailoring to & x <<< X << x' <<< X' < z ... + * + * It is possible that this part should be done even while constructing list + * The problem is that it is unknown what is going to be the strongest + * weight. So we might as well do it here o Allocate CEs for each token in + * the list, based on the total number N of the largest level difference, + * and the gap G between baseCE and nextCE at that level. The relation * + * between the last item and nextCE is the same as the strongest strength. o + * Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1) ? There are 3 + * primary items: a, d, e. Fit them into the primary gap. Then fit b and c + * into the secondary gap between a and d, then fit q into the tertiary gap + * between b and c. o Example: baseCE << b <<< q << c * nextCE(X,2) ? There + * are 2 secondary items: b, c. Fit them into the secondary gap. Then fit q + * into the tertiary gap between b and c. o When incrementing primary + * values, we will not cross high byte boundaries except where there is only + * a single-byte primary. That is to ensure that the script reordering will + * continue to work. + * + * @param collator + * the rule based collator to update + * @exception Exception + * thrown when internal program error occurs + */ + void assembleTailoringTable(RuleBasedCollator collator) throws Exception { + + for (int i = 0; i < m_parser_.m_resultLength_; i++) { + // now we need to generate the CEs + // We stuff the initial value in the buffers, and increase the + // appropriate buffer according to strength + if (m_parser_.m_listHeader_[i].m_first_ != null) { + // if there are any elements + // due to the way parser works, subsequent tailorings + // may remove all the elements from a sequence, therefore + // leaving an empty tailoring sequence. + initBuffers(m_parser_.m_listHeader_[i]); + } + } + + if (m_parser_.m_variableTop_ != null) { + // stuff the variable top value + m_parser_.m_options_.m_variableTopValue_ = m_parser_.m_variableTop_.m_CE_[0] >>> 16; + // remove it from the list + if (m_parser_.m_variableTop_.m_listHeader_.m_first_ == m_parser_.m_variableTop_) { // first + // in + // list + m_parser_.m_variableTop_.m_listHeader_.m_first_ = m_parser_.m_variableTop_.m_next_; + } + if (m_parser_.m_variableTop_.m_listHeader_.m_last_ == m_parser_.m_variableTop_) { + // first in list + m_parser_.m_variableTop_.m_listHeader_.m_last_ = m_parser_.m_variableTop_.m_previous_; + } + if (m_parser_.m_variableTop_.m_next_ != null) { + m_parser_.m_variableTop_.m_next_.m_previous_ = m_parser_.m_variableTop_.m_previous_; + } + if (m_parser_.m_variableTop_.m_previous_ != null) { + m_parser_.m_variableTop_.m_previous_.m_next_ = m_parser_.m_variableTop_.m_next_; + } + } + + BuildTable t = new BuildTable(m_parser_); + + // After this, we have assigned CE values to all regular CEs now we + // will go through list once more and resolve expansions, make + // UCAElements structs and add them to table + for (int i = 0; i < m_parser_.m_resultLength_; i++) { + // now we need to generate the CEs + // We stuff the initial value in the buffers, and increase the + // appropriate buffer according to strength */ + createElements(t, m_parser_.m_listHeader_[i]); + } + + m_utilElement_.clear(); + StringBuilder str = new StringBuilder(); + + // add latin-1 stuff + copyRangeFromUCA(t, 0, 0xFF); + + // add stuff for copying + if (m_parser_.m_copySet_ != null) { + int i = 0; + for (i = 0; i < m_parser_.m_copySet_.getRangeCount(); i++) { + copyRangeFromUCA(t, m_parser_.m_copySet_.getRangeStart(i), + m_parser_.m_copySet_.getRangeEnd(i)); + } + } + + // copy contractions from the UCA - this is felt mostly for cyrillic + char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_; + int offset = 0; + while (conts[offset] != 0) { + // tailoredCE = ucmpe32_get(t.m_mapping, *conts); + int tailoredCE = t.m_mapping_.getValue(conts[offset]); + Elements prefixElm = null; + if (tailoredCE != CE_NOT_FOUND_) { + boolean needToAdd = true; + if (isContractionTableElement(tailoredCE)) { + if (isTailored(t.m_contractions_, tailoredCE, conts, + offset + 1) == true) { + needToAdd = false; + } + } + if (!needToAdd && isPrefix(tailoredCE) + && conts[offset + 1] == 0) { + // pre-context character in UCA + // The format for pre-context character is + // conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP + Elements elm = new Elements(); + elm.m_cPoints_ = m_utilElement_.m_uchars_; + elm.m_CELength_ = 0; + elm.m_uchars_ = UCharacter.toString(conts[offset]); + elm.m_prefixChars_ = UCharacter.toString(conts[offset + 2]); + elm.m_prefix_ = 0; // TODO(claireho) : confirm! + prefixElm = t.m_prefixLookup_.get(elm); + if ((prefixElm == null) + || (prefixElm.m_prefixChars_.charAt(0) != conts[offset + 2])) { + needToAdd = true; + } + } + if (m_parser_.m_removeSet_ != null + && m_parser_.m_removeSet_.contains(conts[offset])) { + needToAdd = false; + } + + if (needToAdd == true) { + // we need to add if this contraction is not tailored. + if (conts[offset + 1] != 0) { // not precontext + m_utilElement_.m_prefix_ = 0; + m_utilElement_.m_prefixChars_ = null; + m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; + str.delete(0, str.length()); + str.append(conts[offset]); + str.append(conts[offset + 1]); + if (conts[offset + 2] != 0) { + str.append(conts[offset + 2]); + } + m_utilElement_.m_uchars_ = str.toString(); + m_utilElement_.m_CELength_ = 0; + m_utilColEIter_.setText(m_utilElement_.m_uchars_); + } else { // add a pre-context element + int preKeyLen = 0; + str.delete(0, str.length()); // clean up + m_utilElement_.m_cPoints_ = UCharacter + .toString(conts[offset]); + m_utilElement_.m_CELength_ = 0; + m_utilElement_.m_uchars_ = UCharacter + .toString(conts[offset]); + m_utilElement_.m_prefixChars_ = UCharacter + .toString(conts[offset + 2]); + if (prefixElm == null) { + m_utilElement_.m_prefix_ = 0; + } else { // TODO (claireho): confirm! + m_utilElement_.m_prefix_ = m_utilElement_.m_prefix_; + // m_utilElement_.m_prefix_= prefixElm.m_prefix_; + } + m_utilColEIter_.setText(m_utilElement_.m_prefixChars_); + while (m_utilColEIter_.next() != CollationElementIterator.NULLORDER) { + // count number of keys for pre-context char. + preKeyLen++; + } + str.append(conts[offset + 2]); + str.append(conts[offset]); + m_utilColEIter_.setText(str.toString()); + // Skip the keys for prefix character, then copy the + // rest to el. + while ((preKeyLen-- > 0) + && m_utilColEIter_.next() != CollationElementIterator.NULLORDER) { + continue; + } + + } + while (true) { + int CE = m_utilColEIter_.next(); + if (CE != CollationElementIterator.NULLORDER) { + m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE; + } else { + break; + } + } + addAnElement(t, m_utilElement_); + } + } else if (m_parser_.m_removeSet_ != null + && m_parser_.m_removeSet_.contains(conts[offset])) { + copyRangeFromUCA(t, conts[offset], conts[offset]); + } + + offset += 3; + } + + // Add completely ignorable elements + processUCACompleteIgnorables(t); + + // canonical closure + canonicalClosure(t); + + // still need to produce compatibility closure + assembleTable(t, collator); + } + + // private inner classes ------------------------------------------------- + + @SuppressWarnings("unused") + private static class CEGenerator { + // package private data members -------------------------------------- + + WeightRange m_ranges_[]; + int m_rangesLength_; + int m_byteSize_; + int m_start_; + int m_limit_; + int m_maxCount_; + int m_count_; + int m_current_; + int m_fLow_; // forbidden Low + int m_fHigh_; // forbidden High + + // package private constructor --------------------------------------- + + CEGenerator() { + m_ranges_ = new WeightRange[7]; + for (int i = 6; i >= 0; i--) { + m_ranges_[i] = new WeightRange(); + } + } + } + + private static class WeightRange implements Comparable { + // public methods ---------------------------------------------------- + + /** + * Compares this object with target + * + * @param target object to compare with + * @return 0 if equals, 1 if this is > target, -1 otherwise + */ + public int compareTo(WeightRange target) { + if (this == target) { + return 0; + } + int tstart = target.m_start_; + if (m_start_ == tstart) { + return 0; + } + if (m_start_ > tstart) { + return 1; + } + return -1; + } + + /** + * Initialize + */ + public void clear() { + m_start_ = 0; + m_end_ = 0; + m_length_ = 0; + m_count_ = 0; + m_length2_ = 0; + m_count2_ = 0; + } + + // package private data members -------------------------------------- + + int m_start_; + int m_end_; + int m_length_; + int m_count_; + int m_length2_; + int m_count2_; + + // package private constructor --------------------------------------- + + WeightRange() { + clear(); + } + + /** + * Copy constructor. Cloneable is troublesome, needs to check for + * exception + * + * @param source + * to clone + */ + WeightRange(WeightRange source) { + m_start_ = source.m_start_; + m_end_ = source.m_end_; + m_length_ = source.m_length_; + m_count_ = source.m_count_; + m_length2_ = source.m_length2_; + m_count2_ = source.m_count2_; + } + } + + private static class MaxJamoExpansionTable { + // package private data members -------------------------------------- + + Vector m_endExpansionCE_; + // vector of booleans + Vector m_isV_; + byte m_maxLSize_; + byte m_maxVSize_; + byte m_maxTSize_; + + // package private constructor --------------------------------------- + + MaxJamoExpansionTable() { + m_endExpansionCE_ = new Vector(); + m_isV_ = new Vector(); + m_endExpansionCE_.add(new Integer(0)); + m_isV_.add(Boolean.FALSE); + m_maxLSize_ = 1; + m_maxVSize_ = 1; + m_maxTSize_ = 1; + } + + MaxJamoExpansionTable(MaxJamoExpansionTable table) { + m_endExpansionCE_ = new Vector(table.m_endExpansionCE_); + m_isV_ = new Vector(table.m_isV_); + m_maxLSize_ = table.m_maxLSize_; + m_maxVSize_ = table.m_maxVSize_; + m_maxTSize_ = table.m_maxTSize_; + } + } + + private static class MaxExpansionTable { + // package private constructor -------------------------------------- + + MaxExpansionTable() { + m_endExpansionCE_ = new Vector(); + m_expansionCESize_ = new Vector(); + m_endExpansionCE_.add(new Integer(0)); + m_expansionCESize_.add(new Byte((byte) 0)); + } + + MaxExpansionTable(MaxExpansionTable table) { + m_endExpansionCE_ = new Vector(table.m_endExpansionCE_); + m_expansionCESize_ = new Vector(table.m_expansionCESize_); + } + + // package private data member -------------------------------------- + + Vector m_endExpansionCE_; + Vector m_expansionCESize_; + } + + private static class BasicContractionTable { + // package private constructors ------------------------------------- + + BasicContractionTable() { + m_CEs_ = new Vector(); + m_codePoints_ = new StringBuilder(); + } + + // package private data members ------------------------------------- + + StringBuilder m_codePoints_; + Vector m_CEs_; + } + + private static class ContractionTable { + // package private constructor -------------------------------------- + + /** + * Builds a contraction table + * + * @param mapping + */ + ContractionTable(IntTrieBuilder mapping) { + m_mapping_ = mapping; + m_elements_ = new Vector(); + m_CEs_ = new Vector(); + m_codePoints_ = new StringBuilder(); + m_offsets_ = new Vector(); + m_currentTag_ = CE_NOT_FOUND_TAG_; + } + + /** + * Copies a contraction table. Not all data will be copied into their + * own object. + * + * @param table + */ + ContractionTable(ContractionTable table) { + m_mapping_ = table.m_mapping_; + m_elements_ = new Vector(table.m_elements_); + m_codePoints_ = new StringBuilder(table.m_codePoints_); + m_CEs_ = new Vector(table.m_CEs_); + m_offsets_ = new Vector(table.m_offsets_); + m_currentTag_ = table.m_currentTag_; + } + + // package private data members ------------------------------------ + + /** + * Vector of BasicContractionTable + */ + Vector m_elements_; + IntTrieBuilder m_mapping_; + StringBuilder m_codePoints_; + Vector m_CEs_; + Vector m_offsets_; + int m_currentTag_; + } + + /** + * Private class for combining mark table. The table is indexed by the class + * value(0-255). + */ + @SuppressWarnings("unused") + private static class CombinClassTable { + /** + * accumulated numbers of combining marks. + */ + int[] index = new int[256]; + + /** + * code point array for combining marks. + */ + char[] cPoints; + + /** + * size of cPoints. + */ + int size; + + // constructor + CombinClassTable() { + cPoints = null; + size = 0; + pos = 0; + curClass = 1; + } + + /** + * Copy the combining mark table from ccc and index in compact way. + * + * @param cps + * : code point array + * @param size + * : size of ccc + * @param index + * : index of combining classes(0-255) + */ + void generate(char[] cps, int numOfCM, int[] ccIndex) { + int count = 0; + + cPoints = new char[numOfCM]; + for (int i = 0; i < 256; i++) { + for (int j = 0; j < ccIndex[i]; j++) { + cPoints[count++] = cps[(i << 8) + j]; + } + index[i] = count; + } + size = count; + } + + /** + * Get first CM(combining mark) with the combining class value cClass. + * + * @param cClass + * : combining class value. + * @return combining mark codepoint or 0 if no combining make with class + * value cClass + */ + char GetFirstCM(int cClass) { + curClass = cClass; + if (cPoints == null || cClass == 0 + || index[cClass] == index[cClass - 1]) { + return 0; + } + pos = 1; + return cPoints[index[cClass - 1]]; + } + + /** + * Get next CM(combining mark) with the combining class value cClass. + * Return combining mark codepoint or 0 if no next CM. + */ + char GetNextCM() { + if (cPoints == null + || index[curClass] == (index[curClass - 1] + pos)) { + return 0; + } + return cPoints[index[curClass - 1] + (pos++)]; + } + + // private data members + int pos; + int curClass; + } + + private static final class BuildTable implements TrieBuilder.DataManipulate { + // package private methods ------------------------------------------ + + /** + * For construction of the Trie tables. Has to be labeled public + * + * @param cp The value of the code point. + * @param offset The value of the offset. + * @return data offset or 0 + */ + public int getFoldedValue(int cp, int offset) { + int limit = cp + 0x400; + while (cp < limit) { + int value = m_mapping_.getValue(cp); + boolean inBlockZero = m_mapping_.isInZeroBlock(cp); + int tag = getCETag(value); + if (inBlockZero == true) { + cp += TrieBuilder.DATA_BLOCK_LENGTH; + } else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_ || tag == CE_NOT_FOUND_TAG_))) { + // These are values that are starting in either UCA + // (IMPLICIT_TAG) or in the tailorings (NOT_FOUND_TAG). + // Presence of these tags means that there is nothing in + // this position and that it should be skipped. + return RuleBasedCollator.CE_SPECIAL_FLAG_ + | (CE_SURROGATE_TAG_ << 24) | offset; + } else { + ++cp; + } + } + return 0; + } + + // package private constructor -------------------------------------- + + /** + * Returns a table + */ + BuildTable(CollationRuleParser parser) { + m_collator_ = new RuleBasedCollator(); + m_collator_.setWithUCAData(); + MaxExpansionTable maxet = new MaxExpansionTable(); + MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable(); + m_options_ = parser.m_options_; + m_expansions_ = new Vector(); + // Do your own mallocs for the structure, array and have linear + // Latin 1 + int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_ + | (CE_NOT_FOUND_TAG_ << 24); + // temporary fix for jb3822, 0x100000 -> 30000 + m_mapping_ = new IntTrieBuilder(null, 0x30000, trieinitialvalue, + trieinitialvalue, true); + m_prefixLookup_ = new Hashtable(); + // uhash_open(prefixLookupHash, prefixLookupComp); + m_contractions_ = new ContractionTable(m_mapping_); + // copy UCA's maxexpansion and merge as we go along + m_maxExpansions_ = maxet; + // adding an extra initial value for easier manipulation + for (int i = 0; i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i++) { + maxet.m_endExpansionCE_.add(new Integer( + RuleBasedCollator.UCA_.m_expansionEndCE_[i])); + maxet.m_expansionCESize_.add(new Byte( + RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i])); + } + m_maxJamoExpansions_ = maxjet; + + m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_]; + m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_]; + Arrays.fill(m_unsafeCP_, (byte) 0); + Arrays.fill(m_contrEndCP_, (byte) 0); + } + + /** + * Duplicating a BuildTable. Not all data will be duplicated into their + * own object. + * + * @param table + * to clone + */ + BuildTable(BuildTable table) { + m_collator_ = table.m_collator_; + m_mapping_ = new IntTrieBuilder(table.m_mapping_); + m_expansions_ = new Vector(table.m_expansions_); + m_contractions_ = new ContractionTable(table.m_contractions_); + m_contractions_.m_mapping_ = m_mapping_; + m_options_ = table.m_options_; + m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_); + m_maxJamoExpansions_ = new MaxJamoExpansionTable( + table.m_maxJamoExpansions_); + m_unsafeCP_ = new byte[table.m_unsafeCP_.length]; + System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0, + m_unsafeCP_.length); + m_contrEndCP_ = new byte[table.m_contrEndCP_.length]; + System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0, + m_contrEndCP_.length); + } + + // package private data members ------------------------------------- + + RuleBasedCollator m_collator_; + IntTrieBuilder m_mapping_; + Vector m_expansions_; + ContractionTable m_contractions_; + // UCATableHeader image; + CollationRuleParser.OptionSet m_options_; + MaxExpansionTable m_maxExpansions_; + MaxJamoExpansionTable m_maxJamoExpansions_; + byte m_unsafeCP_[]; + byte m_contrEndCP_[]; + Hashtable m_prefixLookup_; + CombinClassTable cmLookup = null; + } + + private static class Elements { + // package private data members ------------------------------------- + + String m_prefixChars_; + int m_prefix_; + String m_uchars_; + /** + * Working string + */ + String m_cPoints_; + /** + * Offset to the working string + */ + int m_cPointsOffset_; + /** + * These are collation elements - there could be more than one - in case + * of expansion + */ + int m_CEs_[]; + int m_CELength_; + /** + * This is the value element maps in original table + */ + int m_mapCE_; + int m_sizePrim_[]; + int m_sizeSec_[]; + int m_sizeTer_[]; + boolean m_variableTop_; + boolean m_caseBit_; + + // package private constructors ------------------------------------- + + /** + * Package private constructor + */ + Elements() { + m_sizePrim_ = new int[128]; + m_sizeSec_ = new int[128]; + m_sizeTer_ = new int[128]; + m_CEs_ = new int[256]; + m_CELength_ = 0; + } + + /** + * Package private constructor + */ + Elements(Elements element) { + m_prefixChars_ = element.m_prefixChars_; + m_prefix_ = element.m_prefix_; + m_uchars_ = element.m_uchars_; + m_cPoints_ = element.m_cPoints_; + m_cPointsOffset_ = element.m_cPointsOffset_; + m_CEs_ = element.m_CEs_; + m_CELength_ = element.m_CELength_; + m_mapCE_ = element.m_mapCE_; + m_sizePrim_ = element.m_sizePrim_; + m_sizeSec_ = element.m_sizeSec_; + m_sizeTer_ = element.m_sizeTer_; + m_variableTop_ = element.m_variableTop_; + m_caseBit_ = element.m_caseBit_; + } + + // package private methods ------------------------------------------- + + /** + * Initializing the elements + */ + public void clear() { + m_prefixChars_ = null; + m_prefix_ = 0; + m_uchars_ = null; + m_cPoints_ = null; + m_cPointsOffset_ = 0; + m_CELength_ = 0; + m_mapCE_ = 0; + Arrays.fill(m_sizePrim_, 0); + Arrays.fill(m_sizeSec_, 0); + Arrays.fill(m_sizeTer_, 0); + m_variableTop_ = false; + m_caseBit_ = false; + } + + /** + * Hashcode calculation for token + * + * @return the hashcode + */ + public int hashCode() { + String str = m_cPoints_.substring(m_cPointsOffset_); + return str.hashCode(); + } + + /** + * Equals calculation + * + * @param target Object to compare + * @return true if target is the same as this object + */ + public boolean equals(Object target) { + if (target == this) { + return true; + } + if (target instanceof Elements) { + Elements t = (Elements) target; + int size = m_cPoints_.length() - m_cPointsOffset_; + if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) { + return t.m_cPoints_.regionMatches(t.m_cPointsOffset_, + m_cPoints_, m_cPointsOffset_, size); + } + } + return false; + } + } + + // private data member --------------------------------------------------- + + /** + * Maximum strength used in CE building + */ + private static final int CE_BASIC_STRENGTH_LIMIT_ = 3; + /** + * Maximum collation strength + */ + private static final int CE_STRENGTH_LIMIT_ = 16; + /** + * Strength mask array, used in inverse UCA + */ + private static final int STRENGTH_MASK_[] = { 0xFFFF0000, 0xFFFFFF00, + 0xFFFFFFFF }; + /** + * CE tag for not found + */ + private static final int CE_NOT_FOUND_ = 0xF0000000; + /** + * CE tag for not found + */ + private static final int CE_NOT_FOUND_TAG_ = 0; + /** + * This code point results in an expansion + */ + private static final int CE_EXPANSION_TAG_ = 1; + /** + * Start of a contraction + */ + private static final int CE_CONTRACTION_TAG_ = 2; + /* + * Thai character - do the reordering + */ + // private static final int CE_THAI_TAG_ = 3; + /* + * Charset processing, not yet implemented + */ + // private static final int CE_CHARSET_TAG_ = 4; + /** + * Lead surrogate that is tailored and doesn't start a contraction + */ + private static final int CE_SURROGATE_TAG_ = 5; + /* + * AC00-D7AF + */ + // private static final int CE_HANGUL_SYLLABLE_TAG_ = 6; + /* + * D800-DBFF + */ + // private static final int CE_LEAD_SURROGATE_TAG_ = 7; + /* + * DC00-DFFF + */ + // private static final int CE_TRAIL_SURROGATE_TAG_ = 8; + /* + * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D + */ + // private static final int CE_CJK_IMPLICIT_TAG_ = 9; + private static final int CE_IMPLICIT_TAG_ = 10; + private static final int CE_SPEC_PROC_TAG_ = 11; + /** + * This is a three byte primary with starting secondaries and tertiaries. It + * fits in a single 32 bit CE and is used instead of expansion to save space + * without affecting the performance (hopefully) + */ + private static final int CE_LONG_PRIMARY_TAG_ = 12; + /** + * Unsafe UChar hash table table size. Size is 32 bytes for 1 bit for each + * latin 1 char + some power of two for hashing the rest of the chars. Size + * in bytes + */ + private static final int UNSAFECP_TABLE_SIZE_ = 1056; + /** + * Mask value down to "some power of two" -1. Number of bits, not num of + * bytes. + */ + private static final int UNSAFECP_TABLE_MASK_ = 0x1fff; + /** + * Case values + */ + private static final int UPPER_CASE_ = 0x80; + private static final int MIXED_CASE_ = 0x40; + private static final int LOWER_CASE_ = 0x00; + /* + * Initial table size + */ + // private static final int INIT_TABLE_SIZE_ = 1028; + /* + * Header size, copied from ICU4C, to be changed when that value changes + */ + // private static final int HEADER_SIZE_ = 0xC4; + /** + * Contraction table new element indicator + */ + private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF; + /** + * Parser for the rules + */ + private CollationRuleParser m_parser_; + /** + * Utility UCA collation element iterator + */ + private CollationElementIterator m_utilColEIter_; + /** + * Utility data members + */ + private CEGenerator m_utilGens_[] = { new CEGenerator(), new CEGenerator(), + new CEGenerator() }; + private int m_utilCEBuffer_[] = new int[CE_BASIC_STRENGTH_LIMIT_]; + private int m_utilIntBuffer_[] = new int[CE_STRENGTH_LIMIT_]; + private Elements m_utilElement_ = new Elements(); + private Elements m_utilElement2_ = new Elements(); + private CollationRuleParser.Token m_utilToken_ = new CollationRuleParser.Token(); + private int m_utilCountBuffer_[] = new int[6]; + private long m_utilLongBuffer_[] = new long[5]; + private WeightRange m_utilLowerWeightRange_[] = { new WeightRange(), + new WeightRange(), new WeightRange(), new WeightRange(), + new WeightRange() }; + private WeightRange m_utilUpperWeightRange_[] = { new WeightRange(), + new WeightRange(), new WeightRange(), new WeightRange(), + new WeightRange() }; + private WeightRange m_utilWeightRange_ = new WeightRange(); + private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl; + private CanonicalIterator m_utilCanIter_ = new CanonicalIterator(""); + private StringBuilder m_utilStringBuffer_ = new StringBuilder(""); + // Flag indicating a combining marks table is required or not. + private static boolean buildCMTabFlag = false; + + // private methods ------------------------------------------------------- + + /** + * @param listheader + * parsed rule tokens + * @exception Exception + * thrown when internal error occurs + */ + private void initBuffers(CollationRuleParser.TokenListHeader listheader) + throws Exception { + CollationRuleParser.Token token = listheader.m_last_; + Arrays.fill(m_utilIntBuffer_, 0, CE_STRENGTH_LIMIT_, 0); + + token.m_toInsert_ = 1; + m_utilIntBuffer_[token.m_strength_] = 1; + while (token.m_previous_ != null) { + if (token.m_previous_.m_strength_ < token.m_strength_) { + // going up + m_utilIntBuffer_[token.m_strength_] = 0; + m_utilIntBuffer_[token.m_previous_.m_strength_]++; + } else if (token.m_previous_.m_strength_ > token.m_strength_) { + // going down + m_utilIntBuffer_[token.m_previous_.m_strength_] = 1; + } else { + m_utilIntBuffer_[token.m_strength_]++; + } + token = token.m_previous_; + token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_]; + } + + token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_]; + INVERSE_UCA_.getInverseGapPositions(listheader); + + token = listheader.m_first_; + int fstrength = Collator.IDENTICAL; + int initstrength = Collator.IDENTICAL; + + m_utilCEBuffer_[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_, + listheader.m_baseContCE_, Collator.PRIMARY); + m_utilCEBuffer_[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_, + listheader.m_baseContCE_, Collator.SECONDARY); + m_utilCEBuffer_[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_, + listheader.m_baseContCE_, Collator.TERTIARY); + while (token != null) { + fstrength = token.m_strength_; + if (fstrength < initstrength) { + initstrength = fstrength; + if (listheader.m_pos_[fstrength] == -1) { + while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) { + fstrength--; + } + if (listheader.m_pos_[fstrength] == -1) { + throw new Exception("Internal program error"); + } + } + if (initstrength == Collator.TERTIARY) { + // starting with tertiary + m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3]; + m_utilCEBuffer_[Collator.SECONDARY] = listheader.m_gapsLo_[fstrength * 3 + 1]; + m_utilCEBuffer_[Collator.TERTIARY] = getCEGenerator( + m_utilGens_[Collator.TERTIARY], + listheader.m_gapsLo_, listheader.m_gapsHi_, token, + fstrength); + } else if (initstrength == Collator.SECONDARY) { + // secondaries + m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3]; + m_utilCEBuffer_[Collator.SECONDARY] = getCEGenerator( + m_utilGens_[Collator.SECONDARY], + listheader.m_gapsLo_, listheader.m_gapsHi_, token, + fstrength); + m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator( + m_utilGens_[Collator.TERTIARY], token, + Collator.TERTIARY); + } else { + // primaries + m_utilCEBuffer_[Collator.PRIMARY] = getCEGenerator( + m_utilGens_[Collator.PRIMARY], + listheader.m_gapsLo_, listheader.m_gapsHi_, token, + fstrength); + m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator( + m_utilGens_[Collator.SECONDARY], token, + Collator.SECONDARY); + m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator( + m_utilGens_[Collator.TERTIARY], token, + Collator.TERTIARY); + } + } else { + if (token.m_strength_ == Collator.TERTIARY) { + m_utilCEBuffer_[Collator.TERTIARY] = getNextGenerated(m_utilGens_[Collator.TERTIARY]); + } else if (token.m_strength_ == Collator.SECONDARY) { + m_utilCEBuffer_[Collator.SECONDARY] = getNextGenerated(m_utilGens_[Collator.SECONDARY]); + m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator( + m_utilGens_[Collator.TERTIARY], token, + Collator.TERTIARY); + } else if (token.m_strength_ == Collator.PRIMARY) { + m_utilCEBuffer_[Collator.PRIMARY] = getNextGenerated(m_utilGens_[Collator.PRIMARY]); + m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator( + m_utilGens_[Collator.SECONDARY], token, + Collator.SECONDARY); + m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator( + m_utilGens_[Collator.TERTIARY], token, + Collator.TERTIARY); + } + } + doCE(m_utilCEBuffer_, token); + token = token.m_next_; + } + } + + /** + * Get the next generated ce + * + * @param g + * ce generator + * @return next generated ce + */ + private int getNextGenerated(CEGenerator g) { + g.m_current_ = nextWeight(g); + return g.m_current_; + } + + /** + * @param g + * CEGenerator + * @param token + * rule token + * @param strength + * @return ce generator + * @exception Exception + * thrown when internal error occurs + */ + private int getSimpleCEGenerator(CEGenerator g, + CollationRuleParser.Token token, int strength) throws Exception { + int high, low, count = 1; + int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF; + + if (strength == Collator.SECONDARY) { + low = RuleBasedCollator.COMMON_TOP_2_ << 24; + high = 0xFFFFFFFF; + count = 0xFF - RuleBasedCollator.COMMON_TOP_2_; + } else { + low = RuleBasedCollator.BYTE_COMMON_ << 24; // 0x05000000; + high = 0x40000000; + count = 0x40 - RuleBasedCollator.BYTE_COMMON_; + } + + if (token.m_next_ != null && token.m_next_.m_strength_ == strength) { + count = token.m_next_.m_toInsert_; + } + + g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, + g.m_ranges_); + g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24; + + if (g.m_rangesLength_ == 0) { + throw new Exception("Internal program error"); + } + return g.m_current_; + } + + /** + * Combines 2 ce into one with respect to the argument strength + * + * @param ce1 + * first ce + * @param ce2 + * second ce + * @param strength + * strength to use + * @return combined ce + */ + private static int mergeCE(int ce1, int ce2, int strength) { + int mask = RuleBasedCollator.CE_TERTIARY_MASK_; + if (strength == Collator.SECONDARY) { + mask = RuleBasedCollator.CE_SECONDARY_MASK_; + } else if (strength == Collator.PRIMARY) { + mask = RuleBasedCollator.CE_PRIMARY_MASK_; + } + ce1 &= mask; + ce2 &= mask; + switch (strength) { + case Collator.PRIMARY: + return ce1 | ce2 >>> 16; + case Collator.SECONDARY: + return ce1 << 16 | ce2 << 8; + default: + return ce1 << 24 | ce2 << 16; + } + } + + /** + * @param g + * CEGenerator + * @param lows + * low gap array + * @param highs + * high gap array + * @param token + * rule token + * @param fstrength + * @exception Exception + * thrown when internal error occurs + */ + private int getCEGenerator(CEGenerator g, int lows[], int highs[], + CollationRuleParser.Token token, int fstrength) throws Exception { + int strength = token.m_strength_; + int low = lows[fstrength * 3 + strength]; + int high = highs[fstrength * 3 + strength]; + int maxbyte = 0; + if (strength == Collator.TERTIARY) { + maxbyte = 0x3F; + } else if (strength == Collator.PRIMARY) { + maxbyte = 0xFE; + } else { + maxbyte = 0xFF; + } + + int count = token.m_toInsert_; + + if (Utility.compareUnsigned(low, high) >= 0 + && strength > Collator.PRIMARY) { + int s = strength; + while (true) { + s--; + if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) { + if (strength == Collator.SECONDARY) { + if (low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) { + // Override if low range is less than + // UCOL_COMMON_TOP2. + low = RuleBasedCollator.COMMON_TOP_2_ << 24; + } + high = 0xFFFFFFFF; + } else { + if (low < RuleBasedCollator.COMMON_BOTTOM_3 << 24) { + // Override if low range is less than + // UCOL_COMMON_BOT3. + low = RuleBasedCollator.COMMON_BOTTOM_3 << 24; + } + high = 0x40000000; + } + break; + } + if (s < 0) { + throw new Exception("Internal program error"); + } + } + } + if (low == 0) { + low = 0x01000000; + } + if (strength == Collator.SECONDARY) { // similar as simple + if (Utility.compareUnsigned(low, + RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0 + && Utility.compareUnsigned(low, + RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) { + low = RuleBasedCollator.COMMON_TOP_2_ << 24; + } + if (Utility.compareUnsigned(high, + RuleBasedCollator.COMMON_BOTTOM_2_ << 24) > 0 + && Utility.compareUnsigned(high, + RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) { + high = RuleBasedCollator.COMMON_TOP_2_ << 24; + } + if (Utility.compareUnsigned(low, + RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) { + g.m_rangesLength_ = allocateWeights( + RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, high, + count, maxbyte, g.m_ranges_); + g.m_current_ = nextWeight(g); + // g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24; + return g.m_current_; + } + } + + g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, + g.m_ranges_); + if (g.m_rangesLength_ == 0) { + throw new Exception("Internal program error"); + } + g.m_current_ = nextWeight(g); + return g.m_current_; + } + + /** + * @param ceparts + * list of collation elements parts + * @param token + * rule token + * @exception Exception + * thrown when forming case bits for expansions fails + */ + private void doCE(int ceparts[], CollationRuleParser.Token token) + throws Exception { + // this one makes the table and stuff + // int noofbytes[] = new int[3]; + for (int i = 0; i < 3; i++) { + // noofbytes[i] = countBytes(ceparts[i]); + m_utilIntBuffer_[i] = countBytes(ceparts[i]); + } + + // Here we have to pack CEs from parts + int cei = 0; + int value = 0; + + while ((cei << 1) < m_utilIntBuffer_[0] || cei < m_utilIntBuffer_[1] + || cei < m_utilIntBuffer_[2]) { + if (cei > 0) { + value = RuleBasedCollator.CE_CONTINUATION_MARKER_; + } else { + value = 0; + } + + if ((cei << 1) < m_utilIntBuffer_[0]) { + value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) << 16; + } + if (cei < m_utilIntBuffer_[1]) { + value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8; + } + + if (cei < m_utilIntBuffer_[2]) { + value |= ((ceparts[2] >> (32 - ((cei + 1) << 3))) & 0x3F); + } + token.m_CE_[cei] = value; + cei++; + } + if (cei == 0) { // totally ignorable + token.m_CELength_ = 1; + token.m_CE_[0] = 0; + } else { // there is at least something + token.m_CELength_ = cei; + } + + // Case bits handling for expansion + if (token.m_CE_[0] != 0) { // case bits should be set only for + // non-ignorables + int startoftokenrule = token.m_source_ & 0xFF; + if ((token.m_source_ >>> 24) > 1) { + // Do it manually + int length = token.m_source_ >>> 24; + String tokenstr = token.m_rules_.substring(startoftokenrule, + startoftokenrule + length); + token.m_CE_[0] |= getCaseBits(tokenstr); + } else { + // Copy it from the UCA + int caseCE = getFirstCE(token.m_rules_.charAt(startoftokenrule)); + token.m_CE_[0] |= (caseCE & 0xC0); + } + } + } + + /** + * Count the number of non-zero bytes used in the ce + * + * @param ce + * @return number of non-zero bytes used in ce + */ + private static final int countBytes(int ce) { + int mask = 0xFFFFFFFF; + int result = 0; + while (mask != 0) { + if ((ce & mask) != 0) { + result++; + } + mask >>>= 8; + } + return result; + } + + /** + * We are ready to create collation elements + * + * @param t + * build table to insert + * @param lh + * rule token list header + */ + private void createElements(BuildTable t, + CollationRuleParser.TokenListHeader lh) { + CollationRuleParser.Token tok = lh.m_first_; + m_utilElement_.clear(); + while (tok != null) { + // first, check if there are any expansions + // if there are expansions, we need to do a little bit more + // processing since parts of expansion can be tailored, while + // others are not + if (tok.m_expansion_ != 0) { + int len = tok.m_expansion_ >>> 24; + int currentSequenceLen = len; + int expOffset = tok.m_expansion_ & 0x00FFFFFF; + m_utilToken_.m_source_ = currentSequenceLen | expOffset; + m_utilToken_.m_rules_ = m_parser_.m_source_; + + while (len > 0) { + currentSequenceLen = len; + while (currentSequenceLen > 0) { + m_utilToken_.m_source_ = (currentSequenceLen << 24) + | expOffset; + CollationRuleParser.Token expt = m_parser_.m_hashTable_.get(m_utilToken_); + if (expt != null + && expt.m_strength_ != CollationRuleParser.TOKEN_RESET_) { + // expansion is tailored + int noOfCEsToCopy = expt.m_CELength_; + for (int j = 0; j < noOfCEsToCopy; j++) { + tok.m_expCE_[tok.m_expCELength_ + j] = expt.m_CE_[j]; + } + tok.m_expCELength_ += noOfCEsToCopy; + // never try to add codepoints and CEs. + // For some odd reason, it won't work. + expOffset += currentSequenceLen; // noOfCEsToCopy; + len -= currentSequenceLen; // noOfCEsToCopy; + break; + } else { + currentSequenceLen--; + } + } + if (currentSequenceLen == 0) { + // couldn't find any tailored subsequence, will have to + // get one from UCA. first, get the UChars from the + // rules then pick CEs out until there is no more and + // stuff them into expansion + m_utilColEIter_.setText(m_parser_.m_source_.substring( + expOffset, expOffset + 1)); + while (true) { + int order = m_utilColEIter_.next(); + if (order == CollationElementIterator.NULLORDER) { + break; + } + tok.m_expCE_[tok.m_expCELength_++] = order; + } + expOffset++; + len--; + } + } + } else { + tok.m_expCELength_ = 0; + } + + // set the ucaelement with obtained values + m_utilElement_.m_CELength_ = tok.m_CELength_ + tok.m_expCELength_; + + // copy CEs + System.arraycopy(tok.m_CE_, 0, m_utilElement_.m_CEs_, 0, + tok.m_CELength_); + System.arraycopy(tok.m_expCE_, 0, m_utilElement_.m_CEs_, + tok.m_CELength_, tok.m_expCELength_); + + // copy UChars + // We kept prefix and source kind of together, as it is a kind of a + // contraction. + // However, now we have to slice the prefix off the main thing - + m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_; + m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_; + if (tok.m_prefix_ != 0) { + // we will just copy the prefix here, and adjust accordingly in + // the addPrefix function in ucol_elm. The reason is that we + // need to add both composed AND decomposed elements to the + // unsafe table. + int size = tok.m_prefix_ >> 24; + int offset = tok.m_prefix_ & 0x00FFFFFF; + m_utilElement_.m_prefixChars_ = m_parser_.m_source_.substring( + offset, offset + size); + size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24); + offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24); + m_utilElement_.m_uchars_ = m_parser_.m_source_.substring( + offset, offset + size); + } else { + m_utilElement_.m_prefixChars_ = null; + int offset = tok.m_source_ & 0x00FFFFFF; + int size = tok.m_source_ >>> 24; + m_utilElement_.m_uchars_ = m_parser_.m_source_.substring( + offset, offset + size); + } + m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; + + boolean containCombinMarks = false; + for (int i = 0; i < m_utilElement_.m_cPoints_.length() + - m_utilElement_.m_cPointsOffset_; i++) { + if (isJamo(m_utilElement_.m_cPoints_.charAt(i))) { + t.m_collator_.m_isJamoSpecial_ = true; + break; + } + if (!buildCMTabFlag) { + // check combining class + int fcd = m_nfcImpl_.getFCD16FromSingleLead(m_utilElement_.m_cPoints_.charAt(i)); // TODO: review for handling supplementary characters + if ((fcd & 0xff) == 0) { + // reset flag when current char is not combining mark. + containCombinMarks = false; + } else { + containCombinMarks = true; + } + } + } + + if (!buildCMTabFlag && containCombinMarks) { + buildCMTabFlag = true; + } + + /*** + * // Case bits handling m_utilElement_.m_CEs_[0] &= 0xFFFFFF3F; // + * Clean the case bits field if (m_utilElement_.m_cPoints_.length() + * - m_utilElement_.m_cPointsOffset_ > 1) { // Do it manually + * m_utilElement_.m_CEs_[0] |= + * getCaseBits(m_utilElement_.m_cPoints_); } else { // Copy it from + * the UCA int caseCE = + * getFirstCE(m_utilElement_.m_cPoints_.charAt(0)); + * m_utilElement_.m_CEs_[0] |= (caseCE & 0xC0); } + ***/ + // and then, add it + addAnElement(t, m_utilElement_); + tok = tok.m_next_; + } + } + + /** + * Testing if the string argument has case + * + * @param src + * string + * @return the case for this char array + * @exception Exception + * thrown when internal program error occurs + */ + private final int getCaseBits(String src) throws Exception { + int uCount = 0; + int lCount = 0; + src = Normalizer.decompose(src, true); + m_utilColEIter_.setText(src); + for (int i = 0; i < src.length(); i++) { + m_utilColEIter_.setText(src.substring(i, i + 1)); + int order = m_utilColEIter_.next(); + if (RuleBasedCollator.isContinuation(order)) { + throw new Exception("Internal program error"); + } + if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_) == UPPER_CASE_) { + uCount++; + } else { + char ch = src.charAt(i); + if (UCharacter.isLowerCase(ch)) { + lCount++; + } else { + if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) { + lCount++; + } + } + } + } + + if (uCount != 0 && lCount != 0) { + return MIXED_CASE_; + } else if (uCount != 0) { + return UPPER_CASE_; + } else { + return LOWER_CASE_; + } + } + + /** + * Converts a char to the uppercase Kana + * + * @param ch + * character to convert + * @return the converted Kana character + */ + private static final char toLargeKana(char ch) { + if (0x3042 < ch && ch < 0x30ef) { // Kana range + switch (ch - 0x3000) { + case 0x41: + case 0x43: + case 0x45: + case 0x47: + case 0x49: + case 0x63: + case 0x83: + case 0x85: + case 0x8E: + case 0xA1: + case 0xA3: + case 0xA5: + case 0xA7: + case 0xA9: + case 0xC3: + case 0xE3: + case 0xE5: + case 0xEE: + ch++; + break; + case 0xF5: + ch = 0x30AB; + break; + case 0xF6: + ch = 0x30B1; + break; + } + } + return ch; + } + + /** + * Converts a char to the lowercase Kana + * + * @param ch + * character to convert + * @return the converted Kana character + */ + private static final char toSmallKana(char ch) { + if (0x3042 < ch && ch < 0x30ef) { // Kana range + switch (ch - 0x3000) { + case 0x42: + case 0x44: + case 0x46: + case 0x48: + case 0x4A: + case 0x64: + case 0x84: + case 0x86: + case 0x8F: + case 0xA2: + case 0xA4: + case 0xA6: + case 0xA8: + case 0xAA: + case 0xC4: + case 0xE4: + case 0xE6: + case 0xEF: + ch--; + break; + case 0xAB: + ch = 0x30F5; + break; + case 0xB1: + ch = 0x30F6; + break; + } + } + return ch; + } + + /** + * This should be connected to special Jamo handling. + */ + private int getFirstCE(char ch) { + m_utilColEIter_.setText(UCharacter.toString(ch)); + return m_utilColEIter_.next(); + } + + /** + * This adds a read element, while testing for existence + * + * @param t + * build table + * @param element + * @return ce + */ + private int addAnElement(BuildTable t, Elements element) { + Vector expansions = t.m_expansions_; + element.m_mapCE_ = 0; + + if (element.m_CELength_ == 1) { + element.m_mapCE_ = element.m_CEs_[0]; + + } else { + // unfortunately, it looks like we have to look for a long primary + // here since in canonical closure we are going to hit some long + // primaries from the first phase, and they will come back as + // continuations/expansions destroying the effect of the previous + // opitimization. A long primary is a three byte primary with + // starting secondaries and tertiaries. It can appear in long runs + // of only primary differences (like east Asian tailorings) also, + // it should not be an expansion, as expansions would break with + // this + if (element.m_CELength_ == 2 // a two CE expansion + && RuleBasedCollator.isContinuation(element.m_CEs_[1]) + && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that + // has + // only + // primaries + // in + // continuation + && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_) + // a common secondary + && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_) // and + // a + // common + // tertiary + ) { + element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_ + // a long primary special + | (CE_LONG_PRIMARY_TAG_ << 24) + // first and second byte of primary + | ((element.m_CEs_[0] >> 8) & 0xFFFF00) + // third byte of primary + | ((element.m_CEs_[1] >> 24) & 0xFF); + } else { + // omitting expansion offset in builder + // (HEADER_SIZE_ >> 2) + int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ + | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_) + | (addExpansion(expansions, element.m_CEs_[0]) << 4) + & 0xFFFFF0; + + for (int i = 1; i < element.m_CELength_; i++) { + addExpansion(expansions, element.m_CEs_[i]); + } + if (element.m_CELength_ <= 0xF) { + expansion |= element.m_CELength_; + } else { + addExpansion(expansions, 0); + } + element.m_mapCE_ = expansion; + setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1], + (byte) element.m_CELength_, t.m_maxExpansions_); + if (isJamo(element.m_cPoints_.charAt(0))) { + t.m_collator_.m_isJamoSpecial_ = true; + setMaxJamoExpansion(element.m_cPoints_.charAt(0), + element.m_CEs_[element.m_CELength_ - 1], + (byte) element.m_CELength_, t.m_maxJamoExpansions_); + } + } + } + + // We treat digits differently - they are "uber special" and should be + // processed differently if numeric collation is on. + int uniChar = 0; + if ((element.m_uchars_.length() == 2) + && UTF16.isLeadSurrogate(element.m_uchars_.charAt(0))) { + uniChar = UCharacterProperty.getRawSupplementary(element.m_uchars_ + .charAt(0), element.m_uchars_.charAt(1)); + } else if (element.m_uchars_.length() == 1) { + uniChar = element.m_uchars_.charAt(0); + } + + // Here, we either have one normal CE OR mapCE is set. Therefore, we + // stuff only one element to the expansion buffer. When we encounter a + // digit and we don't do numeric collation, we will just pick the CE + // we have and break out of case (see ucol.cpp ucol_prv_getSpecialCE + // && ucol_prv_getSpecialPrevCE). If we picked a special, further + // processing will occur. If it's a simple CE, we'll return due + // to how the loop is constructed. + if (uniChar != 0 && UCharacter.isDigit(uniChar)) { + // prepare the element + int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ + | (CollationElementIterator.CE_DIGIT_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_) + | 1; + if (element.m_mapCE_ != 0) { + // if there is an expansion, we'll pick it here + expansion |= (addExpansion(expansions, element.m_mapCE_) << 4); + } else { + expansion |= (addExpansion(expansions, element.m_CEs_[0]) << 4); + } + element.m_mapCE_ = expansion; + } + + // here we want to add the prefix structure. + // I will try to process it as a reverse contraction, if possible. + // prefix buffer is already reversed. + + if (element.m_prefixChars_ != null + && element.m_prefixChars_.length() - element.m_prefix_ > 0) { + // We keep the seen prefix starter elements in a hashtable we need + // it to be able to distinguish between the simple codepoints and + // prefix starters. Also, we need to use it for canonical closure. + m_utilElement2_.m_caseBit_ = element.m_caseBit_; + m_utilElement2_.m_CELength_ = element.m_CELength_; + m_utilElement2_.m_CEs_ = element.m_CEs_; + m_utilElement2_.m_mapCE_ = element.m_mapCE_; + // m_utilElement2_.m_prefixChars_ = element.m_prefixChars_; + m_utilElement2_.m_sizePrim_ = element.m_sizePrim_; + m_utilElement2_.m_sizeSec_ = element.m_sizeSec_; + m_utilElement2_.m_sizeTer_ = element.m_sizeTer_; + m_utilElement2_.m_variableTop_ = element.m_variableTop_; + m_utilElement2_.m_prefix_ = element.m_prefix_; + m_utilElement2_.m_prefixChars_ = Normalizer.compose( + element.m_prefixChars_, false); + m_utilElement2_.m_uchars_ = element.m_uchars_; + m_utilElement2_.m_cPoints_ = element.m_cPoints_; + m_utilElement2_.m_cPointsOffset_ = 0; + + if (t.m_prefixLookup_ != null) { + Elements uCE = t.m_prefixLookup_.get(element); + if (uCE != null) { + // there is already a set of code points here + element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element); + } else { // no code points, so this spot is clean + element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element); + uCE = new Elements(element); + uCE.m_cPoints_ = uCE.m_uchars_; + t.m_prefixLookup_.put(uCE, uCE); + } + if (m_utilElement2_.m_prefixChars_.length() != element.m_prefixChars_ + .length() + - element.m_prefix_ + || !m_utilElement2_.m_prefixChars_.regionMatches(0, + element.m_prefixChars_, element.m_prefix_, + m_utilElement2_.m_prefixChars_.length())) { + // do it! + m_utilElement2_.m_mapCE_ = addPrefix(t, element.m_mapCE_, + m_utilElement2_); + } + } + } + + // We need to use the canonical iterator here + // the way we do it is to generate the canonically equivalent strings + // for the contraction and then add the sequences that pass FCD check + if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1 + && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2 + && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) && UTF16 + .isTrailSurrogate(element.m_cPoints_.charAt(1)))) { + // this is a contraction, we should check whether a composed form + // should also be included + m_utilCanIter_.setSource(element.m_cPoints_); + String source = m_utilCanIter_.next(); + while (source != null && source.length() > 0) { + if (Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.NO) { + element.m_uchars_ = source; + element.m_cPoints_ = element.m_uchars_; + finalizeAddition(t, element); + } + source = m_utilCanIter_.next(); + } + + return element.m_mapCE_; + } else { + return finalizeAddition(t, element); + } + } + + /** + * Adds an expansion ce to the expansion vector + * + * @param expansions + * vector to add to + * @param value + * of the expansion + * @return the current position of the new element + */ + private static final int addExpansion(Vector expansions, int value) { + expansions.add(new Integer(value)); + return expansions.size() - 1; + } + + /** + * Looks for the maximum length of all expansion sequences ending with the + * same collation element. The size required for maxexpansion and maxsize is + * returned if the arrays are too small. + * + * @param endexpansion + * the last expansion collation element to be added + * @param expansionsize + * size of the expansion + * @param maxexpansion + * data structure to store the maximum expansion data. + * @returns size of the maxexpansion and maxsize used. + */ + private static int setMaxExpansion(int endexpansion, byte expansionsize, + MaxExpansionTable maxexpansion) { + int start = 0; + int limit = maxexpansion.m_endExpansionCE_.size(); + long unsigned = (long) endexpansion; + unsigned &= 0xFFFFFFFFl; + + // using binary search to determine if last expansion element is + // already in the array + int result = -1; + while (start < limit - 1) { + int mid = start + ((limit - start) >> 1); + long unsignedce = (maxexpansion.m_endExpansionCE_ + .get(mid)).intValue(); + unsignedce &= 0xFFFFFFFFl; + if (unsigned <= unsignedce) { + limit = mid; + } else { + start = mid; + } + } + + if ((maxexpansion.m_endExpansionCE_.get(start)).intValue() == endexpansion) { + result = start; + } else if ((maxexpansion.m_endExpansionCE_.get(limit)) + .intValue() == endexpansion) { + result = limit; + } + if (result > -1) { + // found the ce in expansion, we'll just modify the size if it + // is smaller + Object currentsize = maxexpansion.m_expansionCESize_.get(result); + if (((Byte) currentsize).byteValue() < expansionsize) { + maxexpansion.m_expansionCESize_.set(result, new Byte( + expansionsize)); + } + } else { + // we'll need to squeeze the value into the array. initial + // implementation. shifting the subarray down by 1 + maxexpansion.m_endExpansionCE_.insertElementAt(new Integer( + endexpansion), start + 1); + maxexpansion.m_expansionCESize_.insertElementAt(new Byte( + expansionsize), start + 1); + } + return maxexpansion.m_endExpansionCE_.size(); + } + + /** + * Sets the maximum length of all jamo expansion sequences ending with the + * same collation element. The size required for maxexpansion and maxsize is + * returned if the arrays are too small. + * + * @param ch + * the jamo codepoint + * @param endexpansion + * the last expansion collation element to be added + * @param expansionsize + * size of the expansion + * @param maxexpansion + * data structure to store the maximum expansion data. + * @returns size of the maxexpansion and maxsize used. + */ + private static int setMaxJamoExpansion(char ch, int endexpansion, + byte expansionsize, MaxJamoExpansionTable maxexpansion) { + boolean isV = true; + if (ch >= 0x1100 && ch <= 0x1112) { + // determines L for Jamo, doesn't need to store this since it is + // never at the end of a expansion + if (maxexpansion.m_maxLSize_ < expansionsize) { + maxexpansion.m_maxLSize_ = expansionsize; + } + return maxexpansion.m_endExpansionCE_.size(); + } + + if (ch >= 0x1161 && ch <= 0x1175) { + // determines V for Jamo + if (maxexpansion.m_maxVSize_ < expansionsize) { + maxexpansion.m_maxVSize_ = expansionsize; + } + } + + if (ch >= 0x11A8 && ch <= 0x11C2) { + isV = false; + // determines T for Jamo + if (maxexpansion.m_maxTSize_ < expansionsize) { + maxexpansion.m_maxTSize_ = expansionsize; + } + } + + int pos = maxexpansion.m_endExpansionCE_.size(); + while (pos > 0) { + pos--; + if ((maxexpansion.m_endExpansionCE_.get(pos)).intValue() == endexpansion) { + return maxexpansion.m_endExpansionCE_.size(); + } + } + maxexpansion.m_endExpansionCE_.add(new Integer(endexpansion)); + maxexpansion.m_isV_.add(isV ? Boolean.TRUE : Boolean.FALSE); + + return maxexpansion.m_endExpansionCE_.size(); + } + + /** + * Adds a prefix to the table + * + * @param t + * build table to update + * @param CE + * collation element to add + * @param element + * rule element to add + * @return modified ce + */ + private int addPrefix(BuildTable t, int CE, Elements element) { + // currently the longest prefix we're supporting in Japanese is two + // characters long. Although this table could quite easily mimic + // complete contraction stuff there is no good reason to make a general + // solution, as it would require some error prone messing. + ContractionTable contractions = t.m_contractions_; + String oldCP = element.m_cPoints_; + int oldCPOffset = element.m_cPointsOffset_; + + contractions.m_currentTag_ = CE_SPEC_PROC_TAG_; + // here, we will normalize & add prefix to the table. + int size = element.m_prefixChars_.length() - element.m_prefix_; + for (int j = 1; j < size; j++) { + // First add NFD prefix chars to unsafe CP hash table + // Unless it is a trail surrogate, which is handled algoritmically + // and shouldn't take up space in the table. + char ch = element.m_prefixChars_.charAt(j + element.m_prefix_); + if (!UTF16.isTrailSurrogate(ch)) { + unsafeCPSet(t.m_unsafeCP_, ch); + } + } + + // StringBuffer reversed = new StringBuffer(); + m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); + for (int j = 0; j < size; j++) { + // prefixes are going to be looked up backwards + // therefore, we will promptly reverse the prefix buffer... + int offset = element.m_prefixChars_.length() - j - 1; + m_utilStringBuffer_.append(element.m_prefixChars_.charAt(offset)); + } + element.m_prefixChars_ = m_utilStringBuffer_.toString(); + element.m_prefix_ = 0; + + // the first codepoint is also unsafe, as it forms a 'contraction' with + // the prefix + if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) { + unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0)); + } + + element.m_cPoints_ = element.m_prefixChars_; + element.m_cPointsOffset_ = element.m_prefix_; + + // Add the last char of the contraction to the contraction-end hash + // table. unless it is a trail surrogate, which is handled + // algorithmically and shouldn't be in the table + if (!UTF16.isTrailSurrogate(element.m_cPoints_ + .charAt(element.m_cPoints_.length() - 1))) { + ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_ + .charAt(element.m_cPoints_.length() - 1)); + } + // First we need to check if contractions starts with a surrogate + // int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_); + + // If there are any Jamos in the contraction, we should turn on special + // processing for Jamos + if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) { + t.m_collator_.m_isJamoSpecial_ = true; + } + // then we need to deal with it + // we could aready have something in table - or we might not + if (!isPrefix(CE)) { + // if it wasn't contraction, we wouldn't end up here + int firstContractionOffset = addContraction(contractions, + CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE); + int newCE = processContraction(contractions, element, CE_NOT_FOUND_); + addContraction(contractions, firstContractionOffset, + element.m_prefixChars_.charAt(element.m_prefix_), newCE); + addContraction(contractions, firstContractionOffset, (char) 0xFFFF, + CE); + CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset); + } else { + // we are adding to existing contraction + // there were already some elements in the table, so we need to add + // a new contraction + // Two things can happen here: either the codepoint is already in + // the table, or it is not + char ch = element.m_prefixChars_.charAt(element.m_prefix_); + int position = findCP(contractions, CE, ch); + if (position > 0) { + // if it is we just continue down the chain + int eCE = getCE(contractions, CE, position); + int newCE = processContraction(contractions, element, eCE); + setContraction(contractions, CE, position, ch, newCE); + } else { + // if it isn't, we will have to create a new sequence + processContraction(contractions, element, CE_NOT_FOUND_); + insertContraction(contractions, CE, ch, element.m_mapCE_); + } + } + + element.m_cPoints_ = oldCP; + element.m_cPointsOffset_ = oldCPOffset; + + return CE; + } + + /** + * Checks if the argument ce is a contraction + * + * @param CE + * collation element + * @return true if argument ce is a contraction + */ + private static final boolean isContraction(int CE) { + return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_); + } + + /** + * Checks if the argument ce has a prefix + * + * @param CE + * collation element + * @return true if argument ce has a prefix + */ + private static final boolean isPrefix(int CE) { + return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_); + } + + /** + * Checks if the argument ce is special + * + * @param CE + * collation element + * @return true if argument ce is special + */ + private static final boolean isSpecial(int CE) { + return (CE & RuleBasedCollator.CE_SPECIAL_FLAG_) == 0xF0000000; + } + + /** + * Checks if the argument ce has a prefix + * + * @param CE + * collation element + * @return true if argument ce has a prefix + */ + private static final int getCETag(int CE) { + return (CE & RuleBasedCollator.CE_TAG_MASK_) >>> RuleBasedCollator.CE_TAG_SHIFT_; + } + + /** + * Gets the ce at position in contraction table + * + * @param table + * contraction table + * @param position + * offset to the contraction table + * @return ce + */ + private static final int getCE(ContractionTable table, int element, + int position) { + element &= 0xFFFFFF; + BasicContractionTable tbl = getBasicContractionTable(table, element); + + if (tbl == null) { + return CE_NOT_FOUND_; + } + if (position > tbl.m_CEs_.size() || position == -1) { + return CE_NOT_FOUND_; + } else { + return tbl.m_CEs_.get(position).intValue(); + } + } + + /** + * Sets the unsafe character + * + * @param table + * unsafe table + * @param c + * character to be added + */ + private static final void unsafeCPSet(byte table[], char c) { + int hash = c; + if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) { + if (hash >= 0xd800 && hash <= 0xf8ff) { + // Part of a surrogate, or in private use area. + // These don't go in the table + return; + } + hash = (hash & UNSAFECP_TABLE_MASK_) + 256; + } + table[hash >> 3] |= (1 << (hash & 7)); + } + + /** + * Sets the contraction end character + * + * @param table + * contraction end table + * @param c + * character to be added + */ + private static final void ContrEndCPSet(byte table[], char c) { + int hash = c; + if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) { + hash = (hash & UNSAFECP_TABLE_MASK_) + 256; + } + table[hash >> 3] |= (1 << (hash & 7)); + } + + /** + * Adds more contractions in table. If element is non existant, it creates + * on. Returns element handle + * + * @param table + * contraction table + * @param element + * offset to the contraction table + * @param codePoint + * codepoint to add + * @param value + * @return collation element + */ + private static int addContraction(ContractionTable table, int element, + char codePoint, int value) { + BasicContractionTable tbl = getBasicContractionTable(table, element); + if (tbl == null) { + tbl = addAContractionElement(table); + element = table.m_elements_.size() - 1; + } + + tbl.m_CEs_.add(new Integer(value)); + tbl.m_codePoints_.append(codePoint); + return constructSpecialCE(table.m_currentTag_, element); + } + + /** + * Adds a contraction element to the table + * + * @param table + * contraction table to update + * @return contraction + */ + private static BasicContractionTable addAContractionElement( + ContractionTable table) { + BasicContractionTable result = new BasicContractionTable(); + table.m_elements_.add(result); + return result; + } + + /** + * Constructs a special ce + * + * @param tag + * special tag + * @param CE + * collation element + * @return a contraction ce + */ + private static final int constructSpecialCE(int tag, int CE) { + return RuleBasedCollator.CE_SPECIAL_FLAG_ + | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF); + } + + /** + * Sets and inserts the element that has a contraction + * + * @param contractions + * contraction table + * @param element + * contracting element + * @param existingCE + * @return contraction ce + */ + private static int processContraction(ContractionTable contractions, + Elements element, int existingCE) { + int firstContractionOffset = 0; + // end of recursion + if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) { + if (isContractionTableElement(existingCE) + && getCETag(existingCE) == contractions.m_currentTag_) { + changeContraction(contractions, existingCE, (char) 0, + element.m_mapCE_); + changeContraction(contractions, existingCE, (char) 0xFFFF, + element.m_mapCE_); + return existingCE; + } else { + // can't do just that. existingCe might be a contraction, + // meaning that we need to do another step + return element.m_mapCE_; + } + } + + // this recursion currently feeds on the only element we have... + // We will have to copy it in order to accomodate for both backward + // and forward cycles + // we encountered either an empty space or a non-contraction element + // this means we are constructing a new contraction sequence + element.m_cPointsOffset_++; + if (!isContractionTableElement(existingCE)) { + // if it wasn't contraction, we wouldn't end up here + firstContractionOffset = addContraction(contractions, + CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, existingCE); + int newCE = processContraction(contractions, element, CE_NOT_FOUND_); + addContraction(contractions, firstContractionOffset, + element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE); + addContraction(contractions, firstContractionOffset, (char) 0xFFFF, + existingCE); + existingCE = constructSpecialCE(contractions.m_currentTag_, + firstContractionOffset); + } else { + // we are adding to existing contraction + // there were already some elements in the table, so we need to add + // a new contraction + // Two things can happen here: either the codepoint is already in + // the table, or it is not + int position = findCP(contractions, existingCE, element.m_cPoints_ + .charAt(element.m_cPointsOffset_)); + if (position > 0) { + // if it is we just continue down the chain + int eCE = getCE(contractions, existingCE, position); + int newCE = processContraction(contractions, element, eCE); + setContraction(contractions, existingCE, position, + element.m_cPoints_.charAt(element.m_cPointsOffset_), + newCE); + } else { + // if it isn't, we will have to create a new sequence + int newCE = processContraction(contractions, element, + CE_NOT_FOUND_); + insertContraction(contractions, existingCE, element.m_cPoints_ + .charAt(element.m_cPointsOffset_), newCE); + } + } + element.m_cPointsOffset_--; + return existingCE; + } + + /** + * Checks if CE belongs to the contraction table + * + * @param CE + * collation element to test + * @return true if CE belongs to the contraction table + */ + private static final boolean isContractionTableElement(int CE) { + return isSpecial(CE) + && (getCETag(CE) == CE_CONTRACTION_TAG_ || getCETag(CE) == CE_SPEC_PROC_TAG_); + } + + /** + * Gets the codepoint + * + * @param table + * contraction table + * @param element + * offset to the contraction element in the table + * @param codePoint + * code point to look for + * @return the offset to the code point + */ + private static int findCP(ContractionTable table, int element, + char codePoint) { + BasicContractionTable tbl = getBasicContractionTable(table, element); + if (tbl == null) { + return -1; + } + + int position = 0; + while (codePoint > tbl.m_codePoints_.charAt(position)) { + position++; + if (position > tbl.m_codePoints_.length()) { + return -1; + } + } + if (codePoint == tbl.m_codePoints_.charAt(position)) { + return position; + } else { + return -1; + } + } + + /** + * Gets the contraction element out of the contraction table + * + * @param table + * contraction table + * @param offset + * to the element in the contraction table + * @return basic contraction element at offset in the contraction table + */ + private static final BasicContractionTable getBasicContractionTable( + ContractionTable table, int offset) { + offset &= 0xFFFFFF; + if (offset == 0xFFFFFF) { + return null; + } + return table.m_elements_.get(offset); + } + + /** + * Changes the contraction element + * + * @param table + * contraction table + * @param element + * offset to the element in the contraction table + * @param codePoint + * codepoint + * @param newCE + * new collation element + * @return basic contraction element at offset in the contraction table + */ + private static final int changeContraction(ContractionTable table, + int element, char codePoint, int newCE) { + BasicContractionTable tbl = getBasicContractionTable(table, element); + if (tbl == null) { + return 0; + } + int position = 0; + while (codePoint > tbl.m_codePoints_.charAt(position)) { + position++; + if (position > tbl.m_codePoints_.length()) { + return CE_NOT_FOUND_; + } + } + if (codePoint == tbl.m_codePoints_.charAt(position)) { + tbl.m_CEs_.set(position, new Integer(newCE)); + return element & 0xFFFFFF; + } else { + return CE_NOT_FOUND_; + } + } + + /** + * Sets a part of contraction sequence in table. If element is non existant, + * it creates on. Returns element handle. + * + * @param table + * contraction table + * @param element + * offset to the contraction table + * @param offset + * @param codePoint + * contraction character + * @param value + * ce value + * @return new contraction ce + */ + private static final int setContraction(ContractionTable table, + int element, int offset, char codePoint, int value) { + element &= 0xFFFFFF; + BasicContractionTable tbl = getBasicContractionTable(table, element); + if (tbl == null) { + tbl = addAContractionElement(table); + element = table.m_elements_.size() - 1; + } + + tbl.m_CEs_.set(offset, new Integer(value)); + tbl.m_codePoints_.setCharAt(offset, codePoint); + return constructSpecialCE(table.m_currentTag_, element); + } + + /** + * Inserts a part of contraction sequence in table. Sequences behind the + * offset are moved back. If element is non existent, it creates on. + * + * @param table + * contraction + * @param element + * offset to the table contraction + * @param codePoint + * code point + * @param value + * collation element value + * @return contraction collation element + */ + private static final int insertContraction(ContractionTable table, + int element, char codePoint, int value) { + element &= 0xFFFFFF; + BasicContractionTable tbl = getBasicContractionTable(table, element); + if (tbl == null) { + tbl = addAContractionElement(table); + element = table.m_elements_.size() - 1; + } + + int offset = 0; + while (tbl.m_codePoints_.charAt(offset) < codePoint + && offset < tbl.m_codePoints_.length()) { + offset++; + } + + tbl.m_CEs_.insertElementAt(new Integer(value), offset); + tbl.m_codePoints_.insert(offset, codePoint); + + return constructSpecialCE(table.m_currentTag_, element); + } + + /** + * Finalize addition + * + * @param t + * build table + * @param element + * to add + */ + private final static int finalizeAddition(BuildTable t, Elements element) { + int CE = CE_NOT_FOUND_; + // This should add a completely ignorable element to the + // unsafe table, so that backward iteration will skip + // over it when treating contractions. + if (element.m_mapCE_ == 0) { + for (int i = 0; i < element.m_cPoints_.length(); i++) { + char ch = element.m_cPoints_.charAt(i); + if (!UTF16.isTrailSurrogate(ch)) { + unsafeCPSet(t.m_unsafeCP_, ch); + } + } + } + + if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) { + // we're adding a contraction + int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_); + CE = t.m_mapping_.getValue(cp); + CE = addContraction(t, CE, element); + } else { + // easy case + CE = t.m_mapping_.getValue(element.m_cPoints_ + .charAt(element.m_cPointsOffset_)); + + if (CE != CE_NOT_FOUND_) { + if (isContractionTableElement(CE)) { + // adding a non contraction element (thai, expansion, + // single) to already existing contraction + if (!isPrefix(element.m_mapCE_)) { + // we cannot reenter prefix elements - as we are going + // to create a dead loop + // Only expansions and regular CEs can go here... + // Contractions will never happen in this place + setContraction(t.m_contractions_, CE, 0, (char) 0, + element.m_mapCE_); + // This loop has to change the CE at the end of + // contraction REDO! + changeLastCE(t.m_contractions_, CE, element.m_mapCE_); + } + } else { + t.m_mapping_ + .setValue(element.m_cPoints_ + .charAt(element.m_cPointsOffset_), + element.m_mapCE_); + if (element.m_prefixChars_ != null + && element.m_prefixChars_.length() > 0 + && getCETag(CE) != CE_IMPLICIT_TAG_) { + // Add CE for standalone precontext char. + Elements origElem = new Elements(); + origElem.m_prefixChars_ = null; + origElem.m_uchars_ = element.m_cPoints_; + origElem.m_cPoints_ = origElem.m_uchars_; + origElem.m_CEs_[0] = CE; + origElem.m_mapCE_ = CE; + origElem.m_CELength_ = 1; + finalizeAddition(t, origElem); + } + } + } else { + t.m_mapping_.setValue(element.m_cPoints_ + .charAt(element.m_cPointsOffset_), element.m_mapCE_); + } + } + return CE; + } + + /** + * Note regarding surrogate handling: We are interested only in the single + * or leading surrogates in a contraction. If a surrogate is somewhere else + * in the contraction, it is going to be handled as a pair of code units, as + * it doesn't affect the performance AND handling surrogates specially would + * complicate code way too much. + */ + private static int addContraction(BuildTable t, int CE, Elements element) { + ContractionTable contractions = t.m_contractions_; + contractions.m_currentTag_ = CE_CONTRACTION_TAG_; + + // First we need to check if contractions starts with a surrogate + int cp = UTF16.charAt(element.m_cPoints_, 0); + int cpsize = 1; + if (UCharacter.isSupplementary(cp)) { + cpsize = 2; + } + if (cpsize < element.m_cPoints_.length()) { + // This is a real contraction, if there are other characters after + // the first + int size = element.m_cPoints_.length() - element.m_cPointsOffset_; + for (int j = 1; j < size; j++) { + // First add contraction chars to unsafe CP hash table + // Unless it is a trail surrogate, which is handled + // algoritmically and shouldn't take up space in the table. + if (!UTF16.isTrailSurrogate(element.m_cPoints_ + .charAt(element.m_cPointsOffset_ + j))) { + unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_ + .charAt(element.m_cPointsOffset_ + j)); + } + } + // Add the last char of the contraction to the contraction-end + // hash table. unless it is a trail surrogate, which is handled + // algorithmically and shouldn't be in the table + if (!UTF16.isTrailSurrogate(element.m_cPoints_ + .charAt(element.m_cPoints_.length() - 1))) { + ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_ + .charAt(element.m_cPoints_.length() - 1)); + } + + // If there are any Jamos in the contraction, we should turn on + // special processing for Jamos + if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) { + t.m_collator_.m_isJamoSpecial_ = true; + } + // then we need to deal with it + // we could aready have something in table - or we might not + element.m_cPointsOffset_ += cpsize; + if (!isContraction(CE)) { + // if it wasn't contraction, we wouldn't end up here + int firstContractionOffset = addContraction(contractions, + CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE); + int newCE = processContraction(contractions, element, + CE_NOT_FOUND_); + addContraction(contractions, firstContractionOffset, + element.m_cPoints_.charAt(element.m_cPointsOffset_), + newCE); + addContraction(contractions, firstContractionOffset, + (char) 0xFFFF, CE); + CE = constructSpecialCE(CE_CONTRACTION_TAG_, + firstContractionOffset); + } else { + // we are adding to existing contraction + // there were already some elements in the table, so we need to + // add a new contraction + // Two things can happen here: either the codepoint is already + // in the table, or it is not + int position = findCP(contractions, CE, element.m_cPoints_ + .charAt(element.m_cPointsOffset_)); + if (position > 0) { + // if it is we just continue down the chain + int eCE = getCE(contractions, CE, position); + int newCE = processContraction(contractions, element, eCE); + setContraction( + contractions, + CE, + position, + element.m_cPoints_.charAt(element.m_cPointsOffset_), + newCE); + } else { + // if it isn't, we will have to create a new sequence + int newCE = processContraction(contractions, element, + CE_NOT_FOUND_); + insertContraction(contractions, CE, element.m_cPoints_ + .charAt(element.m_cPointsOffset_), newCE); + } + } + element.m_cPointsOffset_ -= cpsize; + t.m_mapping_.setValue(cp, CE); + } else if (!isContraction(CE)) { + // this is just a surrogate, and there is no contraction + t.m_mapping_.setValue(cp, element.m_mapCE_); + } else { + // fill out the first stage of the contraction with the surrogate + // CE + changeContraction(contractions, CE, (char) 0, element.m_mapCE_); + changeContraction(contractions, CE, (char) 0xFFFF, element.m_mapCE_); + } + return CE; + } + + /** + * this is for adding non contractions + * + * @param table + * contraction table + * @param element + * offset to the contraction table + * @param value + * collation element value + * @return new collation element + */ + private static final int changeLastCE(ContractionTable table, int element, + int value) { + BasicContractionTable tbl = getBasicContractionTable(table, element); + if (tbl == null) { + return 0; + } + + tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, new Integer(value)); + return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF); + } + + /** + * Given a set of ranges calculated by allocWeights(), iterate through the + * weights. Sets the next weight in cegenerator.m_current_. + * + * @param cegenerator + * object that contains ranges weight range array and its + * rangeCount + * @return the next weight + */ + private static int nextWeight(CEGenerator cegenerator) { + if (cegenerator.m_rangesLength_ > 0) { + // get maxByte from the .count field + int maxByte = cegenerator.m_ranges_[0].m_count_; + // get the next weight + int weight = cegenerator.m_ranges_[0].m_start_; + if (weight == cegenerator.m_ranges_[0].m_end_) { + // this range is finished, remove it and move the following + // ones up + cegenerator.m_rangesLength_--; + if (cegenerator.m_rangesLength_ > 0) { + System.arraycopy(cegenerator.m_ranges_, 1, + cegenerator.m_ranges_, 0, + cegenerator.m_rangesLength_); + cegenerator.m_ranges_[0].m_count_ = maxByte; + // keep maxByte in ranges[0] + } + } else { + // increment the weight for the next value + cegenerator.m_ranges_[0].m_start_ = incWeight(weight, + cegenerator.m_ranges_[0].m_length2_, maxByte); + } + return weight; + } + return -1; + } + + /** + * Increment the collation weight + * + * @param weight + * to increment + * @param length + * @param maxByte + * @return new incremented weight + */ + private static final int incWeight(int weight, int length, int maxByte) { + while (true) { + int b = getWeightByte(weight, length); + if (b < maxByte) { + return setWeightByte(weight, length, b + 1); + } else { + // roll over, set this byte to BYTE_FIRST_TAILORED_ and + // increment the previous one + weight = setWeightByte(weight, length, + RuleBasedCollator.BYTE_FIRST_TAILORED_); + --length; + } + } + } + + /** + * Gets the weight byte + * + * @param weight + * @param index + * @return byte + */ + private static final int getWeightByte(int weight, int index) { + return (weight >> ((4 - index) << 3)) & 0xff; + } + + /** + * Set the weight byte in table + * + * @param weight + * @param index + * @param b + * byte + */ + private static final int setWeightByte(int weight, int index, int b) { + index <<= 3; + // 0xffffffff except a 00 "hole" for the index-th byte + int mask = 0xffffffff >>> index; + index = 32 - index; + mask |= 0xffffff00 << index; + return (weight & mask) | (b << index); + } + + /** + * Call getWeightRanges and then determine heuristically which ranges to use + * for a given number of weights between (excluding) two limits + * + * @param lowerLimit + * @param upperLimit + * @param n + * @param maxByte + * @param ranges + * @return + */ + private int allocateWeights(int lowerLimit, int upperLimit, int n, + int maxByte, WeightRange ranges[]) { + // number of usable byte values 3..maxByte + int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1; + // [0] unused, [5] to make index checks unnecessary, m_utilCountBuffer_ + // countBytes to the power of index, m_utilLongBuffer_ for unsignedness + // gcc requires explicit initialization + m_utilLongBuffer_[0] = 1; + m_utilLongBuffer_[1] = countBytes; + m_utilLongBuffer_[2] = m_utilLongBuffer_[1] * countBytes; + m_utilLongBuffer_[3] = m_utilLongBuffer_[2] * countBytes; + m_utilLongBuffer_[4] = m_utilLongBuffer_[3] * countBytes; + int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte, + countBytes, ranges); + if (rangeCount <= 0) { + return 0; + } + // what is the maximum number of weights with these ranges? + long maxCount = 0; + for (int i = 0; i < rangeCount; ++i) { + maxCount += (long) ranges[i].m_count_ + * m_utilLongBuffer_[4 - ranges[i].m_length_]; + } + if (maxCount < n) { + return 0; + } + // set the length2 and count2 fields + for (int i = 0; i < rangeCount; ++i) { + ranges[i].m_length2_ = ranges[i].m_length_; + ranges[i].m_count2_ = ranges[i].m_count_; + } + // try until we find suitably large ranges + while (true) { + // get the smallest number of bytes in a range + int minLength = ranges[0].m_length2_; + // sum up the number of elements that fit into ranges of each byte + // length + Arrays.fill(m_utilCountBuffer_, 0); + for (int i = 0; i < rangeCount; ++i) { + m_utilCountBuffer_[ranges[i].m_length2_] += ranges[i].m_count2_; + } + // now try to allocate n elements in the available short ranges + if (n <= m_utilCountBuffer_[minLength] + + m_utilCountBuffer_[minLength + 1]) { + // trivial cases, use the first few ranges + maxCount = 0; + rangeCount = 0; + do { + maxCount += ranges[rangeCount].m_count2_; + ++rangeCount; + } while (n > maxCount); + break; + } else if (n <= ranges[0].m_count2_ * countBytes) { + // easy case, just make this one range large enough by + // lengthening it once more, possibly split it + rangeCount = 1; + // calculate how to split the range between maxLength-1 + // (count1) and maxLength (count2) + long power_1 = m_utilLongBuffer_[minLength + - ranges[0].m_length_]; + long power = power_1 * countBytes; + int count2 = (int) ((n + power - 1) / power); + int count1 = ranges[0].m_count_ - count2; + // split the range + if (count1 < 1) { + // lengthen the entire range to maxLength + lengthenRange(ranges, 0, maxByte, countBytes); + } else { + // really split the range + // create a new range with the end and initial and current + // length of the old one + rangeCount = 2; + ranges[1].m_end_ = ranges[0].m_end_; + ranges[1].m_length_ = ranges[0].m_length_; + ranges[1].m_length2_ = minLength; + // set the end of the first range according to count1 + int i = ranges[0].m_length_; + int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1; + // ranges[0].count and count1 may be >countBytes from + // merging adjacent ranges; b > maxByte is possible + if (b <= maxByte) { + ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i, + b); + } else { + ranges[0].m_end_ = setWeightByte(incWeight( + ranges[0].m_start_, i - 1, maxByte), i, b + - countBytes); + } + // set the bytes in the end weight at length + 1..length2 + // to maxByte + b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8) + | maxByte; // this used to be 0xffffffff + ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i) + | (b >>> (i << 3)) & (b << ((4 - minLength) << 3)); + // set the start of the second range to immediately follow + // the end of the first one + ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength, + maxByte); + // set the count values (informational) + ranges[0].m_count_ = count1; + ranges[1].m_count_ = count2; + + ranges[0].m_count2_ = (int) (count1 * power_1); + // will be *countBytes when lengthened + ranges[1].m_count2_ = (int) (count2 * power_1); + + // lengthen the second range to maxLength + lengthenRange(ranges, 1, maxByte, countBytes); + } + break; + } + // no good match, lengthen all minLength ranges and iterate + for (int i = 0; ranges[i].m_length2_ == minLength; ++i) { + lengthenRange(ranges, i, maxByte, countBytes); + } + } + + if (rangeCount > 1) { + // sort the ranges by weight values + Arrays.sort(ranges, 0, rangeCount); + } + + // set maxByte in ranges[0] for ucol_nextWeight() + ranges[0].m_count_ = maxByte; + + return rangeCount; + } + + /** + * Updates the range length + * + * @param range + * weight range array + * @param offset + * to weight range array + * @param maxByte + * @param countBytes + * @return new length + */ + private static final int lengthenRange(WeightRange range[], int offset, + int maxByte, int countBytes) { + int length = range[offset].m_length2_ + 1; + range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length, + RuleBasedCollator.BYTE_FIRST_TAILORED_); + range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length, + maxByte); + range[offset].m_count2_ *= countBytes; + range[offset].m_length2_ = length; + return length; + } + + /** + * Gets the weight + * + * @param weight + * @param length + * @param trail + * @return new weight + */ + private static final int setWeightTrail(int weight, int length, int trail) { + length = (4 - length) << 3; + return (weight & (0xffffff00 << length)) | (trail << length); + } + + /** + * take two CE weights and calculate the possible ranges of weights between + * the two limits, excluding them for weights with up to 4 bytes there are + * up to 2*4-1=7 ranges + * + * @param lowerLimit + * @param upperLimit + * @param maxByte + * @param countBytes + * @param ranges + * @return weight ranges + */ + private int getWeightRanges(int lowerLimit, int upperLimit, int maxByte, + int countBytes, WeightRange ranges[]) { + // assume that both lowerLimit & upperLimit are not 0 + // get the lengths of the limits + int lowerLength = lengthOfWeight(lowerLimit); + int upperLength = lengthOfWeight(upperLimit); + if (Utility.compareUnsigned(lowerLimit, upperLimit) >= 0) { + return 0; + } + // check that neither is a prefix of the other + if (lowerLength < upperLength) { + if (lowerLimit == truncateWeight(upperLimit, lowerLength)) { + return 0; + } + } + // if the upper limit is a prefix of the lower limit then the earlier + // test lowerLimit >= upperLimit has caught it + // reset local variables + // With the limit lengths of 1..4, there are up to 7 ranges for + // allocation: + // range minimum length + // lower[4] 4 + // lower[3] 3 + // lower[2] 2 + // middle 1 + // upper[2] 2 + // upper[3] 3 + // upper[4] 4 + // We are now going to calculate up to 7 ranges. + // Some of them will typically overlap, so we will then have to merge + // and eliminate ranges. + + // We have to clean cruft from previous invocations + // before doing anything. C++ already does that + for (int length = 0; length < 5; length++) { + m_utilLowerWeightRange_[length].clear(); + m_utilUpperWeightRange_[length].clear(); + } + m_utilWeightRange_.clear(); + + int weight = lowerLimit; + for (int length = lowerLength; length >= 2; --length) { + m_utilLowerWeightRange_[length].clear(); + int trail = getWeightByte(weight, length); + if (trail < maxByte) { + m_utilLowerWeightRange_[length].m_start_ = incWeightTrail( + weight, length); + m_utilLowerWeightRange_[length].m_end_ = setWeightTrail(weight, + length, maxByte); + m_utilLowerWeightRange_[length].m_length_ = length; + m_utilLowerWeightRange_[length].m_count_ = maxByte - trail; + } + weight = truncateWeight(weight, length - 1); + } + m_utilWeightRange_.m_start_ = incWeightTrail(weight, 1); + + weight = upperLimit; + // [0] and [1] are not used - this simplifies indexing, + // m_utilUpperWeightRange_ + + for (int length = upperLength; length >= 2; length--) { + int trail = getWeightByte(weight, length); + if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) { + m_utilUpperWeightRange_[length].m_start_ = setWeightTrail( + weight, length, RuleBasedCollator.BYTE_FIRST_TAILORED_); + m_utilUpperWeightRange_[length].m_end_ = decWeightTrail(weight, + length); + m_utilUpperWeightRange_[length].m_length_ = length; + m_utilUpperWeightRange_[length].m_count_ = trail + - RuleBasedCollator.BYTE_FIRST_TAILORED_; + } + weight = truncateWeight(weight, length - 1); + } + m_utilWeightRange_.m_end_ = decWeightTrail(weight, 1); + + // set the middle range + m_utilWeightRange_.m_length_ = 1; + if (Utility.compareUnsigned(m_utilWeightRange_.m_end_, + m_utilWeightRange_.m_start_) >= 0) { + // if (m_utilWeightRange_.m_end_ >= m_utilWeightRange_.m_start_) { + m_utilWeightRange_.m_count_ = ((m_utilWeightRange_.m_end_ - m_utilWeightRange_.m_start_) >>> 24) + 1; + } else { + // eliminate overlaps + // remove the middle range + m_utilWeightRange_.m_count_ = 0; + // reduce or remove the lower ranges that go beyond upperLimit + for (int length = 4; length >= 2; --length) { + if (m_utilLowerWeightRange_[length].m_count_ > 0 + && m_utilUpperWeightRange_[length].m_count_ > 0) { + int start = m_utilUpperWeightRange_[length].m_start_; + int end = m_utilLowerWeightRange_[length].m_end_; + if (end >= start + || incWeight(end, length, maxByte) == start) { + // lower and upper ranges collide or are directly + // adjacent: merge these two and remove all shorter + // ranges + start = m_utilLowerWeightRange_[length].m_start_; + end = m_utilLowerWeightRange_[length].m_end_ = m_utilUpperWeightRange_[length].m_end_; + // merging directly adjacent ranges needs to subtract + // the 0/1 gaps in between; + // it may result in a range with count>countBytes + m_utilLowerWeightRange_[length].m_count_ = getWeightByte( + end, length) + - getWeightByte(start, length) + + 1 + + countBytes + * (getWeightByte(end, length - 1) - getWeightByte( + start, length - 1)); + m_utilUpperWeightRange_[length].m_count_ = 0; + while (--length >= 2) { + m_utilLowerWeightRange_[length].m_count_ = m_utilUpperWeightRange_[length].m_count_ = 0; + } + break; + } + } + } + } + + // copy the ranges, shortest first, into the result array + int rangeCount = 0; + if (m_utilWeightRange_.m_count_ > 0) { + ranges[0] = new WeightRange(m_utilWeightRange_); + rangeCount = 1; + } + for (int length = 2; length <= 4; ++length) { + // copy upper first so that later the middle range is more likely + // the first one to use + if (m_utilUpperWeightRange_[length].m_count_ > 0) { + ranges[rangeCount] = new WeightRange( + m_utilUpperWeightRange_[length]); + ++rangeCount; + } + if (m_utilLowerWeightRange_[length].m_count_ > 0) { + ranges[rangeCount] = new WeightRange( + m_utilLowerWeightRange_[length]); + ++rangeCount; + } + } + return rangeCount; + } + + /** + * Truncates the weight with length + * + * @param weight + * @param length + * @return truncated weight + */ + private static final int truncateWeight(int weight, int length) { + return weight & (0xffffffff << ((4 - length) << 3)); + } + + /** + * Length of the weight + * + * @param weight + * @return length of the weight + */ + private static final int lengthOfWeight(int weight) { + if ((weight & 0xffffff) == 0) { + return 1; + } else if ((weight & 0xffff) == 0) { + return 2; + } else if ((weight & 0xff) == 0) { + return 3; + } + return 4; + } + + /** + * Increment the weight trail + * + * @param weight + * @param length + * @return new weight + */ + private static final int incWeightTrail(int weight, int length) { + return weight + (1 << ((4 - length) << 3)); + } + + /** + * Decrement the weight trail + * + * @param weight + * @param length + * @return new weight + */ + private static int decWeightTrail(int weight, int length) { + return weight - (1 << ((4 - length) << 3)); + } + + /** + * Gets the codepoint + * + * @param tbl + * contraction table + * @param codePoint + * code point to look for + * @return the offset to the code point + */ + private static int findCP(BasicContractionTable tbl, char codePoint) { + int position = 0; + while (codePoint > tbl.m_codePoints_.charAt(position)) { + position++; + if (position > tbl.m_codePoints_.length()) { + return -1; + } + } + if (codePoint == tbl.m_codePoints_.charAt(position)) { + return position; + } else { + return -1; + } + } + + /** + * Finds a contraction ce + * + * @param table + * @param element + * @param ch + * @return ce + */ + private static int findCE(ContractionTable table, int element, char ch) { + if (table == null) { + return CE_NOT_FOUND_; + } + BasicContractionTable tbl = getBasicContractionTable(table, element); + if (tbl == null) { + return CE_NOT_FOUND_; + } + int position = findCP(tbl, ch); + if (position > tbl.m_CEs_.size() || position < 0) { + return CE_NOT_FOUND_; + } + return tbl.m_CEs_.get(position).intValue(); + } + + /** + * Checks if the string is tailored in the contraction + * + * @param table + * contraction table + * @param element + * @param array + * character array to check + * @param offset + * array offset + * @return true if it is tailored + */ + private static boolean isTailored(ContractionTable table, int element, + char array[], int offset) { + while (array[offset] != 0) { + element = findCE(table, element, array[offset]); + if (element == CE_NOT_FOUND_) { + return false; + } + if (!isContractionTableElement(element)) { + return true; + } + offset++; + } + if (getCE(table, element, 0) != CE_NOT_FOUND_) { + return true; + } else { + return false; + } + } + + /** + * Assemble RuleBasedCollator + * + * @param t + * build table + * @param collator + * to update + */ + private void assembleTable(BuildTable t, RuleBasedCollator collator) { + IntTrieBuilder mapping = t.m_mapping_; + Vector expansions = t.m_expansions_; + ContractionTable contractions = t.m_contractions_; + MaxExpansionTable maxexpansion = t.m_maxExpansions_; + + // contraction offset has to be in since we are building on the + // UCA contractions + // int beforeContractions = (HEADER_SIZE_ + // + paddedsize(expansions.size() << 2)) >>> 1; + collator.m_contractionOffset_ = 0; + int contractionsSize = constructTable(contractions); + + // the following operation depends on the trie data. Therefore, we have + // to do it before the trie is compacted + // sets jamo expansions + getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_, + collator.m_isJamoSpecial_); + + // TODO: LATIN1 array is now in the utrie - it should be removed from + // the calculation + setAttributes(collator, t.m_options_); + // copy expansions + int size = expansions.size(); + collator.m_expansion_ = new int[size]; + for (int i = 0; i < size; i++) { + collator.m_expansion_[i] = expansions.get(i).intValue(); + } + // contractions block + if (contractionsSize != 0) { + // copy contraction index + collator.m_contractionIndex_ = new char[contractionsSize]; + contractions.m_codePoints_.getChars(0, contractionsSize, + collator.m_contractionIndex_, 0); + // copy contraction collation elements + collator.m_contractionCE_ = new int[contractionsSize]; + for (int i = 0; i < contractionsSize; i++) { + collator.m_contractionCE_[i] = contractions.m_CEs_.get(i).intValue(); + } + } + // copy mapping table + collator.m_trie_ = mapping.serialize(t, + RuleBasedCollator.DataManipulate.getInstance()); + // copy max expansion table + // not copying the first element which is a dummy + // to be in synch with icu4c's builder, we continue to use the + // expansion offset + // omitting expansion offset in builder + collator.m_expansionOffset_ = 0; + size = maxexpansion.m_endExpansionCE_.size(); + collator.m_expansionEndCE_ = new int[size - 1]; + for (int i = 1; i < size; i++) { + collator.m_expansionEndCE_[i - 1] = maxexpansion.m_endExpansionCE_ + .get(i).intValue(); + } + collator.m_expansionEndCEMaxSize_ = new byte[size - 1]; + for (int i = 1; i < size; i++) { + collator.m_expansionEndCEMaxSize_[i - 1] = maxexpansion.m_expansionCESize_ + .get(i).byteValue(); + } + // Unsafe chars table. Finish it off, then copy it. + unsafeCPAddCCNZ(t); + // Or in unsafebits from UCA, making a combined table. + for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) { + t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i]; + } + collator.m_unsafe_ = t.m_unsafeCP_; + + // Finish building Contraction Ending chars hash table and then copy it + // out. + // Or in unsafebits from UCA, making a combined table + for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) { + t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i]; + } + collator.m_contractionEnd_ = t.m_contrEndCP_; + } + + /** + * Sets this collator to use the all options and tables in UCA. + * + * @param collator + * which attribute is to be set + * @param option + * to set with + */ + private static final void setAttributes(RuleBasedCollator collator, + CollationRuleParser.OptionSet option) { + collator.latinOneFailed_ = true; + collator.m_caseFirst_ = option.m_caseFirst_; + collator.setDecomposition(option.m_decomposition_); + collator + .setAlternateHandlingShifted(option.m_isAlternateHandlingShifted_); + collator.setCaseLevel(option.m_isCaseLevel_); + collator.setFrenchCollation(option.m_isFrenchCollation_); + collator.m_isHiragana4_ = option.m_isHiragana4_; + collator.setStrength(option.m_strength_); + collator.m_variableTopValue_ = option.m_variableTopValue_; + collator.latinOneFailed_ = false; + } + + /** + * Constructing the contraction table + * + * @param table + * contraction table + * @return + */ + private int constructTable(ContractionTable table) { + // See how much memory we need + int tsize = table.m_elements_.size(); + if (tsize == 0) { + return 0; + } + table.m_offsets_.clear(); + int position = 0; + for (int i = 0; i < tsize; i++) { + table.m_offsets_.add(new Integer(position)); + position += table.m_elements_.get(i).m_CEs_ + .size(); + } + table.m_CEs_.clear(); + table.m_codePoints_.delete(0, table.m_codePoints_.length()); + // Now stuff the things in + StringBuilder cpPointer = table.m_codePoints_; + Vector CEPointer = table.m_CEs_; + for (int i = 0; i < tsize; i++) { + BasicContractionTable bct = table.m_elements_.get(i); + int size = bct.m_CEs_.size(); + char ccMax = 0; + char ccMin = 255; + int offset = CEPointer.size(); + CEPointer.add(bct.m_CEs_.get(0)); + for (int j = 1; j < size; j++) { + char ch = bct.m_codePoints_.charAt(j); + char cc = (char) (UCharacter.getCombiningClass(ch) & 0xFF); + if (cc > ccMax) { + ccMax = cc; + } + if (cc < ccMin) { + ccMin = cc; + } + cpPointer.append(ch); + CEPointer.add(bct.m_CEs_.get(j)); + } + cpPointer.insert(offset, + (char) (((ccMin == ccMax) ? 1 : 0 << 8) | ccMax)); + for (int j = 0; j < size; j++) { + if (isContractionTableElement(CEPointer.get(offset + j).intValue())) { + int ce = CEPointer.get(offset + j).intValue(); + CEPointer.set(offset + j, + new Integer(constructSpecialCE(getCETag(ce), + table.m_offsets_.get(getContractionOffset(ce)) + .intValue()))); + } + } + } + + for (int i = 0; i <= 0x10FFFF; i++) { + int CE = table.m_mapping_.getValue(i); + if (isContractionTableElement(CE)) { + CE = constructSpecialCE(getCETag(CE), + table.m_offsets_.get(getContractionOffset(CE)).intValue()); + table.m_mapping_.setValue(i, CE); + } + } + return position; + } + + /** + * Get contraction offset + * + * @param ce + * collation element + * @return contraction offset + */ + private static final int getContractionOffset(int ce) { + return ce & 0xFFFFFF; + } + + /** + * Gets the maximum Jamo expansion + * + * @param mapping + * trie table + * @param maxexpansion + * maximum expansion table + * @param maxjamoexpansion + * maximum jamo expansion table + * @param jamospecial + * is jamo special? + */ + private static void getMaxExpansionJamo(IntTrieBuilder mapping, + MaxExpansionTable maxexpansion, + MaxJamoExpansionTable maxjamoexpansion, boolean jamospecial) { + int VBASE = 0x1161; + int TBASE = 0x11A8; + int VCOUNT = 21; + int TCOUNT = 28; + int v = VBASE + VCOUNT - 1; + int t = TBASE + TCOUNT - 1; + + while (v >= VBASE) { + int ce = mapping.getValue(v); + if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) { + setMaxExpansion(ce, (byte) 2, maxexpansion); + } + v--; + } + + while (t >= TBASE) { + int ce = mapping.getValue(t); + if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) { + setMaxExpansion(ce, (byte) 3, maxexpansion); + } + t--; + } + // According to the docs, 99% of the time, the Jamo will not be special + if (jamospecial) { + // gets the max expansion in all unicode characters + int count = maxjamoexpansion.m_endExpansionCE_.size(); + byte maxTSize = (byte) (maxjamoexpansion.m_maxLSize_ + + maxjamoexpansion.m_maxVSize_ + maxjamoexpansion.m_maxTSize_); + byte maxVSize = (byte) (maxjamoexpansion.m_maxLSize_ + maxjamoexpansion.m_maxVSize_); + + while (count > 0) { + count--; + if ((maxjamoexpansion.m_isV_.get(count)) + .booleanValue() == true) { + setMaxExpansion( + (maxjamoexpansion.m_endExpansionCE_ + .get(count)).intValue(), maxVSize, + maxexpansion); + } else { + setMaxExpansion( + (maxjamoexpansion.m_endExpansionCE_ + .get(count)).intValue(), maxTSize, + maxexpansion); + } + } + } + } + + /** + * To the UnsafeCP hash table, add all chars with combining class != 0 + * + * @param t + * build table + */ + private final void unsafeCPAddCCNZ(BuildTable t) { + boolean buildCMTable = (buildCMTabFlag & (t.cmLookup == null)); + char[] cm = null; // combining mark array + int[] index = new int[256]; + int count = 0; + + if (buildCMTable) { + cm = new char[0x10000]; + } + for (char c = 0; c < 0xffff; c++) { + int fcd = m_nfcImpl_.getFCD16FromSingleLead(c); // TODO: review for handling supplementary characters + if (fcd >= 0x100 || // if the leading combining class(c) > 0 || + (UTF16.isLeadSurrogate(c) && fcd != 0)) { + // c is a leading surrogate with some FCD data + unsafeCPSet(t.m_unsafeCP_, c); + if (buildCMTable && (fcd != 0)) { + int cc = (fcd & 0xff); + int pos = (cc << 8) + index[cc]; + cm[pos] = c; + index[cc]++; + count++; + } + } + } + + if (t.m_prefixLookup_ != null) { + Enumeration els = t.m_prefixLookup_.elements(); + while (els.hasMoreElements()) { + Elements e = els.nextElement(); + // codepoints here are in the NFD form. We need to add the + // first code point of the NFC form to unsafe, because + // strcoll needs to backup over them. + // weiv: This is wrong! See the comment above. + // String decomp = Normalizer.decompose(e.m_cPoints_, true); + // unsafeCPSet(t.m_unsafeCP_, decomp.charAt(0)); + // it should be: + String comp = Normalizer.compose(e.m_cPoints_, false); + unsafeCPSet(t.m_unsafeCP_, comp.charAt(0)); + } + } + + if (buildCMTable) { + t.cmLookup = new CombinClassTable(); + t.cmLookup.generate(cm, count, index); + } + } + + /** + * Create closure + * + * @param t + * build table + * @param collator + * RuleBasedCollator + * @param colEl + * collation element iterator + * @param start + * @param limit + * @param type + * character type + * @return + */ + private boolean enumCategoryRangeClosureCategory(BuildTable t, + RuleBasedCollator collator, CollationElementIterator colEl, + int start, int limit, int type) { + if (type != UCharacterCategory.UNASSIGNED + && type != UCharacterCategory.PRIVATE_USE) { + // if the range is assigned - we might ommit more categories later + + for (int u32 = start; u32 < limit; u32++) { + String decomp = m_nfcImpl_.getDecomposition(u32); + if (decomp != null) { + String comp = UCharacter.toString(u32); + if (!collator.equals(comp, decomp)) { + m_utilElement_.m_cPoints_ = decomp; + m_utilElement_.m_prefix_ = 0; + Elements prefix = t.m_prefixLookup_.get(m_utilElement_); + if (prefix == null) { + m_utilElement_.m_cPoints_ = comp; + m_utilElement_.m_prefix_ = 0; + m_utilElement_.m_prefixChars_ = null; + colEl.setText(decomp); + int ce = colEl.next(); + m_utilElement_.m_CELength_ = 0; + while (ce != CollationElementIterator.NULLORDER) { + m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = ce; + ce = colEl.next(); + } + } else { + m_utilElement_.m_cPoints_ = comp; + m_utilElement_.m_prefix_ = 0; + m_utilElement_.m_prefixChars_ = null; + m_utilElement_.m_CELength_ = 1; + m_utilElement_.m_CEs_[0] = prefix.m_mapCE_; + // This character uses a prefix. We have to add it + // to the unsafe table, as it decomposed form is + // already in. In Japanese, this happens for \u309e + // & \u30fe + // Since unsafeCPSet is static in ucol_elm, we are + // going to wrap it up in the unsafeCPAddCCNZ + // function + } + addAnElement(t, m_utilElement_); + } + } + } + } + return true; + } + + /** + * Determine if a character is a Jamo + * + * @param ch + * character to test + * @return true if ch is a Jamo, false otherwise + */ + private static final boolean isJamo(char ch) { + return (ch >= 0x1100 && ch <= 0x1112) || (ch >= 0x1175 && ch <= 0x1161) + || (ch >= 0x11A8 && ch <= 0x11C2); + } + + /** + * Produces canonical closure + */ + private void canonicalClosure(BuildTable t) { + BuildTable temp = new BuildTable(t); + assembleTable(temp, temp.m_collator_); + // produce canonical closure + CollationElementIterator coleiter = temp.m_collator_ + .getCollationElementIterator(""); + RangeValueIterator typeiter = UCharacter.getTypeIterator(); + RangeValueIterator.Element element = new RangeValueIterator.Element(); + while (typeiter.next(element)) { + enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter, + element.start, element.limit, element.value); + } + + t.cmLookup = temp.cmLookup; + temp.cmLookup = null; + + for (int i = 0; i < m_parser_.m_resultLength_; i++) { + char baseChar, firstCM; + // now we need to generate the CEs + // We stuff the initial value in the buffers, and increase the + // appropriate buffer according to strength */ + // createElements(t, m_parser_.m_listHeader_[i]); + CollationRuleParser.Token tok = m_parser_.m_listHeader_[i].m_first_; + m_utilElement_.clear(); + while (tok != null) { + m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_; + m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_; + if (tok.m_prefix_ != 0) { + // we will just copy the prefix here, and adjust accordingly + // in + // the addPrefix function in ucol_elm. The reason is that we + // need to add both composed AND decomposed elements to the + // unsafe table. + int size = tok.m_prefix_ >> 24; + int offset = tok.m_prefix_ & 0x00FFFFFF; + m_utilElement_.m_prefixChars_ = m_parser_.m_source_ + .substring(offset, offset + size); + size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24); + offset = (tok.m_source_ & 0x00FFFFFF) + + (tok.m_prefix_ >> 24); + m_utilElement_.m_uchars_ = m_parser_.m_source_.substring( + offset, offset + size); + } else { + m_utilElement_.m_prefixChars_ = null; + int offset = tok.m_source_ & 0x00FFFFFF; + int size = tok.m_source_ >>> 24; + m_utilElement_.m_uchars_ = m_parser_.m_source_.substring( + offset, offset + size); + } + m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; + + baseChar = firstCM = 0; // reset + for (int j = 0; j < m_utilElement_.m_cPoints_.length() + - m_utilElement_.m_cPointsOffset_; j++) { + + int fcd = m_nfcImpl_.getFCD16FromSingleLead(m_utilElement_.m_cPoints_.charAt(j)); // TODO: review for handling supplementary characters + if ((fcd & 0xff) == 0) { + baseChar = m_utilElement_.m_cPoints_.charAt(j); + } else { + if ((baseChar != 0) && (firstCM == 0)) { + firstCM = m_utilElement_.m_cPoints_.charAt(j); // first + // combining + // mark + } + } + } + + if ((baseChar != 0) && (firstCM != 0)) { + addTailCanonicalClosures(t, temp.m_collator_, coleiter, + baseChar, firstCM); + } + tok = tok.m_next_; + } + } + } + + private void addTailCanonicalClosures(BuildTable t, + RuleBasedCollator m_collator, CollationElementIterator colEl, + char baseChar, char cMark) { + if (t.cmLookup == null) { + return; + } + CombinClassTable cmLookup = t.cmLookup; + int[] index = cmLookup.index; + int cClass = m_nfcImpl_.getFCD16FromSingleLead(cMark) & 0xff; // TODO: review for handling supplementary characters + int maxIndex = 0; + char[] precompCh = new char[256]; + int[] precompClass = new int[256]; + int precompLen = 0; + Elements element = new Elements(); + + if (cClass > 0) { + maxIndex = index[cClass - 1]; + } + for (int i = 0; i < maxIndex; i++) { + StringBuilder decompBuf = new StringBuilder(); + decompBuf.append(baseChar).append(cmLookup.cPoints[i]); + String comp = Normalizer.compose(decompBuf.toString(), false); + if (comp.length() == 1) { + precompCh[precompLen] = comp.charAt(0); + precompClass[precompLen] = (m_nfcImpl_.getFCD16FromSingleLead(cmLookup.cPoints[i]) & 0xff); // TODO: review for handling supplementary characters + precompLen++; + StringBuilder decomp = new StringBuilder(); + for (int j = 0; j < m_utilElement_.m_cPoints_.length(); j++) { + if (m_utilElement_.m_cPoints_.charAt(j) == cMark) { + decomp.append(cmLookup.cPoints[i]); + } else { + decomp.append(m_utilElement_.m_cPoints_.charAt(j)); + } + } + comp = Normalizer.compose(decomp.toString(), false); + StringBuilder buf = new StringBuilder(comp); + buf.append(cMark); + decomp.append(cMark); + comp = buf.toString(); + + element.m_cPoints_ = decomp.toString(); + element.m_CELength_ = 0; + element.m_prefix_ = 0; + Elements prefix = t.m_prefixLookup_.get(element); + element.m_cPoints_ = comp; + element.m_uchars_ = comp; + + if (prefix == null) { + element.m_prefix_ = 0; + element.m_prefixChars_ = null; + colEl.setText(decomp.toString()); + int ce = colEl.next(); + element.m_CELength_ = 0; + while (ce != CollationElementIterator.NULLORDER) { + element.m_CEs_[element.m_CELength_++] = ce; + ce = colEl.next(); + } + } else { + element.m_cPoints_ = comp; + element.m_prefix_ = 0; + element.m_prefixChars_ = null; + element.m_CELength_ = 1; + element.m_CEs_[0] = prefix.m_mapCE_; + } + setMapCE(t, element); + finalizeAddition(t, element); + + if (comp.length() > 2) { + // This is a fix for tailoring contractions with accented + // character at the end of contraction string. + addFCD4AccentedContractions(t, colEl, comp, element); + } + if (precompLen > 1) { + precompLen = addMultiCMontractions(t, colEl, element, + precompCh, precompClass, precompLen, cMark, i, + decomp.toString()); + } + } + } + + } + + private void setMapCE(BuildTable t, Elements element) { + Vector expansions = t.m_expansions_; + element.m_mapCE_ = 0; + + if (element.m_CELength_ == 2 // a two CE expansion + && RuleBasedCollator.isContinuation(element.m_CEs_[1]) + && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that + // has + // only + // primaries + // in + // continuation + && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_) + // a common secondary + && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_)) { // and + // a + // common + // tertiary + + element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_ + // a long primary special + | (CE_LONG_PRIMARY_TAG_ << 24) + // first and second byte of primary + | ((element.m_CEs_[0] >> 8) & 0xFFFF00) + // third byte of primary + | ((element.m_CEs_[1] >> 24) & 0xFF); + } else { + // omitting expansion offset in builder + // (HEADER_SIZE_ >> 2) + int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ + | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_) + | (addExpansion(expansions, element.m_CEs_[0]) << 4) + & 0xFFFFF0; + + for (int i = 1; i < element.m_CELength_; i++) { + addExpansion(expansions, element.m_CEs_[i]); + } + if (element.m_CELength_ <= 0xF) { + expansion |= element.m_CELength_; + } else { + addExpansion(expansions, 0); + } + element.m_mapCE_ = expansion; + setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1], + (byte) element.m_CELength_, t.m_maxExpansions_); + } + } + + private int addMultiCMontractions(BuildTable t, + CollationElementIterator colEl, Elements element, char[] precompCh, + int[] precompClass, int maxComp, char cMark, int cmPos, + String decomp) { + + CombinClassTable cmLookup = t.cmLookup; + char[] combiningMarks = { cMark }; + int cMarkClass = UCharacter.getCombiningClass(cMark) & 0xFF; + String comMark = new String(combiningMarks); + int noOfPrecomposedChs = maxComp; + + for (int j = 0; j < maxComp; j++) { + int count = 0; + StringBuilder temp; + + do { + String newDecomp, comp; + + if (count == 0) { // Decompose the saved precomposed char. + newDecomp = Normalizer.decompose( + new String(precompCh, j, 1), false); + temp = new StringBuilder(newDecomp); + temp.append(cmLookup.cPoints[cmPos]); + newDecomp = temp.toString(); + } else { + temp = new StringBuilder(decomp); + temp.append(precompCh[j]); + newDecomp = temp.toString(); + } + comp = Normalizer.compose(newDecomp, false); + if (comp.length() == 1) { + temp.append(cMark); + element.m_cPoints_ = temp.toString(); + element.m_CELength_ = 0; + element.m_prefix_ = 0; + Elements prefix = t.m_prefixLookup_.get(element); + element.m_cPoints_ = comp + comMark; + if (prefix == null) { + element.m_prefix_ = 0; + element.m_prefixChars_ = null; + colEl.setText(temp.toString()); + int ce = colEl.next(); + element.m_CELength_ = 0; + while (ce != CollationElementIterator.NULLORDER) { + element.m_CEs_[element.m_CELength_++] = ce; + ce = colEl.next(); + } + } else { + element.m_cPoints_ = comp; + element.m_prefix_ = 0; + element.m_prefixChars_ = null; + element.m_CELength_ = 1; + element.m_CEs_[0] = prefix.m_mapCE_; + } + setMapCE(t, element); + finalizeAddition(t, element); + precompCh[noOfPrecomposedChs] = comp.charAt(0); + precompClass[noOfPrecomposedChs] = cMarkClass; + noOfPrecomposedChs++; + } + } while (++count < 2 && (precompClass[j] == cMarkClass)); + } + return noOfPrecomposedChs; + } + + private void addFCD4AccentedContractions(BuildTable t, + CollationElementIterator colEl, String data, Elements element) { + String decomp = Normalizer.decompose(data, false); + String comp = Normalizer.compose(data, false); + + element.m_cPoints_ = decomp; + element.m_CELength_ = 0; + element.m_prefix_ = 0; + Elements prefix = t.m_prefixLookup_.get(element); + if (prefix == null) { + element.m_cPoints_ = comp; + element.m_prefix_ = 0; + element.m_prefixChars_ = null; + element.m_CELength_ = 0; + colEl.setText(decomp); + int ce = colEl.next(); + element.m_CELength_ = 0; + while (ce != CollationElementIterator.NULLORDER) { + element.m_CEs_[element.m_CELength_++] = ce; + ce = colEl.next(); + } + addAnElement(t, element); + } + } + + private void processUCACompleteIgnorables(BuildTable t) { + TrieIterator trieiterator = new TrieIterator( + RuleBasedCollator.UCA_.m_trie_); + RangeValueIterator.Element element = new RangeValueIterator.Element(); + while (trieiterator.next(element)) { + int start = element.start; + int limit = element.limit; + if (element.value == 0) { + while (start < limit) { + int CE = t.m_mapping_.getValue(start); + if (CE == CE_NOT_FOUND_) { + m_utilElement_.m_prefix_ = 0; + m_utilElement_.m_uchars_ = UCharacter.toString(start); + m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; + m_utilElement_.m_cPointsOffset_ = 0; + m_utilElement_.m_CELength_ = 1; + m_utilElement_.m_CEs_[0] = 0; + addAnElement(t, m_utilElement_); + } + start++; + } + } + } + } +} diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java b/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java new file mode 100644 index 00000000000..f2351628897 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java @@ -0,0 +1,2110 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.text; + +import java.text.ParseException; +import java.util.Arrays; +import java.util.Hashtable; + +import com.ibm.icu.impl.UCharacterProperty; +import com.ibm.icu.lang.UCharacter; + +/** +* Class for parsing collation rules, produces a list of tokens that will be +* turned into collation elements +* @author Syn Wee Quek +* @since release 2.2, June 7 2002 +*/ +final class CollationRuleParser +{ + // public data members --------------------------------------------------- + + // package private constructors ------------------------------------------ + + /** + *

    RuleBasedCollator constructor that takes the rules. + * Please see RuleBasedCollator class description for more details on the + * collation rule syntax.

    + * @see java.util.Locale + * @param rules the collation rules to build the collation table from. + * @exception ParseException thrown when argument rules have an invalid + * syntax. + */ + CollationRuleParser(String rules) throws ParseException + { + extractSetsFromRules(rules); + m_source_ = new StringBuilder(Normalizer.decompose(rules, false).trim()); + m_rules_ = m_source_.toString(); + m_current_ = 0; + m_extraCurrent_ = m_source_.length(); + m_variableTop_ = null; + m_parsedToken_ = new ParsedToken(); + m_hashTable_ = new Hashtable(); + m_options_ = new OptionSet(RuleBasedCollator.UCA_); + m_listHeader_ = new TokenListHeader[512]; + m_resultLength_ = 0; + m_prevStrength_ = TOKEN_UNSET_; + // call assembleTokenList() manually, so that we can + // init a parser and manually parse tokens + //assembleTokenList(); + } + + // package private inner classes ----------------------------------------- + + /** + * Collation options set + */ + static class OptionSet + { + // package private constructor --------------------------------------- + + /** + * Initializes the option set with the argument collators + * @param collator option to use + */ + OptionSet(RuleBasedCollator collator) + { + m_variableTopValue_ = collator.m_variableTopValue_; + m_isFrenchCollation_ = collator.isFrenchCollation(); + m_isAlternateHandlingShifted_ + = collator.isAlternateHandlingShifted(); + m_caseFirst_ = collator.m_caseFirst_; + m_isCaseLevel_ = collator.isCaseLevel(); + m_decomposition_ = collator.getDecomposition(); + m_strength_ = collator.getStrength(); + m_isHiragana4_ = collator.m_isHiragana4_; + } + + // package private data members -------------------------------------- + + int m_variableTopValue_; + boolean m_isFrenchCollation_; + /** + * Attribute for handling variable elements + */ + boolean m_isAlternateHandlingShifted_; + /** + * who goes first, lower case or uppercase + */ + int m_caseFirst_; + /** + * do we have an extra case level + */ + boolean m_isCaseLevel_; + /** + * attribute for normalization + */ + int m_decomposition_; + /** + * attribute for strength + */ + int m_strength_; + /** + * attribute for special Hiragana + */ + boolean m_isHiragana4_; + } + + /** + * List of tokens used by the collation rules + */ + static class TokenListHeader + { + Token m_first_; + Token m_last_; + Token m_reset_; + boolean m_indirect_; + int m_baseCE_; + int m_baseContCE_; + int m_nextCE_; + int m_nextContCE_; + int m_previousCE_; + int m_previousContCE_; + int m_pos_[] = new int[Collator.IDENTICAL + 1]; + int m_gapsLo_[] = new int[3 * (Collator.TERTIARY + 1)]; + int m_gapsHi_[] = new int[3 * (Collator.TERTIARY + 1)]; + int m_numStr_[] = new int[3 * (Collator.TERTIARY + 1)]; + Token m_fStrToken_[] = new Token[Collator.TERTIARY + 1]; + Token m_lStrToken_[] = new Token[Collator.TERTIARY + 1]; + } + + /** + * Token wrapper for collation rules + */ + static class Token + { + // package private data members --------------------------------------- + + int m_CE_[]; + int m_CELength_; + int m_expCE_[]; + int m_expCELength_; + int m_source_; + int m_expansion_; + int m_prefix_; + int m_strength_; + int m_toInsert_; + int m_polarity_; // 1 for <, <<, <<<, , ; and 0 for >, >>, >>> + TokenListHeader m_listHeader_; + Token m_previous_; + Token m_next_; + StringBuilder m_rules_; + char m_flags_; + + // package private constructors --------------------------------------- + + Token() + { + m_CE_ = new int[128]; + m_expCE_ = new int[128]; + // TODO: this should also handle reverse + m_polarity_ = TOKEN_POLARITY_POSITIVE_; + m_next_ = null; + m_previous_ = null; + m_CELength_ = 0; + m_expCELength_ = 0; + } + + // package private methods -------------------------------------------- + + /** + * Hashcode calculation for token + * @return the hashcode + */ + public int hashCode() + { + int result = 0; + int len = (m_source_ & 0xFF000000) >>> 24; + int inc = ((len - 32) / 32) + 1; + + int start = m_source_ & 0x00FFFFFF; + int limit = start + len; + + while (start < limit) { + result = (result * 37) + m_rules_.charAt(start); + start += inc; + } + return result; + } + + /** + * Equals calculation + * @param target object to compare + * @return true if target is the same as this object + */ + public boolean equals(Object target) + { + if (target == this) { + return true; + } + if (target instanceof Token) { + Token t = (Token)target; + int sstart = m_source_ & 0x00FFFFFF; + int tstart = t.m_source_ & 0x00FFFFFF; + int slimit = (m_source_ & 0xFF000000) >> 24; + int tlimit = (m_source_ & 0xFF000000) >> 24; + + int end = sstart + slimit - 1; + + if (m_source_ == 0 || t.m_source_ == 0) { + return false; + } + if (slimit != tlimit) { + return false; + } + if (m_source_ == t.m_source_) { + return true; + } + + while (sstart < end + && m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) + { + ++ sstart; + ++ tstart; + } + if (m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) { + return true; + } + } + return false; + } + } + + // package private data member ------------------------------------------- + + /** + * Indicator that the token is resetted yet, ie & in the rules + */ + static final int TOKEN_RESET_ = 0xDEADBEEF; + + /** + * Size of the number of tokens + */ + int m_resultLength_; + /** + * List of parsed tokens + */ + TokenListHeader m_listHeader_[]; + /** + * Variable top token + */ + Token m_variableTop_; + /** + * Collation options + */ + OptionSet m_options_; + /** + * Normalized collation rules with some extra characters + */ + StringBuilder m_source_; + /** + * Hash table to keep all tokens + */ + Hashtable m_hashTable_; + + // package private method ------------------------------------------------ + + void setDefaultOptionsInCollator(RuleBasedCollator collator) + { + collator.m_defaultStrength_ = m_options_.m_strength_; + collator.m_defaultDecomposition_ = m_options_.m_decomposition_; + collator.m_defaultIsFrenchCollation_ = m_options_.m_isFrenchCollation_; + collator.m_defaultIsAlternateHandlingShifted_ + = m_options_.m_isAlternateHandlingShifted_; + collator.m_defaultIsCaseLevel_ = m_options_.m_isCaseLevel_; + collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_; + collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_; + collator.m_defaultVariableTopValue_ = m_options_.m_variableTopValue_; + } + + // private inner classes ------------------------------------------------- + + /** + * This is a token that has been parsed but not yet processed. Used to + * reduce the number of arguments in the parser + */ + private static class ParsedToken + { + // private constructor ---------------------------------------------- + + /** + * Empty constructor + */ + ParsedToken() + { + m_charsLen_ = 0; + m_charsOffset_ = 0; + m_extensionLen_ = 0; + m_extensionOffset_ = 0; + m_prefixLen_ = 0; + m_prefixOffset_ = 0; + m_flags_ = 0; + m_strength_ = TOKEN_UNSET_; + } + + // private data members --------------------------------------------- + + int m_strength_; + int m_charsOffset_; + int m_charsLen_; + int m_extensionOffset_; + int m_extensionLen_; + int m_prefixOffset_; + int m_prefixLen_; + char m_flags_; + char m_indirectIndex_; + } + + /** + * Boundary wrappers + */ + private static class IndirectBoundaries + { + // package private constructor --------------------------------------- + + IndirectBoundaries(int startce[], int limitce[]) + { + // Set values for the top - TODO: once we have values for all the + // indirects, we are going to initalize here. + m_startCE_ = startce[0]; + m_startContCE_ = startce[1]; + if (limitce != null) { + m_limitCE_ = limitce[0]; + m_limitContCE_ = limitce[1]; + } + else { + m_limitCE_ = 0; + m_limitContCE_ = 0; + } + } + + // package private data members -------------------------------------- + + int m_startCE_; + int m_startContCE_; + int m_limitCE_; + int m_limitContCE_; + } + + /** + * Collation option rule tag + */ + private static class TokenOption + { + // package private constructor --------------------------------------- + + TokenOption(String name, int attribute, String suboptions[], + int suboptionattributevalue[]) + { + m_name_ = name; + m_attribute_ = attribute; + m_subOptions_ = suboptions; + m_subOptionAttributeValues_ = suboptionattributevalue; + } + + // package private data member --------------------------------------- + + private String m_name_; + private int m_attribute_; + private String m_subOptions_[]; + private int m_subOptionAttributeValues_[]; + } + + // private variables ----------------------------------------------------- + + /** + * Current parsed token + */ + private ParsedToken m_parsedToken_; + /** + * Collation rule + */ + private String m_rules_; + private int m_current_; + /** + * End of the option while reading. + * Need it for UnicodeSet reading support. + */ + private int m_optionEnd_; + /* + * Current offset in m_source + */ + //private int m_sourceLimit_; + /** + * Offset to m_source_ ofr the extra expansion characters + */ + private int m_extraCurrent_; + + /** + * UnicodeSet that contains code points to be copied from the UCA + */ + UnicodeSet m_copySet_; + + /** + * UnicodeSet that contains code points for which we want to remove + * UCA contractions. It implies copying of these code points from + * the UCA. + */ + UnicodeSet m_removeSet_; + /** + * Stores the previous token's strength when making a list of same level + * differences. + */ + private int m_prevStrength_; + + /* + * This is space for the extra strings that need to be unquoted during the + * parsing of the rules + */ + //private static final int TOKEN_EXTRA_RULE_SPACE_SIZE_ = 2048; + /** + * Indicator that the token is not set yet + */ + private static final int TOKEN_UNSET_ = 0xFFFFFFFF; + /* + * Indicator that the rule is in the > polarity, ie everything on the + * right of the rule is less than + */ + //private static final int TOKEN_POLARITY_NEGATIVE_ = 0; + /** + * Indicator that the rule is in the < polarity, ie everything on the + * right of the rule is greater than + */ + private static final int TOKEN_POLARITY_POSITIVE_ = 1; + /** + * Flag mask to determine if top is set + */ + private static final int TOKEN_TOP_MASK_ = 0x04; + /** + * Flag mask to determine if variable top is set + */ + private static final int TOKEN_VARIABLE_TOP_MASK_ = 0x08; + /** + * Flag mask to determine if a before attribute is set + */ + private static final int TOKEN_BEFORE_ = 0x03; + /** + * For use in parsing token options + */ + private static final int TOKEN_SUCCESS_MASK_ = 0x10; + + /** + * These values are used for finding CE values for indirect positioning. + * Indirect positioning is a mechanism for allowing resets on symbolic + * values. It only works for resets and you cannot tailor indirect names. + * An indirect name can define either an anchor point or a range. An anchor + * point behaves in exactly the same way as a code point in reset would, + * except that it cannot be tailored. A range (we currently only know for + * the [top] range will explicitly set the upper bound for generated CEs, + * thus allowing for better control over how many CEs can be squeezed + * between in the range without performance penalty. In that respect, we use + * [top] for tailoring of locales that use CJK characters. Other indirect + * values are currently a pure convenience, they can be used to assure that + * the CEs will be always positioned in the same place relative to a point + * with known properties (e.g. first primary ignorable). + */ + private static final IndirectBoundaries INDIRECT_BOUNDARIES_[]; + +// /** +// * Inverse UCA constants +// */ +// private static final int INVERSE_SIZE_MASK_ = 0xFFF00000; +// private static final int INVERSE_OFFSET_MASK_ = 0x000FFFFF; +// private static final int INVERSE_SHIFT_VALUE_ = 20; + + /** + * Collation option tags + * [last variable] last variable value + * [last primary ignorable] largest CE for primary ignorable + * [last secondary ignorable] largest CE for secondary ignorable + * [last tertiary ignorable] largest CE for tertiary ignorable + * [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8) + */ + private static final TokenOption RULES_OPTIONS_[]; + + static + { + INDIRECT_BOUNDARIES_ = new IndirectBoundaries[15]; + // UCOL_RESET_TOP_VALUE + INDIRECT_BOUNDARIES_[0] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_, + RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_); + // UCOL_FIRST_PRIMARY_IGNORABLE + INDIRECT_BOUNDARIES_[1] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.FIRST_PRIMARY_IGNORABLE_, + null); + // UCOL_LAST_PRIMARY_IGNORABLE + INDIRECT_BOUNDARIES_[2] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_PRIMARY_IGNORABLE_, + null); + + // UCOL_FIRST_SECONDARY_IGNORABLE + INDIRECT_BOUNDARIES_[3] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.FIRST_SECONDARY_IGNORABLE_, + null); + // UCOL_LAST_SECONDARY_IGNORABLE + INDIRECT_BOUNDARIES_[4] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_SECONDARY_IGNORABLE_, + null); + // UCOL_FIRST_TERTIARY_IGNORABLE + INDIRECT_BOUNDARIES_[5] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.FIRST_TERTIARY_IGNORABLE_, + null); + // UCOL_LAST_TERTIARY_IGNORABLE + INDIRECT_BOUNDARIES_[6] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_TERTIARY_IGNORABLE_, + null); + // UCOL_FIRST_VARIABLE; + INDIRECT_BOUNDARIES_[7] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.FIRST_VARIABLE_, + null); + // UCOL_LAST_VARIABLE + INDIRECT_BOUNDARIES_[8] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_VARIABLE_, + null); + // UCOL_FIRST_NON_VARIABLE + INDIRECT_BOUNDARIES_[9] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.FIRST_NON_VARIABLE_, + null); + // UCOL_LAST_NON_VARIABLE + INDIRECT_BOUNDARIES_[10] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_, + RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_); + // UCOL_FIRST_IMPLICIT + INDIRECT_BOUNDARIES_[11] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_, + null); + // UCOL_LAST_IMPLICIT + INDIRECT_BOUNDARIES_[12] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_IMPLICIT_, + RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_); + // UCOL_FIRST_TRAILING + INDIRECT_BOUNDARIES_[13] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_, + null); + // UCOL_LAST_TRAILING + INDIRECT_BOUNDARIES_[14] = new IndirectBoundaries( + RuleBasedCollator.UCA_CONSTANTS_.LAST_TRAILING_, + null); + INDIRECT_BOUNDARIES_[14].m_limitCE_ + = RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_SPECIAL_MIN_ << 24; + + RULES_OPTIONS_ = new TokenOption[19]; + String option[] = {"non-ignorable", "shifted"}; + int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_, + RuleBasedCollator.AttributeValue.SHIFTED_}; + RULES_OPTIONS_[0] = new TokenOption("alternate", + RuleBasedCollator.Attribute.ALTERNATE_HANDLING_, + option, value); + option = new String[1]; + option[0] = "2"; + value = new int[1]; + value[0] = RuleBasedCollator.AttributeValue.ON_; + RULES_OPTIONS_[1] = new TokenOption("backwards", + RuleBasedCollator.Attribute.FRENCH_COLLATION_, + option, value); + String offonoption[] = new String[2]; + offonoption[0] = "off"; + offonoption[1] = "on"; + int offonvalue[] = new int[2]; + offonvalue[0] = RuleBasedCollator.AttributeValue.OFF_; + offonvalue[1] = RuleBasedCollator.AttributeValue.ON_; + RULES_OPTIONS_[2] = new TokenOption("caseLevel", + RuleBasedCollator.Attribute.CASE_LEVEL_, + offonoption, offonvalue); + option = new String[3]; + option[0] = "lower"; + option[1] = "upper"; + option[2] = "off"; + value = new int[3]; + value[0] = RuleBasedCollator.AttributeValue.LOWER_FIRST_; + value[1] = RuleBasedCollator.AttributeValue.UPPER_FIRST_; + value[2] = RuleBasedCollator.AttributeValue.OFF_; + RULES_OPTIONS_[3] = new TokenOption("caseFirst", + RuleBasedCollator.Attribute.CASE_FIRST_, + option, value); + RULES_OPTIONS_[4] = new TokenOption("normalization", + RuleBasedCollator.Attribute.NORMALIZATION_MODE_, + offonoption, offonvalue); + RULES_OPTIONS_[5] = new TokenOption("hiraganaQ", + RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_, + offonoption, offonvalue); + option = new String[5]; + option[0] = "1"; + option[1] = "2"; + option[2] = "3"; + option[3] = "4"; + option[4] = "I"; + value = new int[5]; + value[0] = RuleBasedCollator.AttributeValue.PRIMARY_; + value[1] = RuleBasedCollator.AttributeValue.SECONDARY_; + value[2] = RuleBasedCollator.AttributeValue.TERTIARY_; + value[3] = RuleBasedCollator.AttributeValue.QUATERNARY_; + value[4] = RuleBasedCollator.AttributeValue.IDENTICAL_; + RULES_OPTIONS_[6] = new TokenOption("strength", + RuleBasedCollator.Attribute.STRENGTH_, + option, value); + RULES_OPTIONS_[7] = new TokenOption("variable top", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + RULES_OPTIONS_[8] = new TokenOption("rearrange", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + option = new String[3]; + option[0] = "1"; + option[1] = "2"; + option[2] = "3"; + value = new int[3]; + value[0] = RuleBasedCollator.AttributeValue.PRIMARY_; + value[1] = RuleBasedCollator.AttributeValue.SECONDARY_; + value[2] = RuleBasedCollator.AttributeValue.TERTIARY_; + RULES_OPTIONS_[9] = new TokenOption("before", + RuleBasedCollator.Attribute.LIMIT_, + option, value); + RULES_OPTIONS_[10] = new TokenOption("top", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + String firstlastoption[] = new String[7]; + firstlastoption[0] = "primary"; + firstlastoption[1] = "secondary"; + firstlastoption[2] = "tertiary"; + firstlastoption[3] = "variable"; + firstlastoption[4] = "regular"; + firstlastoption[5] = "implicit"; + firstlastoption[6] = "trailing"; + + int firstlastvalue[] = new int[7]; + Arrays.fill(firstlastvalue, RuleBasedCollator.AttributeValue.PRIMARY_); + + RULES_OPTIONS_[11] = new TokenOption("first", + RuleBasedCollator.Attribute.LIMIT_, + firstlastoption, firstlastvalue); + RULES_OPTIONS_[12] = new TokenOption("last", + RuleBasedCollator.Attribute.LIMIT_, + firstlastoption, firstlastvalue); + RULES_OPTIONS_[13] = new TokenOption("optimize", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + RULES_OPTIONS_[14] = new TokenOption("suppressContractions", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + RULES_OPTIONS_[15] = new TokenOption("undefined", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + RULES_OPTIONS_[16] = new TokenOption("scriptOrder", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + RULES_OPTIONS_[17] = new TokenOption("charsetname", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + RULES_OPTIONS_[18] = new TokenOption("charset", + RuleBasedCollator.Attribute.LIMIT_, + null, null); + } + + /** + * Utility data members + */ + private Token m_utilToken_ = new Token(); + private CollationElementIterator m_UCAColEIter_ + = RuleBasedCollator.UCA_.getCollationElementIterator(""); + private int m_utilCEBuffer_[] = new int[2]; + + // private methods ------------------------------------------------------- + + /** + * Assembles the token list + * @exception ParseException thrown when rules syntax fails + */ + int assembleTokenList() throws ParseException + { + Token lastToken = null; + m_parsedToken_.m_strength_ = TOKEN_UNSET_; + int sourcelimit = m_source_.length(); + int expandNext = 0; + + while (m_current_ < sourcelimit) { + m_parsedToken_.m_prefixOffset_ = 0; + if (parseNextToken(lastToken == null) < 0) { + // we have reached the end + continue; + } + char specs = m_parsedToken_.m_flags_; + boolean variableTop = ((specs & TOKEN_VARIABLE_TOP_MASK_) != 0); + boolean top = ((specs & TOKEN_TOP_MASK_) != 0); + int lastStrength = TOKEN_UNSET_; + if (lastToken != null) { + lastStrength = lastToken.m_strength_; + } + m_utilToken_.m_source_ = m_parsedToken_.m_charsLen_ << 24 + | m_parsedToken_.m_charsOffset_; + m_utilToken_.m_rules_ = m_source_; + // 4 Lookup each source in the CharsToToken map, and find a + // sourcetoken + Token sourceToken = m_hashTable_.get(m_utilToken_); + if (m_parsedToken_.m_strength_ != TOKEN_RESET_) { + if (lastToken == null) { + // this means that rules haven't started properly + throwParseException(m_source_.toString(), 0); + } + // 6 Otherwise (when relation != reset) + if (sourceToken == null) { + // If sourceToken is null, create new one + sourceToken = new Token(); + sourceToken.m_rules_ = m_source_; + sourceToken.m_source_ = m_parsedToken_.m_charsLen_ << 24 + | m_parsedToken_.m_charsOffset_; + sourceToken.m_prefix_ = m_parsedToken_.m_prefixLen_ << 24 + | m_parsedToken_.m_prefixOffset_; + // TODO: this should also handle reverse + sourceToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_; + sourceToken.m_next_ = null; + sourceToken.m_previous_ = null; + sourceToken.m_CELength_ = 0; + sourceToken.m_expCELength_ = 0; + m_hashTable_.put(sourceToken, sourceToken); + } + else { + // we could have fished out a reset here + if (sourceToken.m_strength_ != TOKEN_RESET_ + && lastToken != sourceToken) { + // otherwise remove sourceToken from where it was. + if (sourceToken.m_next_ != null) { + if (sourceToken.m_next_.m_strength_ + > sourceToken.m_strength_) { + sourceToken.m_next_.m_strength_ + = sourceToken.m_strength_; + } + sourceToken.m_next_.m_previous_ + = sourceToken.m_previous_; + } + else { + sourceToken.m_listHeader_.m_last_ + = sourceToken.m_previous_; + } + if (sourceToken.m_previous_ != null) { + sourceToken.m_previous_.m_next_ + = sourceToken.m_next_; + } + else { + sourceToken.m_listHeader_.m_first_ + = sourceToken.m_next_; + } + sourceToken.m_next_ = null; + sourceToken.m_previous_ = null; + } + } + sourceToken.m_strength_ = m_parsedToken_.m_strength_; + sourceToken.m_listHeader_ = lastToken.m_listHeader_; + + // 1. Find the strongest strength in each list, and set + // strongestP and strongestN accordingly in the headers. + if (lastStrength == TOKEN_RESET_ + || sourceToken.m_listHeader_.m_first_ == null) { + // If LAST is a reset insert sourceToken in the list. + if (sourceToken.m_listHeader_.m_first_ == null) { + sourceToken.m_listHeader_.m_first_ = sourceToken; + sourceToken.m_listHeader_.m_last_ = sourceToken; + } + else { // we need to find a place for us + // and we'll get in front of the same strength + if (sourceToken.m_listHeader_.m_first_.m_strength_ + <= sourceToken.m_strength_) { + sourceToken.m_next_ + = sourceToken.m_listHeader_.m_first_; + sourceToken.m_next_.m_previous_ = sourceToken; + sourceToken.m_listHeader_.m_first_ = sourceToken; + sourceToken.m_previous_ = null; + } + else { + lastToken = sourceToken.m_listHeader_.m_first_; + while (lastToken.m_next_ != null + && lastToken.m_next_.m_strength_ + > sourceToken.m_strength_) { + lastToken = lastToken.m_next_; + } + if (lastToken.m_next_ != null) { + lastToken.m_next_.m_previous_ = sourceToken; + } + else { + sourceToken.m_listHeader_.m_last_ + = sourceToken; + } + sourceToken.m_previous_ = lastToken; + sourceToken.m_next_ = lastToken.m_next_; + lastToken.m_next_ = sourceToken; + } + } + } + else { + // Otherwise (when LAST is not a reset) + // if polarity (LAST) == polarity(relation), insert + // sourceToken after LAST, otherwise insert before. + // when inserting after or before, search to the next + // position with the same strength in that direction. + // (This is called postpone insertion). + if (sourceToken != lastToken) { + if (lastToken.m_polarity_ == sourceToken.m_polarity_) { + while (lastToken.m_next_ != null + && lastToken.m_next_.m_strength_ + > sourceToken.m_strength_) { + lastToken = lastToken.m_next_; + } + sourceToken.m_previous_ = lastToken; + if (lastToken.m_next_ != null) { + lastToken.m_next_.m_previous_ = sourceToken; + } + else { + sourceToken.m_listHeader_.m_last_ = sourceToken; + } + sourceToken.m_next_ = lastToken.m_next_; + lastToken.m_next_ = sourceToken; + } + else { + while (lastToken.m_previous_ != null + && lastToken.m_previous_.m_strength_ + > sourceToken.m_strength_) { + lastToken = lastToken.m_previous_; + } + sourceToken.m_next_ = lastToken; + if (lastToken.m_previous_ != null) { + lastToken.m_previous_.m_next_ = sourceToken; + } + else { + sourceToken.m_listHeader_.m_first_ + = sourceToken; + } + sourceToken.m_previous_ = lastToken.m_previous_; + lastToken.m_previous_ = sourceToken; + } + } + else { // repeated one thing twice in rules, stay with the + // stronger strength + if (lastStrength < sourceToken.m_strength_) { + sourceToken.m_strength_ = lastStrength; + } + } + } + // if the token was a variable top, we're gonna put it in + if (variableTop == true && m_variableTop_ == null) { + variableTop = false; + m_variableTop_ = sourceToken; + } + // Treat the expansions. + // There are two types of expansions: explicit (x / y) and + // reset based propagating expansions + // (&abc * d * e <=> &ab * d / c * e / c) + // if both of them are in effect for a token, they are combined. + sourceToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24 + | m_parsedToken_.m_extensionOffset_; + if (expandNext != 0) { + if (sourceToken.m_strength_ == RuleBasedCollator.PRIMARY) { + // primary strength kills off the implicit expansion + expandNext = 0; + } + else if (sourceToken.m_expansion_ == 0) { + // if there is no expansion, implicit is just added to + // the token + sourceToken.m_expansion_ = expandNext; + } + else { + // there is both explicit and implicit expansion. + // We need to make a combination + int start = expandNext & 0xFFFFFF; + int size = expandNext >>> 24; + if (size > 0) { + m_source_.append(m_source_.substring(start, + start + size)); + } + start = m_parsedToken_.m_extensionOffset_; + m_source_.append(m_source_.substring(start, + start + m_parsedToken_.m_extensionLen_)); + sourceToken.m_expansion_ = (size + + m_parsedToken_.m_extensionLen_) << 24 + | m_extraCurrent_; + m_extraCurrent_ += size + m_parsedToken_.m_extensionLen_; + } + } + // if the previous token was a reset before, the strength of this + // token must match the strength of before. Otherwise we have an + // undefined situation. + // In other words, we currently have a cludge which we use to + // represent &a >> x. This is written as &[before 2]a << x. + if((lastToken.m_flags_ & TOKEN_BEFORE_) != 0) { + int beforeStrength = (lastToken.m_flags_ & TOKEN_BEFORE_) - 1; + if(beforeStrength != sourceToken.m_strength_) { + throwParseException(m_source_.toString(), m_current_); + } + } + + } + else { + if (lastToken != null && lastStrength == TOKEN_RESET_) { + // if the previous token was also a reset, this means that + // we have two consecutive resets and we want to remove the + // previous one if empty + if (m_resultLength_ > 0 && m_listHeader_[m_resultLength_ - 1].m_first_ == null) { + m_resultLength_ --; + } + } + if (sourceToken == null) { + // this is a reset, but it might still be somewhere in the + // tailoring, in shorter form + int searchCharsLen = m_parsedToken_.m_charsLen_; + while (searchCharsLen > 1 && sourceToken == null) { + searchCharsLen --; + // key = searchCharsLen << 24 | charsOffset; + m_utilToken_.m_source_ = searchCharsLen << 24 + | m_parsedToken_.m_charsOffset_; + m_utilToken_.m_rules_ = m_source_; + sourceToken = m_hashTable_.get(m_utilToken_); + } + if (sourceToken != null) { + expandNext = (m_parsedToken_.m_charsLen_ + - searchCharsLen) << 24 + | (m_parsedToken_.m_charsOffset_ + + searchCharsLen); + } + } + if ((specs & TOKEN_BEFORE_) != 0) { + if (top == false) { + // we're doing before & there is no indirection + int strength = (specs & TOKEN_BEFORE_) - 1; + if (sourceToken != null + && sourceToken.m_strength_ != TOKEN_RESET_) { + // this is a before that is already ordered in the UCA + // - so we need to get the previous with good strength + while (sourceToken.m_strength_ > strength + && sourceToken.m_previous_ != null) { + sourceToken = sourceToken.m_previous_; + } + // here, either we hit the strength or NULL + if (sourceToken.m_strength_ == strength) { + if (sourceToken.m_previous_ != null) { + sourceToken = sourceToken.m_previous_; + } + else { // start of list + sourceToken + = sourceToken.m_listHeader_.m_reset_; + } + } + else { // we hit NULL, we should be doing the else part + sourceToken + = sourceToken.m_listHeader_.m_reset_; + sourceToken = getVirginBefore(sourceToken, + strength); + } + } + else { + sourceToken + = getVirginBefore(sourceToken, strength); + } + } + else { + // this is both before and indirection + top = false; + m_listHeader_[m_resultLength_] = new TokenListHeader(); + m_listHeader_[m_resultLength_].m_previousCE_ = 0; + m_listHeader_[m_resultLength_].m_previousContCE_ = 0; + m_listHeader_[m_resultLength_].m_indirect_ = true; + // we need to do slightly more work. we need to get the + // baseCE using the inverse UCA & getPrevious. The next + // bound is not set, and will be decided in ucol_bld + int strength = (specs & TOKEN_BEFORE_) - 1; + int baseCE = INDIRECT_BOUNDARIES_[ + m_parsedToken_.m_indirectIndex_].m_startCE_; + int baseContCE = INDIRECT_BOUNDARIES_[ + m_parsedToken_.m_indirectIndex_].m_startContCE_; + int ce[] = new int[2]; + if((baseCE >>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_) + && (baseCE >>> 24 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */ + int primary = baseCE & RuleBasedCollator.CE_PRIMARY_MASK_ | (baseContCE & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16; + int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary); + int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1); + ce[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505; + ce[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_; + } else { + CollationParsedRuleBuilder.InverseUCA invuca + = CollationParsedRuleBuilder.INVERSE_UCA_; + invuca.getInversePrevCE(baseCE, baseContCE, strength, + ce); + } + m_listHeader_[m_resultLength_].m_baseCE_ = ce[0]; + m_listHeader_[m_resultLength_].m_baseContCE_ = ce[1]; + m_listHeader_[m_resultLength_].m_nextCE_ = 0; + m_listHeader_[m_resultLength_].m_nextContCE_ = 0; + + sourceToken = new Token(); + expandNext = initAReset(0, sourceToken); + } + } + // 5 If the relation is a reset: + // If sourceToken is null + // Create new list, create new sourceToken, make the baseCE + // from source, put the sourceToken in ListHeader of the new + // list + if (sourceToken == null) { + if (m_listHeader_[m_resultLength_] == null) { + m_listHeader_[m_resultLength_] = new TokenListHeader(); + } + // 3 Consider each item: relation, source, and expansion: + // e.g. ...< x / y ... + // First convert all expansions into normal form. + // Examples: + // If "xy" doesn't occur earlier in the list or in the UCA, + // convert &xy * c * d * ... into &x * c/y * d * ... + // Note: reset values can never have expansions, although + // they can cause the very next item to have one. They may + // be contractions, if they are found earlier in the list. + if (top == false) { + CollationElementIterator coleiter + = RuleBasedCollator.UCA_.getCollationElementIterator( + m_source_.substring(m_parsedToken_.m_charsOffset_, + m_parsedToken_.m_charsOffset_ + + m_parsedToken_.m_charsLen_)); + + int CE = coleiter.next(); + // offset to the character in the full rule string + int expand = coleiter.getOffset() + + m_parsedToken_.m_charsOffset_; + int SecondCE = coleiter.next(); + + m_listHeader_[m_resultLength_].m_baseCE_ + = CE & 0xFFFFFF3F; + if (RuleBasedCollator.isContinuation(SecondCE)) { + m_listHeader_[m_resultLength_].m_baseContCE_ + = SecondCE; + } + else { + m_listHeader_[m_resultLength_].m_baseContCE_ = 0; + } + m_listHeader_[m_resultLength_].m_nextCE_ = 0; + m_listHeader_[m_resultLength_].m_nextContCE_ = 0; + m_listHeader_[m_resultLength_].m_previousCE_ = 0; + m_listHeader_[m_resultLength_].m_previousContCE_ = 0; + m_listHeader_[m_resultLength_].m_indirect_ = false; + sourceToken = new Token(); + expandNext = initAReset(expand, sourceToken); + } + else { // top == TRUE + top = false; + m_listHeader_[m_resultLength_].m_previousCE_ = 0; + m_listHeader_[m_resultLength_].m_previousContCE_ = 0; + m_listHeader_[m_resultLength_].m_indirect_ = true; + IndirectBoundaries ib = INDIRECT_BOUNDARIES_[ + m_parsedToken_.m_indirectIndex_]; + m_listHeader_[m_resultLength_].m_baseCE_ + = ib.m_startCE_; + m_listHeader_[m_resultLength_].m_baseContCE_ + = ib.m_startContCE_; + m_listHeader_[m_resultLength_].m_nextCE_ + = ib.m_limitCE_; + m_listHeader_[m_resultLength_].m_nextContCE_ + = ib.m_limitContCE_; + sourceToken = new Token(); + expandNext = initAReset(0, sourceToken); + } + } + else { // reset to something already in rules + top = false; + } + } + // 7 After all this, set LAST to point to sourceToken, and goto + // step 3. + lastToken = sourceToken; + } + + if (m_resultLength_ > 0 + && m_listHeader_[m_resultLength_ - 1].m_first_ == null) { + m_resultLength_ --; + } + return m_resultLength_; + } + + /** + * Formats and throws a ParseException + * @param rules collation rule that failed + * @param offset failed offset in rules + * @throws ParseException with failure information + */ + private static final void throwParseException(String rules, int offset) + throws ParseException + { + // for pre-context + String precontext = rules.substring(0, offset); + String postcontext = rules.substring(offset, rules.length()); + StringBuilder error = new StringBuilder( + "Parse error occurred in rule at offset "); + error.append(offset); + error.append("\n after the prefix \""); + error.append(precontext); + error.append("\" before the suffix \""); + error.append(postcontext); + throw new ParseException(error.toString(), offset); + } + + private final boolean doSetTop() { + m_parsedToken_.m_charsOffset_ = m_extraCurrent_; + m_source_.append((char)0xFFFE); + IndirectBoundaries ib = + INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_]; + m_source_.append((char)(ib.m_startCE_ >> 16)); + m_source_.append((char)(ib.m_startCE_ & 0xFFFF)); + m_extraCurrent_ += 3; + if (INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_ + ].m_startContCE_ == 0) { + m_parsedToken_.m_charsLen_ = 3; + } + else { + m_source_.append((char)(INDIRECT_BOUNDARIES_[ + m_parsedToken_.m_indirectIndex_ + ].m_startContCE_ >> 16)); + m_source_.append((char)(INDIRECT_BOUNDARIES_[ + m_parsedToken_.m_indirectIndex_ + ].m_startContCE_ & 0xFFFF)); + m_extraCurrent_ += 2; + m_parsedToken_.m_charsLen_ = 5; + } + return true; + } + + private static boolean isCharNewLine(char c) { + switch (c) { + case 0x000A: /* LF */ + case 0x000D: /* CR */ + case 0x000C: /* FF */ + case 0x0085: /* NEL */ + case 0x2028: /* LS */ + case 0x2029: /* PS */ + return true; + default: + return false; + } + } + + /** + * Getting the next token + * + * @param startofrules + * flag indicating if we are at the start of rules + * @return the offset of the rules + * @exception ParseException + * thrown when rule parsing fails + */ + @SuppressWarnings("fallthrough") + private int parseNextToken(boolean startofrules) throws ParseException + { + // parsing part + boolean variabletop = false; + boolean top = false; + boolean inchars = true; + boolean inquote = false; + boolean wasinquote = false; + byte before = 0; + boolean isescaped = false; + int /*newcharslen = 0,*/ newextensionlen = 0; + int /*charsoffset = 0,*/ extensionoffset = 0; + int newstrength = TOKEN_UNSET_; + + m_parsedToken_.m_charsLen_ = 0; + m_parsedToken_.m_charsOffset_ = 0; + m_parsedToken_.m_prefixOffset_ = 0; + m_parsedToken_.m_prefixLen_ = 0; + m_parsedToken_.m_indirectIndex_ = 0; + + int limit = m_rules_.length(); + while (m_current_ < limit) { + char ch = m_source_.charAt(m_current_); + if (inquote) { + if (ch == 0x0027) { // '\'' + inquote = false; + } + else { + if ((m_parsedToken_.m_charsLen_ == 0) || inchars) { + if (m_parsedToken_.m_charsLen_ == 0) { + m_parsedToken_.m_charsOffset_ = m_extraCurrent_; + } + m_parsedToken_.m_charsLen_ ++; + } + else { + if (newextensionlen == 0) { + extensionoffset = m_extraCurrent_; + } + newextensionlen ++; + } + } + } + else if (isescaped) { + isescaped = false; + if (newstrength == TOKEN_UNSET_) { + throwParseException(m_rules_, m_current_); + } + if (ch != 0 && m_current_ != limit) { + if (inchars) { + if (m_parsedToken_.m_charsLen_ == 0) { + m_parsedToken_.m_charsOffset_ = m_current_; + } + m_parsedToken_.m_charsLen_ ++; + } + else { + if (newextensionlen == 0) { + extensionoffset = m_current_; + } + newextensionlen ++; + } + } + } + else { + if (!UCharacterProperty.isRuleWhiteSpace(ch)) { + // Sets the strength for this entry + switch (ch) { + case 0x003D : // '=' + if (newstrength != TOKEN_UNSET_) { + return doEndParseNextToken(newstrength, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + // if we start with strength, we'll reset to top + if (startofrules == true) { + m_parsedToken_.m_indirectIndex_ = 5; + top = doSetTop(); + return doEndParseNextToken(TOKEN_RESET_, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + newstrength = Collator.IDENTICAL; + if(m_source_.charAt(m_current_ + 1) == 0x002A) { // '*' + m_current_++; + m_prevStrength_ = newstrength; + }else{ + m_prevStrength_ = TOKEN_UNSET_; + } + break; + case 0x002C : // ',' + if (newstrength != TOKEN_UNSET_) { + return doEndParseNextToken(newstrength, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + // if we start with strength, we'll reset to top + if (startofrules == true) { + m_parsedToken_.m_indirectIndex_ = 5; + top = doSetTop(); + return doEndParseNextToken(TOKEN_RESET_, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + newstrength = Collator.TERTIARY; + m_prevStrength_ = TOKEN_UNSET_; + break; + case 0x003B : // ';' + if (newstrength != TOKEN_UNSET_) { + return doEndParseNextToken(newstrength, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + // if we start with strength, we'll reset to top + if (startofrules == true) { + m_parsedToken_.m_indirectIndex_ = 5; + top = doSetTop(); + return doEndParseNextToken(TOKEN_RESET_, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + newstrength = Collator.SECONDARY; + m_prevStrength_ = TOKEN_UNSET_; + break; + case 0x003C : // '<' + if (newstrength != TOKEN_UNSET_) { + return doEndParseNextToken(newstrength, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + // if we start with strength, we'll reset to top + if (startofrules == true) { + m_parsedToken_.m_indirectIndex_ = 5; + top = doSetTop(); + return doEndParseNextToken(TOKEN_RESET_, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + // before this, do a scan to verify whether this is + // another strength + if (m_source_.charAt(m_current_ + 1) == 0x003C) { + m_current_ ++; + if (m_source_.charAt(m_current_ + 1) == 0x003C) { + m_current_ ++; // three in a row! + newstrength = Collator.TERTIARY; + } + else { // two in a row + newstrength = Collator.SECONDARY; + } + } + else { // just one + newstrength = Collator.PRIMARY; + } + + if(m_source_.charAt(m_current_ + 1) == 0x002A) { // '*' + m_current_++; + m_prevStrength_ = newstrength; + }else{ + m_prevStrength_ = TOKEN_UNSET_; + } + break; + case 0x0026 : // '&' + if (newstrength != TOKEN_UNSET_) { + return doEndParseNextToken(newstrength, + top, + extensionoffset, + newextensionlen, + variabletop, before); + } + newstrength = TOKEN_RESET_; // PatternEntry::RESET = 0 + m_prevStrength_ = TOKEN_UNSET_; + break; + case 0x005b : // '[' + // options - read an option, analyze it + m_optionEnd_ = m_rules_.indexOf(0x005d, m_current_); + if (m_optionEnd_ != -1) { // ']' + byte result = readAndSetOption(); + m_current_ = m_optionEnd_; + if ((result & TOKEN_TOP_MASK_) != 0) { + if (newstrength == TOKEN_RESET_) { + top = doSetTop(); + if (before != 0) { + // This is a combination of before and + // indirection like + // '&[before 2][first regular]>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_) + && (basece >>> 24 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */ + + int primary = basece & RuleBasedCollator.CE_PRIMARY_MASK_ | (basecontce & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16; + int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary); + ch = RuleBasedCollator.impCEGen_.getCodePointFromRaw(raw-1); + int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1); + m_utilCEBuffer_[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505; + m_utilCEBuffer_[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_; + + m_parsedToken_.m_charsOffset_ = m_extraCurrent_; + m_source_.append('\uFFFE'); + m_source_.append((char)ch); + m_extraCurrent_ += 2; + m_parsedToken_.m_charsLen_++; + + m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24) + | m_parsedToken_.m_charsOffset_; + m_utilToken_.m_rules_ = m_source_; + sourcetoken = m_hashTable_.get(m_utilToken_); + + if(sourcetoken == null) { + m_listHeader_[m_resultLength_] = new TokenListHeader(); + m_listHeader_[m_resultLength_].m_baseCE_ + = m_utilCEBuffer_[0] & 0xFFFFFF3F; + if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) { + m_listHeader_[m_resultLength_].m_baseContCE_ + = m_utilCEBuffer_[1]; + } + else { + m_listHeader_[m_resultLength_].m_baseContCE_ = 0; + } + m_listHeader_[m_resultLength_].m_nextCE_ = 0; + m_listHeader_[m_resultLength_].m_nextContCE_ = 0; + m_listHeader_[m_resultLength_].m_previousCE_ = 0; + m_listHeader_[m_resultLength_].m_previousContCE_ = 0; + m_listHeader_[m_resultLength_].m_indirect_ = false; + + sourcetoken = new Token(); + initAReset(-1, sourcetoken); + } + + } else { + + // first ce and second ce m_utilCEBuffer_ + /*int invpos = */CollationParsedRuleBuilder.INVERSE_UCA_.getInversePrevCE( + basece, basecontce, + strength, m_utilCEBuffer_); + // we got the previous CE. Now we need to see if the difference between + // the two CEs is really of the requested strength. + // if it's a bigger difference (we asked for secondary and got primary), we + // need to modify the CE. + if(CollationParsedRuleBuilder.INVERSE_UCA_.getCEStrengthDifference(basece, basecontce, m_utilCEBuffer_[0], m_utilCEBuffer_[1]) < strength) { + // adjust the strength + // now we are in the situation where our baseCE should actually be modified in + // order to get the CE in the right position. + if(strength == Collator.SECONDARY) { + m_utilCEBuffer_[0] = basece - 0x0200; + } else { // strength == UCOL_TERTIARY + m_utilCEBuffer_[0] = basece - 0x02; + } + if(RuleBasedCollator.isContinuation(basecontce)) { + if(strength == Collator.SECONDARY) { + m_utilCEBuffer_[1] = basecontce - 0x0200; + } else { // strength == UCOL_TERTIARY + m_utilCEBuffer_[1] = basecontce - 0x02; + } + } + } + +/* + // the code below relies on getting a code point from the inverse table, in order to be + // able to merge the situations like &x < 9 &[before 1]a < d. This won't work: + // 1. There are many code points that have the same CE + // 2. The CE to codepoint table (things pointed to by CETable[3*invPos+2] are broken. + // Also, in case when there is no equivalent strength before an element, we have to actually + // construct one. For example, &[before 2]a << x won't result in x << a, because the element + // before a is a primary difference. + ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_table_[3 * invpos + + 2]; + if ((ch & INVERSE_SIZE_MASK_) != 0) { + int offset = ch & INVERSE_OFFSET_MASK_; + ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_continuations_[ + offset]; + } + m_source_.append((char)ch); + m_extraCurrent_ ++; + m_parsedToken_.m_charsOffset_ = m_extraCurrent_ - 1; + m_parsedToken_.m_charsLen_ = 1; + + // We got an UCA before. However, this might have been tailored. + // example: + // &\u30ca = \u306a + // &[before 3]\u306a<<<\u306a|\u309d + + m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24) + | m_parsedToken_.m_charsOffset_; + m_utilToken_.m_rules_ = m_source_; + sourcetoken = (Token)m_hashTable_.get(m_utilToken_); +*/ + + // here is how it should be. The situation such as &[before 1]a < x, should be + // resolved exactly as if we wrote &a > x. + // therefore, I don't really care if the UCA value before a has been changed. + // However, I do care if the strength between my element and the previous element + // is bigger then I wanted. So, if CE < baseCE and I wanted &[before 2], then i'll + // have to construct the base CE. + + // if we found a tailored thing, we have to use the UCA value and + // construct a new reset token with constructed name + //if (sourcetoken != null && sourcetoken.m_strength_ != TOKEN_RESET_) { + // character to which we want to anchor is already tailored. + // We need to construct a new token which will be the anchor point + //m_source_.setCharAt(m_extraCurrent_ - 1, '\uFFFE'); + //m_source_.append(ch); + //m_extraCurrent_ ++; + //m_parsedToken_.m_charsLen_ ++; + // grab before + m_parsedToken_.m_charsOffset_ -= 10; + m_parsedToken_.m_charsLen_ += 10; + m_listHeader_[m_resultLength_] = new TokenListHeader(); + m_listHeader_[m_resultLength_].m_baseCE_ + = m_utilCEBuffer_[0] & 0xFFFFFF3F; + if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) { + m_listHeader_[m_resultLength_].m_baseContCE_ + = m_utilCEBuffer_[1]; + } + else { + m_listHeader_[m_resultLength_].m_baseContCE_ = 0; + } + m_listHeader_[m_resultLength_].m_nextCE_ = 0; + m_listHeader_[m_resultLength_].m_nextContCE_ = 0; + m_listHeader_[m_resultLength_].m_previousCE_ = 0; + m_listHeader_[m_resultLength_].m_previousContCE_ = 0; + m_listHeader_[m_resultLength_].m_indirect_ = false; + sourcetoken = new Token(); + initAReset(-1, sourcetoken); + //} + } + return sourcetoken; + } + + /** + * Processing Description. + * 1. Build a m_listHeader_. Each list has a header, which contains two lists + * (positive and negative), a reset token, a baseCE, nextCE, and + * previousCE. The lists and reset may be null. + * 2. As you process, you keep a LAST pointer that points to the last token + * you handled. + * @param expand string offset, -1 for null strings + * @param targetToken token to update + * @return expandnext offset + * @throws ParseException thrown when rules syntax failed + */ + private int initAReset(int expand, Token targetToken) throws ParseException + { + if (m_resultLength_ == m_listHeader_.length - 1) { + // Unfortunately, this won't work, as we store addresses of lhs in + // token + TokenListHeader temp[] = new TokenListHeader[m_resultLength_ << 1]; + System.arraycopy(m_listHeader_, 0, temp, 0, m_resultLength_ + 1); + m_listHeader_ = temp; + } + // do the reset thing + targetToken.m_rules_ = m_source_; + targetToken.m_source_ = m_parsedToken_.m_charsLen_ << 24 + | m_parsedToken_.m_charsOffset_; + targetToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24 + | m_parsedToken_.m_extensionOffset_; + // keep the flags around so that we know about before + targetToken.m_flags_ = m_parsedToken_.m_flags_; + + if (m_parsedToken_.m_prefixOffset_ != 0) { + throwParseException(m_rules_, m_parsedToken_.m_charsOffset_ - 1); + } + + targetToken.m_prefix_ = 0; + // TODO: this should also handle reverse + targetToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_; + targetToken.m_strength_ = TOKEN_RESET_; + targetToken.m_next_ = null; + targetToken.m_previous_ = null; + targetToken.m_CELength_ = 0; + targetToken.m_expCELength_ = 0; + targetToken.m_listHeader_ = m_listHeader_[m_resultLength_]; + m_listHeader_[m_resultLength_].m_first_ = null; + m_listHeader_[m_resultLength_].m_last_ = null; + m_listHeader_[m_resultLength_].m_first_ = null; + m_listHeader_[m_resultLength_].m_last_ = null; + m_listHeader_[m_resultLength_].m_reset_ = targetToken; + + /* 3 Consider each item: relation, source, and expansion: + * e.g. ...< x / y ... + * First convert all expansions into normal form. Examples: + * If "xy" doesn't occur earlier in the list or in the UCA, convert + * &xy * c * d * ... into &x * c/y * d * ... + * Note: reset values can never have expansions, although they can + * cause the very next item to have one. They may be contractions, if + * they are found earlier in the list. + */ + int result = 0; + if (expand > 0) { + // check to see if there is an expansion + if (m_parsedToken_.m_charsLen_ > 1) { + targetToken.m_source_ = ((expand + - m_parsedToken_.m_charsOffset_ ) + << 24) + | m_parsedToken_.m_charsOffset_; + result = ((m_parsedToken_.m_charsLen_ + + m_parsedToken_.m_charsOffset_ - expand) << 24) + | expand; + } + } + + m_resultLength_ ++; + m_hashTable_.put(targetToken, targetToken); + return result; + } + + /** + * Checks if an character is special + * @param ch character to test + * @return true if the character is special + */ + private static final boolean isSpecialChar(char ch) + { + return (ch <= 0x002F && ch >= 0x0020) || (ch <= 0x003F && ch >= 0x003A) + || (ch <= 0x0060 && ch >= 0x005B) + || (ch <= 0x007E && ch >= 0x007D) || ch == 0x007B; + } + + private + UnicodeSet readAndSetUnicodeSet(String source, int start) throws ParseException + { + while(source.charAt(start) != '[') { /* advance while we find the first '[' */ + start++; + } + // now we need to get a balanced set of '[]'. The problem is that a set can have + // many, and *end point to the first closing '[' + int noOpenBraces = 1; + int current = 1; // skip the opening brace + while(start+current < source.length() && noOpenBraces != 0) { + if(source.charAt(start+current) == '[') { + noOpenBraces++; + } else if(source.charAt(start+current) == ']') { // closing brace + noOpenBraces--; + } + current++; + } + //int nextBrace = -1; + + if(noOpenBraces != 0 || (/*nextBrace =*/ source.indexOf("]", start+current) /*']'*/) == -1) { + throwParseException(m_rules_, start); + } + return new UnicodeSet(source.substring(start, start+current)); //uset_openPattern(start, current); + } + + + /** in C, optionarg is passed by reference to function. + * We use a private int to simulate this. + */ + private int m_optionarg_ = 0; + + private int readOption(String rules, int start, int optionend) + { + m_optionarg_ = 0; + int i = 0; + while (i < RULES_OPTIONS_.length) { + String option = RULES_OPTIONS_[i].m_name_; + int optionlength = option.length(); + if (rules.length() > start + optionlength + && option.equalsIgnoreCase(rules.substring(start, + start + optionlength))) { + if (optionend - start > optionlength) { + m_optionarg_ = start + optionlength; + // start of the options, skip space + while (m_optionarg_ < optionend && (UCharacter.isWhitespace(rules.charAt(m_optionarg_)) || UCharacterProperty.isRuleWhiteSpace(rules.charAt(m_optionarg_)))) + { // eat whitespace + m_optionarg_ ++; + } + } + break; + } + i ++; + } + if(i == RULES_OPTIONS_.length) { + i = -1; + } + return i; + } + /** + * Reads and set collation options + * @return TOKEN_SUCCESS if option is set correct, 0 otherwise + * @exception ParseException thrown when options in rules are wrong + */ + private byte readAndSetOption() throws ParseException + { + int start = m_current_ + 1; // skip opening '[' + int i = readOption(m_rules_, start, m_optionEnd_); + + int optionarg = m_optionarg_; + + if (i < 0) { + throwParseException(m_rules_, start); + } + + if (i < 7) { + if (optionarg != 0) { + for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; + j ++) { + String subname = RULES_OPTIONS_[i].m_subOptions_[j]; + int size = optionarg + subname.length(); + if (m_rules_.length() > size + && subname.equalsIgnoreCase(m_rules_.substring( + optionarg, size))) { + setOptions(m_options_, RULES_OPTIONS_[i].m_attribute_, + RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]); + return TOKEN_SUCCESS_MASK_; + } + } + } + throwParseException(m_rules_, optionarg); + } + else if (i == 7) { // variable top + return TOKEN_SUCCESS_MASK_ | TOKEN_VARIABLE_TOP_MASK_; + } + else if (i == 8) { // rearange + return TOKEN_SUCCESS_MASK_; + } + else if (i == 9) { // before + if (optionarg != 0) { + for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; + j ++) { + String subname = RULES_OPTIONS_[i].m_subOptions_[j]; + int size = optionarg + subname.length(); + if (m_rules_.length() > size + && subname.equalsIgnoreCase( + m_rules_.substring(optionarg, + optionarg + subname.length()))) { + return (byte)(TOKEN_SUCCESS_MASK_ + | RULES_OPTIONS_[i].m_subOptionAttributeValues_[j] + + 1); + } + } + } + throwParseException(m_rules_, optionarg); + } + else if (i == 10) { // top, we are going to have an array with + // structures of limit CEs index to this array will be + // src->parsedToken.indirectIndex + m_parsedToken_.m_indirectIndex_ = 0; + return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_; + } + else if (i < 13) { // first, last + for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; j ++) { + String subname = RULES_OPTIONS_[i].m_subOptions_[j]; + int size = optionarg + subname.length(); + if (m_rules_.length() > size + && subname.equalsIgnoreCase(m_rules_.substring(optionarg, + size))) { + m_parsedToken_.m_indirectIndex_ = (char)(i - 10 + (j << 1)); + return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_; + } + } + throwParseException(m_rules_, optionarg); + } + else if(i == 13 || i == 14) { // copy and remove are handled before normalization + // we need to move end here + int noOpenBraces = 1; + m_current_++; // skip opening brace + while(m_current_ < m_source_.length() && noOpenBraces != 0) { + if(m_source_.charAt(m_current_) == '[') { + noOpenBraces++; + } else if(m_source_.charAt(m_current_) == ']') { // closing brace + noOpenBraces--; + } + m_current_++; + } + m_optionEnd_ = m_current_-1; + return TOKEN_SUCCESS_MASK_; + } + else { + throwParseException(m_rules_, optionarg); + } + return TOKEN_SUCCESS_MASK_; // we will never reach here. + } + + /** + * Set collation option + * @param optionset option set to set + * @param attribute type to set + * @param value attribute value + */ + private void setOptions(OptionSet optionset, int attribute, int value) + { + switch (attribute) { + case RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_ : + optionset.m_isHiragana4_ + = (value == RuleBasedCollator.AttributeValue.ON_); + break; + case RuleBasedCollator.Attribute.FRENCH_COLLATION_ : + optionset.m_isFrenchCollation_ + = (value == RuleBasedCollator.AttributeValue.ON_); + break; + case RuleBasedCollator.Attribute.ALTERNATE_HANDLING_ : + optionset.m_isAlternateHandlingShifted_ + = (value + == RuleBasedCollator.AttributeValue.SHIFTED_); + break; + case RuleBasedCollator.Attribute.CASE_FIRST_ : + optionset.m_caseFirst_ = value; + break; + case RuleBasedCollator.Attribute.CASE_LEVEL_ : + optionset.m_isCaseLevel_ + = (value == RuleBasedCollator.AttributeValue.ON_); + break; + case RuleBasedCollator.Attribute.NORMALIZATION_MODE_ : + if (value == RuleBasedCollator.AttributeValue.ON_) { + value = Collator.CANONICAL_DECOMPOSITION; + } + optionset.m_decomposition_ = value; + break; + case RuleBasedCollator.Attribute.STRENGTH_ : + optionset.m_strength_ = value; + break; + default : + break; + } + } + + UnicodeSet getTailoredSet() throws ParseException + { + boolean startOfRules = true; + UnicodeSet tailored = new UnicodeSet(); + String pattern; + CanonicalIterator it = new CanonicalIterator(""); + + m_parsedToken_.m_strength_ = TOKEN_UNSET_; + int sourcelimit = m_source_.length(); + //int expandNext = 0; + + while (m_current_ < sourcelimit) { + m_parsedToken_.m_prefixOffset_ = 0; + if (parseNextToken(startOfRules) < 0) { + // we have reached the end + continue; + } + startOfRules = false; + // The idea is to tokenize the rule set. For each non-reset token, + // we add all the canonicaly equivalent FCD sequences + if(m_parsedToken_.m_strength_ != TOKEN_RESET_) { + it.setSource(m_source_.substring( + m_parsedToken_.m_charsOffset_, + m_parsedToken_.m_charsOffset_+m_parsedToken_.m_charsLen_)); + pattern = it.next(); + while(pattern != null) { + if(Normalizer.quickCheck(pattern, Normalizer.FCD,0) != Normalizer.NO) { + tailored.add(pattern); + } + pattern = it.next(); + } + } + } + return tailored; + } + + final private void extractSetsFromRules(String rules) throws ParseException { + int optionNumber = -1; + int setStart = 0; + int i = 0; + while(i < rules.length()) { + if(rules.charAt(i) == 0x005B) { + optionNumber = readOption(rules, i+1, rules.length()); + setStart = m_optionarg_; + if(optionNumber == 13) { /* copy - parts of UCA to tailoring */ + UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart); + if(m_copySet_ == null) { + m_copySet_ = newSet; + } else { + m_copySet_.addAll(newSet); + } + } else if(optionNumber == 14) { + UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart); + if(m_removeSet_ == null) { + m_removeSet_ = newSet; + } else { + m_removeSet_.addAll(newSet); + } + } + } + i++; + } + } +} diff --git a/main/classes/collate/src/com/ibm/icu/text/Collator.java b/main/classes/collate/src/com/ibm/icu/text/Collator.java new file mode 100644 index 00000000000..5447181cc5d --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/Collator.java @@ -0,0 +1,1097 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.text; + +import java.util.Comparator; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Locale; +import java.util.MissingResourceException; +import java.util.Set; + +import com.ibm.icu.impl.ICUDebug; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.VersionInfo; + +/** +* {@icuenhanced java.text.Collator}.{@icu _usage_} +* +*

    Collator performs locale-sensitive string comparison. A concrete +* subclass, RuleBasedCollator, allows customization of the collation +* ordering by the use of rule sets.

    +* +*

    Following the Unicode +* Consortium's specifications for the +* Unicode Collation +* Algorithm (UCA), there are 5 different levels of strength used +* in comparisons: +* +*

      +*
    • PRIMARY strength: Typically, this is used to denote differences between +* base characters (for example, "a" < "b"). +* It is the strongest difference. For example, dictionaries are divided +* into different sections by base character. +*
    • SECONDARY strength: Accents in the characters are considered secondary +* differences (for example, "as" < "às" < "at"). Other +* differences +* between letters can also be considered secondary differences, depending +* on the language. A secondary difference is ignored when there is a +* primary difference anywhere in the strings. +*
    • TERTIARY strength: Upper and lower case differences in characters are +* distinguished at tertiary strength (for example, "ao" < "Ao" < +* "aò"). In addition, a variant of a letter differs from the base +* form on the tertiary strength (such as "A" and "Ⓐ"). Another +* example is the +* difference between large and small Kana. A tertiary difference is ignored +* when there is a primary or secondary difference anywhere in the strings. +*
    • QUATERNARY strength: When punctuation is ignored +* +* (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY +* strength, an additional strength level can +* be used to distinguish words with and without punctuation (for example, +* "ab" < "a-b" < "aB"). +* This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY +* difference. The QUATERNARY strength should only be used if ignoring +* punctuation is required. +*
    • IDENTICAL strength: +* When all other strengths are equal, the IDENTICAL strength is used as a +* tiebreaker. The Unicode code point values of the NFD form of each string +* are compared, just in case there is no difference. +* For example, Hebrew cantellation marks are only distinguished at this +* strength. This strength should be used sparingly, as only code point +* value differences between two strings is an extremely rare occurrence. +* Using this strength substantially decreases the performance for both +* comparison and collation key generation APIs. This strength also +* increases the size of the collation key. +*
    +* +* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes, +* the canonical decomposition mode and one that does not use any decomposition. +* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION +* is not supported here. If the canonical +* decomposition mode is set, the Collator handles un-normalized text properly, +* producing the same results as if the text were normalized in NFD. If +* canonical decomposition is turned off, it is the user's responsibility to +* ensure that all text is already in the appropriate form before performing +* a comparison or before getting a CollationKey.

    +* +*

    For more information about the collation service see the +* users +* guide.

    +* +*

    Examples of use +*

    +* // Get the Collator for US English and set its strength to PRIMARY
    +* Collator usCollator = Collator.getInstance(Locale.US);
    +* usCollator.setStrength(Collator.PRIMARY);
    +* if (usCollator.compare("abc", "ABC") == 0) {
    +*     System.out.println("Strings are equivalent");
    +* }
    +*
    +* The following example shows how to compare two strings using the
    +* Collator for the default locale.
    +*
    +* // Compare two strings in the default locale
    +* Collator myCollator = Collator.getInstance();
    +* myCollator.setDecomposition(NO_DECOMPOSITION);
    +* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
    +*     System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
    +*     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
    +*     if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
    +*         System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
    +*     }
    +*     else {
    +*         System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
    +*     }
    +* }
    +* else {
    +*     System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
    +* }
    +* 
    +*

    +* @see RuleBasedCollator +* @see CollationKey +* @author Syn Wee Quek +* @stable ICU 2.8 +*/ +public abstract class Collator implements Comparator, Cloneable +{ + // public data members --------------------------------------------------- + + /** + * Strongest collator strength value. Typically used to denote differences + * between base characters. See class documentation for more explanation. + * @see #setStrength + * @see #getStrength + * @stable ICU 2.8 + */ + public final static int PRIMARY = 0; + + /** + * Second level collator strength value. + * Accents in the characters are considered secondary differences. + * Other differences between letters can also be considered secondary + * differences, depending on the language. + * See class documentation for more explanation. + * @see #setStrength + * @see #getStrength + * @stable ICU 2.8 + */ + public final static int SECONDARY = 1; + + /** + * Third level collator strength value. + * Upper and lower case differences in characters are distinguished at this + * strength level. In addition, a variant of a letter differs from the base + * form on the tertiary level. + * See class documentation for more explanation. + * @see #setStrength + * @see #getStrength + * @stable ICU 2.8 + */ + public final static int TERTIARY = 2; + + /** + * {@icu} Fourth level collator strength value. + * When punctuation is ignored + * + * (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY + * strength, an additional strength level can + * be used to distinguish words with and without punctuation. + * See class documentation for more explanation. + * @see #setStrength + * @see #getStrength + * @stable ICU 2.8 + */ + public final static int QUATERNARY = 3; + + /** + * Smallest Collator strength value. When all other strengths are equal, + * the IDENTICAL strength is used as a tiebreaker. The Unicode code point + * values of the NFD form of each string are compared, just in case there + * is no difference. + * See class documentation for more explanation. + *

    + *

    + * Note this value is different from JDK's + *

    + * @stable ICU 2.8 + */ + public final static int IDENTICAL = 15; + + /** + * {@icunote} This is for backwards compatibility with Java APIs only. It + * should not be used, IDENTICAL should be used instead. ICU's + * collation does not support Java's FULL_DECOMPOSITION mode. + * @stable ICU 3.4 + */ + public final static int FULL_DECOMPOSITION = IDENTICAL; + + /** + * Decomposition mode value. With NO_DECOMPOSITION set, Strings + * will not be decomposed for collation. This is the default + * decomposition setting unless otherwise specified by the locale + * used to create the Collator.

    + * + *

    Note this value is different from the JDK's.

    + * @see #CANONICAL_DECOMPOSITION + * @see #getDecomposition + * @see #setDecomposition + * @stable ICU 2.8 + */ + public final static int NO_DECOMPOSITION = 16; + + /** + * Decomposition mode value. With CANONICAL_DECOMPOSITION set, + * characters that are canonical variants according to the Unicode standard + * will be decomposed for collation.

    + * + *

    CANONICAL_DECOMPOSITION corresponds to Normalization Form D as + * described in + * Unicode Technical Report #15. + *

    + * @see #NO_DECOMPOSITION + * @see #getDecomposition + * @see #setDecomposition + * @stable ICU 2.8 + */ + public final static int CANONICAL_DECOMPOSITION = 17; + + // public methods -------------------------------------------------------- + + // public setters -------------------------------------------------------- + + /** + * Sets this Collator's strength property. The strength property + * determines the minimum level of difference considered significant + * during comparison.

    + * + *

    The default strength for the Collator is TERTIARY, unless specified + * otherwise by the locale used to create the Collator.

    + * + *

    See the Collator class description for an example of use.

    + * @param newStrength the new strength value. + * @see #getStrength + * @see #PRIMARY + * @see #SECONDARY + * @see #TERTIARY + * @see #QUATERNARY + * @see #IDENTICAL + * @throws IllegalArgumentException if the new strength value is not one + * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. + * @stable ICU 2.8 + */ + public void setStrength(int newStrength) + { + if ((newStrength != PRIMARY) && + (newStrength != SECONDARY) && + (newStrength != TERTIARY) && + (newStrength != QUATERNARY) && + (newStrength != IDENTICAL)) { + throw new IllegalArgumentException("Incorrect comparison level."); + } + m_strength_ = newStrength; + } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + public Collator setStrength2(int newStrength) + { + setStrength(newStrength); + return this; + } + + /** + * Sets the decomposition mode of this Collator. Setting this + * decomposition property with CANONICAL_DECOMPOSITION allows the + * Collator to handle un-normalized text properly, producing the + * same results as if the text were normalized. If + * NO_DECOMPOSITION is set, it is the user's responsibility to + * insure that all text is already in the appropriate form before + * a comparison or before getting a CollationKey. Adjusting + * decomposition mode allows the user to select between faster and + * more complete collation behavior.

    + * + *

    Since a great many of the world's languages do not require + * text normalization, most locales set NO_DECOMPOSITION as the + * default decomposition mode.

    + * + * The default decompositon mode for the Collator is + * NO_DECOMPOSITON, unless specified otherwise by the locale used + * to create the Collator.

    + * + *

    See getDecomposition for a description of decomposition + * mode.

    + * + * @param decomposition the new decomposition mode + * @see #getDecomposition + * @see #NO_DECOMPOSITION + * @see #CANONICAL_DECOMPOSITION + * @throws IllegalArgumentException If the given value is not a valid + * decomposition mode. + * @stable ICU 2.8 + */ + public void setDecomposition(int decomposition) + { + if ((decomposition != NO_DECOMPOSITION) && + (decomposition != CANONICAL_DECOMPOSITION)) { + throw new IllegalArgumentException("Wrong decomposition mode."); + } + m_decomposition_ = decomposition; + } + + // public getters -------------------------------------------------------- + + /** + * Returns the Collator for the current default locale. + * The default locale is determined by java.util.Locale.getDefault(). + * @return the Collator for the default locale (for example, en_US) if it + * is created successfully. Otherwise if there is no Collator + * associated with the current locale, the default UCA collator + * will be returned. + * @see java.util.Locale#getDefault() + * @see #getInstance(Locale) + * @stable ICU 2.8 + */ + public static final Collator getInstance() + { + return getInstance(ULocale.getDefault()); + } + + /** + * Clones the collator. + * @stable ICU 2.6 + * @return a clone of this collator. + */ + public Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + // begin registry stuff + + /** + * A factory used with registerFactory to register multiple collators and provide + * display names for them. If standard locale display names are sufficient, + * Collator instances may be registered instead. + *

    Note: as of ICU4J 3.2, the default API for CollatorFactory uses + * ULocale instead of Locale. Instead of overriding createCollator(Locale), + * new implementations should override createCollator(ULocale). Note that + * one of these two methods MUST be overridden or else an infinite + * loop will occur. + * @stable ICU 2.6 + */ + public static abstract class CollatorFactory { + /** + * Return true if this factory will be visible. Default is true. + * If not visible, the locales supported by this factory will not + * be listed by getAvailableLocales. + * + * @return true if this factory is visible + * @stable ICU 2.6 + */ + public boolean visible() { + return true; + } + + /** + * Return an instance of the appropriate collator. If the locale + * is not supported, return null. + * Note: as of ICU4J 3.2, implementations should override + * this method instead of createCollator(Locale). + * @param loc the locale for which this collator is to be created. + * @return the newly created collator. + * @stable ICU 3.2 + */ + public Collator createCollator(ULocale loc) { + return createCollator(loc.toLocale()); + } + + /** + * Return an instance of the appropriate collator. If the locale + * is not supported, return null. + *

    Note: as of ICU4J 3.2, implementations should override + * createCollator(ULocale) instead of this method, and inherit this + * method's implementation. This method is no longer abstract + * and instead delegates to createCollator(ULocale). + * @param loc the locale for which this collator is to be created. + * @return the newly created collator. + * @stable ICU 2.6 + */ + public Collator createCollator(Locale loc) { + return createCollator(ULocale.forLocale(loc)); + } + + /** + * Return the name of the collator for the objectLocale, localized for the displayLocale. + * If objectLocale is not visible or not defined by the factory, return null. + * @param objectLocale the locale identifying the collator + * @param displayLocale the locale for which the display name of the collator should be localized + * @return the display name + * @stable ICU 2.6 + */ + public String getDisplayName(Locale objectLocale, Locale displayLocale) { + return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale)); + } + + /** + * Return the name of the collator for the objectLocale, localized for the displayLocale. + * If objectLocale is not visible or not defined by the factory, return null. + * @param objectLocale the locale identifying the collator + * @param displayLocale the locale for which the display name of the collator should be localized + * @return the display name + * @stable ICU 3.2 + */ + public String getDisplayName(ULocale objectLocale, ULocale displayLocale) { + if (visible()) { + Set supported = getSupportedLocaleIDs(); + String name = objectLocale.getBaseName(); + if (supported.contains(name)) { + return objectLocale.getDisplayName(displayLocale); + } + } + return null; + } + + /** + * Return an unmodifiable collection of the locale names directly + * supported by this factory. + * + * @return the set of supported locale IDs. + * @stable ICU 2.6 + */ + public abstract Set getSupportedLocaleIDs(); + + /** + * Empty default constructor. + * @stable ICU 2.6 + */ + protected CollatorFactory() { + } + } + + static abstract class ServiceShim { + abstract Collator getInstance(ULocale l); + abstract Object registerInstance(Collator c, ULocale l); + abstract Object registerFactory(CollatorFactory f); + abstract boolean unregister(Object k); + abstract Locale[] getAvailableLocales(); // TODO remove + abstract ULocale[] getAvailableULocales(); + abstract String getDisplayName(ULocale ol, ULocale dl); + } + + private static ServiceShim shim; + private static ServiceShim getShim() { + // Note: this instantiation is safe on loose-memory-model configurations + // despite lack of synchronization, since the shim instance has no state-- + // it's all in the class init. The worst problem is we might instantiate + // two shim instances, but they'll share the same state so that's ok. + if (shim == null) { + try { + Class cls = Class.forName("com.ibm.icu.text.CollatorServiceShim"); + shim = (ServiceShim)cls.newInstance(); + } + catch (MissingResourceException e) + { + ///CLOVER:OFF + throw e; + ///CLOVER:ON + } + catch (Exception e) { + ///CLOVER:OFF + if(DEBUG){ + e.printStackTrace(); + } + throw new RuntimeException(e.getMessage()); + ///CLOVER:ON + } + } + return shim; + } + + /** + * {@icu} Returns the Collator for the desired locale. + * @param locale the desired locale. + * @return Collator for the desired locale if it is created successfully. + * Otherwise if there is no Collator + * associated with the current locale, a default UCA collator will + * be returned. + * @see java.util.Locale + * @see java.util.ResourceBundle + * @see #getInstance(Locale) + * @see #getInstance() + * @stable ICU 3.0 + */ + public static final Collator getInstance(ULocale locale) { + // fetching from service cache is faster than instantiation + return getShim().getInstance(locale); + } + + /** + * Returns the Collator for the desired locale. + * @param locale the desired locale. + * @return Collator for the desired locale if it is created successfully. + * Otherwise if there is no Collator + * associated with the current locale, a default UCA collator will + * be returned. + * @see java.util.Locale + * @see java.util.ResourceBundle + * @see #getInstance(ULocale) + * @see #getInstance() + * @stable ICU 2.8 + */ + public static final Collator getInstance(Locale locale) { + return getInstance(ULocale.forLocale(locale)); + } + + /** + * {@icu} Registers a collator as the default collator for the provided locale. The + * collator should not be modified after it is registered. + * + * @param collator the collator to register + * @param locale the locale for which this is the default collator + * @return an object that can be used to unregister the registered collator. + * + * @stable ICU 3.2 + */ + public static final Object registerInstance(Collator collator, ULocale locale) { + return getShim().registerInstance(collator, locale); + } + + /** + * {@icu} Registers a collator factory. + * + * @param factory the factory to register + * @return an object that can be used to unregister the registered factory. + * + * @stable ICU 2.6 + */ + public static final Object registerFactory(CollatorFactory factory) { + return getShim().registerFactory(factory); + } + + /** + * {@icu} Unregisters a collator previously registered using registerInstance. + * @param registryKey the object previously returned by registerInstance. + * @return true if the collator was successfully unregistered. + * @stable ICU 2.6 + */ + public static final boolean unregister(Object registryKey) { + if (shim == null) { + return false; + } + return shim.unregister(registryKey); + } + + /** + * Returns the set of locales, as Locale objects, for which collators + * are installed. Note that Locale objects do not support RFC 3066. + * @return the list of locales in which collators are installed. + * This list includes any that have been registered, in addition to + * those that are installed with ICU4J. + * @stable ICU 2.4 + */ + public static Locale[] getAvailableLocales() { + // TODO make this wrap getAvailableULocales later + if (shim == null) { + ClassLoader cl = Collator.class.getClassLoader(); + return ICUResourceBundle.getAvailableLocales( + ICUResourceBundle.ICU_COLLATION_BASE_NAME, cl); + } + return shim.getAvailableLocales(); + } + + /** + * {@icu} Returns the set of locales, as ULocale objects, for which collators + * are installed. ULocale objects support RFC 3066. + * @return the list of locales in which collators are installed. + * This list includes any that have been registered, in addition to + * those that are installed with ICU4J. + * @stable ICU 3.0 + */ + public static final ULocale[] getAvailableULocales() { + if (shim == null) { + ClassLoader cl = Collator.class.getClassLoader(); + return ICUResourceBundle.getAvailableULocales( + ICUResourceBundle.ICU_COLLATION_BASE_NAME, cl); + } + return shim.getAvailableULocales(); + } + + /** + * The list of keywords for this service. This must be kept in sync with + * the resource data. + * @since ICU 3.0 + */ + private static final String[] KEYWORDS = { "collation" }; + + /** + * The resource name for this service. Note that this is not the same as + * the keyword for this service. + * @since ICU 3.0 + */ + private static final String RESOURCE = "collations"; + + /** + * The resource bundle base name for this service. + * *since ICU 3.0 + */ + private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME; + + /** + * {@icu} Returns an array of all possible keywords that are relevant to + * collation. At this point, the only recognized keyword for this + * service is "collation". + * @return an array of valid collation keywords. + * @see #getKeywordValues + * @stable ICU 3.0 + */ + public static final String[] getKeywords() { + return KEYWORDS; + } + + /** + * {@icu} Given a keyword, returns an array of all values for + * that keyword that are currently in use. + * @param keyword one of the keywords returned by getKeywords. + * @see #getKeywords + * @stable ICU 3.0 + */ + public static final String[] getKeywordValues(String keyword) { + if (!keyword.equals(KEYWORDS[0])) { + throw new IllegalArgumentException("Invalid keyword: " + keyword); + } + return ICUResourceBundle.getKeywordValues(BASE, RESOURCE); + } + + /** + * {@icu} Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param key one of the keys supported by this service. For now, only + * "collation" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @return an array of string values for the given key and the locale. + * @stable ICU 4.2 + */ + public static final String[] getKeywordValuesForLocale(String key, ULocale locale, + boolean commonlyUsed) { + // Note: The parameter commonlyUsed is actually not used. + // The switch is in the method signature for consistency + // with other locale services. + + // Read available collation values from collation bundles + String baseLoc = locale.getBaseName(); + LinkedList values = new LinkedList(); + + UResourceBundle bundle = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME + "/coll", baseLoc); + + String defcoll = null; + while (bundle != null) { + UResourceBundle collations = bundle.get("collations"); + Enumeration collEnum = collations.getKeys(); + while (collEnum.hasMoreElements()) { + String collkey = collEnum.nextElement(); + if (collkey.equals("default")) { + if (defcoll == null) { + // Keep the default + defcoll = collations.getString("default"); + } + } else if (!values.contains(collkey)) { + values.add(collkey); + } + } + bundle = ((ICUResourceBundle)bundle).getParent(); + } + // Reordering + Iterator itr = values.iterator(); + String[] result = new String[values.size()]; + result[0] = defcoll; + int idx = 1; + while (itr.hasNext()) { + String collKey = itr.next(); + if (!collKey.equals(defcoll)) { + result[idx++] = collKey; + } + } + return result; + } + + /** + * {@icu} Returns the functionally equivalent locale for the given + * requested locale, with respect to given keyword, for the + * collation service. If two locales return the same result, then + * collators instantiated for these locales will behave + * equivalently. The converse is not always true; two collators + * may in fact be equivalent, but return different results, due to + * internal details. The return result has no other meaning than + * that stated above, and implies nothing as to the relationship + * between the two locales. This is intended for use by + * applications who wish to cache collators, or otherwise reuse + * collators when possible. The functional equivalent may change + * over time. For more information, please see the + * Locales and Services section of the ICU User Guide. + * @param keyword a particular keyword as enumerated by + * getKeywords. + * @param locID The requested locale + * @param isAvailable If non-null, isAvailable[0] will receive and + * output boolean that indicates whether the requested locale was + * 'available' to the collation service. If non-null, isAvailable + * must have length >= 1. + * @return the locale + * @stable ICU 3.0 + */ + public static final ULocale getFunctionalEquivalent(String keyword, + ULocale locID, + boolean isAvailable[]) { + ClassLoader cl = Collator.class.getClassLoader(); + return ICUResourceBundle.getFunctionalEquivalent(BASE, cl, RESOURCE, + keyword, locID, isAvailable, true); + } + + /** + * {@icu} Returns the functionally equivalent locale for the given + * requested locale, with respect to given keyword, for the + * collation service. + * @param keyword a particular keyword as enumerated by + * getKeywords. + * @param locID The requested locale + * @return the locale + * @see #getFunctionalEquivalent(String,ULocale,boolean[]) + * @stable ICU 3.0 + */ + public static final ULocale getFunctionalEquivalent(String keyword, + ULocale locID) { + return getFunctionalEquivalent(keyword, locID, null); + } + + /** + * {@icu} Returns the name of the collator for the objectLocale, localized for the + * displayLocale. + * @param objectLocale the locale of the collator + * @param displayLocale the locale for the collator's display name + * @return the display name + * @stable ICU 2.6 + */ + static public String getDisplayName(Locale objectLocale, Locale displayLocale) { + return getShim().getDisplayName(ULocale.forLocale(objectLocale), + ULocale.forLocale(displayLocale)); + } + + /** + * {@icu} Returns the name of the collator for the objectLocale, localized for the + * displayLocale. + * @param objectLocale the locale of the collator + * @param displayLocale the locale for the collator's display name + * @return the display name + * @stable ICU 3.2 + */ + static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) { + return getShim().getDisplayName(objectLocale, displayLocale); + } + + /** + * {@icu} Returns the name of the collator for the objectLocale, localized for the + * current locale. + * @param objectLocale the locale of the collator + * @return the display name + * @stable ICU 2.6 + */ + static public String getDisplayName(Locale objectLocale) { + return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault()); + } + + /** + * {@icu} Returns the name of the collator for the objectLocale, localized for the + * current locale. + * @param objectLocale the locale of the collator + * @return the display name + * @stable ICU 3.2 + */ + static public String getDisplayName(ULocale objectLocale) { + return getShim().getDisplayName(objectLocale, ULocale.getDefault()); + } + + /** + * Returns this Collator's strength property. The strength property + * determines the minimum level of difference considered significant. + *

    + * {@icunote} This can return QUATERNARY strength, which is not supported by the + * JDK version. + *

    + * See the Collator class description for more details. + *

    + * @return this Collator's current strength property. + * @see #setStrength + * @see #PRIMARY + * @see #SECONDARY + * @see #TERTIARY + * @see #QUATERNARY + * @see #IDENTICAL + * @stable ICU 2.8 + */ + public int getStrength() + { + return m_strength_; + } + + /** + * Returns the decomposition mode of this Collator. The decomposition mode + * determines how Unicode composed characters are handled. + *

    + *

    + * See the Collator class description for more details. + *

    + * @return the decomposition mode + * @see #setDecomposition + * @see #NO_DECOMPOSITION + * @see #CANONICAL_DECOMPOSITION + * @stable ICU 2.8 + */ + public int getDecomposition() + { + return m_decomposition_; + } + + // public other methods ------------------------------------------------- + + /** + * Compares the equality of two text Strings using + * this Collator's rules, strength and decomposition mode. Convenience method. + * @param source the source string to be compared. + * @param target the target string to be compared. + * @return true if the strings are equal according to the collation + * rules, otherwise false. + * @see #compare + * @throws NullPointerException thrown if either arguments is null. + * @stable ICU 2.8 + */ + public boolean equals(String source, String target) + { + return (compare(source, target) == 0); + } + + /** + * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored + * in this collator. + * @return a pointer to a UnicodeSet object containing all the + * code points and sequences that may sort differently than + * in the UCA. + * @stable ICU 2.4 + */ + public UnicodeSet getTailoredSet() + { + return new UnicodeSet(0, 0x10FFFF); + } + + /** + * Compares the source text String to the target text String according to + * this Collator's rules, strength and decomposition mode. + * Returns an integer less than, + * equal to or greater than zero depending on whether the source String is + * less than, equal to or greater than the target String. See the Collator + * class description for an example of use. + *

    + * @param source the source String. + * @param target the target String. + * @return Returns an integer value. Value is less than zero if source is + * less than target, value is zero if source and target are equal, + * value is greater than zero if source is greater than target. + * @see CollationKey + * @see #getCollationKey + * @throws NullPointerException thrown if either argument is null. + * @stable ICU 2.8 + */ + public abstract int compare(String source, String target); + + /** + * Compares the source Object to the target Object. + *

    + * @param source the source Object. + * @param target the target Object. + * @return Returns an integer value. Value is less than zero if source is + * less than target, value is zero if source and target are equal, + * value is greater than zero if source is greater than target. + * @throws ClassCastException thrown if either arguments cannot be cast to String. + * @stable ICU 4.2 + */ + public int compare(Object source, Object target) { + return compare((String)source, (String)target); + } + + /** + *

    + * Transforms the String into a CollationKey suitable for efficient + * repeated comparison. The resulting key depends on the collator's + * rules, strength and decomposition mode. + *

    + *

    See the CollationKey class documentation for more information.

    + * @param source the string to be transformed into a CollationKey. + * @return the CollationKey for the given String based on this Collator's + * collation rules. If the source String is null, a null + * CollationKey is returned. + * @see CollationKey + * @see #compare(String, String) + * @see #getRawCollationKey + * @stable ICU 2.8 + */ + public abstract CollationKey getCollationKey(String source); + + /** + * {@icu} Returns the simpler form of a CollationKey for the String source following + * the rules of this Collator and stores the result into the user provided argument + * key. If key has a internal byte array of length that's too small for the result, + * the internal byte array will be grown to the exact required size. + * @param source the text String to be transformed into a RawCollationKey + * @return If key is null, a new instance of RawCollationKey will be + * created and returned, otherwise the user provided key will be + * returned. + * @see #compare(String, String) + * @see #getCollationKey + * @see RawCollationKey + * @stable ICU 2.8 + */ + public abstract RawCollationKey getRawCollationKey(String source, + RawCollationKey key); + + /** + * {@icu} Variable top is a two byte primary value which causes all the codepoints + * with primary values that are less or equal than the variable top to be + * shifted when alternate handling is set to SHIFTED. + *

    + *

    + * Sets the variable top to a collation element value of a string supplied. + *

    + * @param varTop one or more (if contraction) characters to which the + * variable top should be set + * @return a int value containing the value of the variable top in upper 16 + * bits. Lower 16 bits are undefined. + * @throws IllegalArgumentException is thrown if varTop argument is not + * a valid variable top element. A variable top element is + * invalid when it is a contraction that does not exist in the + * Collation order or when the PRIMARY strength collation + * element for the variable top has more than two bytes + * @see #getVariableTop + * @see RuleBasedCollator#setAlternateHandlingShifted + * @stable ICU 2.6 + */ + public abstract int setVariableTop(String varTop); + + /** + * {@icu} Returns the variable top value of a Collator. + * Lower 16 bits are undefined and should be ignored. + * @return the variable top value of a Collator. + * @see #setVariableTop + * @stable ICU 2.6 + */ + public abstract int getVariableTop(); + + /** + * {@icu} Sets the variable top to a collation element value supplied. + * Variable top is set to the upper 16 bits. + * Lower 16 bits are ignored. + * @param varTop Collation element value, as returned by setVariableTop or + * getVariableTop + * @see #getVariableTop + * @see #setVariableTop + * @stable ICU 2.6 + */ + public abstract void setVariableTop(int varTop); + + /** + * {@icu} Returns the version of this collator object. + * @return the version object associated with this collator + * @stable ICU 2.8 + */ + public abstract VersionInfo getVersion(); + + /** + * {@icu} Returns the UCA version of this collator object. + * @return the version object associated with this collator + * @stable ICU 2.8 + */ + public abstract VersionInfo getUCAVersion(); + + // protected constructor ------------------------------------------------- + + /** + * Empty default constructor to make javadocs happy + * @stable ICU 2.4 + */ + protected Collator() + { + } + + // package private methods ----------------------------------------------- + + // private data members -------------------------------------------------- + + /** + * Collation strength + */ + private int m_strength_ = TERTIARY; + + /** + * Decomposition mode + */ + private int m_decomposition_ = CANONICAL_DECOMPOSITION; + + private static final boolean DEBUG = ICUDebug.enabled("collator"); + + // private methods ------------------------------------------------------- + + // end registry stuff + + // -------- BEGIN ULocale boilerplate -------- + + /** + * {@icu} Returns the locale that was used to create this object, or null. + * This may may differ from the locale requested at the time of + * this object's creation. For example, if an object is created + * for locale en_US_CALIFORNIA, the actual data may be + * drawn from en (the actual locale), and + * en_US may be the most specific locale that exists (the + * valid locale). + * + *

    Note: This method will be implemented in ICU 3.0; ICU 2.8 + * contains a partial preview implementation. The * actual + * locale is returned correctly, but the valid locale is + * not, in most cases. + * @param type type of information requested, either {@link + * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link + * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}. + * @return the information specified by type, or null if + * this object was not constructed from locale data. + * @see com.ibm.icu.util.ULocale + * @see com.ibm.icu.util.ULocale#VALID_LOCALE + * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE + * @draft ICU 2.8 (retain) + * @provisional This API might change or be removed in a future release. + */ + public final ULocale getLocale(ULocale.Type type) { + return type == ULocale.ACTUAL_LOCALE ? + this.actualLocale : this.validLocale; + } + + /* + * Set information about the locales that were used to create this + * object. If the object was not constructed from locale data, + * both arguments should be set to null. Otherwise, neither + * should be null. The actual locale must be at the same level or + * less specific than the valid locale. This method is intended + * for use by factories or other entities that create objects of + * this class. + * @param valid the most specific locale containing any resource + * data, or null + * @param actual the locale containing data used to construct this + * object, or null + * @see com.ibm.icu.util.ULocale + * @see com.ibm.icu.util.ULocale#VALID_LOCALE + * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE + */ + final void setLocale(ULocale valid, ULocale actual) { + // Change the following to an assertion later + ///CLOVER:OFF + // The following would not happen since the method is called + // by other protected functions that checks and makes sure that + // valid and actual are not null before passing + if ((valid == null) != (actual == null)) { + throw new IllegalArgumentException(); + } + ///CLOVER:ON + // Another check we could do is that the actual locale is at + // the same level or less specific than the valid locale. + this.validLocale = valid; + this.actualLocale = actual; + } + + /* + * The most specific locale containing any resource data, or null. + * @see com.ibm.icu.util.ULocale + */ + private ULocale validLocale; + + /* + * The locale containing data used to construct this object, or + * null. + * @see com.ibm.icu.util.ULocale + */ + private ULocale actualLocale; + + // -------- END ULocale boilerplate -------- +} diff --git a/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java b/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java new file mode 100644 index 00000000000..d3d02ed69c0 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java @@ -0,0 +1,681 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.text; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; + +import com.ibm.icu.impl.ICUBinary; +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.IntTrie; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA; +import com.ibm.icu.text.RuleBasedCollator.UCAConstants; +import com.ibm.icu.util.VersionInfo; + +/** +*

    Internal reader class for ICU data file uca.icu containing +* Unicode Collation Algorithm data.

    +*

    This class simply reads uca.icu, authenticates that it is a valid +* ICU data file and split its contents up into blocks of data for use in +* com.ibm.icu.text.Collator. +*

    +*

    uca.icu which is in big-endian format is jared together with this +* package.

    +* @author Syn Wee Quek +* @since release 2.2, April 18 2002 +*/ + +final class CollatorReader +{ + static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException { + InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/ucadata.icu"); + BufferedInputStream b = new BufferedInputStream(i, 90000); + CollatorReader reader = new CollatorReader(b); + char[] result = reader.readImp(rbc, ucac); + b.close(); + return result; + } + + public static InputStream makeByteBufferInputStream(final ByteBuffer buf) { + return new InputStream() { + public int read() throws IOException { + if (!buf.hasRemaining()) { + return -1; + } + return buf.get() & 0xff; + } + public int read(byte[] bytes, int off, int len) throws IOException { + len = Math.min(len, buf.remaining()); + buf.get(bytes, off, len); + return len; + } + }; + } + + static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException { + final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2; + int dataLength = data.remaining(); + // TODO: Change the rest of this class to use the ByteBuffer directly, rather than + // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader(). + // Consider changing ICUBinary to also work with a ByteBuffer. + CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false); + if (dataLength > MIN_BINARY_DATA_SIZE_) { + reader.readImp(rbc, null); + } else { + reader.readHeader(rbc); + reader.readOptions(rbc); + // duplicating UCA_'s data + rbc.setWithUCATables(); + } + } + + static InverseUCA getInverseUCA() throws IOException { + InverseUCA result = null; + InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/coll/invuca.icu"); +// try { +// String invdat = "/com/ibm/icu/impl/data/invuca.icu"; +// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat); + BufferedInputStream b = new BufferedInputStream(i, 110000); + result = CollatorReader.readInverseUCA(b); + b.close(); + i.close(); + return result; +// } catch (Exception e) { +// throw new RuntimeException(e.getMessage()); +// } + } + + // protected constructor --------------------------------------------- + + /** + *

    Protected constructor.

    + * @param inputStream ICU collator file input stream + * @exception IOException throw if data file fails authentication + */ + private CollatorReader(InputStream inputStream) throws IOException + { + this(inputStream, true); + /* + byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); + // weiv: check that we have the correct Unicode version in + // binary files + VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); + if(UnicodeVersion[0] != UCDVersion.getMajor() + || UnicodeVersion[1] != UCDVersion.getMinor()) { + throw new IOException(WRONG_UNICODE_VERSION_ERROR_); + } + m_dataInputStream_ = new DataInputStream(inputStream); + */ + } + + /** + *

    Protected constructor.

    + * @param inputStream ICU uprops.icu file input stream + * @param readICUHeader flag to indicate if the ICU header has to be read + * @exception IOException throw if data file fails authentication + */ + private CollatorReader(InputStream inputStream, boolean readICUHeader) + throws IOException + { + if (readICUHeader) { + byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, + UCA_AUTHENTICATE_); + // weiv: check that we have the correct Unicode version in + // binary files + VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); + if(UnicodeVersion[0] != UCDVersion.getMajor() + || UnicodeVersion[1] != UCDVersion.getMinor()) { + throw new IOException(WRONG_UNICODE_VERSION_ERROR_); + } + } + m_dataInputStream_ = new DataInputStream(inputStream); + } + + // protected methods ------------------------------------------------- + + /** + * Read and break up the header stream of data passed in as arguments into + * meaningful Collator data. + * @param rbc RuleBasedCollator to populate with header information + * @exception IOException thrown when there's a data error. + */ + private void readHeader(RuleBasedCollator rbc) throws IOException + { + m_size_ = m_dataInputStream_.readInt(); + // all the offsets are in bytes + // to get the address add to the header address and cast properly + // Default options int options + m_headerSize_ = m_dataInputStream_.readInt(); // start of options + int readcount = 8; // for size and headersize + // structure which holds values for indirect positioning and implicit + // ranges + int UCAConst = m_dataInputStream_.readInt(); + readcount += 4; + // this one is needed only for UCA, to copy the appropriate + // contractions + m_dataInputStream_.skip(4); + readcount += 4; + // reserved for future use + m_dataInputStream_.skipBytes(4); + readcount += 4; + // const uint8_t *mappingPosition; + int mapping = m_dataInputStream_.readInt(); + readcount += 4; + // uint32_t *expansion; + rbc.m_expansionOffset_ = m_dataInputStream_.readInt(); + readcount += 4; + // UChar *contractionIndex; + rbc.m_contractionOffset_ = m_dataInputStream_.readInt(); + readcount += 4; + // uint32_t *contractionCEs; + int contractionCE = m_dataInputStream_.readInt(); + readcount += 4; + // needed for various closures int contractionSize + /*int contractionSize = */m_dataInputStream_.readInt(); + readcount += 4; + // array of last collation element in expansion + int expansionEndCE = m_dataInputStream_.readInt(); + readcount += 4; + // array of maximum expansion size corresponding to the expansion + // collation elements with last element in expansionEndCE + int expansionEndCEMaxSize = m_dataInputStream_.readInt(); + readcount += 4; + // size of endExpansionCE int expansionEndCESize + m_dataInputStream_.skipBytes(4); + readcount += 4; + // hash table of unsafe code points + int unsafe = m_dataInputStream_.readInt(); + readcount += 4; + // hash table of final code points in contractions. + int contractionEnd = m_dataInputStream_.readInt(); + readcount += 4; + // int CEcount = m_dataInputStream_.readInt(); + m_dataInputStream_.skipBytes(4); + readcount += 4; + // is jamoSpecial + rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean(); + readcount++; + // padding + m_dataInputStream_.skipBytes(3); + readcount += 3; + rbc.m_version_ = readVersion(m_dataInputStream_); + readcount += 4; + rbc.m_UCA_version_ = readVersion(m_dataInputStream_); + readcount += 4; + rbc.m_UCD_version_ = readVersion(m_dataInputStream_); + readcount += 4; + // byte charsetName[] = new byte[32]; // for charset CEs + m_dataInputStream_.skipBytes(32); + readcount += 32; + m_dataInputStream_.skipBytes(56); // for future use + readcount += 56; + if (m_headerSize_ < readcount) { + ///CLOVER:OFF + throw new IOException("Internal Error: Header size error"); + ///CLOVER:ON + } + m_dataInputStream_.skipBytes(m_headerSize_ - readcount); + + if (rbc.m_contractionOffset_ == 0) { // contraction can be null + rbc.m_contractionOffset_ = mapping; + contractionCE = mapping; + } + m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_; + m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_; + m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_; + m_contractionCESize_ = mapping - contractionCE; + //m_trieSize_ = expansionEndCE - mapping; + m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE; + m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize; + m_unsafeSize_ = contractionEnd - unsafe; + m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled + // later + // treat it as normal collator first + // for normal collator there is no UCA contraction + m_contractionEndSize_ = m_size_ - contractionEnd; + + rbc.m_contractionOffset_ >>= 1; // casting to ints + rbc.m_expansionOffset_ >>= 2; // casting to chars + } + + /** + * Read and break up the collation options passed in the stream of data and + * update the argument Collator with the results + * + * @param rbc + * RuleBasedCollator to populate + * @exception IOException + * thrown when there's a data error. + */ + private void readOptions(RuleBasedCollator rbc) throws IOException + { + int readcount = 0; + rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt(); + readcount += 4; + rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() + == RuleBasedCollator.AttributeValue.ON_); + readcount += 4; + rbc.m_defaultIsAlternateHandlingShifted_ + = (m_dataInputStream_.readInt() == + RuleBasedCollator.AttributeValue.SHIFTED_); + readcount += 4; + rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt(); + readcount += 4; + rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() + == RuleBasedCollator.AttributeValue.ON_); + readcount += 4; + int value = m_dataInputStream_.readInt(); + readcount += 4; + if (value == RuleBasedCollator.AttributeValue.ON_) { + value = Collator.CANONICAL_DECOMPOSITION; + } + else { + value = Collator.NO_DECOMPOSITION; + } + rbc.m_defaultDecomposition_ = value; + rbc.m_defaultStrength_ = m_dataInputStream_.readInt(); + readcount += 4; + rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() + == RuleBasedCollator.AttributeValue.ON_); + readcount += 4; + rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() + == RuleBasedCollator.AttributeValue.ON_); + readcount += 4; + m_dataInputStream_.skip(60); // reserved for future use + readcount += 60; + m_dataInputStream_.skipBytes(m_optionSize_ - readcount); + if (m_optionSize_ < readcount) { + ///CLOVER:OFF + throw new IOException("Internal Error: Option size error"); + ///CLOVER:ON + } + } + + /** + * Read and break up the stream of data passed in as arguments into + * meaningful Collator data. + * @param rbc RuleBasedCollator to populate + * @param UCAConst object to fill up with UCA constants if we are reading + * the UCA collator, if not use a null + * @return UCAContractions array filled up with the UCA contractions if we + * are reading the UCA collator + * @exception IOException thrown when there's a data error. + */ + private char[] readImp(RuleBasedCollator rbc, + RuleBasedCollator.UCAConstants UCAConst) + throws IOException + { + readHeader(rbc); + // header size has been checked by readHeader + int readcount = m_headerSize_; + // option size has been checked by readOptions + readOptions(rbc); + readcount += m_optionSize_; + m_expansionSize_ >>= 2; + rbc.m_expansion_ = new int[m_expansionSize_]; + for (int i = 0; i < m_expansionSize_; i ++) { + rbc.m_expansion_[i] = m_dataInputStream_.readInt(); + } + readcount += (m_expansionSize_ << 2); + if (m_contractionIndexSize_ > 0) { + m_contractionIndexSize_ >>= 1; + rbc.m_contractionIndex_ = new char[m_contractionIndexSize_]; + for (int i = 0; i < m_contractionIndexSize_; i ++) { + rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar(); + } + readcount += (m_contractionIndexSize_ << 1); + m_contractionCESize_ >>= 2; + rbc.m_contractionCE_ = new int[m_contractionCESize_]; + for (int i = 0; i < m_contractionCESize_; i ++) { + rbc.m_contractionCE_[i] = m_dataInputStream_.readInt(); + } + readcount += (m_contractionCESize_ << 2); + } + rbc.m_trie_ = new IntTrie(m_dataInputStream_, + RuleBasedCollator.DataManipulate.getInstance()); + if (!rbc.m_trie_.isLatin1Linear()) { + throw new IOException("Data corrupted, " + + "Collator Tries expected to have linear " + + "latin one data arrays"); + } + readcount += rbc.m_trie_.getSerializedDataSize(); + m_expansionEndCESize_ >>= 2; + rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_]; + for (int i = 0; i < m_expansionEndCESize_; i ++) { + rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt(); + } + readcount += (m_expansionEndCESize_ << 2); + rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_]; + for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) { + rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte(); + } + readcount += m_expansionEndCEMaxSizeSize_; + rbc.m_unsafe_ = new byte[m_unsafeSize_]; + for (int i = 0; i < m_unsafeSize_; i ++) { + rbc.m_unsafe_[i] = m_dataInputStream_.readByte(); + } + readcount += m_unsafeSize_; + if (UCAConst != null) { + // we are reading the UCA + // unfortunately the UCA offset in any collator data is not 0 and + // only refers to the UCA data + m_contractionEndSize_ -= m_UCAValuesSize_; + } + rbc.m_contractionEnd_ = new byte[m_contractionEndSize_]; + for (int i = 0; i < m_contractionEndSize_; i ++) { + rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte(); + } + readcount += m_contractionEndSize_; + if (UCAConst != null) { + UCAConst.FIRST_TERTIARY_IGNORABLE_[0] + = m_dataInputStream_.readInt(); + int readUCAConstcount = 4; + UCAConst.FIRST_TERTIARY_IGNORABLE_[1] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_TERTIARY_IGNORABLE_[0] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_TERTIARY_IGNORABLE_[1] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_PRIMARY_IGNORABLE_[0] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_PRIMARY_IGNORABLE_[1] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_SECONDARY_IGNORABLE_[0] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_SECONDARY_IGNORABLE_[1] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_SECONDARY_IGNORABLE_[0] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_SECONDARY_IGNORABLE_[1] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_PRIMARY_IGNORABLE_[0] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_PRIMARY_IGNORABLE_[1] + = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt(); + readUCAConstcount += 4; + int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1; + char result[] = new char[resultsize]; + for (int i = 0; i < resultsize; i ++) { + result[i] = m_dataInputStream_.readChar(); + } + readcount += m_UCAValuesSize_; + if (readcount != m_size_) { + ///CLOVER:OFF + throw new IOException("Internal Error: Data file size error"); + ///CLOVER:ON + } + return result; + } + if (readcount != m_size_) { + ///CLOVER:OFF + throw new IOException("Internal Error: Data file size error"); + ///CLOVER:ON + } + return null; + } + + /** + * Reads in the inverse uca data + * @param input input stream with the inverse uca data + * @return an object containing the inverse uca data + * @exception IOException thrown when error occurs while reading the + * inverse uca + */ + private static CollationParsedRuleBuilder.InverseUCA readInverseUCA( + InputStream inputStream) + throws IOException + { + byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, + INVERSE_UCA_AUTHENTICATE_); + + // weiv: check that we have the correct Unicode version in + // binary files + VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); + if(UnicodeVersion[0] != UCDVersion.getMajor() + || UnicodeVersion[1] != UCDVersion.getMinor()) { + throw new IOException(WRONG_UNICODE_VERSION_ERROR_); + } + + CollationParsedRuleBuilder.InverseUCA result = + new CollationParsedRuleBuilder.InverseUCA(); + DataInputStream input = new DataInputStream(inputStream); + input.readInt(); // bytesize + int tablesize = input.readInt(); // in int size + int contsize = input.readInt(); // in char size + input.readInt(); // table in bytes + input.readInt(); // conts in bytes + result.m_UCA_version_ = readVersion(input); + input.skipBytes(8); // skip padding + + int size = tablesize * 3; // one column for each strength + result.m_table_ = new int[size]; + result.m_continuations_ = new char[contsize]; + + for (int i = 0; i < size; i ++) { + result.m_table_[i] = input.readInt(); + } + for (int i = 0; i < contsize; i ++) { + result.m_continuations_[i] = input.readChar(); + } + input.close(); + return result; + } + + /** + * Reads four bytes from the input and returns a VersionInfo + * object. Use it to read different collator versions. + * @param input already instantiated DataInputStream, positioned + * at the start of four version bytes + * @return a ready VersionInfo object + * @throws IOException thrown when error occurs while reading + * version bytes + */ + + protected static VersionInfo readVersion(DataInputStream input) + throws IOException { + byte[] version = new byte[4]; + version[0] = input.readByte(); + version[1] = input.readByte(); + version[2] = input.readByte(); + version[3] = input.readByte(); + + VersionInfo result = + VersionInfo.getInstance( + (int)version[0], (int)version[1], + (int)version[2], (int)version[3]); + + return result; + } + + // private inner class ----------------------------------------------- + + // private variables ------------------------------------------------- + + /** + * Authenticate uca data format version + */ + private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ + = new ICUBinary.Authenticate() { + public boolean isDataVersionAcceptable(byte version[]) + { + return version[0] == DATA_FORMAT_VERSION_[0] + && version[1] >= DATA_FORMAT_VERSION_[1]; + // Too harsh + //&& version[1] == DATA_FORMAT_VERSION_[1] + //&& version[2] == DATA_FORMAT_VERSION_[2] + //&& version[3] == DATA_FORMAT_VERSION_[3]; + } + }; + + /** + * Authenticate uca data format version + */ + private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ + = new ICUBinary.Authenticate() { + public boolean isDataVersionAcceptable(byte version[]) + { + return version[0] + == INVERSE_UCA_DATA_FORMAT_VERSION_[0] + && version[1] + >= INVERSE_UCA_DATA_FORMAT_VERSION_[1]; + } + }; + + /** + * Data input stream for uca.icu + */ + private DataInputStream m_dataInputStream_; + + /** + * File format version and id that this class understands. + * No guarantees are made if a older version is used + */ + private static final byte DATA_FORMAT_VERSION_[] = + {(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0}; + private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43, + (byte)0x6f, (byte)0x6c}; + /** + * Inverse UCA file format version and id that this class understands. + * No guarantees are made if a older version is used + */ + private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = + {(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0}; + private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, + (byte)0x6e, + (byte)0x76, + (byte)0x43}; + + /** + * Wrong unicode version error string + */ + private static final String WRONG_UNICODE_VERSION_ERROR_ = + "Unicode version in binary image is not compatible with the current Unicode version"; + + /** + * Size of expansion table in bytes + */ + private int m_expansionSize_; + /** + * Size of contraction index table in bytes + */ + private int m_contractionIndexSize_; + /** + * Size of contraction table in bytes + */ + private int m_contractionCESize_; + /* + * Size of the Trie in bytes + */ + //private int m_trieSize_; + /** + * Size of the table that contains information about collation elements + * that end with an expansion + */ + private int m_expansionEndCESize_; + /** + * Size of the table that contains information about the maximum size of + * collation elements that end with a particular expansion CE corresponding + * to the ones in expansionEndCE + */ + private int m_expansionEndCEMaxSizeSize_; + /** + * Size of the option table that contains information about the collation + * options + */ + private int m_optionSize_; + /** + * Size of the whole data file minusing the ICU header + */ + private int m_size_; + /** + * Size of the collation data header + */ + private int m_headerSize_; + /** + * Size of the table that contains information about the "Unsafe" + * codepoints + */ + private int m_unsafeSize_; + /** + * Size of the table that contains information about codepoints that ends + * with a contraction + */ + private int m_contractionEndSize_; + /** + * Size of the table that contains UCA contraction information + */ + private int m_UCAValuesSize_; + + // private methods --------------------------------------------------- + +} + diff --git a/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java b/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java new file mode 100644 index 00000000000..6d08c82ee0f --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java @@ -0,0 +1,145 @@ +/** +******************************************************************************* +* Copyright (C) 2003-2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.text; + +import java.util.Locale; +import java.util.MissingResourceException; +import java.util.Set; + +import com.ibm.icu.impl.ICULocaleService; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.ICUService; +import com.ibm.icu.impl.ICULocaleService.LocaleKeyFactory; +import com.ibm.icu.impl.ICUService.Factory; +import com.ibm.icu.text.Collator.CollatorFactory; +import com.ibm.icu.util.ULocale; + +final class CollatorServiceShim extends Collator.ServiceShim { + + Collator getInstance(ULocale locale) { + // use service cache, it's faster than instantiation +// if (service.isDefault()) { +// return new RuleBasedCollator(locale); +// } + try { + ULocale[] actualLoc = new ULocale[1]; + Collator coll = (Collator)service.get(locale, actualLoc); + if (coll == null) { + ///CLOVER:OFF + //Can't really change coll after it's been initialized + throw new MissingResourceException("Could not locate Collator data", "", ""); + ///CLOVER:ON + } + coll = (Collator) coll.clone(); + coll.setLocale(actualLoc[0], actualLoc[0]); // services make no distinction between actual & valid + return coll; + } + catch (CloneNotSupportedException e) { + ///CLOVER:OFF + throw new IllegalStateException(e.getMessage()); + ///CLOVER:ON + } + } + + Object registerInstance(Collator collator, ULocale locale) { + return service.registerObject(collator, locale); + } + + Object registerFactory(CollatorFactory f) { + class CFactory extends LocaleKeyFactory { + CollatorFactory delegate; + + CFactory(CollatorFactory fctry) { + super(fctry.visible()); + this.delegate = fctry; + } + + public Object handleCreate(ULocale loc, int kind, ICUService srvc) { + Object coll = delegate.createCollator(loc); + return coll; + } + + public String getDisplayName(String id, ULocale displayLocale) { + ULocale objectLocale = new ULocale(id); + return delegate.getDisplayName(objectLocale, displayLocale); + } + + public Set getSupportedIDs() { + return delegate.getSupportedLocaleIDs(); + } + } + + return service.registerFactory(new CFactory(f)); + } + + boolean unregister(Object registryKey) { + return service.unregisterFactory((Factory)registryKey); + } + + Locale[] getAvailableLocales() { + // TODO rewrite this to just wrap getAvailableULocales later + Locale[] result; + if (service.isDefault()) { + ClassLoader cl = getClass().getClassLoader(); + result = ICUResourceBundle.getAvailableLocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME, cl); + } else { + result = service.getAvailableLocales(); + } + return result; + } + + ULocale[] getAvailableULocales() { + ULocale[] result; + if (service.isDefault()) { + ClassLoader cl = getClass().getClassLoader(); + result = ICUResourceBundle.getAvailableULocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME, cl); + } else { + result = service.getAvailableULocales(); + } + return result; + } + + String getDisplayName(ULocale objectLocale, ULocale displayLocale) { + String id = objectLocale.getName(); + return service.getDisplayName(id, displayLocale); + } + + private static class CService extends ICULocaleService { + CService() { + super("Collator"); + + class CollatorFactory extends ICUResourceBundleFactory { + CollatorFactory() { + super(ICUResourceBundle.ICU_COLLATION_BASE_NAME); + } + + protected Object handleCreate(ULocale uloc, int kind, ICUService srvc) { + return new RuleBasedCollator(uloc); + } + } + + this.registerFactory(new CollatorFactory()); + markDefault(); + } + ///CLOVER:OFF + // The following method can not be reached by testing + protected Object handleDefault(Key key, String[] actualIDReturn) { + if (actualIDReturn != null) { + actualIDReturn[0] = "root"; + } + try { + return new RuleBasedCollator(ULocale.ROOT); + } + catch (MissingResourceException e) { + return null; + } + } + ///CLOVER:ON + } + private static ICULocaleService service = new CService(); +} diff --git a/main/classes/collate/src/com/ibm/icu/text/IndexCharacters.java b/main/classes/collate/src/com/ibm/icu/text/IndexCharacters.java new file mode 100644 index 00000000000..3fc61d2cd34 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/IndexCharacters.java @@ -0,0 +1,288 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2010, Google Inc, International Business Machines Corporation + * and others. All Rights Reserved. + ******************************************************************************* + */ +package com.ibm.icu.text; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import com.ibm.icu.impl.MultiComparator; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.util.LocaleData; +import com.ibm.icu.util.ULocale; + +/** + * A set of characters for use as a UI "index", that is, a + * list of clickable characters (or character sequences) that allow the user to + * see a segment of a larger "target" list. That is, each character corresponds + * to a bucket in the target list, where everything in the bucket is greater + * than or equal to the character (according to the locale's collation). The + * intention is to have two main functions; one that produces an index list that + * is relatively static, and the other is a list that produces roughly + * equally-sized buckets. Only the first is currently provided. + *

    + * The static list would be presented as something like + * + *

    + *  A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
    + * 
    + * + * In the UI, an index character could be omitted if its bucket is empty. For + * example, if there is nothing in the bucket for Q, then Q could be omitted. + *

    + * Important Notes: + *

      + *
    • Although we say "character" above, the index character could be a + * sequence, like "CH".
    • + *
    • There could be items in a target list that are less than the first or + * (much) greater than the last; examples include words from other scripts. The + * UI could bucket them with the first or last respectively, or have some symbol + * for those categories.
    • + *
    • The use of the list requires that the target list be sorted according to + * the locale that is used to create that list.
    • + *
    • For languages without widely accepted sorting methods (eg Chinese/Japanese) + * the results may appear arbitrary, and it may be best not to use these methods.
    • + *
    • In the initial version, an arbitrary limit of 100 is placed on these lists.
    • + *
    + * + * @author markdavis + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ +//TODO(markdavis) return an additional character that is the "least greater" character than +//the last character. +public class IndexCharacters { + private static final char CGJ = '\u034F'; + private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]"); + private static final UnicodeSet HANGUL = new UnicodeSet("[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]"); + private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"); + private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]"); + + private ULocale locale; + private Collator comparator; + private Set indexCharacters; + private LinkedHashMap> alreadyIn = new LinkedHashMap>(); + private List noDistinctSorting = new ArrayList(); + private List notAlphabetic = new ArrayList(); + + /** + * Create the index object. + * @param locale The locale to be passed. + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ + public IndexCharacters(ULocale locale) { + this(locale, LocaleData.getExemplarSet(locale, LocaleData.ES_STANDARD), Collator.getInstance(locale)); + } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @SuppressWarnings("unchecked") + public IndexCharacters(ULocale locale, UnicodeSet exemplarSet, Collator collator) { + this.locale = locale; + try { + comparator = (Collator) collator.clone(); + } catch (CloneNotSupportedException e) { + throw new IllegalArgumentException(e); + } + comparator.setStrength(Collator.PRIMARY); + + // get the exemplars, and handle special cases + + UnicodeSet exemplars = exemplarSet.cloneAsThawed(); + // question: should we add auxiliary exemplars? + if (exemplars.containsSome(CORE_LATIN)) { + exemplars.addAll(CORE_LATIN); + } + if (exemplars.containsSome(HANGUL)) { + // cut down to small list + exemplars.removeAll(new UnicodeSet("[:block=hangul_syllables:]")).addAll(HANGUL); + } + if (exemplars.containsSome(ETHIOPIC)) { + // cut down to small list + // make use of the fact that Ethiopic is allocated in 8's, where + // the base is 0 mod 8. + for (UnicodeSetIterator it = new UnicodeSetIterator(ETHIOPIC); it.next();) { + if ((it.codepoint & 0x7) != 0) { + exemplars.remove(it.codepoint); + } + } + } + + // first sort them, with an "best" ordering among items that are the same according + // to the collator + Comparator[] comparators = (Comparator[])new Comparator[2]; + comparators[0] = comparator; + comparators[1] = new PreferenceComparator(Collator.getInstance(locale)); + + Set preferenceSorting = new TreeSet(new MultiComparator(comparators)); + for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) { + preferenceSorting.add(it.getString()); + } + + indexCharacters = new TreeSet(comparator); + + // We nw make a sorted array of elements, uppercased + // Some of the input may, however, be redundant. + // That is, we might have c, ch, d, where "ch" sorts just like "c", "h" + // So we make a pass through, filtering out those cases. + + for (String item : preferenceSorting) { + item = UCharacter.toUpperCase(locale, item); + if (indexCharacters.contains(item)) { + for (String itemAlreadyIn : indexCharacters) { + if (comparator.compare(item, itemAlreadyIn) == 0) { + Set targets = alreadyIn.get(itemAlreadyIn); + if (targets == null) { + alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet()); + } + targets.add(item); + break; + } + } + } else if (UTF16.countCodePoint(item) > 1 && comparator.compare(item, separated(item)) == 0){ + noDistinctSorting.add(item); + } else if (!ALPHABETIC.containsSome(item)) { + notAlphabetic.add(item); + } else { + indexCharacters.add(item); + } + } + + // if the result is still too large, cut down to 100 elements + + final int size = indexCharacters.size() - 1; + if (size > 99) { + int count = 0; + int old = -1; + for (Iterator it = indexCharacters.iterator(); it.hasNext();) { + ++ count; + it.next(); + final int bump = count * 99 / size; + if (bump == old) { + it.remove(); + } else { + old = bump; + } + } + } + indexCharacters = Collections.unmodifiableSet(indexCharacters); + } + + /* + * Return the string with interspersed CGJs. Input must have more than 2 codepoints. + */ + private String separated(String item) { + StringBuilder result = new StringBuilder(); + // add a CGJ except within surrogates + char last = item.charAt(0); + result.append(last); + for (int i = 1; i < item.length(); ++i) { + char ch = item.charAt(i); + if (!UCharacter.isHighSurrogate(last) || !UCharacter.isLowSurrogate(ch)) { + result.append(CGJ); + } + result.append(ch); + last = ch; + } + return result.toString(); + } + + /** + * Get the index characters. + * @return A collection including the index characters + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ + public Collection getIndexCharacters() { + return indexCharacters; + } + + /** + * Get the locale + * @return The locale. + * @draft ICU 4.2 + * @provisional This API might change or be removed in a future release. + */ + public ULocale getLocale() { + return locale; + } + + /** + * As the index is built, items may be discarded from the exemplars. + * This contains some of the discards, and is intended for debugging. + * @internal + * @deprecated This API is ICU internal only. + */ + public Map> getAlreadyIn() { + return alreadyIn; + } + + /** + * As the index is built, items may be discarded from the exemplars. + * This contains some of the discards, and is intended for debugging. + * @internal + * @deprecated This API is ICU internal only. + */ + public List getNoDistinctSorting() { + return noDistinctSorting; + } + + /** + * As the index is built, items may be discarded from the exemplars. + * This contains some of the discards, and is intended for debugging. + * @internal + * @deprecated This API is ICU internal only. + */ + public List getNotAlphabetic() { + return notAlphabetic; + } + + /* + * Comparator that returns "better" items first, where shorter NFKD is better, + * and otherwise NFKD binary order is better, and otherwise binary order is better. + */ + private static class PreferenceComparator implements Comparator { + static final Comparator binary = new UTF16.StringComparator(true,false,0); + final Collator collator; + + public PreferenceComparator(Collator collator) { + this.collator = collator; + } + + public int compare(Object o1, Object o2) { + return compare((String)o1, (String)o2); + } + + public int compare(String s1, String s2) { + if (s1 == s2) { + return 0; + } + String n1 = Normalizer.decompose(s1, true); + String n2 = Normalizer.decompose(s2, true); + int result = n1.length() - n2.length(); + if (result != 0) { + return result; + } + result = collator.compare(n1, n2); + if (result != 0) { + return result; + } + return binary.compare(s1, s2); + } + } +} diff --git a/main/classes/collate/src/com/ibm/icu/text/RawCollationKey.java b/main/classes/collate/src/com/ibm/icu/text/RawCollationKey.java new file mode 100644 index 00000000000..68e7863e387 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/RawCollationKey.java @@ -0,0 +1,102 @@ +/** + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.text; + +import com.ibm.icu.util.ByteArrayWrapper; + +/** + *

    + * Simple class wrapper to store the internal byte representation of a + * CollationKey. Unlike the CollationKey, this class do not contain information + * on the source string the sort order represents. RawCollationKey is mutable + * and users can reuse its objects with the method in + * RuleBasedCollator.getRawCollationKey(..). + *

    + *

    + * Please refer to the documentation on CollationKey for a detail description + * on the internal byte representation. Note the internal byte representation + * is always null-terminated. + *

    + * + * Example of use:
    + * String str[] = {.....}; + * RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(); + * RawCollationKey key = new RawCollationKey(128); + * for (int i = 0; i < str.length; i ++) { + * collator.getRawCollationKey(str[i], key); + * // do something with key.bytes + * } + *
    + *

    Note: Comparison between RawCollationKeys created by + * different Collators might return incorrect results. + * See class documentation for Collator.

    + * @stable ICU 2.8 + * @see RuleBasedCollator + * @see CollationKey + */ +public final class RawCollationKey extends ByteArrayWrapper +{ + // public constructors -------------------------------------------------- + + /** + * Default constructor, internal byte array is null and its size set to 0. + * @stable ICU 2.8 + */ + public RawCollationKey() + { + } + + /** + * RawCollationKey created with an empty internal byte array of length + * capacity. Size of the internal byte array will be set to 0. + * @param capacity length of internal byte array + * @stable ICU 2.8 + */ + public RawCollationKey(int capacity) + { + bytes = new byte[capacity]; + } + + /** + * RawCollationKey created, adopting bytes as the internal byte array. + * Size of the internal byte array will be set to 0. + * @param bytes byte array to be adopted by RawCollationKey + * @stable ICU 2.8 + */ + public RawCollationKey(byte[] bytes) + { + this.bytes = bytes; + } + + /** + * Construct a RawCollationKey from a byte array and size. + * @param bytesToAdopt the byte array to adopt + * @param size the length of valid data in the byte array + * @throws IndexOutOfBoundsException if bytesToAdopt == null and size != 0, or + * size < 0, or size > bytesToAdopt.length. + * @stable ICU 2.8 + */ + public RawCollationKey(byte[] bytesToAdopt, int size) + { + super(bytesToAdopt, size); + } + + /** + * Compare this RawCollationKey to another, which must not be null. This overrides + * the inherited implementation to ensure the returned values are -1, 0, or 1. + * @param rhs the RawCollationKey to compare to. + * @return -1, 0, or 1 as this compares less than, equal to, or + * greater than rhs. + * @throws ClassCastException if the other object is not a RawCollationKey. + * @stable ICU 4.4 + */ + public int compareTo(RawCollationKey rhs) { + int result = super.compareTo(rhs); + return result < 0 ? -1 : result == 0 ? 0 : 1; + } +} diff --git a/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java b/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java new file mode 100644 index 00000000000..55e19a1c5c7 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java @@ -0,0 +1,273 @@ +/* +******************************************************************************* +* Copyright (C) 2009-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.text; + +import java.util.HashMap; +import java.util.Map; + +import com.ibm.icu.util.ULocale; + +/** + * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat + * implementation behind setLenientParseMode, which is based on Collator. + * @internal + * @deprecated This API is ICU internal only. + */ +public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider { + private Map cache; + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + public RbnfScannerProviderImpl() { + cache = new HashMap(); + } + + /** + * Returns a collation-based scanner. + * + * Only primary differences are treated as significant. This means that case + * differences, accent differences, alternate spellings of the same letter + * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in + * matching the text. In many cases, numerals will be accepted in place of words + * or phrases as well. + * + * For example, all of the following will correctly parse as 255 in English in + * lenient-parse mode: + *
    "two hundred fifty-five" + *
    "two hundred fifty five" + *
    "TWO HUNDRED FIFTY-FIVE" + *
    "twohundredfiftyfive" + *
    "2 hundred fifty-5" + * + * The Collator used is determined by the locale that was + * passed to this object on construction. The description passed to this object + * on construction may supply additional collation rules that are appended to the + * end of the default collator for the locale, enabling additional equivalences + * (such as adding more ignorable characters or permitting spelled-out version of + * symbols; see the demo program for examples). + * + * It's important to emphasize that even strict parsing is relatively lenient: it + * will accept some text that it won't produce as output. In English, for example, + * it will correctly parse "two hundred zero" and "fifteen hundred". + * + * @internal + * @deprecated This API is ICU internal only. + */ + public RbnfLenientScanner get(ULocale locale, String extras) { + RbnfLenientScanner result = null; + String key = locale.toString() + "/" + extras; + synchronized(cache) { + result = cache.get(key); + if (result != null) { + return result; + } + } + result = createScanner(locale, extras); + synchronized(cache) { + cache.put(key, result); + } + return result; + } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + protected RbnfLenientScanner createScanner(ULocale locale, String extras) { + RuleBasedCollator collator = null; + try { + // create a default collator based on the locale, + // then pull out that collator's rules, append any additional + // rules specified in the description, and create a _new_ + // collator based on the combination of those rules + collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale()); + if (extras != null) { + String rules = collator.getRules() + extras; + collator = new RuleBasedCollator(rules); + } + collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); + } + catch (Exception e) { + // If we get here, it means we have a malformed set of + // collation rules, which hopefully won't happen + ///CLOVER:OFF + if (true){ // debug hook + e.printStackTrace(); System.out.println("++++"); + } + collator = null; + ///CLOVER:ON + } + + return new RbnfLenientScannerImpl(collator); + } + + private static class RbnfLenientScannerImpl implements RbnfLenientScanner { + private final RuleBasedCollator collator; + + private RbnfLenientScannerImpl(RuleBasedCollator rbc) { + this.collator = rbc; + } + + public boolean allIgnorable(String s) { + CollationElementIterator iter = collator.getCollationElementIterator(s); + + int o = iter.next(); + while (o != CollationElementIterator.NULLORDER + && CollationElementIterator.primaryOrder(o) == 0) { + o = iter.next(); + } + return o == CollationElementIterator.NULLORDER; + } + + public int[] findText(String str, String key, int startingAt) { + int p = startingAt; + int keyLen = 0; + + // basically just isolate smaller and smaller substrings of + // the target string (each running to the end of the string, + // and with the first one running from startingAt to the end) + // and then use prefixLength() to see if the search key is at + // the beginning of each substring. This is excruciatingly + // slow, but it will locate the key and tell use how long the + // matching text was. + while (p < str.length() && keyLen == 0) { + keyLen = prefixLength(str.substring(p), key); + if (keyLen != 0) { + return new int[] { p, keyLen }; + } + ++p; + } + // if we make it to here, we didn't find it. Return -1 for the + // location. The length should be ignored, but set it to 0, + // which should be "safe" + return new int[] { -1, 0 }; + } + + ///CLOVER:OFF + // The following method contains the same signature as findText + // and has never been used by anything once. + @SuppressWarnings("unused") + public int[] findText2(String str, String key, int startingAt) { + + CollationElementIterator strIter = collator.getCollationElementIterator(str); + CollationElementIterator keyIter = collator.getCollationElementIterator(key); + + int keyStart = -1; + + strIter.setOffset(startingAt); + + int oStr = strIter.next(); + int oKey = keyIter.next(); + while (oKey != CollationElementIterator.NULLORDER) { + while (oStr != CollationElementIterator.NULLORDER && + CollationElementIterator.primaryOrder(oStr) == 0) + oStr = strIter.next(); + + while (oKey != CollationElementIterator.NULLORDER && + CollationElementIterator.primaryOrder(oKey) == 0) + oKey = keyIter.next(); + + if (oStr == CollationElementIterator.NULLORDER) { + return new int[] { -1, 0 }; + } + + if (oKey == CollationElementIterator.NULLORDER) { + break; + } + + if (CollationElementIterator.primaryOrder(oStr) == + CollationElementIterator.primaryOrder(oKey)) { + keyStart = strIter.getOffset(); + oStr = strIter.next(); + oKey = keyIter.next(); + } else { + if (keyStart != -1) { + keyStart = -1; + keyIter.reset(); + } else { + oStr = strIter.next(); + } + } + } + + if (oKey == CollationElementIterator.NULLORDER) { + return new int[] { keyStart, strIter.getOffset() - keyStart }; + } + + return new int[] { -1, 0 }; + } + ///CLOVER:ON + + public int prefixLength(String str, String prefix) { + // Create two collation element iterators, one over the target string + // and another over the prefix. + // + // Previous code was matching "fifty-" against " fifty" and leaving + // the number " fifty-7" to parse as 43 (50 - 7). + // Also it seems that if we consume the entire prefix, that's ok even + // if we've consumed the entire string, so I switched the logic to + // reflect this. + + CollationElementIterator strIter = collator.getCollationElementIterator(str); + CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix); + + // match collation elements between the strings + int oStr = strIter.next(); + int oPrefix = prefixIter.next(); + + while (oPrefix != CollationElementIterator.NULLORDER) { + // skip over ignorable characters in the target string + while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr != + CollationElementIterator.NULLORDER) { + oStr = strIter.next(); + } + + // skip over ignorable characters in the prefix + while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix != + CollationElementIterator.NULLORDER) { + oPrefix = prefixIter.next(); + } + + // if skipping over ignorables brought to the end of + // the prefix, we DID match: drop out of the loop + if (oPrefix == CollationElementIterator.NULLORDER) { + break; + } + + // if skipping over ignorables brought us to the end + // of the target string, we didn't match and return 0 + if (oStr == CollationElementIterator.NULLORDER) { + return 0; + } + + // match collation elements from the two strings + // (considering only primary differences). If we + // get a mismatch, dump out and return 0 + if (CollationElementIterator.primaryOrder(oStr) != + CollationElementIterator.primaryOrder(oPrefix)) { + return 0; + } + + // otherwise, advance to the next character in each string + // and loop (we drop out of the loop when we exhaust + // collation elements in the prefix) + + oStr = strIter.next(); + oPrefix = prefixIter.next(); + } + + int result = strIter.getOffset(); + if (oStr != CollationElementIterator.NULLORDER) { + --result; + } + return result; + } + } +} \ No newline at end of file diff --git a/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java b/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java new file mode 100644 index 00000000000..98868e811aa --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java @@ -0,0 +1,4679 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.text; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.text.CharacterIterator; +import java.text.ParseException; +import java.util.Arrays; +import java.util.MissingResourceException; + +import com.ibm.icu.impl.BOCU; +import com.ibm.icu.impl.ICUDebug; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.ImplicitCEGenerator; +import com.ibm.icu.impl.IntTrie; +import com.ibm.icu.impl.StringUCharacterIterator; +import com.ibm.icu.impl.Trie; +import com.ibm.icu.impl.TrieIterator; +import com.ibm.icu.impl.Utility; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.util.RangeValueIterator; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.VersionInfo; + +/** + *

    RuleBasedCollator is a concrete subclass of Collator. It allows + * customization of the Collator via user-specified rule sets. + * RuleBasedCollator is designed to be fully compliant to the Unicode + * Collation Algorithm (UCA) and conforms to ISO 14651.

    + * + *

    Users are strongly encouraged to read + * the users guide for more information about the collation + * service before using this class.

    + * + *

    Create a RuleBasedCollator from a locale by calling the + * getInstance(Locale) factory method in the base class Collator. + * Collator.getInstance(Locale) creates a RuleBasedCollator object + * based on the collation rules defined by the argument locale. If a + * customized collation ordering ar attributes is required, use the + * RuleBasedCollator(String) constructor with the appropriate + * rules. The customized RuleBasedCollator will base its ordering on + * UCA, while re-adjusting the attributes and orders of the characters + * in the specified rule accordingly.

    + * + *

    RuleBasedCollator provides correct collation orders for most + * locales supported in ICU. If specific data for a locale is not + * available, the orders eventually falls back to the UCA collation + * order .

    + * + *

    For information about the collation rule syntax and details + * about customization, please refer to the + * + * Collation customization section of the user's guide.

    + * + *

    Note that there are some differences between + * the Collation rule syntax used in Java and ICU4J: + * + *

      + *
    • According to the JDK documentation: + * + *

      + * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule + * is in force when a Thai vowel of the range \U0E40-\U0E44 precedes a + * Thai consonant of the range \U0E01-\U0E2E OR a Lao vowel of the + * range \U0EC0-\U0EC4 precedes a Lao consonant of the range + * \U0E81-\U0EAE then the + * vowel is placed after the consonant for collation purposes. + *

      + *

      + * If a rule is without the modifier '!', the Thai/Lao vowel-consonant + * swapping is not turned on. + *

      + *
      + *

      + * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao + * vowel-consonant swapping, since the UCA clearly states that it has to be + * supported to ensure a correct sorting order. If a '!' is encountered, it is + * ignored. + *

      + *
    • As mentioned in the documentation of the base class Collator, + * compatibility decomposition mode is not supported. + *
    + *

    + * Examples + *

    + *

    + * Creating Customized RuleBasedCollators: + *

    + *
    + * String simple = "& a < b < c < d";
    + * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
    + *
    + * String norwegian = "& a , A < b , B < c , C < d , D < e , E "
    + *                    + "< f , F < g , G < h , H < i , I < j , "
    + *                    + "J < k , K < l , L < m , M < n , N < "
    + *                    + "o , O < p , P < q , Q < r , R < s , S < "
    + *                    + "t , T < u , U < v , V < w , W < x , X "
    + *                    + "< y , Y < z , Z < \u00E5 = a\u030A "
    + *                    + ", \u00C5 = A\u030A ; aa , AA < \u00E6 "
    + *                    + ", \u00C6 < \u00F8 , \u00D8";
    + * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
    + * 
    + *
    + * + * Concatenating rules to combine Collators: + *
    + *
    + * // Create an en_US Collator object
    + * RuleBasedCollator en_USCollator = (RuleBasedCollator)
    + *     Collator.getInstance(new Locale("en", "US", ""));
    + * // Create a da_DK Collator object
    + * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
    + *     Collator.getInstance(new Locale("da", "DK", ""));
    + * // Combine the two
    + * // First, get the collation rules from en_USCollator
    + * String en_USRules = en_USCollator.getRules();
    + * // Second, get the collation rules from da_DKCollator
    + * String da_DKRules = da_DKCollator.getRules();
    + * RuleBasedCollator newCollator =
    + *                             new RuleBasedCollator(en_USRules + da_DKRules);
    + * // newCollator has the combined rules
    + * 
    + *
    + * + * Making changes to an existing RuleBasedCollator to create a new + * Collator object, by appending changes to the existing rule: + *
    + *
    + * // Create a new Collator object with additional rules
    + * String addRules = "& C < ch, cH, Ch, CH";
    + * RuleBasedCollator myCollator =
    + *     new RuleBasedCollator(en_USCollator.getRules() + addRules);
    + * // myCollator contains the new rules
    + * 
    + *
    + * + * How to change the order of non-spacing accents: + *
    + *
    + * // old rule with main accents
    + * String oldRules = "= \u0301 ; \u0300 ; \u0302 ; \u0308 "
    + *                 + "; \u0327 ; \u0303 ; \u0304 ; \u0305 "
    + *                 + "; \u0306 ; \u0307 ; \u0309 ; \u030A "
    + *                 + "; \u030B ; \u030C ; \u030D ; \u030E "
    + *                 + "; \u030F ; \u0310 ; \u0311 ; \u0312 "
    + *                 + "< a , A ; ae, AE ; \u00e6 , \u00c6 "
    + *                 + "< b , B < c, C < e, E & C < d , D";
    + * // change the order of accent characters
    + * String addOn = "& \u0300 ; \u0308 ; \u0302";
    + * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
    + * 
    + *
    + * + * Putting in a new primary ordering before the default setting, + * e.g. sort English characters before or after Japanese characters in the Japanese + * Collator: + *
    + *
    + * // get en_US Collator rules
    + * RuleBasedCollator en_USCollator
    + *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
    + * // add a few Japanese characters to sort before English characters
    + * // suppose the last character before the first base letter 'a' in
    + * // the English collation rule is \u2212
    + * String jaString = "& \u2212 < \u3041, \u3042 < \u3043, "
    + *                   + "\u3044";
    + * RuleBasedCollator myJapaneseCollator
    + *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
    + * 
    + *
    + *

    + *

    + * This class is not subclassable + *

    + * @author Syn Wee Quek + * @stable ICU 2.8 + */ +public final class RuleBasedCollator extends Collator +{ + // public constructors --------------------------------------------------- + + /** + *

    + * Constructor that takes the argument rules for + * customization. The collator will be based on UCA, + * with the attributes and re-ordering of the characters specified in the + * argument rules. + *

    + *

    See the user guide's section on + * + * Collation Customization for details on the rule syntax. + *

    + * @param rules the collation rules to build the collation table from. + * @exception ParseException and IOException thrown. ParseException thrown + * when argument rules have an invalid syntax. IOException + * thrown when an error occured while reading internal data. + * @stable ICU 2.8 + */ + public RuleBasedCollator(String rules) throws Exception + { + checkUCA(); + if (rules == null) { + throw new IllegalArgumentException( + "Collation rules can not be null"); + } + init(rules); + } + + // public methods -------------------------------------------------------- + + /** + * Clones the RuleBasedCollator + * @return a new instance of this RuleBasedCollator object + * @stable ICU 2.8 + */ + public Object clone() throws CloneNotSupportedException + { + RuleBasedCollator result = (RuleBasedCollator)super.clone(); + if (latinOneCEs_ != null) { + result.m_reallocLatinOneCEs_ = true; + result.m_ContInfo_ = new ContractionInfo(); + } + + // since all collation data in the RuleBasedCollator do not change + // we can safely assign the result.fields to this collator + result.initUtility(false); // let the new clone have their own util + // iterators + return result; + } + + /** + * Return a CollationElementIterator for the given String. + * @see CollationElementIterator + * @stable ICU 2.8 + */ + public CollationElementIterator getCollationElementIterator(String source) + { + return new CollationElementIterator(source, this); + } + + /** + * Return a CollationElementIterator for the given CharacterIterator. + * The source iterator's integrity will be preserved since a new copy + * will be created for use. + * @see CollationElementIterator + * @stable ICU 2.8 + */ + public CollationElementIterator getCollationElementIterator( + CharacterIterator source) + { + CharacterIterator newsource = (CharacterIterator)source.clone(); + return new CollationElementIterator(newsource, this); + } + + /** + * Return a CollationElementIterator for the given UCharacterIterator. + * The source iterator's integrity will be preserved since a new copy + * will be created for use. + * @see CollationElementIterator + * @stable ICU 2.8 + */ + public CollationElementIterator getCollationElementIterator( + UCharacterIterator source) + { + return new CollationElementIterator(source, this); + } + + // public setters -------------------------------------------------------- + + /** + * Sets the Hiragana Quaternary mode to be on or off. + * When the Hiragana Quaternary mode is turned on, the collator + * positions Hiragana characters before all non-ignorable characters in + * QUATERNARY strength. This is to produce a correct JIS collation order, + * distinguishing between Katakana and Hiragana characters. + * @param flag true if Hiragana Quaternary mode is to be on, false + * otherwise + * @see #setHiraganaQuaternaryDefault + * @see #isHiraganaQuaternary + * @stable ICU 2.8 + */ + public void setHiraganaQuaternary(boolean flag) + { + m_isHiragana4_ = flag; + updateInternalState(); + } + + /** + * Sets the Hiragana Quaternary mode to the initial mode set during + * construction of the RuleBasedCollator. + * See setHiraganaQuaternary(boolean) for more details. + * @see #setHiraganaQuaternary(boolean) + * @see #isHiraganaQuaternary + * @stable ICU 2.8 + */ + public void setHiraganaQuaternaryDefault() + { + m_isHiragana4_ = m_defaultIsHiragana4_; + updateInternalState(); + } + + /** + * Sets whether uppercase characters sort before lowercase + * characters or vice versa, in strength TERTIARY. The default + * mode is false, and so lowercase characters sort before uppercase + * characters. + * If true, sort upper case characters first. + * @param upperfirst true to sort uppercase characters before + * lowercase characters, false to sort lowercase + * characters before uppercase characters + * @see #isLowerCaseFirst + * @see #isUpperCaseFirst + * @see #setLowerCaseFirst + * @see #setCaseFirstDefault + * @stable ICU 2.8 + */ + public void setUpperCaseFirst(boolean upperfirst) + { + if (upperfirst) { + if(m_caseFirst_ != AttributeValue.UPPER_FIRST_) { + latinOneRegenTable_ = true; + } + m_caseFirst_ = AttributeValue.UPPER_FIRST_; + } + else { + if(m_caseFirst_ != AttributeValue.OFF_) { + latinOneRegenTable_ = true; + } + m_caseFirst_ = AttributeValue.OFF_; + } + updateInternalState(); + } + + /** + * Sets the orders of lower cased characters to sort before upper cased + * characters, in strength TERTIARY. The default + * mode is false. + * If true is set, the RuleBasedCollator will sort lower cased characters + * before the upper cased ones. + * Otherwise, if false is set, the RuleBasedCollator will ignore case + * preferences. + * @param lowerfirst true for sorting lower cased characters before + * upper cased characters, false to ignore case + * preferences. + * @see #isLowerCaseFirst + * @see #isUpperCaseFirst + * @see #setUpperCaseFirst + * @see #setCaseFirstDefault + * @stable ICU 2.8 + */ + public void setLowerCaseFirst(boolean lowerfirst) + { + if (lowerfirst) { + if(m_caseFirst_ != AttributeValue.LOWER_FIRST_) { + latinOneRegenTable_ = true; + } + m_caseFirst_ = AttributeValue.LOWER_FIRST_; + } + else { + if(m_caseFirst_ != AttributeValue.OFF_) { + latinOneRegenTable_ = true; + } + m_caseFirst_ = AttributeValue.OFF_; + } + updateInternalState(); + } + + /** + * Sets the case first mode to the initial mode set during + * construction of the RuleBasedCollator. + * See setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more + * details. + * @see #isLowerCaseFirst + * @see #isUpperCaseFirst + * @see #setLowerCaseFirst(boolean) + * @see #setUpperCaseFirst(boolean) + * @stable ICU 2.8 + */ + public final void setCaseFirstDefault() + { + if(m_caseFirst_ != m_defaultCaseFirst_) { + latinOneRegenTable_ = true; + } + m_caseFirst_ = m_defaultCaseFirst_; + updateInternalState(); + } + + /** + * Sets the alternate handling mode to the initial mode set during + * construction of the RuleBasedCollator. + * See setAlternateHandling(boolean) for more details. + * @see #setAlternateHandlingShifted(boolean) + * @see #isAlternateHandlingShifted() + * @stable ICU 2.8 + */ + public void setAlternateHandlingDefault() + { + m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_; + updateInternalState(); + } + + /** + * Sets the case level mode to the initial mode set during + * construction of the RuleBasedCollator. + * See setCaseLevel(boolean) for more details. + * @see #setCaseLevel(boolean) + * @see #isCaseLevel + * @stable ICU 2.8 + */ + public void setCaseLevelDefault() + { + m_isCaseLevel_ = m_defaultIsCaseLevel_; + updateInternalState(); + } + + /** + * Sets the decomposition mode to the initial mode set during construction + * of the RuleBasedCollator. + * See setDecomposition(int) for more details. + * @see #getDecomposition + * @see #setDecomposition(int) + * @stable ICU 2.8 + */ + public void setDecompositionDefault() + { + setDecomposition(m_defaultDecomposition_); + updateInternalState(); + } + + /** + * Sets the French collation mode to the initial mode set during + * construction of the RuleBasedCollator. + * See setFrenchCollation(boolean) for more details. + * @see #isFrenchCollation + * @see #setFrenchCollation(boolean) + * @stable ICU 2.8 + */ + public void setFrenchCollationDefault() + { + if(m_isFrenchCollation_ != m_defaultIsFrenchCollation_) { + latinOneRegenTable_ = true; + } + m_isFrenchCollation_ = m_defaultIsFrenchCollation_; + updateInternalState(); + } + + /** + * Sets the collation strength to the initial mode set during the + * construction of the RuleBasedCollator. + * See setStrength(int) for more details. + * @see #setStrength(int) + * @see #getStrength + * @stable ICU 2.8 + */ + public void setStrengthDefault() + { + setStrength(m_defaultStrength_); + updateInternalState(); + } + + /** + * Method to set numeric collation to its default value. + * When numeric collation is turned on, this Collator generates a collation + * key for the numeric value of substrings of digits. This is a way to get + * '100' to sort AFTER '2' + * @see #getNumericCollation + * @see #setNumericCollation + * @stable ICU 2.8 + */ + public void setNumericCollationDefault() + { + setNumericCollation(m_defaultIsNumericCollation_); + updateInternalState(); + } + + /** + * Sets the mode for the direction of SECONDARY weights to be used in + * French collation. + * The default value is false, which treats SECONDARY weights in the order + * they appear. + * If set to true, the SECONDARY weights will be sorted backwards. + * See the section on + * + * French collation for more information. + * @param flag true to set the French collation on, false to set it off + * @stable ICU 2.8 + * @see #isFrenchCollation + * @see #setFrenchCollationDefault + */ + public void setFrenchCollation(boolean flag) + { + if(m_isFrenchCollation_ != flag) { + latinOneRegenTable_ = true; + } + m_isFrenchCollation_ = flag; + updateInternalState(); + } + + /** + * Sets the alternate handling for QUATERNARY strength to be either + * shifted or non-ignorable. + * See the UCA definition on + * + * Alternate Weighting. + * This attribute will only be effective when QUATERNARY strength is set. + * The default value for this mode is false, corresponding to the + * NON_IGNORABLE mode in UCA. In the NON-IGNORABLE mode, the + * RuleBasedCollator will treats all the codepoints with non-ignorable + * primary weights in the same way. + * If the mode is set to true, the behaviour corresponds to SHIFTED defined + * in UCA, this causes codepoints with PRIMARY orders that are equal or + * below the variable top value to be ignored in PRIMARY order and + * moved to the QUATERNARY order. + * @param shifted true if SHIFTED behaviour for alternate handling is + * desired, false for the NON_IGNORABLE behaviour. + * @see #isAlternateHandlingShifted + * @see #setAlternateHandlingDefault + * @stable ICU 2.8 + */ + public void setAlternateHandlingShifted(boolean shifted) + { + m_isAlternateHandlingShifted_ = shifted; + updateInternalState(); + } + + /** + *

    + * When case level is set to true, an additional weight is formed + * between the SECONDARY and TERTIARY weight, known as the case level. + * The case level is used to distinguish large and small Japanese Kana + * characters. Case level could also be used in other situations. + * For example to distinguish certain Pinyin characters. + * The default value is false, which means the case level is not generated. + * The contents of the case level are affected by the case first + * mode. A simple way to ignore accent differences in a string is to set + * the strength to PRIMARY and enable case level. + *

    + *

    + * See the section on + * + * case level for more information. + *

    + * @param flag true if case level sorting is required, false otherwise + * @stable ICU 2.8 + * @see #setCaseLevelDefault + * @see #isCaseLevel + */ + public void setCaseLevel(boolean flag) + { + m_isCaseLevel_ = flag; + updateInternalState(); + } + + /** + *

    + * Sets this Collator's strength property. The strength property + * determines the minimum level of difference considered significant + * during comparison. + *

    + *

    See the Collator class description for an example of use.

    + * @param newStrength the new strength value. + * @see #getStrength + * @see #setStrengthDefault + * @see #PRIMARY + * @see #SECONDARY + * @see #TERTIARY + * @see #QUATERNARY + * @see #IDENTICAL + * @exception IllegalArgumentException If the new strength value is not one + * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. + * @stable ICU 2.8 + */ + public void setStrength(int newStrength) + { + super.setStrength(newStrength); + updateInternalState(); + } + + /** + *

    + * Variable top is a two byte primary value which causes all the codepoints + * with primary values that are less or equal than the variable top to be + * shifted when alternate handling is set to SHIFTED. + *

    + *

    + * Sets the variable top to a collation element value of a string supplied. + *

    + * @param varTop one or more (if contraction) characters to which the + * variable top should be set + * @return a int value containing the value of the variable top in upper 16 + * bits. Lower 16 bits are undefined. + * @exception IllegalArgumentException is thrown if varTop argument is not + * a valid variable top element. A variable top element is + * invalid when + *
      + *
    • it is a contraction that does not exist in the + * Collation order + *
    • when the PRIMARY strength collation element for the + * variable top has more than two bytes + *
    • when the varTop argument is null or zero in length. + *
    + * @see #getVariableTop + * @see RuleBasedCollator#setAlternateHandlingShifted + * @stable ICU 2.6 + */ + public int setVariableTop(String varTop) + { + if (varTop == null || varTop.length() == 0) { + throw new IllegalArgumentException( + "Variable top argument string can not be null or zero in length."); + } + if (m_srcUtilIter_ == null) { + initUtility(true); + } + + m_srcUtilColEIter_.setText(varTop); + int ce = m_srcUtilColEIter_.next(); + + // here we check if we have consumed all characters + // you can put in either one character or a contraction + // you shouldn't put more... + if (m_srcUtilColEIter_.getOffset() != varTop.length() + || ce == CollationElementIterator.NULLORDER) { + throw new IllegalArgumentException( + "Variable top argument string is a contraction that does not exist " + + "in the Collation order"); + } + + int nextCE = m_srcUtilColEIter_.next(); + + if ((nextCE != CollationElementIterator.NULLORDER) + && (!isContinuation(nextCE) || (nextCE & CE_PRIMARY_MASK_) != 0)) { + throw new IllegalArgumentException( + "Variable top argument string can only have a single collation " + + "element that has less than or equal to two PRIMARY strength " + + "bytes"); + } + + m_variableTopValue_ = (ce & CE_PRIMARY_MASK_) >> 16; + + return ce & CE_PRIMARY_MASK_; + } + + /** + * Sets the variable top to a collation element value supplied. + * Variable top is set to the upper 16 bits. + * Lower 16 bits are ignored. + * @param varTop Collation element value, as returned by setVariableTop or + * getVariableTop + * @see #getVariableTop + * @see #setVariableTop(String) + * @stable ICU 2.6 + */ + public void setVariableTop(int varTop) + { + m_variableTopValue_ = (varTop & CE_PRIMARY_MASK_) >> 16; + } + + /** + * When numeric collation is turned on, this Collator generates a collation + * key for the numeric value of substrings of digits. This is a way to get + * '100' to sort AFTER '2' + * @param flag true to turn numeric collation on and false to turn it off + * @see #getNumericCollation + * @see #setNumericCollationDefault + * @stable ICU 2.8 + */ + public void setNumericCollation(boolean flag) + { + // sort substrings of digits as numbers + m_isNumericCollation_ = flag; + updateInternalState(); + } + + // public getters -------------------------------------------------------- + + /** + * Gets the collation rules for this RuleBasedCollator. + * Equivalent to String getRules(RuleOption.FULL_RULES). + * @return returns the collation rules + * @see #getRules(boolean) + * @stable ICU 2.8 + */ + public String getRules() + { + return m_rules_; + } + + /** + * Returns current rules. The argument defines whether full rules + * (UCA + tailored) rules are returned or just the tailoring. + * @param fullrules true if the rules that defines the full set of + * collation order is required, otherwise false for returning only + * the tailored rules + * @return the current rules that defines this Collator. + * @see #getRules() + * @stable ICU 2.6 + */ + public String getRules(boolean fullrules) + { + if (!fullrules) { + return m_rules_; + } + // take the UCA rules and append real rules at the end + return UCA_.m_rules_.concat(m_rules_); + } + + /** + * Get an UnicodeSet that contains all the characters and sequences + * tailored in this collator. + * @return a pointer to a UnicodeSet object containing all the + * code points and sequences that may sort differently than + * in the UCA. + * @stable ICU 2.4 + */ + public UnicodeSet getTailoredSet() + { + try { + CollationRuleParser src = new CollationRuleParser(getRules()); + return src.getTailoredSet(); + } catch(Exception e) { + throw new IllegalStateException("A tailoring rule should not " + + "have errors. Something is quite wrong!"); + } + } + + private class contContext { + RuleBasedCollator coll; + UnicodeSet contractions; + UnicodeSet expansions; + UnicodeSet removedContractions; + boolean addPrefixes; + contContext(RuleBasedCollator coll, UnicodeSet contractions, UnicodeSet expansions, + UnicodeSet removedContractions, boolean addPrefixes) { + this.coll = coll; + this.contractions = contractions; + this.expansions = expansions; + this.removedContractions = removedContractions; + this.addPrefixes = addPrefixes; + } + } + + private void + addSpecial(contContext c, StringBuilder buffer, int CE) + { + StringBuilder b = new StringBuilder(); + int offset = (CE & 0xFFFFFF) - c.coll.m_contractionOffset_; + int newCE = c.coll.m_contractionCE_[offset]; + // we might have a contraction that ends from previous level + if(newCE != CollationElementIterator.CE_NOT_FOUND_) { + if(isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_ + && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_ + && c.addPrefixes) { + addSpecial(c, buffer, newCE); + } + if(buffer.length() > 1) { + if(c.contractions != null) { + c.contractions.add(buffer.toString()); + } + if(c.expansions != null && isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) { + c.expansions.add(buffer.toString()); + } + } + } + + offset++; + // check whether we're doing contraction or prefix + if(getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) { + while(c.coll.m_contractionIndex_[offset] != 0xFFFF) { + b.delete(0, b.length()); + b.append(buffer); + newCE = c.coll.m_contractionCE_[offset]; + b.insert(0, c.coll.m_contractionIndex_[offset]); + if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) { + addSpecial(c, b, newCE); + } else { + if(c.contractions != null) { + c.contractions.add(b.toString()); + } + if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) { + c.expansions.add(b.toString()); + } + } + offset++; + } + } else if(getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_) { + while(c.coll.m_contractionIndex_[offset] != 0xFFFF) { + b.delete(0, b.length()); + b.append(buffer); + newCE = c.coll.m_contractionCE_[offset]; + b.append(c.coll.m_contractionIndex_[offset]); + if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) { + addSpecial(c, b, newCE); + } else { + if(c.contractions != null) { + c.contractions.add(b.toString()); + } + if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) { + c.expansions.add(b.toString()); + } + } + offset++; + } + } + } + + private + void processSpecials(contContext c) + { + int internalBufferSize = 512; + TrieIterator trieiterator + = new TrieIterator(c.coll.m_trie_); + RangeValueIterator.Element element = new RangeValueIterator.Element(); + while (trieiterator.next(element)) { + int start = element.start; + int limit = element.limit; + int CE = element.value; + StringBuilder contraction = new StringBuilder(internalBufferSize); + + if(isSpecial(CE)) { + if(((getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) || getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_)) { + while(start < limit) { + // if there are suppressed contractions, we don't + // want to add them. + if(c.removedContractions != null && c.removedContractions.contains(start)) { + start++; + continue; + } + // we start our contraction from middle, since we don't know if it + // will grow toward right or left + contraction.append((char) start); + addSpecial(c, contraction, CE); + start++; + } + } else if(c.expansions != null && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) { + while(start < limit) { + c.expansions.add(start++); + } + } + } + } + } + + /** + * Gets unicode sets containing contractions and/or expansions of a collator + * @param contractions if not null, set to contain contractions + * @param expansions if not null, set to contain expansions + * @param addPrefixes add the prefix contextual elements to contractions + * @throws Exception Throws an exception if any errors occurs. + * @stable ICU 3.4 + */ + public void + getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, + boolean addPrefixes) throws Exception { + if(contractions != null) { + contractions.clear(); + } + if(expansions != null) { + expansions.clear(); + } + String rules = getRules(); + try { + CollationRuleParser src = new CollationRuleParser(rules); + contContext c = new contContext(RuleBasedCollator.UCA_, + contractions, expansions, src.m_removeSet_, addPrefixes); + + // Add the UCA contractions + processSpecials(c); + // This is collator specific. Add contractions from a collator + c.coll = this; + c.removedContractions = null; + processSpecials(c); + } catch (Exception e) { + throw e; + } + } + + /** + *

    + * Get a Collation key for the argument String source from this + * RuleBasedCollator. + *

    + *

    + * General recommendation:
    + * If comparison are to be done to the same String multiple times, it would + * be more efficient to generate CollationKeys for the Strings and use + * CollationKey.compareTo(CollationKey) for the comparisons. + * If the each Strings are compared to only once, using the method + * RuleBasedCollator.compare(String, String) will have a better performance. + *

    + *

    + * See the class documentation for an explanation about CollationKeys. + *

    + * @param source the text String to be transformed into a collation key. + * @return the CollationKey for the given String based on this + * RuleBasedCollator's collation rules. If the source String is + * null, a null CollationKey is returned. + * @see CollationKey + * @see #compare(String, String) + * @see #getRawCollationKey + * @stable ICU 2.8 + */ + public CollationKey getCollationKey(String source) { + if (source == null) { + return null; + } + m_utilRawCollationKey_ = getRawCollationKey(source, + m_utilRawCollationKey_); + return new CollationKey(source, m_utilRawCollationKey_); + } + + /** + * Gets the simpler form of a CollationKey for the String source following + * the rules of this Collator and stores the result into the user provided + * argument key. + * If key has a internal byte array of length that's too small for the + * result, the internal byte array will be grown to the exact required + * size. + * @param source the text String to be transformed into a RawCollationKey + * @param key output RawCollationKey to store results + * @return If key is null, a new instance of RawCollationKey will be + * created and returned, otherwise the user provided key will be + * returned. + * @see #getCollationKey + * @see #compare(String, String) + * @see RawCollationKey + * @stable ICU 2.8 + */ + public RawCollationKey getRawCollationKey(String source, + RawCollationKey key) + { + if (source == null) { + return null; + } + int strength = getStrength(); + m_utilCompare0_ = m_isCaseLevel_; + //m_utilCompare1_ = true; + m_utilCompare2_ = strength >= SECONDARY; + m_utilCompare3_ = strength >= TERTIARY; + m_utilCompare4_ = strength >= QUATERNARY; + m_utilCompare5_ = strength == IDENTICAL; + + m_utilBytesCount0_ = 0; + m_utilBytesCount1_ = 0; + m_utilBytesCount2_ = 0; + m_utilBytesCount3_ = 0; + m_utilBytesCount4_ = 0; + //m_utilBytesCount5_ = 0; + //m_utilCount0_ = 0; + //m_utilCount1_ = 0; + m_utilCount2_ = 0; + m_utilCount3_ = 0; + m_utilCount4_ = 0; + //m_utilCount5_ = 0; + boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_; + // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. + // If we have no qShifted, we don't need to set UCOL_COMMON_BOT4 so + // high. + int commonBottom4 = ((m_variableTopValue_ >>> 8) + 1) & LAST_BYTE_MASK_; + byte hiragana4 = 0; + if (m_isHiragana4_ && m_utilCompare4_) { + // allocate one more space for hiragana, value for hiragana + hiragana4 = (byte)commonBottom4; + commonBottom4 ++; + } + + int bottomCount4 = 0xFF - commonBottom4; + // If we need to normalize, we'll do it all at once at the beginning! + if (m_utilCompare5_ && Normalizer.quickCheck(source, Normalizer.NFD,0) + != Normalizer.YES) { + // if it is identical strength, we have to normalize the string to + // NFD so that it will be appended correctly to the end of the sort + // key + source = Normalizer.decompose(source, false); + } + else if (getDecomposition() != NO_DECOMPOSITION + && Normalizer.quickCheck(source, Normalizer.FCD,0) + != Normalizer.YES) { + // for the rest of the strength, if decomposition is on, FCD is + // enough for us to work on. + source = Normalizer.normalize(source,Normalizer.FCD); + } + getSortKeyBytes(source, doFrench, hiragana4, commonBottom4, + bottomCount4); + if (key == null) { + key = new RawCollationKey(); + } + getSortKey(source, doFrench, commonBottom4, bottomCount4, key); + return key; + } + + /** + * Return true if an uppercase character is sorted before the corresponding lowercase character. + * See setCaseFirst(boolean) for details. + * @see #setUpperCaseFirst + * @see #setLowerCaseFirst + * @see #isLowerCaseFirst + * @see #setCaseFirstDefault + * @return true if upper cased characters are sorted before lower cased + * characters, false otherwise + * @stable ICU 2.8 + */ + public boolean isUpperCaseFirst() + { + return (m_caseFirst_ == AttributeValue.UPPER_FIRST_); + } + + /** + * Return true if a lowercase character is sorted before the corresponding uppercase character. + * See setCaseFirst(boolean) for details. + * @see #setUpperCaseFirst + * @see #setLowerCaseFirst + * @see #isUpperCaseFirst + * @see #setCaseFirstDefault + * @return true lower cased characters are sorted before upper cased + * characters, false otherwise + * @stable ICU 2.8 + */ + public boolean isLowerCaseFirst() + { + return (m_caseFirst_ == AttributeValue.LOWER_FIRST_); + } + + /** + * Checks if the alternate handling behaviour is the UCA defined SHIFTED or + * NON_IGNORABLE. + * If return value is true, then the alternate handling attribute for the + * Collator is SHIFTED. Otherwise if return value is false, then the + * alternate handling attribute for the Collator is NON_IGNORABLE + * See setAlternateHandlingShifted(boolean) for more details. + * @return true or false + * @see #setAlternateHandlingShifted(boolean) + * @see #setAlternateHandlingDefault + * @stable ICU 2.8 + */ + public boolean isAlternateHandlingShifted() + { + return m_isAlternateHandlingShifted_; + } + + /** + * Checks if case level is set to true. + * See setCaseLevel(boolean) for details. + * @return the case level mode + * @see #setCaseLevelDefault + * @see #isCaseLevel + * @see #setCaseLevel(boolean) + * @stable ICU 2.8 + */ + public boolean isCaseLevel() + { + return m_isCaseLevel_; + } + + /** + * Checks if French Collation is set to true. + * See setFrenchCollation(boolean) for details. + * @return true if French Collation is set to true, false otherwise + * @see #setFrenchCollation(boolean) + * @see #setFrenchCollationDefault + * @stable ICU 2.8 + */ + public boolean isFrenchCollation() + { + return m_isFrenchCollation_; + } + + /** + * Checks if the Hiragana Quaternary mode is set on. + * See setHiraganaQuaternary(boolean) for more details. + * @return flag true if Hiragana Quaternary mode is on, false otherwise + * @see #setHiraganaQuaternaryDefault + * @see #setHiraganaQuaternary(boolean) + * @stable ICU 2.8 + */ + public boolean isHiraganaQuaternary() + { + return m_isHiragana4_; + } + + /** + * Gets the variable top value of a Collator. + * Lower 16 bits are undefined and should be ignored. + * @return the variable top value of a Collator. + * @see #setVariableTop + * @stable ICU 2.6 + */ + public int getVariableTop() + { + return m_variableTopValue_ << 16; + } + + /** + * Method to retrieve the numeric collation value. + * When numeric collation is turned on, this Collator generates a collation + * key for the numeric value of substrings of digits. This is a way to get + * '100' to sort AFTER '2' + * @see #setNumericCollation + * @see #setNumericCollationDefault + * @return true if numeric collation is turned on, false otherwise + * @stable ICU 2.8 + */ + public boolean getNumericCollation() + { + return m_isNumericCollation_; + } + + // public other methods ------------------------------------------------- + + /** + * Compares the equality of two RuleBasedCollator objects. + * RuleBasedCollator objects are equal if they have the same collation + * rules and the same attributes. + * @param obj the RuleBasedCollator to be compared to. + * @return true if this RuleBasedCollator has exactly the same + * collation behaviour as obj, false otherwise. + * @stable ICU 2.8 + */ + public boolean equals(Object obj) + { + if (obj == null) { + return false; // super does class check + } + if (this == obj) { + return true; + } + if (getClass() != obj.getClass()) { + return false; + } + RuleBasedCollator other = (RuleBasedCollator)obj; + // all other non-transient information is also contained in rules. + if (getStrength() != other.getStrength() + || getDecomposition() != other.getDecomposition() + || other.m_caseFirst_ != m_caseFirst_ + || other.m_caseSwitch_ != m_caseSwitch_ + || other.m_isAlternateHandlingShifted_ + != m_isAlternateHandlingShifted_ + || other.m_isCaseLevel_ != m_isCaseLevel_ + || other.m_isFrenchCollation_ != m_isFrenchCollation_ + || other.m_isHiragana4_ != m_isHiragana4_) { + return false; + } + boolean rules = m_rules_ == other.m_rules_; + if (!rules && (m_rules_ != null && other.m_rules_ != null)) { + rules = m_rules_.equals(other.m_rules_); + } + if (!rules || !ICUDebug.enabled("collation")) { + return rules; + } + if (m_addition3_ != other.m_addition3_ + || m_bottom3_ != other.m_bottom3_ + || m_bottomCount3_ != other.m_bottomCount3_ + || m_common3_ != other.m_common3_ + || m_isSimple3_ != other.m_isSimple3_ + || m_mask3_ != other.m_mask3_ + || m_minContractionEnd_ != other.m_minContractionEnd_ + || m_minUnsafe_ != other.m_minUnsafe_ + || m_top3_ != other.m_top3_ + || m_topCount3_ != other.m_topCount3_ + || !Arrays.equals(m_unsafe_, other.m_unsafe_)) { + return false; + } + if (!m_trie_.equals(other.m_trie_)) { + // we should use the trie iterator here, but then this part is + // only used in the test. + for (int i = UCharacter.MAX_VALUE; i >= UCharacter.MIN_VALUE; i --) + { + int v = m_trie_.getCodePointValue(i); + int otherv = other.m_trie_.getCodePointValue(i); + if (v != otherv) { + int mask = v & (CE_TAG_MASK_ | CE_SPECIAL_FLAG_); + if (mask == (otherv & 0xff000000)) { + v &= 0xffffff; + otherv &= 0xffffff; + if (mask == 0xf1000000) { + v -= (m_expansionOffset_ << 4); + otherv -= (other.m_expansionOffset_ << 4); + } + else if (mask == 0xf2000000) { + v -= m_contractionOffset_; + otherv -= other.m_contractionOffset_; + } + if (v == otherv) { + continue; + } + } + return false; + } + } + } + if (!Arrays.equals(m_contractionCE_, other.m_contractionCE_) + || !Arrays.equals(m_contractionEnd_, other.m_contractionEnd_) + || !Arrays.equals(m_contractionIndex_, other.m_contractionIndex_) + || !Arrays.equals(m_expansion_, other.m_expansion_) + || !Arrays.equals(m_expansionEndCE_, other.m_expansionEndCE_)) { + return false; + } + // not comparing paddings + for (int i = 0; i < m_expansionEndCE_.length; i++) { + if (m_expansionEndCEMaxSize_[i] != other.m_expansionEndCEMaxSize_[i]) { + return false; + } + } + return true; + } + + /** + * Generates a unique hash code for this RuleBasedCollator. + * @return the unique hash code for this Collator + * @stable ICU 2.8 + */ + public int hashCode() + { + String rules = getRules(); + if (rules == null) { + rules = ""; + } + return rules.hashCode(); + } + + /** + * Compares the source text String to the target text String according to + * the collation rules, strength and decomposition mode for this + * RuleBasedCollator. + * Returns an integer less than, + * equal to or greater than zero depending on whether the source String is + * less than, equal to or greater than the target String. See the Collator + * class description for an example of use. + *

    + *

    + * General recommendation:
    + * If comparison are to be done to the same String multiple times, it would + * be more efficient to generate CollationKeys for the Strings and use + * CollationKey.compareTo(CollationKey) for the comparisons. + * If speed performance is critical and object instantiation is to be + * reduced, further optimization may be achieved by generating a simpler + * key of the form RawCollationKey and reusing this RawCollationKey + * object with the method RuleBasedCollator.getRawCollationKey. Internal + * byte representation can be directly accessed via RawCollationKey and + * stored for future use. Like CollationKey, RawCollationKey provides a + * method RawCollationKey.compareTo for key comparisons. + * If the each Strings are compared to only once, using the method + * RuleBasedCollator.compare(String, String) will have a better performance. + *

    + * @param source the source text String. + * @param target the target text String. + * @return Returns an integer value. Value is less than zero if source is + * less than target, value is zero if source and target are equal, + * value is greater than zero if source is greater than target. + * @see CollationKey + * @see #getCollationKey + * @stable ICU 2.8 + */ + public int compare(String source, String target) + { + if (source == target) { + return 0; + } + + // Find the length of any leading portion that is equal + int offset = getFirstUnmatchedOffset(source, target); + //return compareRegular(source, target, offset); + if(latinOneUse_) { + if ((offset < source.length() + && source.charAt(offset) > ENDOFLATINONERANGE_) + || (offset < target.length() + && target.charAt(offset) > ENDOFLATINONERANGE_)) { + // source or target start with non-latin-1 + return compareRegular(source, target, offset); + } else { + return compareUseLatin1(source, target, offset); + } + } else { + return compareRegular(source, target, offset); + } + } + + // package private inner interfaces -------------------------------------- + + /** + * Attribute values to be used when setting the Collator options + */ + static interface AttributeValue + { + /** + * Indicates that the default attribute value will be used. + * See individual attribute for details on its default value. + */ + static final int DEFAULT_ = -1; + /** + * Primary collation strength + */ + static final int PRIMARY_ = Collator.PRIMARY; + /** + * Secondary collation strength + */ + static final int SECONDARY_ = Collator.SECONDARY; + /** + * Tertiary collation strength + */ + static final int TERTIARY_ = Collator.TERTIARY; + /** + * Default collation strength + */ + static final int DEFAULT_STRENGTH_ = Collator.TERTIARY; + /** + * Internal use for strength checks in Collation elements + */ + static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1; + /** + * Quaternary collation strength + */ + static final int QUATERNARY_ = 3; + /** + * Identical collation strength + */ + static final int IDENTICAL_ = Collator.IDENTICAL; + /** + * Internal use for strength checks + */ + static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1; + /** + * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL, + * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE + */ + static final int OFF_ = 16; + /** + * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL, + * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE + */ + static final int ON_ = 17; + /** + * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted + */ + static final int SHIFTED_ = 20; + /** + * Valid for ALTERNATE_HANDLING. Alternate handling will be non + * ignorable + */ + static final int NON_IGNORABLE_ = 21; + /** + * Valid for CASE_FIRST - lower case sorts before upper case + */ + static final int LOWER_FIRST_ = 24; + /** + * Upper case sorts before lower case + */ + static final int UPPER_FIRST_ = 25; + /** + * Number of attribute values + */ + static final int LIMIT_ = 29; + } + + /** + * Attributes that collation service understands. All the attributes can + * take DEFAULT value, as well as the values specific to each one. + */ + static interface Attribute + { + /** + * Attribute for direction of secondary weights - used in French. + * Acceptable values are ON, which results in secondary weights being + * considered backwards and OFF which treats secondary weights in the + * order they appear. + */ + static final int FRENCH_COLLATION_ = 0; + /** + * Attribute for handling variable elements. Acceptable values are + * NON_IGNORABLE (default) which treats all the codepoints with + * non-ignorable primary weights in the same way, and SHIFTED which + * causes codepoints with primary weights that are equal or below the + * variable top value to be ignored on primary level and moved to the + * quaternary level. + */ + static final int ALTERNATE_HANDLING_ = 1; + /** + * Controls the ordering of upper and lower case letters. Acceptable + * values are OFF (default), which orders upper and lower case letters + * in accordance to their tertiary weights, UPPER_FIRST which forces + * upper case letters to sort before lower case letters, and + * LOWER_FIRST which does the opposite. + */ + static final int CASE_FIRST_ = 2; + /** + * Controls whether an extra case level (positioned before the third + * level) is generated or not. Acceptable values are OFF (default), + * when case level is not generated, and ON which causes the case + * level to be generated. Contents of the case level are affected by + * the value of CASE_FIRST attribute. A simple way to ignore accent + * differences in a string is to set the strength to PRIMARY and + * enable case level. + */ + static final int CASE_LEVEL_ = 3; + /** + * Controls whether the normalization check and necessary + * normalizations are performed. When set to OFF (default) no + * normalization check is performed. The correctness of the result is + * guaranteed only if the input data is in so-called FCD form (see + * users manual for more info). When set to ON, an incremental check + * is performed to see whether the input data is in the FCD form. If + * the data is not in the FCD form, incremental NFD normalization is + * performed. + */ + static final int NORMALIZATION_MODE_ = 4; + /** + * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY, + * QUATERNARY or IDENTICAL. The usual strength for most locales + * (except Japanese) is tertiary. Quaternary strength is useful when + * combined with shifted setting for alternate handling attribute and + * for JIS x 4061 collation, when it is used to distinguish between + * Katakana and Hiragana (this is achieved by setting the + * HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level is + * affected only by the number of non ignorable code points in the + * string. Identical strength is rarely useful, as it amounts to + * codepoints of the NFD form of the string. + */ + static final int STRENGTH_ = 5; + /** + * When turned on, this attribute positions Hiragana before all + * non-ignorables on quaternary level. This is a sneaky way to produce + * JIS sort order. + */ + static final int HIRAGANA_QUATERNARY_MODE_ = 6; + /** + * Attribute count + */ + static final int LIMIT_ = 7; + } + + /** + * DataManipulate singleton + */ + static class DataManipulate implements Trie.DataManipulate + { + // public methods ---------------------------------------------------- + + /** + * Internal method called to parse a lead surrogate's ce for the offset + * to the next trail surrogate data. + * @param ce collation element of the lead surrogate + * @return data offset or 0 for the next trail surrogate + * @stable ICU 2.8 + */ + public final int getFoldingOffset(int ce) + { + if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) { + return (ce & 0xFFFFFF); + } + return 0; + } + + /** + * Get singleton object + */ + public static final DataManipulate getInstance() + { + if (m_instance_ == null) { + m_instance_ = new DataManipulate(); + } + return m_instance_; + } + + // private data member ---------------------------------------------- + + /** + * Singleton instance + */ + private static DataManipulate m_instance_; + + // private constructor ---------------------------------------------- + + /** + * private to prevent initialization + */ + private DataManipulate() + { + } + } + + /** + * UCAConstants + */ + static final class UCAConstants + { + int FIRST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000 + int LAST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000 + int FIRST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x00008705 + int FIRST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000000 + int LAST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000500 + int LAST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x0000DD05 + int FIRST_VARIABLE_[] = new int[2]; // 0x05070505 + int LAST_VARIABLE_[] = new int[2]; // 0x13CF0505 + int FIRST_NON_VARIABLE_[] = new int[2]; // 0x16200505 + int LAST_NON_VARIABLE_[] = new int[2]; // 0x767C0505 + int RESET_TOP_VALUE_[] = new int[2]; // 0x9F000303 + int FIRST_IMPLICIT_[] = new int[2]; + int LAST_IMPLICIT_[] = new int[2]; + int FIRST_TRAILING_[] = new int[2]; + int LAST_TRAILING_[] = new int[2]; + int PRIMARY_TOP_MIN_; + int PRIMARY_IMPLICIT_MIN_; // 0xE8000000 + int PRIMARY_IMPLICIT_MAX_; // 0xF0000000 + int PRIMARY_TRAILING_MIN_; // 0xE8000000 + int PRIMARY_TRAILING_MAX_; // 0xF0000000 + int PRIMARY_SPECIAL_MIN_; // 0xE8000000 + int PRIMARY_SPECIAL_MAX_; // 0xF0000000 + } + + // package private data member ------------------------------------------- + + static final byte BYTE_FIRST_TAILORED_ = (byte)0x04; + static final byte BYTE_COMMON_ = (byte)0x05; + static final int COMMON_TOP_2_ = 0x86; // int for unsigness + static final int COMMON_BOTTOM_2_ = BYTE_COMMON_; + static final int COMMON_BOTTOM_3 = 0x05; + /** + * Case strength mask + */ + static final int CE_CASE_BIT_MASK_ = 0xC0; + static final int CE_TAG_SHIFT_ = 24; + static final int CE_TAG_MASK_ = 0x0F000000; + + static final int CE_SPECIAL_FLAG_ = 0xF0000000; + /** + * Lead surrogate that is tailored and doesn't start a contraction + */ + static final int CE_SURROGATE_TAG_ = 5; + /** + * Mask to get the primary strength of the collation element + */ + static final int CE_PRIMARY_MASK_ = 0xFFFF0000; + /** + * Mask to get the secondary strength of the collation element + */ + static final int CE_SECONDARY_MASK_ = 0xFF00; + /** + * Mask to get the tertiary strength of the collation element + */ + static final int CE_TERTIARY_MASK_ = 0xFF; + /** + * Primary strength shift + */ + static final int CE_PRIMARY_SHIFT_ = 16; + /** + * Secondary strength shift + */ + static final int CE_SECONDARY_SHIFT_ = 8; + /** + * Continuation marker + */ + static final int CE_CONTINUATION_MARKER_ = 0xC0; + + /** + * Size of collator raw data headers and options before the expansion + * data. This is used when expansion ces are to be retrieved. ICU4C uses + * the expansion offset starting from UCollator.UColHeader, hence ICU4J + * will have to minus that off to get the right expansion ce offset. In + * number of ints. + */ + int m_expansionOffset_; + /** + * Size of collator raw data headers, options and expansions before + * contraction data. This is used when contraction ces are to be retrieved. + * ICU4C uses contraction offset starting from UCollator.UColHeader, hence + * ICU4J will have to minus that off to get the right contraction ce + * offset. In number of chars. + */ + int m_contractionOffset_; + /** + * Flag indicator if Jamo is special + */ + boolean m_isJamoSpecial_; + + // Collator options ------------------------------------------------------ + + int m_defaultVariableTopValue_; + boolean m_defaultIsFrenchCollation_; + boolean m_defaultIsAlternateHandlingShifted_; + int m_defaultCaseFirst_; + boolean m_defaultIsCaseLevel_; + int m_defaultDecomposition_; + int m_defaultStrength_; + boolean m_defaultIsHiragana4_; + boolean m_defaultIsNumericCollation_; + + /** + * Value of the variable top + */ + int m_variableTopValue_; + /** + * Attribute for special Hiragana + */ + boolean m_isHiragana4_; + /** + * Case sorting customization + */ + int m_caseFirst_; + /** + * Numeric collation option + */ + boolean m_isNumericCollation_; + + // end Collator options -------------------------------------------------- + + /** + * Expansion table + */ + int m_expansion_[]; + /** + * Contraction index table + */ + char m_contractionIndex_[]; + /** + * Contraction CE table + */ + int m_contractionCE_[]; + /** + * Data trie + */ + IntTrie m_trie_; + /** + * Table to store all collation elements that are the last element of an + * expansion. This is for use in StringSearch. + */ + int m_expansionEndCE_[]; + /** + * Table to store the maximum size of any expansions that end with the + * corresponding collation element in m_expansionEndCE_. For use in + * StringSearch too + */ + byte m_expansionEndCEMaxSize_[]; + /** + * Heuristic table to store information on whether a char character is + * considered "unsafe". "Unsafe" character are combining marks or those + * belonging to some contraction sequence from the offset 1 onwards. + * E.g. if "ABC" is the only contraction, then 'B' and 'C' are considered + * unsafe. If we have another contraction "ZA" with the one above, then + * 'A', 'B', 'C' are "unsafe" but 'Z' is not. + */ + byte m_unsafe_[]; + /** + * Table to store information on whether a codepoint can occur as the last + * character in a contraction + */ + byte m_contractionEnd_[]; + /** + * Original collation rules + */ + String m_rules_; + /** + * The smallest "unsafe" codepoint + */ + char m_minUnsafe_; + /** + * The smallest codepoint that could be the end of a contraction + */ + char m_minContractionEnd_; + /** + * General version of the collator + */ + VersionInfo m_version_; + /** + * UCA version + */ + VersionInfo m_UCA_version_; + /** + * UCD version + */ + VersionInfo m_UCD_version_; + + /** + * UnicodeData.txt property object + */ + static final RuleBasedCollator UCA_; + /** + * UCA Constants + */ + static final UCAConstants UCA_CONSTANTS_; + /** + * Table for UCA and builder use + */ + static final char UCA_CONTRACTIONS_[]; + + private static boolean UCA_INIT_COMPLETE; + + /** + * Implicit generator + */ + static final ImplicitCEGenerator impCEGen_; +// /** +// * Implicit constants +// */ +// static final int IMPLICIT_BASE_BYTE_; +// static final int IMPLICIT_LIMIT_BYTE_; +// static final int IMPLICIT_4BYTE_BOUNDARY_; +// static final int LAST_MULTIPLIER_; +// static final int LAST2_MULTIPLIER_; +// static final int IMPLICIT_BASE_3BYTE_; +// static final int IMPLICIT_BASE_4BYTE_; +// static final int BYTES_TO_AVOID_ = 3; +// static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_; +// static final int LAST_COUNT_ = OTHER_COUNT_ / 2; +// /** +// * Room for intervening, without expanding to 5 bytes +// */ +// static final int LAST_COUNT2_ = OTHER_COUNT_ / 21; +// static final int IMPLICIT_3BYTE_COUNT_ = 1; +// + static final byte SORT_LEVEL_TERMINATOR_ = 1; + +// These are values from UCA required for +// implicit generation and supressing sort key compression +// they should regularly be in the UCA, but if one +// is running without UCA, it could be a problem + static final int maxRegularPrimary = 0xA0; + static final int minImplicitPrimary = 0xE0; + static final int maxImplicitPrimary = 0xE4; + + + // block to initialise character property database + static + { + // take pains to let static class init succeed, otherwise the class itself won't exist and + // clients will get a NoClassDefFoundException. Instead, make the constructors fail if + // we can't load the UCA data. + + RuleBasedCollator iUCA_ = null; + UCAConstants iUCA_CONSTANTS_ = null; + char iUCA_CONTRACTIONS_[] = null; + ImplicitCEGenerator iimpCEGen_ = null; + try + { + // !!! note what's going on here... + // even though the static init of the class is not yet complete, we + // instantiate an instance of the class. So we'd better be sure that + // instantiation doesn't rely on the static initialization that's + // not complete yet! + iUCA_ = new RuleBasedCollator(); + iUCA_CONSTANTS_ = new UCAConstants(); + iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_); + + // called before doing canonical closure for the UCA. + iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary, maxImplicitPrimary); + //iimpCEGen_ = new ImplicitCEGenerator(iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_, iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_); + iUCA_.init(); + ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ENGLISH); + iUCA_.m_rules_ = (String)rb.getObject("UCARules"); + } + catch (MissingResourceException ex) + { +// throw ex; + } + catch (IOException e) + { + // e.printStackTrace(); +// throw new MissingResourceException(e.getMessage(),"",""); + } + + UCA_ = iUCA_; + UCA_CONSTANTS_ = iUCA_CONSTANTS_; + UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_; + impCEGen_ = iimpCEGen_; + + UCA_INIT_COMPLETE = true; + } + + + private static void checkUCA() throws MissingResourceException { + if (UCA_INIT_COMPLETE && UCA_ == null) { + throw new MissingResourceException("Collator UCA data unavailable", "", ""); + } + } + + // package private constructors ------------------------------------------ + + /** + *

    Private contructor for use by subclasses. + * Public access to creating Collators is handled by the API + * Collator.getInstance() or RuleBasedCollator(String rules). + *

    + *

    + * This constructor constructs the UCA collator internally + *

    + */ + RuleBasedCollator() + { + checkUCA(); + initUtility(false); + } + + /** + * Constructors a RuleBasedCollator from the argument locale. + * If no resource bundle is associated with the locale, UCA is used + * instead. + * @param locale + */ + RuleBasedCollator(ULocale locale) + { + checkUCA(); + ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale); + initUtility(false); + if (rb != null) { + try { + // Use keywords, if supplied for lookup + String collkey = locale.getKeywordValue("collation"); + if(collkey == null) { + collkey = rb.getStringWithFallback("collations/default"); + } + + // collations/default will always give a string back + // keyword for the real collation data + // if "collations/collkey" will return null if collkey == null + ICUResourceBundle elements = rb.getWithFallback("collations/" + collkey); + if (elements != null) { + // TODO: Determine actual & valid locale correctly + ULocale uloc = rb.getULocale(); + setLocale(uloc, uloc); + + m_rules_ = elements.getString("Sequence"); + ByteBuffer buf = elements.get("%%CollationBin").getBinary(); + // %%CollationBin + if(buf!=null){ + // m_rules_ = (String)rules[1][1]; + CollatorReader.initRBC(this, buf); + /* + BufferedInputStream input = + new BufferedInputStream( + new ByteArrayInputStream(map)); + /* + CollatorReader reader = new CollatorReader(input, false); + if (map.length > MIN_BINARY_DATA_SIZE_) { + reader.read(this, null); + } + else { + reader.readHeader(this); + reader.readOptions(this); + // duplicating UCA_'s data + setWithUCATables(); + } + */ + // at this point, we have read in the collator + // now we need to check whether the binary image has + // the right UCA and other versions + if(!m_UCA_version_.equals(UCA_.m_UCA_version_) || + !m_UCD_version_.equals(UCA_.m_UCD_version_)) { + init(m_rules_); + return; + } + init(); + return; + } + else { + init(m_rules_); + return; + } + } + } + catch (Exception e) { + // e.printStackTrace(); + // if failed use UCA. + } + } + setWithUCAData(); + } + + // package private methods ----------------------------------------------- + + /** + * Sets this collator to use the tables in UCA. Note options not taken + * care of here. + */ + final void setWithUCATables() + { + m_contractionOffset_ = UCA_.m_contractionOffset_; + m_expansionOffset_ = UCA_.m_expansionOffset_; + m_expansion_ = UCA_.m_expansion_; + m_contractionIndex_ = UCA_.m_contractionIndex_; + m_contractionCE_ = UCA_.m_contractionCE_; + m_trie_ = UCA_.m_trie_; + m_expansionEndCE_ = UCA_.m_expansionEndCE_; + m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_; + m_unsafe_ = UCA_.m_unsafe_; + m_contractionEnd_ = UCA_.m_contractionEnd_; + m_minUnsafe_ = UCA_.m_minUnsafe_; + m_minContractionEnd_ = UCA_.m_minContractionEnd_; + } + + /** + * Sets this collator to use the all options and tables in UCA. + */ + final void setWithUCAData() + { + latinOneFailed_ = true; + + m_addition3_ = UCA_.m_addition3_; + m_bottom3_ = UCA_.m_bottom3_; + m_bottomCount3_ = UCA_.m_bottomCount3_; + m_caseFirst_ = UCA_.m_caseFirst_; + m_caseSwitch_ = UCA_.m_caseSwitch_; + m_common3_ = UCA_.m_common3_; + m_contractionOffset_ = UCA_.m_contractionOffset_; + setDecomposition(UCA_.getDecomposition()); + m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_; + m_defaultDecomposition_ = UCA_.m_defaultDecomposition_; + m_defaultIsAlternateHandlingShifted_ + = UCA_.m_defaultIsAlternateHandlingShifted_; + m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_; + m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_; + m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_; + m_defaultStrength_ = UCA_.m_defaultStrength_; + m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_; + m_defaultIsNumericCollation_ = UCA_.m_defaultIsNumericCollation_; + m_expansionOffset_ = UCA_.m_expansionOffset_; + m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_; + m_isCaseLevel_ = UCA_.m_isCaseLevel_; + m_isFrenchCollation_ = UCA_.m_isFrenchCollation_; + m_isHiragana4_ = UCA_.m_isHiragana4_; + m_isJamoSpecial_ = UCA_.m_isJamoSpecial_; + m_isSimple3_ = UCA_.m_isSimple3_; + m_mask3_ = UCA_.m_mask3_; + m_minContractionEnd_ = UCA_.m_minContractionEnd_; + m_minUnsafe_ = UCA_.m_minUnsafe_; + m_rules_ = UCA_.m_rules_; + setStrength(UCA_.getStrength()); + m_top3_ = UCA_.m_top3_; + m_topCount3_ = UCA_.m_topCount3_; + m_variableTopValue_ = UCA_.m_variableTopValue_; + m_isNumericCollation_ = UCA_.m_isNumericCollation_; + setWithUCATables(); + latinOneFailed_ = false; + } + + /** + * Test whether a char character is potentially "unsafe" for use as a + * collation starting point. "Unsafe" characters are combining marks or + * those belonging to some contraction sequence from the offset 1 onwards. + * E.g. if "ABC" is the only contraction, then 'B' and + * 'C' are considered unsafe. If we have another contraction "ZA" with + * the one above, then 'A', 'B', 'C' are "unsafe" but 'Z' is not. + * @param ch character to determin + * @return true if ch is unsafe, false otherwise + */ + final boolean isUnsafe(char ch) + { + if (ch < m_minUnsafe_) { + return false; + } + + if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) { + if (UTF16.isLeadSurrogate(ch) + || UTF16.isTrailSurrogate(ch)) { + // Trail surrogate are always considered unsafe. + return true; + } + ch &= HEURISTIC_OVERFLOW_MASK_; + ch += HEURISTIC_OVERFLOW_OFFSET_; + } + int value = m_unsafe_[ch >> HEURISTIC_SHIFT_]; + return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0; + } + + /** + * Approximate determination if a char character is at a contraction end. + * Guaranteed to be true if a character is at the end of a contraction, + * otherwise it is not deterministic. + * @param ch character to be determined + */ + final boolean isContractionEnd(char ch) + { + if (UTF16.isTrailSurrogate(ch)) { + return true; + } + + if (ch < m_minContractionEnd_) { + return false; + } + + if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) { + ch &= HEURISTIC_OVERFLOW_MASK_; + ch += HEURISTIC_OVERFLOW_OFFSET_; + } + int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_]; + return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0; + } + + /** + * Retrieve the tag of a special ce + * @param ce ce to test + * @return tag of ce + */ + static int getTag(int ce) + { + return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_; + } + + /** + * Checking if ce is special + * @param ce to check + * @return true if ce is special + */ + static boolean isSpecial(int ce) + { + return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_; + } + + /** + * Checks if the argument ce is a continuation + * @param ce collation element to test + * @return true if ce is a continuation + */ + static final boolean isContinuation(int ce) + { + return ce != CollationElementIterator.NULLORDER + && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_; + } + + // private inner classes ------------------------------------------------ + + // private variables ----------------------------------------------------- + + /** + * The smallest natural unsafe or contraction end char character before + * tailoring. + * This is a combining mark. + */ + private static final int DEFAULT_MIN_HEURISTIC_ = 0x300; + /** + * Heuristic table table size. Size is 32 bytes, 1 bit for each + * latin 1 char, and some power of two for hashing the rest of the chars. + * Size in bytes. + */ + private static final char HEURISTIC_SIZE_ = 1056; + /** + * Mask value down to "some power of two" - 1, + * number of bits, not num of bytes. + */ + private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff; + /** + * Unsafe character shift + */ + private static final int HEURISTIC_SHIFT_ = 3; + /** + * Unsafe character addition for character too large, it has to be folded + * then incremented. + */ + private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256; + /** + * Mask value to get offset in heuristic table. + */ + private static final char HEURISTIC_MASK_ = 7; + + private int m_caseSwitch_; + private int m_common3_; + private int m_mask3_; + /** + * When switching case, we need to add or subtract different values. + */ + private int m_addition3_; + /** + * Upper range when compressing + */ + private int m_top3_; + /** + * Upper range when compressing + */ + private int m_bottom3_; + private int m_topCount3_; + private int m_bottomCount3_; + /** + * Case first constants + */ + private static final int CASE_SWITCH_ = 0xC0; + private static final int NO_CASE_SWITCH_ = 0; + /** + * Case level constants + */ + private static final int CE_REMOVE_CASE_ = 0x3F; + private static final int CE_KEEP_CASE_ = 0xFF; + /** + * Case strength mask + */ + private static final int CE_CASE_MASK_3_ = 0xFF; + /** + * Sortkey size factor. Values can be changed. + */ + private static final double PROPORTION_2_ = 0.5; + private static final double PROPORTION_3_ = 0.667; + + // These values come from the UCA ---------------------------------------- + + /** + * This is an enum that lists magic special byte values from the + * fractional UCA + */ + //private static final byte BYTE_ZERO_ = 0x0; + //private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01; + //private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02; + private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03; + /*private*/ static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_; + //private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_; + static final byte CODAN_PLACEHOLDER = 0x27; + //private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C; + private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D; + private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF; + private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1; + private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80; + private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40; + private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85; + private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45; + private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5; + private static final int COMMON_BOTTOM_3_ = 0x05; + private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86; + private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ = + COMMON_BOTTOM_3_; + private static final int TOP_COUNT_2_ = (int)(PROPORTION_2_ * TOTAL_2_); + private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_; + private static final int COMMON_2_ = COMMON_BOTTOM_2_; + private static final int COMMON_UPPER_FIRST_3_ = 0xC5; + private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_; + //private static final int COMMON_4_ = (byte)0xFF; + + + + /* + * Minimum size required for the binary collation data in bytes. + * Size of UCA header + size of options to 4 bytes + */ + //private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2; + + /** + * If this collator is to generate only simple tertiaries for fast path + */ + private boolean m_isSimple3_; + + /** + * French collation sorting flag + */ + private boolean m_isFrenchCollation_; + /** + * Flag indicating if shifted is requested for Quaternary alternate + * handling. If this is not true, the default for alternate handling will + * be non-ignorable. + */ + private boolean m_isAlternateHandlingShifted_; + /** + * Extra case level for sorting + */ + private boolean m_isCaseLevel_; + + private static final int SORT_BUFFER_INIT_SIZE_ = 128; + private static final int SORT_BUFFER_INIT_SIZE_1_ = + SORT_BUFFER_INIT_SIZE_ << 3; + private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_; + private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_; + private static final int SORT_BUFFER_INIT_SIZE_CASE_ = + SORT_BUFFER_INIT_SIZE_ >> 2; + private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_; + + private static final int CE_CONTINUATION_TAG_ = 0xC0; + private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F; + + private static final int LAST_BYTE_MASK_ = 0xFF; + + //private static final int CE_RESET_TOP_VALUE_ = 0x9F000303; + //private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303; + + private static final byte SORT_CASE_BYTE_START_ = (byte)0x80; + private static final byte SORT_CASE_SHIFT_START_ = (byte)7; + + /** + * CE buffer size + */ + private static final int CE_BUFFER_SIZE_ = 512; + + // variables for Latin-1 processing + boolean latinOneUse_ = false; + boolean latinOneRegenTable_ = false; + boolean latinOneFailed_ = false; + + int latinOneTableLen_ = 0; + int latinOneCEs_[] = null; + /** + * Bunch of utility iterators + */ + private StringUCharacterIterator m_srcUtilIter_; + private CollationElementIterator m_srcUtilColEIter_; + private StringUCharacterIterator m_tgtUtilIter_; + private CollationElementIterator m_tgtUtilColEIter_; + /** + * Utility comparison flags + */ + private boolean m_utilCompare0_; + //private boolean m_utilCompare1_; + private boolean m_utilCompare2_; + private boolean m_utilCompare3_; + private boolean m_utilCompare4_; + private boolean m_utilCompare5_; + /** + * Utility byte buffer + */ + private byte m_utilBytes0_[]; + private byte m_utilBytes1_[]; + private byte m_utilBytes2_[]; + private byte m_utilBytes3_[]; + private byte m_utilBytes4_[]; + //private byte m_utilBytes5_[]; + private RawCollationKey m_utilRawCollationKey_; + + private int m_utilBytesCount0_; + private int m_utilBytesCount1_; + private int m_utilBytesCount2_; + private int m_utilBytesCount3_; + private int m_utilBytesCount4_; + //private int m_utilBytesCount5_; + //private int m_utilCount0_; + //private int m_utilCount1_; + private int m_utilCount2_; + private int m_utilCount3_; + private int m_utilCount4_; + //private int m_utilCount5_; + + private int m_utilFrenchStart_; + private int m_utilFrenchEnd_; + + /** + * Preparing the CE buffers. will be filled during the primary phase + */ + private int m_srcUtilCEBuffer_[]; + private int m_tgtUtilCEBuffer_[]; + private int m_srcUtilCEBufferSize_; + private int m_tgtUtilCEBufferSize_; + + private int m_srcUtilContOffset_; + private int m_tgtUtilContOffset_; + + private int m_srcUtilOffset_; + private int m_tgtUtilOffset_; + + // private methods ------------------------------------------------------- + + private void init(String rules) throws Exception + { + setWithUCAData(); + CollationParsedRuleBuilder builder + = new CollationParsedRuleBuilder(rules); + builder.setRules(this); + m_rules_ = rules; + init(); + initUtility(false); + } + + private final int compareRegular(String source, String target, int offset) { + if (m_srcUtilIter_ == null) { + initUtility(true); + } + int strength = getStrength(); + // setting up the collator parameters + m_utilCompare0_ = m_isCaseLevel_; + //m_utilCompare1_ = true; + m_utilCompare2_ = strength >= SECONDARY; + m_utilCompare3_ = strength >= TERTIARY; + m_utilCompare4_ = strength >= QUATERNARY; + m_utilCompare5_ = strength == IDENTICAL; + boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_; + boolean doShift4 = m_isAlternateHandlingShifted_ && m_utilCompare4_; + boolean doHiragana4 = m_isHiragana4_ && m_utilCompare4_; + + if (doHiragana4 && doShift4) { + String sourcesub = source.substring(offset); + String targetsub = target.substring(offset); + return compareBySortKeys(sourcesub, targetsub); + } + + // This is the lowest primary value that will not be ignored if shifted + int lowestpvalue = m_isAlternateHandlingShifted_ + ? m_variableTopValue_ << 16 : 0; + m_srcUtilCEBufferSize_ = 0; + m_tgtUtilCEBufferSize_ = 0; + int result = doPrimaryCompare(doHiragana4, lowestpvalue, source, + target, offset); + if (m_srcUtilCEBufferSize_ == -1 + && m_tgtUtilCEBufferSize_ == -1) { + // since the cebuffer is cleared when we have determined that + // either source is greater than target or vice versa, the return + // result is the comparison result and not the hiragana result + return result; + } + + int hiraganaresult = result; + + if (m_utilCompare2_) { + result = doSecondaryCompare(doFrench); + if (result != 0) { + return result; + } + } + // doing the case bit + if (m_utilCompare0_) { + result = doCaseCompare(); + if (result != 0) { + return result; + } + } + // Tertiary level + if (m_utilCompare3_) { + result = doTertiaryCompare(); + if (result != 0) { + return result; + } + } + + if (doShift4) { // checkQuad + result = doQuaternaryCompare(lowestpvalue); + if (result != 0) { + return result; + } + } + else if (doHiragana4 && hiraganaresult != 0) { + // If we're fine on quaternaries, we might be different + // on Hiragana. This, however, might fail us in shifted. + return hiraganaresult; + } + + // For IDENTICAL comparisons, we use a bitwise character comparison + // as a tiebreaker if all else is equal. + // Getting here should be quite rare - strings are not identical - + // that is checked first, but compared == through all other checks. + if (m_utilCompare5_) { + return doIdenticalCompare(source, target, offset, true); + } + return 0; + } + + /** + * Gets the 2 bytes of primary order and adds it to the primary byte array + * @param ce current ce + * @param notIsContinuation flag indicating if the current bytes belong to + * a continuation ce + * @param doShift flag indicating if ce is to be shifted + * @param leadPrimary lead primary used for compression + * @param commonBottom4 common byte value for Quaternary + * @param bottomCount4 smallest byte value for Quaternary + * @return the new lead primary for compression + */ + private final int doPrimaryBytes(int ce, boolean notIsContinuation, + boolean doShift, int leadPrimary, + int commonBottom4, int bottomCount4) + { + + int p2 = (ce >>>= 16) & LAST_BYTE_MASK_; // in ints for unsigned + int p1 = ce >>> 8; // comparison + if (doShift) { + if (m_utilCount4_ > 0) { + while (m_utilCount4_ > bottomCount4) { + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)(commonBottom4 + bottomCount4)); + m_utilBytesCount4_ ++; + m_utilCount4_ -= bottomCount4; + } + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)(commonBottom4 + + (m_utilCount4_ - 1))); + m_utilBytesCount4_ ++; + m_utilCount4_ = 0; + } + // dealing with a variable and we're treating them as shifted + // This is a shifted ignorable + if (p1 != 0) { + // we need to check this since we could be in continuation + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)p1); + m_utilBytesCount4_ ++; + } + if (p2 != 0) { + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)p2); + m_utilBytesCount4_ ++; + } + } + else { + // Note: This code assumes that the table is well built + // i.e. not having 0 bytes where they are not supposed to be. + // Usually, we'll have non-zero primary1 & primary2, except + // in cases of LatinOne and friends, when primary2 will be + // regular and simple sortkey calc + if (p1 != CollationElementIterator.IGNORABLE) { + if (notIsContinuation) { + if (leadPrimary == p1) { + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, (byte)p2); + m_utilBytesCount1_ ++; + } + else { + if (leadPrimary != 0) { + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + ((p1 > leadPrimary) + ? BYTE_UNSHIFTED_MAX_ + : BYTE_UNSHIFTED_MIN_)); + m_utilBytesCount1_ ++; + } + if (p2 == CollationElementIterator.IGNORABLE) { + // one byter, not compressed + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)p1); + m_utilBytesCount1_ ++; + leadPrimary = 0; + } + else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_ + || (p1 > maxRegularPrimary + //> (RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_[0] + // >>> 24) + && p1 < minImplicitPrimary + //< (RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_[0] + // >>> 24) + )) { + // not compressible + leadPrimary = 0; + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)p1); + m_utilBytesCount1_ ++; + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)p2); + m_utilBytesCount1_ ++; + } + else { // compress + leadPrimary = p1; + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)p1); + m_utilBytesCount1_ ++; + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, (byte)p2); + m_utilBytesCount1_ ++; + } + } + } + else { + // continuation, add primary to the key, no compression + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, (byte)p1); + m_utilBytesCount1_ ++; + if (p2 != CollationElementIterator.IGNORABLE) { + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, (byte)p2); + // second part + m_utilBytesCount1_ ++; + } + } + } + } + return leadPrimary; + } + + /** + * Gets the secondary byte and adds it to the secondary byte array + * @param ce current ce + * @param notIsContinuation flag indicating if the current bytes belong to + * a continuation ce + * @param doFrench flag indicator if french sort is to be performed + */ + private final void doSecondaryBytes(int ce, boolean notIsContinuation, + boolean doFrench) + { + int s = (ce >>= 8) & LAST_BYTE_MASK_; // int for comparison + if (s != 0) { + if (!doFrench) { + // This is compression code. + if (s == COMMON_2_ && notIsContinuation) { + m_utilCount2_ ++; + } + else { + if (m_utilCount2_ > 0) { + if (s > COMMON_2_) { // not necessary for 4th level. + while (m_utilCount2_ > TOP_COUNT_2_) { + m_utilBytes2_ = append(m_utilBytes2_, + m_utilBytesCount2_, + (byte)(COMMON_TOP_2_ - TOP_COUNT_2_)); + m_utilBytesCount2_ ++; + m_utilCount2_ -= TOP_COUNT_2_; + } + m_utilBytes2_ = append(m_utilBytes2_, + m_utilBytesCount2_, + (byte)(COMMON_TOP_2_ + - (m_utilCount2_ - 1))); + m_utilBytesCount2_ ++; + } + else { + while (m_utilCount2_ > BOTTOM_COUNT_2_) { + m_utilBytes2_ = append(m_utilBytes2_, + m_utilBytesCount2_, + (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_)); + m_utilBytesCount2_ ++; + m_utilCount2_ -= BOTTOM_COUNT_2_; + } + m_utilBytes2_ = append(m_utilBytes2_, + m_utilBytesCount2_, + (byte)(COMMON_BOTTOM_2_ + + (m_utilCount2_ - 1))); + m_utilBytesCount2_ ++; + } + m_utilCount2_ = 0; + } + m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, + (byte)s); + m_utilBytesCount2_ ++; + } + } + else { + m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, + (byte)s); + m_utilBytesCount2_ ++; + // Do the special handling for French secondaries + // We need to get continuation elements and do intermediate + // restore + // abc1c2c3de with french secondaries need to be edc1c2c3ba + // NOT edc3c2c1ba + if (notIsContinuation) { + if (m_utilFrenchStart_ != -1) { + // reverse secondaries from frenchStartPtr up to + // frenchEndPtr + reverseBuffer(m_utilBytes2_); + m_utilFrenchStart_ = -1; + } + } + else { + if (m_utilFrenchStart_ == -1) { + m_utilFrenchStart_ = m_utilBytesCount2_ - 2; + } + m_utilFrenchEnd_ = m_utilBytesCount2_ - 1; + } + } + } + } + + /** + * Reverse the argument buffer + * @param buffer to reverse + */ + private void reverseBuffer(byte buffer[]) + { + int start = m_utilFrenchStart_; + int end = m_utilFrenchEnd_; + while (start < end) { + byte b = buffer[start]; + buffer[start ++] = buffer[end]; + buffer[end --] = b; + } + } + + /** + * Insert the case shifting byte if required + * @param caseshift value + * @return new caseshift value + */ + private final int doCaseShift(int caseshift) + { + if (caseshift == 0) { + m_utilBytes0_ = append(m_utilBytes0_, m_utilBytesCount0_, + SORT_CASE_BYTE_START_); + m_utilBytesCount0_ ++; + caseshift = SORT_CASE_SHIFT_START_; + } + return caseshift; + } + + /** + * Performs the casing sort + * @param tertiary byte in ints for easy comparison + * @param notIsContinuation flag indicating if the current bytes belong to + * a continuation ce + * @param caseshift + * @return the new value of case shift + */ + private final int doCaseBytes(int tertiary, boolean notIsContinuation, + int caseshift) + { + caseshift = doCaseShift(caseshift); + + if (notIsContinuation && tertiary != 0) { + byte casebits = (byte)(tertiary & 0xC0); + if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) { + if (casebits == 0) { + m_utilBytes0_[m_utilBytesCount0_ - 1] + |= (1 << (-- caseshift)); + } + else { + // second bit + caseshift = doCaseShift(caseshift - 1); + m_utilBytes0_[m_utilBytesCount0_ - 1] + |= ((casebits >> 6) & 1) << (-- caseshift); + } + } + else { + if (casebits != 0) { + m_utilBytes0_[m_utilBytesCount0_ - 1] + |= 1 << (-- caseshift); + // second bit + caseshift = doCaseShift(caseshift); + m_utilBytes0_[m_utilBytesCount0_ - 1] + |= ((casebits >> 7) & 1) << (-- caseshift); + } + else { + caseshift --; + } + } + } + + return caseshift; + } + + /** + * Gets the tertiary byte and adds it to the tertiary byte array + * @param tertiary byte in int for easy comparison + * @param notIsContinuation flag indicating if the current bytes belong to + * a continuation ce + */ + private final void doTertiaryBytes(int tertiary, boolean notIsContinuation) + { + if (tertiary != 0) { + // This is compression code. + // sequence size check is included in the if clause + if (tertiary == m_common3_ && notIsContinuation) { + m_utilCount3_ ++; + } + else { + int common3 = m_common3_ & LAST_BYTE_MASK_; + if (tertiary > common3 && m_common3_ == COMMON_NORMAL_3_) { + tertiary += m_addition3_; + } + else if (tertiary <= common3 + && m_common3_ == COMMON_UPPER_FIRST_3_) { + tertiary -= m_addition3_; + } + if (m_utilCount3_ > 0) { + if (tertiary > common3) { + while (m_utilCount3_ > m_topCount3_) { + m_utilBytes3_ = append(m_utilBytes3_, + m_utilBytesCount3_, + (byte)(m_top3_ - m_topCount3_)); + m_utilBytesCount3_ ++; + m_utilCount3_ -= m_topCount3_; + } + m_utilBytes3_ = append(m_utilBytes3_, + m_utilBytesCount3_, + (byte)(m_top3_ + - (m_utilCount3_ - 1))); + m_utilBytesCount3_ ++; + } + else { + while (m_utilCount3_ > m_bottomCount3_) { + m_utilBytes3_ = append(m_utilBytes3_, + m_utilBytesCount3_, + (byte)(m_bottom3_ + m_bottomCount3_)); + m_utilBytesCount3_ ++; + m_utilCount3_ -= m_bottomCount3_; + } + m_utilBytes3_ = append(m_utilBytes3_, + m_utilBytesCount3_, + (byte)(m_bottom3_ + + (m_utilCount3_ - 1))); + m_utilBytesCount3_ ++; + } + m_utilCount3_ = 0; + } + m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, + (byte)tertiary); + m_utilBytesCount3_ ++; + } + } + } + + /** + * Gets the Quaternary byte and adds it to the Quaternary byte array + * @param isCodePointHiragana flag indicator if the previous codepoint + * we dealt with was Hiragana + * @param commonBottom4 smallest common Quaternary byte + * @param bottomCount4 smallest Quaternary byte + * @param hiragana4 hiragana Quaternary byte + */ + private final void doQuaternaryBytes(boolean isCodePointHiragana, + int commonBottom4, int bottomCount4, + byte hiragana4) + { + if (isCodePointHiragana) { // This was Hiragana, need to note it + if (m_utilCount4_ > 0) { // Close this part + while (m_utilCount4_ > bottomCount4) { + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)(commonBottom4 + + bottomCount4)); + m_utilBytesCount4_ ++; + m_utilCount4_ -= bottomCount4; + } + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)(commonBottom4 + + (m_utilCount4_ - 1))); + m_utilBytesCount4_ ++; + m_utilCount4_ = 0; + } + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + hiragana4); // Add the Hiragana + m_utilBytesCount4_ ++; + } + else { // This wasn't Hiragana, so we can continue adding stuff + m_utilCount4_ ++; + } + } + + /** + * Iterates through the argument string for all ces. + * Split the ces into their relevant primaries, secondaries etc. + * @param source normalized string + * @param doFrench flag indicator if special handling of French has to be + * done + * @param hiragana4 offset for Hiragana quaternary + * @param commonBottom4 smallest common quaternary byte + * @param bottomCount4 smallest quaternary byte + */ + private final void getSortKeyBytes(String source, boolean doFrench, + byte hiragana4, int commonBottom4, + int bottomCount4) + + { + if (m_srcUtilIter_ == null) { + initUtility(true); + } + int backupDecomposition = getDecomposition(); + setDecomposition(NO_DECOMPOSITION); // have to revert to backup later + m_srcUtilIter_.setText(source); + m_srcUtilColEIter_.setText(m_srcUtilIter_); + m_utilFrenchStart_ = -1; + m_utilFrenchEnd_ = -1; + + // scriptorder not implemented yet + // const uint8_t *scriptOrder = coll->scriptOrder; + + boolean doShift = false; + boolean notIsContinuation = false; + + int leadPrimary = 0; // int for easier comparison + int caseShift = 0; + + while (true) { + int ce = m_srcUtilColEIter_.next(); + if (ce == CollationElementIterator.NULLORDER) { + break; + } + + if (ce == CollationElementIterator.IGNORABLE) { + continue; + } + + notIsContinuation = !isContinuation(ce); + + /* + * if (notIsContinuation) { + if (scriptOrder != NULL) { + primary1 = scriptOrder[primary1]; + } + }*/ + boolean isPrimaryByteIgnorable = (ce & CE_PRIMARY_MASK_) == 0; + // actually we can just check that the first byte is 0 + // generation stuffs the order left first + boolean isSmallerThanVariableTop = (ce >>> CE_PRIMARY_SHIFT_) + <= m_variableTopValue_; + doShift = (m_isAlternateHandlingShifted_ + && ((notIsContinuation && isSmallerThanVariableTop + && !isPrimaryByteIgnorable) // primary byte not 0 + || (!notIsContinuation && doShift)) + || (doShift && isPrimaryByteIgnorable)); + if (doShift && isPrimaryByteIgnorable) { + // amendment to the UCA says that primary ignorables and other + // ignorables should be removed if following a shifted code + // point + // if we were shifted and we got an ignorable code point + // we should just completely ignore it + continue; + } + leadPrimary = doPrimaryBytes(ce, notIsContinuation, doShift, + leadPrimary, commonBottom4, + bottomCount4); + if (doShift) { + continue; + } + if (m_utilCompare2_) { + doSecondaryBytes(ce, notIsContinuation, doFrench); + } + + int t = ce & LAST_BYTE_MASK_; + if (!notIsContinuation) { + t = ce & CE_REMOVE_CONTINUATION_MASK_; + } + + if (m_utilCompare0_ && (!isPrimaryByteIgnorable || m_utilCompare2_)) { + // do the case level if we need to do it. We don't want to calculate + // case level for primary ignorables if we have only primary strength and case level + // otherwise we would break well formedness of CEs + caseShift = doCaseBytes(t, notIsContinuation, caseShift); + } + else if (notIsContinuation) { + t ^= m_caseSwitch_; + } + + t &= m_mask3_; + + if (m_utilCompare3_) { + doTertiaryBytes(t, notIsContinuation); + } + + if (m_utilCompare4_ && notIsContinuation) { // compare quad + doQuaternaryBytes(m_srcUtilColEIter_.m_isCodePointHiragana_, + commonBottom4, bottomCount4, hiragana4); + } + } + setDecomposition(backupDecomposition); // reverts to original + if (m_utilFrenchStart_ != -1) { + // one last round of checks + reverseBuffer(m_utilBytes2_); + } + } + + /** + * From the individual strength byte results the final compact sortkey + * will be calculated. + * @param source text string + * @param doFrench flag indicating that special handling of French has to + * be done + * @param commonBottom4 smallest common quaternary byte + * @param bottomCount4 smallest quaternary byte + * @param key output RawCollationKey to store results, key cannot be null + */ + private final void getSortKey(String source, boolean doFrench, + int commonBottom4, + int bottomCount4, + RawCollationKey key) + { + // we have done all the CE's, now let's put them together to form + // a key + if (m_utilCompare2_) { + doSecondary(doFrench); + } + // adding case level should be independent of secondary level + if (m_utilCompare0_) { + doCase(); + } + if (m_utilCompare3_) { + doTertiary(); + if (m_utilCompare4_) { + doQuaternary(commonBottom4, bottomCount4); + if (m_utilCompare5_) { + doIdentical(source); + } + + } + } + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte)0); + m_utilBytesCount1_ ++; + + key.set(m_utilBytes1_, 0, m_utilBytesCount1_); + } + + /** + * Packs the French bytes + */ + private final void doFrench() + { + for (int i = 0; i < m_utilBytesCount2_; i ++) { + byte s = m_utilBytes2_[m_utilBytesCount2_ - i - 1]; + // This is compression code. + if (s == COMMON_2_) { + ++ m_utilCount2_; + } + else { + if (m_utilCount2_ > 0) { + // getting the unsigned value + if ((s & LAST_BYTE_MASK_) > COMMON_2_) { + // not necessary for 4th level. + while (m_utilCount2_ > TOP_COUNT_2_) { + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)(COMMON_TOP_2_ - TOP_COUNT_2_)); + m_utilBytesCount1_ ++; + m_utilCount2_ -= TOP_COUNT_2_; + } + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)(COMMON_TOP_2_ + - (m_utilCount2_ - 1))); + m_utilBytesCount1_ ++; + } + else { + while (m_utilCount2_ > BOTTOM_COUNT_2_) { + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_)); + m_utilBytesCount1_ ++; + m_utilCount2_ -= BOTTOM_COUNT_2_; + } + m_utilBytes1_ = append(m_utilBytes1_, + m_utilBytesCount1_, + (byte)(COMMON_BOTTOM_2_ + + (m_utilCount2_ - 1))); + m_utilBytesCount1_ ++; + } + m_utilCount2_ = 0; + } + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, s); + m_utilBytesCount1_ ++; + } + } + if (m_utilCount2_ > 0) { + while (m_utilCount2_ > BOTTOM_COUNT_2_) { + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, + (byte)(COMMON_BOTTOM_2_ + + BOTTOM_COUNT_2_)); + m_utilBytesCount1_ ++; + m_utilCount2_ -= BOTTOM_COUNT_2_; + } + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, + (byte)(COMMON_BOTTOM_2_ + + (m_utilCount2_ - 1))); + m_utilBytesCount1_ ++; + } + } + + /** + * Compacts the secondary bytes and stores them into the primary array + * @param doFrench flag indicator that French has to be handled specially + */ + private final void doSecondary(boolean doFrench) + { + if (m_utilCount2_ > 0) { + while (m_utilCount2_ > BOTTOM_COUNT_2_) { + m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, + (byte)(COMMON_BOTTOM_2_ + + BOTTOM_COUNT_2_)); + m_utilBytesCount2_ ++; + m_utilCount2_ -= BOTTOM_COUNT_2_; + } + m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, + (byte)(COMMON_BOTTOM_2_ + + (m_utilCount2_ - 1))); + m_utilBytesCount2_ ++; + } + + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, + SORT_LEVEL_TERMINATOR_); + m_utilBytesCount1_ ++; + + if (doFrench) { // do the reverse copy + doFrench(); + } + else { + if (m_utilBytes1_.length <= m_utilBytesCount1_ + + m_utilBytesCount2_) { + m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, + m_utilBytesCount2_); + } + System.arraycopy(m_utilBytes2_, 0, m_utilBytes1_, + m_utilBytesCount1_, m_utilBytesCount2_); + m_utilBytesCount1_ += m_utilBytesCount2_; + } + } + + /** + * Increase buffer size + * @param buffer array of bytes + * @param size of the byte array + * @param incrementsize size to increase + * @return the new buffer + */ + private static final byte[] increase(byte buffer[], int size, + int incrementsize) + { + byte result[] = new byte[buffer.length + incrementsize]; + System.arraycopy(buffer, 0, result, 0, size); + return result; + } + + /** + * Increase buffer size + * @param buffer array of ints + * @param size of the byte array + * @param incrementsize size to increase + * @return the new buffer + */ + private static final int[] increase(int buffer[], int size, + int incrementsize) + { + int result[] = new int[buffer.length + incrementsize]; + System.arraycopy(buffer, 0, result, 0, size); + return result; + } + + /** + * Compacts the case bytes and stores them into the primary array + */ + private final void doCase() + { + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, + SORT_LEVEL_TERMINATOR_); + m_utilBytesCount1_ ++; + if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount0_) { + m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, + m_utilBytesCount0_); + } + System.arraycopy(m_utilBytes0_, 0, m_utilBytes1_, m_utilBytesCount1_, + m_utilBytesCount0_); + m_utilBytesCount1_ += m_utilBytesCount0_; + } + + /** + * Compacts the tertiary bytes and stores them into the primary array + */ + private final void doTertiary() + { + if (m_utilCount3_ > 0) { + if (m_common3_ != COMMON_BOTTOM_3_) { + while (m_utilCount3_ >= m_topCount3_) { + m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, + (byte)(m_top3_ - m_topCount3_)); + m_utilBytesCount3_ ++; + m_utilCount3_ -= m_topCount3_; + } + m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, + (byte)(m_top3_ - m_utilCount3_)); + m_utilBytesCount3_ ++; + } + else { + while (m_utilCount3_ > m_bottomCount3_) { + m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, + (byte)(m_bottom3_ + + m_bottomCount3_)); + m_utilBytesCount3_ ++; + m_utilCount3_ -= m_bottomCount3_; + } + m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, + (byte)(m_bottom3_ + + (m_utilCount3_ - 1))); + m_utilBytesCount3_ ++; + } + } + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, + SORT_LEVEL_TERMINATOR_); + m_utilBytesCount1_ ++; + if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount3_) { + m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, + m_utilBytesCount3_); + } + System.arraycopy(m_utilBytes3_, 0, m_utilBytes1_, m_utilBytesCount1_, + m_utilBytesCount3_); + m_utilBytesCount1_ += m_utilBytesCount3_; + } + + /** + * Compacts the quaternary bytes and stores them into the primary array + */ + private final void doQuaternary(int commonbottom4, int bottomcount4) + { + if (m_utilCount4_ > 0) { + while (m_utilCount4_ > bottomcount4) { + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)(commonbottom4 + bottomcount4)); + m_utilBytesCount4_ ++; + m_utilCount4_ -= bottomcount4; + } + m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, + (byte)(commonbottom4 + + (m_utilCount4_ - 1))); + m_utilBytesCount4_ ++; + } + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, + SORT_LEVEL_TERMINATOR_); + m_utilBytesCount1_ ++; + if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount4_) { + m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, + m_utilBytesCount4_); + } + System.arraycopy(m_utilBytes4_, 0, m_utilBytes1_, m_utilBytesCount1_, + m_utilBytesCount4_); + m_utilBytesCount1_ += m_utilBytesCount4_; + } + + /** + * Deals with the identical sort. + * Appends the BOCSU version of the source string to the ends of the + * byte buffer. + * @param source text string + */ + private final void doIdentical(String source) + { + int isize = BOCU.getCompressionLength(source); + m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, + SORT_LEVEL_TERMINATOR_); + m_utilBytesCount1_ ++; + if (m_utilBytes1_.length <= m_utilBytesCount1_ + isize) { + m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, + 1 + isize); + } + m_utilBytesCount1_ = BOCU.compress(source, m_utilBytes1_, + m_utilBytesCount1_); + } + + /** + * Gets the offset of the first unmatched characters in source and target. + * This method returns the offset of the start of a contraction or a + * combining sequence, if the first difference is in the middle of such a + * sequence. + * @param source string + * @param target string + * @return offset of the first unmatched characters in source and target. + */ + private final int getFirstUnmatchedOffset(String source, String target) + { + int result = 0; + int slength = source.length(); + int tlength = target.length(); + int minlength = slength; + if (minlength > tlength) { + minlength = tlength; + } + while (result < minlength + && source.charAt(result) == target.charAt(result)) { + result ++; + } + if (result > 0) { + // There is an identical portion at the beginning of the two + // strings. If the identical portion ends within a contraction or a + // combining character sequence, back up to the start of that + // sequence. + char schar = 0; + char tchar = 0; + if (result < minlength) { + schar = source.charAt(result); // first differing chars + tchar = target.charAt(result); + } + else { + schar = source.charAt(minlength - 1); + if (isUnsafe(schar)) { + tchar = schar; + } + else if (slength == tlength) { + return result; + } + else if (slength < tlength) { + tchar = target.charAt(result); + } + else { + schar = source.charAt(result); + } + } + if (isUnsafe(schar) || isUnsafe(tchar)) + { + // We are stopped in the middle of a contraction or combining + // sequence. + // Look backwards for the part of the string for the start of + // the sequence + // It doesn't matter which string we scan, since they are the + // same in this region. + do { + result --; + } + while (result > 0 && isUnsafe(source.charAt(result))); + } + } + return result; + } + + /** + * Appending an byte to an array of bytes and increases it if we run out of + * space + * @param array of byte arrays + * @param appendindex index in the byte array to append + * @param value to append + * @return array if array size can accomodate the new value, otherwise + * a bigger array will be created and returned + */ + private static final byte[] append(byte array[], int appendindex, + byte value) + { + try { + array[appendindex] = value; + } + catch (ArrayIndexOutOfBoundsException e) { + array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_); + array[appendindex] = value; + } + return array; + } + + /** + * This is a trick string compare function that goes in and uses sortkeys + * to compare. It is used when compare gets in trouble and needs to bail + * out. + * @param source text string + * @param target text string + */ + private final int compareBySortKeys(String source, String target) + + { + m_utilRawCollationKey_ = getRawCollationKey(source, + m_utilRawCollationKey_); + // this method is very seldom called + RawCollationKey targetkey = getRawCollationKey(target, null); + return m_utilRawCollationKey_.compareTo(targetkey); + } + + /** + * Performs the primary comparisons, and fills up the CE buffer at the + * same time. + * The return value toggles between the comparison result and the hiragana + * result. If either the source is greater than target or vice versa, the + * return result is the comparison result, ie 1 or -1, furthermore the + * cebuffers will be cleared when that happens. If the primary comparisons + * are equal, we'll have to continue with secondary comparison. In this case + * the cebuffer will not be cleared and the return result will be the + * hiragana result. + * @param doHiragana4 flag indicator that Hiragana Quaternary has to be + * observed + * @param lowestpvalue the lowest primary value that will not be ignored if + * alternate handling is shifted + * @param source text string + * @param target text string + * @param textoffset offset in text to start the comparison + * @return comparion result if a primary difference is found, otherwise + * hiragana result + */ + private final int doPrimaryCompare(boolean doHiragana4, int lowestpvalue, + String source, String target, + int textoffset) + + { + // Preparing the context objects for iterating over strings + m_srcUtilIter_.setText(source); + m_srcUtilColEIter_.setText(m_srcUtilIter_, textoffset); + m_tgtUtilIter_.setText(target); + m_tgtUtilColEIter_.setText(m_tgtUtilIter_, textoffset); + + // Non shifted primary processing is quite simple + if (!m_isAlternateHandlingShifted_) { + int hiraganaresult = 0; + while (true) { + int sorder = 0; + // We fetch CEs until we hit a non ignorable primary or end. + do { + sorder = m_srcUtilColEIter_.next(); + m_srcUtilCEBuffer_ = append(m_srcUtilCEBuffer_, + m_srcUtilCEBufferSize_, sorder); + m_srcUtilCEBufferSize_ ++; + sorder &= CE_PRIMARY_MASK_; + } while (sorder == CollationElementIterator.IGNORABLE); + + int torder = 0; + do { + torder = m_tgtUtilColEIter_.next(); + m_tgtUtilCEBuffer_ = append(m_tgtUtilCEBuffer_, + m_tgtUtilCEBufferSize_, torder); + m_tgtUtilCEBufferSize_ ++; + torder &= CE_PRIMARY_MASK_; + } while (torder == CollationElementIterator.IGNORABLE); + + // if both primaries are the same + if (sorder == torder) { + // and there are no more CEs, we advance to the next level + // see if we are at the end of either string + if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1] + == CollationElementIterator.NULLORDER) { + if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] + != CollationElementIterator.NULLORDER) { + return -1; + } + break; + } + else if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] + == CollationElementIterator.NULLORDER) { + return 1; + } + if (doHiragana4 && hiraganaresult == 0 + && m_srcUtilColEIter_.m_isCodePointHiragana_ != + m_tgtUtilColEIter_.m_isCodePointHiragana_) { + if (m_srcUtilColEIter_.m_isCodePointHiragana_) { + hiraganaresult = -1; + } + else { + hiraganaresult = 1; + } + } + } + else { + // if two primaries are different, we are done + return endPrimaryCompare(sorder, torder); + } + } + // no primary difference... do the rest from the buffers + return hiraganaresult; + } + else { // shifted - do a slightly more complicated processing :) + while (true) { + int sorder = getPrimaryShiftedCompareCE(m_srcUtilColEIter_, + lowestpvalue, true); + int torder = getPrimaryShiftedCompareCE(m_tgtUtilColEIter_, + lowestpvalue, false); + if (sorder == torder) { + if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1] + == CollationElementIterator.NULLORDER) { + break; + } + else { + continue; + } + } + else { + return endPrimaryCompare(sorder, torder); + } + } // no primary difference... do the rest from the buffers + } + return 0; + } + + /** + * This is used only for primary strength when we know that sorder is + * already different from torder. + * Compares sorder and torder, returns -1 if sorder is less than torder. + * Clears the cebuffer at the same time. + * @param sorder source strength order + * @param torder target strength order + * @return the comparison result of sorder and torder + */ + private final int endPrimaryCompare(int sorder, int torder) + { + // if we reach here, the ce offset accessed is the last ce + // appended to the buffer + boolean isSourceNullOrder = (m_srcUtilCEBuffer_[ + m_srcUtilCEBufferSize_ - 1] + == CollationElementIterator.NULLORDER); + boolean isTargetNullOrder = (m_tgtUtilCEBuffer_[ + m_tgtUtilCEBufferSize_ - 1] + == CollationElementIterator.NULLORDER); + m_srcUtilCEBufferSize_ = -1; + m_tgtUtilCEBufferSize_ = -1; + if (isSourceNullOrder) { + return -1; + } + if (isTargetNullOrder) { + return 1; + } + // getting rid of the sign + sorder >>>= CE_PRIMARY_SHIFT_; + torder >>>= CE_PRIMARY_SHIFT_; + if (sorder < torder) { + return -1; + } + return 1; + } + + /** + * Calculates the next primary shifted value and fills up cebuffer with the + * next non-ignorable ce. + * @param coleiter collation element iterator + * @param doHiragana4 flag indicator if hiragana quaternary is to be + * handled + * @param lowestpvalue lowest primary shifted value that will not be + * ignored + * @return result next modified ce + */ + private final int getPrimaryShiftedCompareCE( + CollationElementIterator coleiter, + int lowestpvalue, boolean isSrc) + + { + boolean shifted = false; + int result = CollationElementIterator.IGNORABLE; + int cebuffer[] = m_srcUtilCEBuffer_; + int cebuffersize = m_srcUtilCEBufferSize_; + if (!isSrc) { + cebuffer = m_tgtUtilCEBuffer_; + cebuffersize = m_tgtUtilCEBufferSize_; + } + while (true) { + result = coleiter.next(); + if (result == CollationElementIterator.NULLORDER) { + cebuffer = append(cebuffer, cebuffersize, result); + cebuffersize ++; + break; + } + else if (result == CollationElementIterator.IGNORABLE + || (shifted + && (result & CE_PRIMARY_MASK_) + == CollationElementIterator.IGNORABLE)) { + // UCA amendment - ignore ignorables that follow shifted code + // points + continue; + } + else if (isContinuation(result)) { + if ((result & CE_PRIMARY_MASK_) + != CollationElementIterator.IGNORABLE) { + // There is primary value + if (shifted) { + result = (result & CE_PRIMARY_MASK_) + | CE_CONTINUATION_MARKER_; + // preserve interesting continuation + cebuffer = append(cebuffer, cebuffersize, result); + cebuffersize ++; + continue; + } + else { + cebuffer = append(cebuffer, cebuffersize, result); + cebuffersize ++; + break; + } + } + else { // Just lower level values + if (!shifted) { + cebuffer = append(cebuffer, cebuffersize, result); + cebuffersize ++; + } + } + } + else { // regular + if (Utility.compareUnsigned(result & CE_PRIMARY_MASK_, + lowestpvalue) > 0) { + cebuffer = append(cebuffer, cebuffersize, result); + cebuffersize ++; + break; + } + else { + if ((result & CE_PRIMARY_MASK_) != 0) { + shifted = true; + result &= CE_PRIMARY_MASK_; + cebuffer = append(cebuffer, cebuffersize, result); + cebuffersize ++; + continue; + } + else { + cebuffer = append(cebuffer, cebuffersize, result); + cebuffersize ++; + shifted = false; + continue; + } + } + } + } + if (isSrc) { + m_srcUtilCEBuffer_ = cebuffer; + m_srcUtilCEBufferSize_ = cebuffersize; + } + else { + m_tgtUtilCEBuffer_ = cebuffer; + m_tgtUtilCEBufferSize_ = cebuffersize; + } + result &= CE_PRIMARY_MASK_; + return result; + } + + /** + * Appending an int to an array of ints and increases it if we run out of + * space + * @param array of int arrays + * @param appendindex index at which value will be appended + * @param value to append + * @return array if size is not increased, otherwise a new array will be + * returned + */ + private static final int[] append(int array[], int appendindex, int value) + { + if (appendindex + 1 >= array.length) { + array = increase(array, appendindex, CE_BUFFER_SIZE_); + } + array[appendindex] = value; + return array; + } + + /** + * Does secondary strength comparison based on the collected ces. + * @param doFrench flag indicates if French ordering is to be done + * @return the secondary strength comparison result + */ + private final int doSecondaryCompare(boolean doFrench) + { + // now, we're gonna reexamine collected CEs + if (!doFrench) { // normal + int soffset = 0; + int toffset = 0; + while (true) { + int sorder = CollationElementIterator.IGNORABLE; + while (sorder == CollationElementIterator.IGNORABLE) { + sorder = m_srcUtilCEBuffer_[soffset ++] + & CE_SECONDARY_MASK_; + } + int torder = CollationElementIterator.IGNORABLE; + while (torder == CollationElementIterator.IGNORABLE) { + torder = m_tgtUtilCEBuffer_[toffset ++] + & CE_SECONDARY_MASK_; + } + + if (sorder == torder) { + if (m_srcUtilCEBuffer_[soffset - 1] + == CollationElementIterator.NULLORDER) { + if (m_tgtUtilCEBuffer_[toffset - 1] + != CollationElementIterator.NULLORDER) { + return -1; + } + break; + } + else if (m_tgtUtilCEBuffer_[toffset - 1] + == CollationElementIterator.NULLORDER) { + return 1; + } + } + else { + if (m_srcUtilCEBuffer_[soffset - 1] == + CollationElementIterator.NULLORDER) { + return -1; + } + if (m_tgtUtilCEBuffer_[toffset - 1] == + CollationElementIterator.NULLORDER) { + return 1; + } + return (sorder < torder) ? -1 : 1; + } + } + } + else { // do the French + m_srcUtilContOffset_ = 0; + m_tgtUtilContOffset_ = 0; + m_srcUtilOffset_ = m_srcUtilCEBufferSize_ - 2; + m_tgtUtilOffset_ = m_tgtUtilCEBufferSize_ - 2; + while (true) { + int sorder = getSecondaryFrenchCE(true); + int torder = getSecondaryFrenchCE(false); + if (sorder == torder) { + if ((m_srcUtilOffset_ < 0 && m_tgtUtilOffset_ < 0) + || (m_srcUtilOffset_ >= 0 + && m_srcUtilCEBuffer_[m_srcUtilOffset_] + == CollationElementIterator.NULLORDER)) { + break; + } + } + else { + return (sorder < torder) ? -1 : 1; + } + } + } + return 0; + } + + /** + * Calculates the next secondary french CE. + * @param isSrc flag indicator if we are calculating the src ces + * @return result next modified ce + */ + private final int getSecondaryFrenchCE(boolean isSrc) + { + int result = CollationElementIterator.IGNORABLE; + int offset = m_srcUtilOffset_; + int continuationoffset = m_srcUtilContOffset_; + int cebuffer[] = m_srcUtilCEBuffer_; + if (!isSrc) { + offset = m_tgtUtilOffset_; + continuationoffset = m_tgtUtilContOffset_; + cebuffer = m_tgtUtilCEBuffer_; + } + + while (result == CollationElementIterator.IGNORABLE + && offset >= 0) { + if (continuationoffset == 0) { + result = cebuffer[offset]; + while (isContinuation(cebuffer[offset --])){ + } + // after this, sorder is at the start of continuation, + // and offset points before that + if (isContinuation(cebuffer[offset + 1])) { + // save offset for later + continuationoffset = offset; + offset += 2; + } + } + else { + result = cebuffer[offset ++]; + if (!isContinuation(result)) { + // we have finished with this continuation + offset = continuationoffset; + // reset the pointer to before continuation + continuationoffset = 0; + continue; + } + } + result &= CE_SECONDARY_MASK_; // remove continuation bit + } + if (isSrc) { + m_srcUtilOffset_ = offset; + m_srcUtilContOffset_ = continuationoffset; + } + else { + m_tgtUtilOffset_ = offset; + m_tgtUtilContOffset_ = continuationoffset; + } + return result; + } + + /** + * Does case strength comparison based on the collected ces. + * @return the case strength comparison result + */ + private final int doCaseCompare() + { + int soffset = 0; + int toffset = 0; + while (true) { + int sorder = CollationElementIterator.IGNORABLE; + int torder = CollationElementIterator.IGNORABLE; + while ((sorder & CE_REMOVE_CASE_) + == CollationElementIterator.IGNORABLE) { + sorder = m_srcUtilCEBuffer_[soffset ++]; + if (!isContinuation(sorder) && ((sorder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) { + // primary ignorables should not be considered on the case level when the strength is primary + // otherwise, the CEs stop being well-formed + sorder &= CE_CASE_MASK_3_; + sorder ^= m_caseSwitch_; + } + else { + sorder = CollationElementIterator.IGNORABLE; + } + } + + while ((torder & CE_REMOVE_CASE_) + == CollationElementIterator.IGNORABLE) { + torder = m_tgtUtilCEBuffer_[toffset ++]; + if (!isContinuation(torder) && ((torder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) { + // primary ignorables should not be considered on the case level when the strength is primary + // otherwise, the CEs stop being well-formed + torder &= CE_CASE_MASK_3_; + torder ^= m_caseSwitch_; + } + else { + torder = CollationElementIterator.IGNORABLE; + } + } + + sorder &= CE_CASE_BIT_MASK_; + torder &= CE_CASE_BIT_MASK_; + if (sorder == torder) { + // checking end of strings + if (m_srcUtilCEBuffer_[soffset - 1] + == CollationElementIterator.NULLORDER) { + if (m_tgtUtilCEBuffer_[toffset - 1] + != CollationElementIterator.NULLORDER) { + return -1; + } + break; + } + else if (m_tgtUtilCEBuffer_[toffset - 1] + == CollationElementIterator.NULLORDER) { + return 1; + } + } + else { + if (m_srcUtilCEBuffer_[soffset - 1] + == CollationElementIterator.NULLORDER) { + return -1; + } + if (m_tgtUtilCEBuffer_[soffset - 1] + == CollationElementIterator.NULLORDER) { + return 1; + } + return (sorder < torder) ? -1 : 1; + } + } + return 0; + } + + /** + * Does tertiary strength comparison based on the collected ces. + * @return the tertiary strength comparison result + */ + private final int doTertiaryCompare() + { + int soffset = 0; + int toffset = 0; + while (true) { + int sorder = CollationElementIterator.IGNORABLE; + int torder = CollationElementIterator.IGNORABLE; + while ((sorder & CE_REMOVE_CASE_) + == CollationElementIterator.IGNORABLE) { + sorder = m_srcUtilCEBuffer_[soffset ++] & m_mask3_; + if (!isContinuation(sorder)) { + sorder ^= m_caseSwitch_; + } + else { + sorder &= CE_REMOVE_CASE_; + } + } + + while ((torder & CE_REMOVE_CASE_) + == CollationElementIterator.IGNORABLE) { + torder = m_tgtUtilCEBuffer_[toffset ++] & m_mask3_; + if (!isContinuation(torder)) { + torder ^= m_caseSwitch_; + } + else { + torder &= CE_REMOVE_CASE_; + } + } + + if (sorder == torder) { + if (m_srcUtilCEBuffer_[soffset - 1] + == CollationElementIterator.NULLORDER) { + if (m_tgtUtilCEBuffer_[toffset - 1] + != CollationElementIterator.NULLORDER) { + return -1; + } + break; + } + else if (m_tgtUtilCEBuffer_[toffset - 1] + == CollationElementIterator.NULLORDER) { + return 1; + } + } + else { + if (m_srcUtilCEBuffer_[soffset - 1] == + CollationElementIterator.NULLORDER) { + return -1; + } + if (m_tgtUtilCEBuffer_[toffset - 1] == + CollationElementIterator.NULLORDER) { + return 1; + } + return (sorder < torder) ? -1 : 1; + } + } + return 0; + } + + /** + * Does quaternary strength comparison based on the collected ces. + * @param lowestpvalue the lowest primary value that will not be ignored if + * alternate handling is shifted + * @return the quaternary strength comparison result + */ + private final int doQuaternaryCompare(int lowestpvalue) + { + boolean sShifted = true; + boolean tShifted = true; + int soffset = 0; + int toffset = 0; + while (true) { + int sorder = CollationElementIterator.IGNORABLE; + int torder = CollationElementIterator.IGNORABLE; + while (sorder == CollationElementIterator.IGNORABLE + || (isContinuation(sorder) && !sShifted)) { + sorder = m_srcUtilCEBuffer_[soffset ++]; + if (isContinuation(sorder)) { + if (!sShifted) { + continue; + } + } + else if (Utility.compareUnsigned(sorder, lowestpvalue) > 0 + || (sorder & CE_PRIMARY_MASK_) + == CollationElementIterator.IGNORABLE) { + // non continuation + sorder = CE_PRIMARY_MASK_; + sShifted = false; + } + else { + sShifted = true; + } + } + sorder >>>= CE_PRIMARY_SHIFT_; + while (torder == CollationElementIterator.IGNORABLE + || (isContinuation(torder) && !tShifted)) { + torder = m_tgtUtilCEBuffer_[toffset ++]; + if (isContinuation(torder)) { + if (!tShifted) { + continue; + } + } + else if (Utility.compareUnsigned(torder, lowestpvalue) > 0 + || (torder & CE_PRIMARY_MASK_) + == CollationElementIterator.IGNORABLE) { + // non continuation + torder = CE_PRIMARY_MASK_; + tShifted = false; + } + else { + tShifted = true; + } + } + torder >>>= CE_PRIMARY_SHIFT_; + + if (sorder == torder) { + if (m_srcUtilCEBuffer_[soffset - 1] + == CollationElementIterator.NULLORDER) { + if (m_tgtUtilCEBuffer_[toffset - 1] + != CollationElementIterator.NULLORDER) { + return -1; + } + break; + } + else if (m_tgtUtilCEBuffer_[toffset - 1] + == CollationElementIterator.NULLORDER) { + return 1; + } + } + else { + if (m_srcUtilCEBuffer_[soffset - 1] == + CollationElementIterator.NULLORDER) { + return -1; + } + if (m_tgtUtilCEBuffer_[toffset - 1] == + CollationElementIterator.NULLORDER) { + return 1; + } + return (sorder < torder) ? -1 : 1; + } + } + return 0; + } + + /** + * Internal function. Does byte level string compare. Used by strcoll if + * strength == identical and strings are otherwise equal. This is a rare + * case. Comparison must be done on NFD normalized strings. FCD is not good + * enough. + * @param source text + * @param target text + * @param offset of the first difference in the text strings + * @param normalize flag indicating if we are to normalize the text before + * comparison + * @return 1 if source is greater than target, -1 less than and 0 if equals + */ + private static final int doIdenticalCompare(String source, String target, + int offset, boolean normalize) + + { + if (normalize) { + if (Normalizer.quickCheck(source, Normalizer.NFD,0) + != Normalizer.YES) { + source = Normalizer.decompose(source, false); + } + + if (Normalizer.quickCheck(target, Normalizer.NFD,0) + != Normalizer.YES) { + target = Normalizer.decompose(target, false); + } + offset = 0; + } + + return doStringCompare(source, target, offset); + } + + /** + * Compares string for their codepoint order. + * This comparison handles surrogate characters and place them after the + * all non surrogate characters. + * @param source text + * @param target text + * @param offset start offset for comparison + * @return 1 if source is greater than target, -1 less than and 0 if equals + */ + private static final int doStringCompare(String source, + String target, + int offset) + { + // compare identical prefixes - they do not need to be fixed up + char schar = 0; + char tchar = 0; + int slength = source.length(); + int tlength = target.length(); + int minlength = Math.min(slength, tlength); + while (offset < minlength) { + schar = source.charAt(offset); + tchar = target.charAt(offset ++); + if (schar != tchar) { + break; + } + } + + if (schar == tchar && offset == minlength) { + if (slength > minlength) { + return 1; + } + if (tlength > minlength) { + return -1; + } + return 0; + } + + // if both values are in or above the surrogate range, Fix them up. + if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE + && tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) { + schar = fixupUTF16(schar); + tchar = fixupUTF16(tchar); + } + + // now c1 and c2 are in UTF-32-compatible order + return (schar < tchar) ? -1 : 1; // schar and tchar has to be different + } + + /** + * Rotate surrogates to the top to get code point order + */ + private static final char fixupUTF16(char ch) + { + if (ch >= 0xe000) { + ch -= 0x800; + } + else { + ch += 0x2000; + } + return ch; + } + + /** + * Resets the internal case data members and compression values. + */ + private void updateInternalState() + { + if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) { + m_caseSwitch_ = CASE_SWITCH_; + } + else { + m_caseSwitch_ = NO_CASE_SWITCH_; + } + + if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) { + m_mask3_ = CE_REMOVE_CASE_; + m_common3_ = COMMON_NORMAL_3_; + m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_; + m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_; + m_bottom3_ = COMMON_BOTTOM_3_; + } + else { + m_mask3_ = CE_KEEP_CASE_; + m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_; + if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) { + m_common3_ = COMMON_UPPER_FIRST_3_; + m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_; + m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_; + } else { + m_common3_ = COMMON_NORMAL_3_; + m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_; + m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_; + } + } + + // Set the compression values + int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1; + // we multilply double with int, but need only int + m_topCount3_ = (int)(PROPORTION_3_ * total3); + m_bottomCount3_ = total3 - m_topCount3_; + + if (!m_isCaseLevel_ && getStrength() == AttributeValue.TERTIARY_ + && !m_isFrenchCollation_ && !m_isAlternateHandlingShifted_) { + m_isSimple3_ = true; + } + else { + m_isSimple3_ = false; + } + if(!m_isCaseLevel_ && getStrength() <= AttributeValue.TERTIARY_ && !m_isNumericCollation_ + && !m_isAlternateHandlingShifted_ && !latinOneFailed_) { + if(latinOneCEs_ == null || latinOneRegenTable_) { + if(setUpLatinOne()) { // if we succeed in building latin1 table, we'll use it + latinOneUse_ = true; + } else { + latinOneUse_ = false; + latinOneFailed_ = true; + } + latinOneRegenTable_ = false; + } else { // latin1Table exists and it doesn't need to be regenerated, just use it + latinOneUse_ = true; + } + } else { + latinOneUse_ = false; + } + + } + + /** + * Initializes the RuleBasedCollator + */ + private final void init() + { + for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; + m_minUnsafe_ ++) { + // Find the smallest unsafe char. + if (isUnsafe(m_minUnsafe_)) { + break; + } + } + + for (m_minContractionEnd_ = 0; + m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; + m_minContractionEnd_ ++) { + // Find the smallest contraction-ending char. + if (isContractionEnd(m_minContractionEnd_)) { + break; + } + } + latinOneFailed_ = true; + setStrength(m_defaultStrength_); + setDecomposition(m_defaultDecomposition_); + m_variableTopValue_ = m_defaultVariableTopValue_; + m_isFrenchCollation_ = m_defaultIsFrenchCollation_; + m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_; + m_isCaseLevel_ = m_defaultIsCaseLevel_; + m_caseFirst_ = m_defaultCaseFirst_; + m_isHiragana4_ = m_defaultIsHiragana4_; + m_isNumericCollation_ = m_defaultIsNumericCollation_; + latinOneFailed_ = false; + updateInternalState(); + } + + /** + * Initializes utility iterators and byte buffer used by compare + */ + private final void initUtility(boolean allocate) { + if (allocate) { + if (m_srcUtilIter_ == null) { + m_srcUtilIter_ = new StringUCharacterIterator(); + m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this); + m_tgtUtilIter_ = new StringUCharacterIterator(); + m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this); + m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case + m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary + m_utilBytes2_ = new byte[SORT_BUFFER_INIT_SIZE_2_]; // secondary + m_utilBytes3_ = new byte[SORT_BUFFER_INIT_SIZE_3_]; // tertiary + m_utilBytes4_ = new byte[SORT_BUFFER_INIT_SIZE_4_]; // Quaternary + m_srcUtilCEBuffer_ = new int[CE_BUFFER_SIZE_]; + m_tgtUtilCEBuffer_ = new int[CE_BUFFER_SIZE_]; + } + } else { + m_srcUtilIter_ = null; + m_srcUtilColEIter_ = null; + m_tgtUtilIter_ = null; + m_tgtUtilColEIter_ = null; + m_utilBytes0_ = null; + m_utilBytes1_ = null; + m_utilBytes2_ = null; + m_utilBytes3_ = null; + m_utilBytes4_ = null; + m_srcUtilCEBuffer_ = null; + m_tgtUtilCEBuffer_ = null; + } + } + + // Consts for Latin-1 special processing + private static final int ENDOFLATINONERANGE_ = 0xFF; + private static final int LATINONETABLELEN_ = (ENDOFLATINONERANGE_+50); + private static final int BAIL_OUT_CE_ = 0xFF000000; + + /** + * Generate latin-1 tables + */ + + private class shiftValues { + int primShift = 24; + int secShift = 24; + int terShift = 24; + } + + private final void + addLatinOneEntry(char ch, int CE, shiftValues sh) { + int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0; + boolean reverseSecondary = false; + if(!isContinuation(CE)) { + tertiary = ((CE & m_mask3_)); + tertiary ^= m_caseSwitch_; + reverseSecondary = true; + } else { + tertiary = (byte)((CE & CE_REMOVE_CONTINUATION_MASK_)); + tertiary &= CE_REMOVE_CASE_; + reverseSecondary = false; + } + + secondary = ((CE >>>= 8) & LAST_BYTE_MASK_); + primary2 = ((CE >>>= 8) & LAST_BYTE_MASK_); + primary1 = (CE >>> 8); + + if(primary1 != 0) { + latinOneCEs_[ch] |= (primary1 << sh.primShift); + sh.primShift -= 8; + } + if(primary2 != 0) { + if(sh.primShift < 0) { + latinOneCEs_[ch] = BAIL_OUT_CE_; + latinOneCEs_[latinOneTableLen_+ch] = BAIL_OUT_CE_; + latinOneCEs_[2*latinOneTableLen_+ch] = BAIL_OUT_CE_; + return; + } + latinOneCEs_[ch] |= (primary2 << sh.primShift); + sh.primShift -= 8; + } + if(secondary != 0) { + if(reverseSecondary && m_isFrenchCollation_) { // reverse secondary + latinOneCEs_[latinOneTableLen_+ch] >>>= 8; // make space for secondary + latinOneCEs_[latinOneTableLen_+ch] |= (secondary << 24); + } else { // normal case + latinOneCEs_[latinOneTableLen_+ch] |= (secondary << sh.secShift); + } + sh.secShift -= 8; + } + if(tertiary != 0) { + latinOneCEs_[2*latinOneTableLen_+ch] |= (tertiary << sh.terShift); + sh.terShift -= 8; + } + } + + private final void + resizeLatinOneTable(int newSize) { + int newTable[] = new int[3*newSize]; + int sizeToCopy = ((newSize> 4) - m_expansionOffset_; //it.getExpansionOffset(this, CE); + int size = CE & 0xF; // getExpansionCount(CE); + //CE = *CEOffset++; + if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */ + for(i = 0; iimage+getContractOffset(CE&0xFFF); + int UCharOffset = (CE & 0xFFF) - m_contractionOffset_; + int offset = 1; + int latinOneOffset = (CE & 0x00FFF000) >>> 12; + char schar = 0, tchar = 0; + + for(;;) { + /* + if(len == -1) { + if(s[*index] == 0) { // end of string + return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); + } else { + schar = s[*index]; + } + } else { + */ + if(m_ContInfo_.index == len) { + return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]); + } else { + schar = s.charAt(m_ContInfo_.index); + } + //} + + while(schar > (tchar = m_contractionIndex_[UCharOffset+offset]/**(UCharOffset+offset)*/)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */ + offset++; + } + + if (schar == tchar) { + m_ContInfo_.index++; + return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset+offset]); + } + else + { + if(schar > ENDOFLATINONERANGE_ /*& 0xFF00*/) { + return BAIL_OUT_CE_; + } + // skip completely ignorables + int isZeroCE = m_trie_.getLeadValue(schar); //UTRIE_GET32_FROM_LEAD(coll->mapping, schar); + if(isZeroCE == 0) { // we have to ignore completely ignorables + m_ContInfo_.index++; + continue; + } + + return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]); + } + } + } + + + /** + * This is a fast strcoll, geared towards text in Latin-1. + * It supports contractions of size two, French secondaries + * and case switching. You can use it with strengths primary + * to tertiary. It does not support shifted and case level. + * It relies on the table build by setupLatin1Table. If it + * doesn't understand something, it will go to the regular + * strcoll. + */ + private final int + compareUseLatin1(String source, String target, int startOffset) + { + int sLen = source.length(); + int tLen = target.length(); + + int strength = getStrength(); + + int sIndex = startOffset, tIndex = startOffset; + char sChar = 0, tChar = 0; + int sOrder=0, tOrder=0; + + boolean endOfSource = false; + + //uint32_t *elements = coll->latinOneCEs; + + boolean haveContractions = false; // if we have contractions in our string + // we cannot do French secondary + + int offset = latinOneTableLen_; + + // Do the primary level + primLoop: + for(;;) { + while(sOrder==0) { // this loop skips primary ignorables + // sOrder=getNextlatinOneCE(source); + if(sIndex==sLen) { + endOfSource = true; + break; + } + sChar=source.charAt(sIndex++); //[sIndex++]; + //} + if(sChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out + //fprintf(stderr, "R"); + return compareRegular(source, target, startOffset); + } + sOrder = latinOneCEs_[sChar]; + if(isSpecial(sOrder)) { // if we got a special + // specials can basically be either contractions or bail-out signs. If we get anything + // else, we'll bail out anywasy + if(getTag(sOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) { + m_ContInfo_.index = sIndex; + sOrder = getLatinOneContraction(0, sOrder, source); + sIndex = m_ContInfo_.index; + haveContractions = true; // if there are contractions, we cannot do French secondary + // However, if there are contractions in the table, but we always use just one char, + // we might be able to do French. This should be checked out. + } + if(isSpecial(sOrder) /*== UCOL_BAIL_OUT_CE*/) { + //fprintf(stderr, "S"); + return compareRegular(source, target, startOffset); + } + } + } + + while(tOrder==0) { // this loop skips primary ignorables + // tOrder=getNextlatinOneCE(target); + if(tIndex==tLen) { + if(endOfSource) { + break primLoop; + } else { + return 1; + } + } + tChar=target.charAt(tIndex++); //[tIndex++]; + if(tChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out + //fprintf(stderr, "R"); + return compareRegular(source, target, startOffset); + } + tOrder = latinOneCEs_[tChar]; + if(isSpecial(tOrder)) { + // Handling specials, see the comments for source + if(getTag(tOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) { + m_ContInfo_.index = tIndex; + tOrder = getLatinOneContraction(0, tOrder, target); + tIndex = m_ContInfo_.index; + haveContractions = true; + } + if(isSpecial(tOrder)/*== UCOL_BAIL_OUT_CE*/) { + //fprintf(stderr, "S"); + return compareRegular(source, target, startOffset); + } + } + } + if(endOfSource) { // source is finished, but target is not, say the result. + return -1; + } + + if(sOrder == tOrder) { // if we have same CEs, we continue the loop + sOrder = 0; tOrder = 0; + continue; + } else { + // compare current top bytes + if(((sOrder^tOrder)&0xFF000000)!=0) { + // top bytes differ, return difference + if(sOrder >>> 8 < tOrder >>> 8) { + return -1; + } else { + return 1; + } + // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24); + // since we must return enum value + } + + // top bytes match, continue with following bytes + sOrder<<=8; + tOrder<<=8; + } + } + + // after primary loop, we definitely know the sizes of strings, + // so we set it and use simpler loop for secondaries and tertiaries + //sLen = sIndex; tLen = tIndex; + if(strength >= SECONDARY) { + // adjust the table beggining + //latinOneCEs_ += coll->latinOneTableLen; + endOfSource = false; + + if(!m_isFrenchCollation_) { // non French + // This loop is a simplified copy of primary loop + // at this point we know that whole strings are latin-1, so we don't + // check for that. We also know that we only have contractions as + // specials. + //sIndex = 0; tIndex = 0; + sIndex = startOffset; tIndex = startOffset; + secLoop: + for(;;) { + while(sOrder==0) { + if(sIndex==sLen) { + endOfSource = true; + break; + } + sChar=source.charAt(sIndex++); //[sIndex++]; + sOrder = latinOneCEs_[offset+sChar]; + if(isSpecial(sOrder)) { + m_ContInfo_.index = sIndex; + sOrder = getLatinOneContraction(1, sOrder, source); + sIndex = m_ContInfo_.index; + } + } + + while(tOrder==0) { + if(tIndex==tLen) { + if(endOfSource) { + break secLoop; + } else { + return 1; + } + } + tChar=target.charAt(tIndex++); //[tIndex++]; + tOrder = latinOneCEs_[offset+tChar]; + if(isSpecial(tOrder)) { + m_ContInfo_.index = tIndex; + tOrder = getLatinOneContraction(1, tOrder, target); + tIndex = m_ContInfo_.index; + } + } + if(endOfSource) { + return -1; + } + + if(sOrder == tOrder) { + sOrder = 0; tOrder = 0; + continue; + } else { + // see primary loop for comments on this + if(((sOrder^tOrder)&0xFF000000)!=0) { + if(sOrder >>> 8 < tOrder >>> 8) { + return -1; + } else { + return 1; + } + } + sOrder<<=8; + tOrder<<=8; + } + } + } else { // French + if(haveContractions) { // if we have contractions, we have to bail out + // since we don't really know how to handle them here + return compareRegular(source, target, startOffset); + } + // For French, we go backwards + sIndex = sLen; tIndex = tLen; + secFLoop: + for(;;) { + while(sOrder==0) { + if(sIndex==startOffset) { + endOfSource = true; + break; + } + sChar=source.charAt(--sIndex); //[--sIndex]; + sOrder = latinOneCEs_[offset+sChar]; + // don't even look for contractions + } + + while(tOrder==0) { + if(tIndex==startOffset) { + if(endOfSource) { + break secFLoop; + } else { + return 1; + } + } + tChar=target.charAt(--tIndex); //[--tIndex]; + tOrder = latinOneCEs_[offset+tChar]; + // don't even look for contractions + } + if(endOfSource) { + return -1; + } + + if(sOrder == tOrder) { + sOrder = 0; tOrder = 0; + continue; + } else { + // see the primary loop for comments + if(((sOrder^tOrder)&0xFF000000)!=0) { + if(sOrder >>> 8 < tOrder >>> 8) { + return -1; + } else { + return 1; + } + } + sOrder<<=8; + tOrder<<=8; + } + } + } + } + + if(strength >= TERTIARY) { + // tertiary loop is the same as secondary (except no French) + offset += latinOneTableLen_; + //sIndex = 0; tIndex = 0; + sIndex = startOffset; tIndex = startOffset; + endOfSource = false; + for(;;) { + while(sOrder==0) { + if(sIndex==sLen) { + endOfSource = true; + break; + } + sChar=source.charAt(sIndex++); //[sIndex++]; + sOrder = latinOneCEs_[offset+sChar]; + if(isSpecial(sOrder)) { + m_ContInfo_.index = sIndex; + sOrder = getLatinOneContraction(2, sOrder, source); + sIndex = m_ContInfo_.index; + } + } + while(tOrder==0) { + if(tIndex==tLen) { + if(endOfSource) { + return 0; // if both strings are at the end, they are equal + } else { + return 1; + } + } + tChar=target.charAt(tIndex++); //[tIndex++]; + tOrder = latinOneCEs_[offset+tChar]; + if(isSpecial(tOrder)) { + m_ContInfo_.index = tIndex; + tOrder = getLatinOneContraction(2, tOrder, target); + tIndex = m_ContInfo_.index; + } + } + if(endOfSource) { + return -1; + } + if(sOrder == tOrder) { + sOrder = 0; tOrder = 0; + continue; + } else { + if(((sOrder^tOrder)&0xff000000)!=0) { + if(sOrder >>> 8 < tOrder >>> 8) { + return -1; + } else { + return 1; + } + } + sOrder<<=8; + tOrder<<=8; + } + } + } + return 0; + } + /** + * Get the version of this collator object. + * @return the version object associated with this collator + * @stable ICU 2.8 + */ + public VersionInfo getVersion() { + /* RunTime version */ + int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor(); + /* Builder version*/ + int bdVersion = m_version_.getMajor(); + + /* Charset Version. Need to get the version from cnv files + * makeconv should populate cnv files with version and + * an api has to be provided in ucnv.h to obtain this version + */ + int csVersion = 0; + + /* combine the version info */ + int cmbVersion = ((rtVersion<<11) | (bdVersion<<6) | (csVersion)) & 0xFFFF; + + /* Tailoring rules */ + return VersionInfo.getInstance(cmbVersion>>8, + cmbVersion & 0xFF, + m_version_.getMinor(), + UCA_.m_UCA_version_.getMajor()); + +// versionInfo[0] = (uint8_t)(cmbVersion>>8); +// versionInfo[1] = (uint8_t)cmbVersion; +// versionInfo[2] = coll->image->version[1]; +// versionInfo[3] = coll->UCA->image->UCAVersion[0]; + } + + /** + * Get the UCA version of this collator object. + * @return the version object associated with this collator + * @stable ICU 2.8 + */ + public VersionInfo getUCAVersion() { + return UCA_.m_UCA_version_; + } + + private transient boolean m_reallocLatinOneCEs_; +} diff --git a/main/classes/collate/src/com/ibm/icu/text/StringSearch.java b/main/classes/collate/src/com/ibm/icu/text/StringSearch.java new file mode 100644 index 00000000000..5429f34bac3 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/text/StringSearch.java @@ -0,0 +1,3177 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.text; + +import java.text.CharacterIterator; +import java.text.StringCharacterIterator; +import java.util.Locale; + +import com.ibm.icu.impl.CharacterIteratorWrapper; +import com.ibm.icu.impl.Norm2AllModes; +import com.ibm.icu.impl.Normalizer2Impl; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.util.ULocale; + +/** + *

    + * StringSearch is the concrete subclass of + * SearchIterator that provides language-sensitive text searching + * based on the comparison rules defined in a {@link RuleBasedCollator} object. + *

    + *

    + * StringSearch uses a version of the fast Boyer-Moore search + * algorithm that has been adapted to work with the large character set of + * Unicode. Refer to + * + * "Efficient Text Searching in Java", published in the + * Java Report on February, 1999, for further information on the + * algorithm. + *

    + *

    + * Users are also strongly encouraged to read the section on + * + * String Search and + * + * Collation in the user guide before attempting to use this class. + *

    + *

    + * String searching becomes a little complicated when accents are encountered at + * match boundaries. If a match is found and it has preceding or trailing + * accents not part of the match, the result returned will include the + * preceding accents up to the first base character, if the pattern searched + * for starts an accent. Likewise, + * if the pattern ends with an accent, all trailing accents up to the first + * base character will be included in the result. + *

    + *

    + * For example, if a match is found in target text "a\u0325\u0300" for + * the pattern + * "a\u0325", the result returned by StringSearch will be the index 0 and + * length 3 <0, 3>. If a match is found in the target + * "a\u0325\u0300" + * for the pattern "\u0300", then the result will be index 1 and length 2 + * <1, 2>. + *

    + *

    + * In the case where the decomposition mode is on for the RuleBasedCollator, + * all matches that starts or ends with an accent will have its results include + * preceding or following accents respectively. For example, if pattern "a" is + * looked for in the target text "á\u0325", the result will be + * index 0 and length 2 <0, 2>. + *

    + *

    + * The StringSearch class provides two options to handle accent matching + * described below: + *

    + *

    + * Let S' be the sub-string of a text string S between the offsets start and + * end <start, end>. + *
    + * A pattern string P matches a text string S at the offsets <start, + * length> + *
    + * if + *

     
    + * option 1. P matches some canonical equivalent string of S'. Suppose the 
    + *           RuleBasedCollator used for searching has a collation strength of 
    + *           TERTIARY, all accents are non-ignorable. If the pattern 
    + *           "a\u0300" is searched in the target text 
    + *           "a\u0325\u0300", 
    + *           a match will be found, since the target text is canonically 
    + *           equivalent to "a\u0300\u0325"
    + * option 2. P matches S' and if P starts or ends with a combining mark, 
    + *           there exists no non-ignorable combining mark before or after S' 
    + *           in S respectively. Following the example above, the pattern 
    + *           "a\u0300" will not find a match in "a\u0325\u0300", 
    + *           since
    + *           there exists a non-ignorable accent '\u0325' in the middle of 
    + *           'a' and '\u0300'. Even with a target text of 
    + *           "a\u0300\u0325" a match will not be found because of the 
    + *           non-ignorable trailing accent \u0325.
    + * 
    + * Option 2. will be the default mode for dealing with boundary accents unless + * specified via the API setCanonical(boolean). + * One restriction is to be noted for option 1. Currently there are no + * composite characters that consists of a character with combining class > 0 + * before a character with combining class == 0. However, if such a character + * exists in the future, the StringSearch may not work correctly with option 1 + * when such characters are encountered. + *

    + *

    + * SearchIterator provides APIs to specify the starting position + * within the text string to be searched, e.g. setIndex, + * preceding and following. Since the starting position will + * be set as it is specified, please take note that there are some dangerous + * positions which the search may render incorrect results: + *

      + *
    • The midst of a substring that requires decomposition. + *
    • If the following match is to be found, the position should not be the + * second character which requires to be swapped with the preceding + * character. Vice versa, if the preceding match is to be found, + * position to search from should not be the first character which + * requires to be swapped with the next character. E.g certain Thai and + * Lao characters require swapping. + *
    • If a following pattern match is to be found, any position within a + * contracting sequence except the first will fail. Vice versa if a + * preceding pattern match is to be found, a invalid starting point + * would be any character within a contracting sequence except the last. + *
    + *

    + *

    + * Though collator attributes will be taken into consideration while + * performing matches, there are no APIs provided in StringSearch for setting + * and getting the attributes. These attributes can be set by getting the + * collator from getCollator and using the APIs in + * com.ibm.icu.text.Collator. To update StringSearch to the new + * collator attributes, reset() or + * setCollator(RuleBasedCollator) has to be called. + *

    + *

    + * Consult the + * + * String Search user guide and the SearchIterator + * documentation for more information and examples of use. + *

    + *

    + * This class is not subclassable + *

    + * @see SearchIterator + * @see RuleBasedCollator + * @author Laura Werner, synwee + * @stable ICU 2.0 + */ +// internal notes: all methods do not guarantee the correct status of the +// characteriterator. the caller has to maintain the original index position +// if necessary. methods could change the index position as it deems fit +public final class StringSearch extends SearchIterator +{ + + // public constructors -------------------------------------------------- + + /** + * Initializes the iterator to use the language-specific rules defined in + * the argument collator to search for argument pattern in the argument + * target text. The argument breakiter is used to define logical matches. + * See super class documentation for more details on the use of the target + * text and BreakIterator. + * @param pattern text to look for. + * @param target target text to search for pattern. + * @param collator RuleBasedCollator that defines the language rules + * @param breakiter A {@link BreakIterator} that is used to determine the + * boundaries of a logical match. This argument can be null. + * @exception IllegalArgumentException thrown when argument target is null, + * or of length 0 + * @see BreakIterator + * @see RuleBasedCollator + * @see SearchIterator + * @stable ICU 2.0 + */ + public StringSearch(String pattern, CharacterIterator target, + RuleBasedCollator collator, BreakIterator breakiter) + { + super(target, breakiter); + m_textBeginOffset_ = targetText.getBeginIndex(); + m_textLimitOffset_ = targetText.getEndIndex(); + m_collator_ = collator; + m_colEIter_ = m_collator_.getCollationElementIterator(target); + m_utilColEIter_ = collator.getCollationElementIterator(""); + m_ceMask_ = getMask(m_collator_.getStrength()); + m_isCanonicalMatch_ = false; + m_pattern_ = new Pattern(pattern); + m_matchedIndex_ = DONE; + m_charBreakIter_ = BreakIterator.getCharacterInstance(/*m_collator_.getLocale(ULocale.ACTUAL_LOCALE)*/); + m_charBreakIter_.setText(target); + initialize(); + } + + /** + * Initializes the iterator to use the language-specific rules defined in + * the argument collator to search for argument pattern in the argument + * target text. No BreakIterators are set to test for logical matches. + * @param pattern text to look for. + * @param target target text to search for pattern. + * @param collator RuleBasedCollator that defines the language rules + * @exception IllegalArgumentException thrown when argument target is null, + * or of length 0 + * @see RuleBasedCollator + * @see SearchIterator + * @stable ICU 2.0 + */ + public StringSearch(String pattern, CharacterIterator target, + RuleBasedCollator collator) + { + this(pattern, target, collator, null/*BreakIterator.getCharacterInstance()*/); + } + + /** + * Initializes the iterator to use the language-specific rules and + * break iterator rules defined in the argument locale to search for + * argument pattern in the argument target text. + * See super class documentation for more details on the use of the target + * text and BreakIterator. + * @param pattern text to look for. + * @param target target text to search for pattern. + * @param locale locale to use for language and break iterator rules + * @exception IllegalArgumentException thrown when argument target is null, + * or of length 0. ClassCastException thrown if the collator for + * the specified locale is not a RuleBasedCollator. + * @see BreakIterator + * @see RuleBasedCollator + * @see SearchIterator + * @stable ICU 2.0 + */ + public StringSearch(String pattern, CharacterIterator target, Locale locale) + { + this(pattern, target, ULocale.forLocale(locale)); + } + + /** + * Initializes the iterator to use the language-specific rules and + * break iterator rules defined in the argument locale to search for + * argument pattern in the argument target text. + * See super class documentation for more details on the use of the target + * text and BreakIterator. + * @param pattern text to look for. + * @param target target text to search for pattern. + * @param locale ulocale to use for language and break iterator rules + * @exception IllegalArgumentException thrown when argument target is null, + * or of length 0. ClassCastException thrown if the collator for + * the specified locale is not a RuleBasedCollator. + * @see BreakIterator + * @see RuleBasedCollator + * @see SearchIterator + * @stable ICU 3.2 + */ + public StringSearch(String pattern, CharacterIterator target, ULocale locale) + { + this(pattern, target, (RuleBasedCollator)Collator.getInstance(locale), + null/*BreakIterator.getCharacterInstance(locale)*/); + } + + /** + * Initializes the iterator to use the language-specific rules and + * break iterator rules defined in the default locale to search for + * argument pattern in the argument target text. + * See super class documentation for more details on the use of the target + * text and BreakIterator. + * @param pattern text to look for. + * @param target target text to search for pattern. + * @exception IllegalArgumentException thrown when argument target is null, + * or of length 0. ClassCastException thrown if the collator for + * the default locale is not a RuleBasedCollator. + * @see BreakIterator + * @see RuleBasedCollator + * @see SearchIterator + * @stable ICU 2.0 + */ + public StringSearch(String pattern, String target) + { + this(pattern, new StringCharacterIterator(target), + (RuleBasedCollator)Collator.getInstance(), + null/*BreakIterator.getCharacterInstance()*/); + } + + // public getters ----------------------------------------------------- + + /** + *

    + * Gets the RuleBasedCollator used for the language rules. + *

    + *

    + * Since StringSearch depends on the returned RuleBasedCollator, any + * changes to the RuleBasedCollator result should follow with a call to + * either StringSearch.reset() or + * StringSearch.setCollator(RuleBasedCollator) to ensure the correct + * search behaviour. + *

    + * @return RuleBasedCollator used by this StringSearch + * @see RuleBasedCollator + * @see #setCollator + * @stable ICU 2.0 + */ + public RuleBasedCollator getCollator() + { + return m_collator_; + } + + /** + * Returns the pattern for which StringSearch is searching for. + * @return the pattern searched for + * @stable ICU 2.0 + */ + public String getPattern() + { + return m_pattern_.targetText; + } + + /** + * Return the index in the target text where the iterator is currently + * positioned at. + * If the iteration has gone past the end of the target text or past + * the beginning for a backwards search, {@link #DONE} is returned. + * @return index in the target text where the iterator is currently + * positioned at + * @stable ICU 2.8 + */ + public int getIndex() + { + int result = m_colEIter_.getOffset(); + if (isOutOfBounds(m_textBeginOffset_, m_textLimitOffset_, result)) { + return DONE; + } + return result; + } + + /** + * Determines whether canonical matches (option 1, as described in the + * class documentation) is set. + * See setCanonical(boolean) for more information. + * @see #setCanonical + * @return true if canonical matches is set, false otherwise + * @stable ICU 2.8 + */ + public boolean isCanonical() + { + return m_isCanonicalMatch_; + } + + // public setters ----------------------------------------------------- + + /** + *

    + * Sets the RuleBasedCollator to be used for language-specific searching. + *

    + *

    + * This method causes internal data such as Boyer-Moore shift tables + * to be recalculated, but the iterator's position is unchanged. + *

    + * @param collator to use for this StringSearch + * @exception IllegalArgumentException thrown when collator is null + * @see #getCollator + * @stable ICU 2.0 + */ + public void setCollator(RuleBasedCollator collator) + { + if (collator == null) { + throw new IllegalArgumentException("Collator can not be null"); + } + m_collator_ = collator; + m_ceMask_ = getMask(m_collator_.getStrength()); + // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT + initialize(); + m_colEIter_.setCollator(m_collator_); + m_utilColEIter_.setCollator(m_collator_); + m_charBreakIter_ = BreakIterator.getCharacterInstance(/*collator.getLocale(ULocale.VALID_LOCALE)*/); + m_charBreakIter_.setText(targetText); + } + + /** + *

    + * Set the pattern to search for. + *

    + *

    + * This method causes internal data such as Boyer-Moore shift tables + * to be recalculated, but the iterator's position is unchanged. + *

    + * @param pattern for searching + * @see #getPattern + * @exception IllegalArgumentException thrown if pattern is null or of + * length 0 + * @stable ICU 2.0 + */ + public void setPattern(String pattern) + { + if (pattern == null || pattern.length() <= 0) { + throw new IllegalArgumentException( + "Pattern to search for can not be null or of length 0"); + } + m_pattern_.targetText = pattern; + initialize(); + } + + /** + * Set the target text to be searched. Text iteration will hence begin at + * the start of the text string. This method is useful if you want to + * re-use an iterator to search within a different body of text. + * @param text new text iterator to look for match, + * @exception IllegalArgumentException thrown when text is null or has + * 0 length + * @see #getTarget + * @stable ICU 2.8 + */ + public void setTarget(CharacterIterator text) + { + super.setTarget(text); + m_textBeginOffset_ = targetText.getBeginIndex(); + m_textLimitOffset_ = targetText.getEndIndex(); + m_colEIter_.setText(targetText); + m_charBreakIter_.setText(targetText); + } + + /** + *

    + * Sets the position in the target text which the next search will start + * from to the argument. This method clears all previous states. + *

    + *

    + * This method takes the argument position and sets the position in the + * target text accordingly, without checking if position is pointing to a + * valid starting point to begin searching. + *

    + *

    + * Search positions that may render incorrect results are highlighted in + * the class documentation. + *

    + * @param position index to start next search from. + * @exception IndexOutOfBoundsException thrown if argument position is out + * of the target text range. + * @see #getIndex + * @stable ICU 2.8 + */ + public void setIndex(int position) + { + super.setIndex(position); + m_matchedIndex_ = DONE; + m_colEIter_.setExactOffset(position); + } + + /** + *

    + * Set the canonical match mode. See class documentation for details. + * The default setting for this property is false. + *

    + * @param allowCanonical flag indicator if canonical matches are allowed + * @see #isCanonical + * @stable ICU 2.8 + */ + public void setCanonical(boolean allowCanonical) + { + m_isCanonicalMatch_ = allowCanonical; + if (m_isCanonicalMatch_ == true) { + if (m_canonicalPrefixAccents_ == null) { + m_canonicalPrefixAccents_ = new StringBuilder(); + } + else { + m_canonicalPrefixAccents_.delete(0, + m_canonicalPrefixAccents_.length()); + } + if (m_canonicalSuffixAccents_ == null) { + m_canonicalSuffixAccents_ = new StringBuilder(); + } + else { + m_canonicalSuffixAccents_.delete(0, + m_canonicalSuffixAccents_.length()); + } + } + } + + // public miscellaneous methods ----------------------------------------- + + /** + *

    + * Resets the search iteration. All properties will be reset to the + * default value. + *

    + *

    + * Search will begin at the start of the target text if a forward iteration + * is initiated before a backwards iteration. Otherwise if a + * backwards iteration is initiated before a forwards iteration, the search + * will begin at the end of the target text. + *

    + *

    + * Canonical match option will be reset to false, ie an exact match. + *

    + * @stable ICU 2.8 + */ + public void reset() + { + // reset is setting the attributes that are already in string search, + // hence all attributes in the collator should be retrieved without any + // problems + super.reset(); + m_isCanonicalMatch_ = false; + m_ceMask_ = getMask(m_collator_.getStrength()); + // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT + initialize(); + m_colEIter_.setCollator(m_collator_); + m_colEIter_.reset(); + m_utilColEIter_.setCollator(m_collator_); + } + + // protected methods ----------------------------------------------------- + + /** + *

    + * Concrete method to provide the mechanism + * for finding the next forwards match in the target text. + * See super class documentation for its use. + *

    + * @param start index in the target text at which the forwards search + * should begin. + * @return the starting index of the next forwards match if found, DONE + * otherwise + * @see #handlePrevious(int) + * @see #DONE + * @stable ICU 2.8 + */ + protected int handleNext(int start) + { + if (m_pattern_.m_CELength_ == 0) { + matchLength = 0; + if (m_matchedIndex_ == DONE && start == m_textBeginOffset_) { + m_matchedIndex_ = start; + return m_matchedIndex_; + } + + targetText.setIndex(start); + char ch = targetText.current(); + // ch can never be done, it is handled by next() + char ch2 = targetText.next(); + if (ch2 == CharacterIterator.DONE) { + m_matchedIndex_ = DONE; + } + else { + m_matchedIndex_ = targetText.getIndex(); + } + if (UTF16.isLeadSurrogate(ch) && UTF16.isTrailSurrogate(ch2)) { + targetText.next(); + m_matchedIndex_ = targetText.getIndex(); + } + } + else { + if (matchLength <= 0) { + // we must have reversed direction after we reached the start + // of the target text + // see SearchIterator next(), it checks the bounds and returns + // if it exceeds the range. It does not allow setting of + // m_matchedIndex + if (start == m_textBeginOffset_) { + m_matchedIndex_ = DONE; + } + else { + // for boundary check purposes. this will ensure that the + // next match will not preceed the current offset + // note search->matchedIndex will always be set to something + // in the code + m_matchedIndex_ = start - 1; + } + } + + // status checked below + if (m_isCanonicalMatch_) { + // can't use exact here since extra accents are allowed. + handleNextCanonical(start); + } + else { + handleNextExact(start); + } + } + if (m_matchedIndex_ == DONE) { + targetText.setIndex(m_textLimitOffset_); + } + else { + targetText.setIndex(m_matchedIndex_); + } + return m_matchedIndex_; + } + + /** + *

    + * Concrete method to provide the mechanism + * for finding the next backwards match in the target text. + * See super class documentation for its use. + *

    + * @param start index in the target text at which the backwards search + * should begin. + * @return the starting index of the next backwards match if found, DONE + * otherwise + * @see #handleNext(int) + * @see #DONE + * @stable ICU 2.8 + */ + protected int handlePrevious(int start) + { + if (m_pattern_.m_CELength_ == 0) { + matchLength = 0; + // start can never be DONE or 0, it is handled in previous + targetText.setIndex(start); + char ch = targetText.previous(); + if (ch == CharacterIterator.DONE) { + m_matchedIndex_ = DONE; + } + else { + m_matchedIndex_ = targetText.getIndex(); + if (UTF16.isTrailSurrogate(ch)) { + if (UTF16.isLeadSurrogate(targetText.previous())) { + m_matchedIndex_ = targetText.getIndex(); + } + } + } + } + else { + if (matchLength == 0) { + // we must have reversed direction after we reached the end + // of the target text + // see SearchIterator next(), it checks the bounds and returns + // if it exceeds the range. It does not allow setting of + // m_matchedIndex + m_matchedIndex_ = DONE; + } + if (m_isCanonicalMatch_) { + // can't use exact here since extra accents are allowed. + handlePreviousCanonical(start); + } + else { + handlePreviousExact(start); + } + } + + if (m_matchedIndex_ == DONE) { + targetText.setIndex(m_textBeginOffset_); + } + else { + targetText.setIndex(m_matchedIndex_); + } + return m_matchedIndex_; + } + + // private static inner classes ---------------------------------------- + + private static class Pattern + { + // protected methods ----------------------------------------------- + + /** + * Pattern string + */ + protected String targetText; + /** + * Array containing the collation elements of targetText + */ + protected int m_CE_[]; + /** + * Number of collation elements in m_CE_ + */ + protected int m_CELength_; + /** + * Flag indicator if targetText starts with an accent + */ + protected boolean m_hasPrefixAccents_; + /** + * Flag indicator if targetText ends with an accent + */ + protected boolean m_hasSuffixAccents_; + /** + * Default number of characters to shift for Boyer Moore + */ + protected int m_defaultShiftSize_; + /** + * Number of characters to shift for Boyer Moore, depending on the + * source text to search + */ + protected char m_shift_[]; + /** + * Number of characters to shift backwards for Boyer Moore, depending + * on the source text to search + */ + protected char m_backShift_[]; + + // protected constructors ------------------------------------------ + + /** + * Empty constructor + */ + protected Pattern(String pattern) + { + targetText = pattern; + m_CE_ = new int[INITIAL_ARRAY_SIZE_]; + m_CELength_ = 0; + m_hasPrefixAccents_ = false; + m_hasSuffixAccents_ = false; + m_defaultShiftSize_ = 1; + m_shift_ = new char[MAX_TABLE_SIZE_]; + m_backShift_ = new char[MAX_TABLE_SIZE_]; + } + } + + + // private data members ------------------------------------------------ + + /** + * target text begin offset. Each targetText has a valid contiguous region + * to iterate and this data member is the offset to the first such + * character in the region. + */ + private int m_textBeginOffset_; + /** + * target text limit offset. Each targetText has a valid contiguous region + * to iterate and this data member is the offset to 1 after the last such + * character in the region. + */ + private int m_textLimitOffset_; + /** + * Upon completion of a search, m_matchIndex_ will store starting offset in + * m_text for the match. The Value DONE is the default value. + * If we are not at the start of the text or the end of the text and + * m_matchedIndex_ is DONE it means that we can find any more matches in + * that particular direction + */ + private int m_matchedIndex_; + /** + * Current pattern to search for + */ + private Pattern m_pattern_; + /** + * Collator whose rules are used to perform the search + */ + private RuleBasedCollator m_collator_; + /** + * The collation element iterator for the text source. + */ + private CollationElementIterator m_colEIter_; + /** + * Utility collation element, used throughout program for temporary + * iteration. + */ + private CollationElementIterator m_utilColEIter_; + /** + * The mask used on the collation elements to retrieve the valid strength + * weight + */ + private int m_ceMask_; + /** + * Buffer storing accents during a canonical search + */ + private StringBuilder m_canonicalPrefixAccents_; + /** + * Buffer storing accents during a canonical search + */ + private StringBuilder m_canonicalSuffixAccents_; + /** + * Flag to indicate if canonical search is to be done. + * E.g looking for "a\u0300" in "a\u0318\u0300" will yield the match at 0. + */ + private boolean m_isCanonicalMatch_; + /** + * Character break iterator for boundary checking. + */ + private BreakIterator m_charBreakIter_; + private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl; + /** + * Size of the shift tables + */ + private static final int MAX_TABLE_SIZE_ = 257; + /** + * Initial array size + */ + private static final int INITIAL_ARRAY_SIZE_ = 256; + /** + * Utility mask + */ + private static final int SECOND_LAST_BYTE_SHIFT_ = 8; + /** + * Utility mask + */ + private static final int LAST_BYTE_MASK_ = 0xff; + /** + * Utility buffer for return values and temporary storage + */ + private int m_utilBuffer_[] = new int[2]; + /** + * Unsigned 32-Bit Integer Mask + */ + private static final long UNSIGNED_32BIT_MASK = 0xffffffffL; + + // private methods ------------------------------------------------------- + + /** + * Hash a collation element from its full size (32 bits) down into a + * value that can be used as an index into the shift tables. Right + * now we do a modulus by the size of the hash table. + * @param ce collation element + * @return collapsed version of the collation element + */ + private static final int hash(int ce) + { + // the old value UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_ does not work + // well with the new collation where most of the latin 1 characters + // are of the value xx000xxx. their hashes will most of the time be 0 + // to be discussed on the hash algo. + return CollationElementIterator.primaryOrder(ce) % MAX_TABLE_SIZE_; + } + + private final char getFCD(int c) { + return (char)m_nfcImpl_.getFCD16(c); + } + /** + * Gets the fcd value for a character at the argument index. + * This method takes into accounts of the supplementary characters. + * Note this method changes the offset in the character iterator. + * @param str UTF16 string where character for fcd retrieval resides + * @param offset position of the character whose fcd is to be retrieved + * @return fcd value + */ + private final char getFCD(CharacterIterator str, int offset) + { + char ch = str.setIndex(offset); + int result = m_nfcImpl_.getFCD16FromSingleLead(ch); + if (result != 0 && Character.isHighSurrogate(ch)) { + char c2 = str.next(); + if (Character.isLowSurrogate(c2)) { + result = m_nfcImpl_.getFCD16(Character.toCodePoint(ch, c2)); + } else { + result = 0; + } + } + return (char)result; + } + /** + * Gets the FCD value for the code point before the input offset. + * Modifies the iterator's index. + * @param iter text iterator + * @param offset index after the character to test + * @return FCD value for the character before offset + */ + private final int getFCDBefore(CharacterIterator iter, int offset) { + int result; + iter.setIndex(offset); + char c = iter.previous(); + if (UTF16.isSurrogate(c)) { + if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c)) { + result = 0; + } else { + char lead = iter.previous(); + if (Character.isHighSurrogate(lead)) { + result = m_nfcImpl_.getFCD16(Character.toCodePoint(lead, c)); + } else { + result = 0; + } + } + } else { + result = m_nfcImpl_.getFCD16FromSingleLead(c); + } + return result; + } + /** + * Gets the fcd value for a character at the argument index. + * This method takes into accounts of the supplementary characters. + * @param str UTF16 string where character for fcd retrieval resides + * @param offset position of the character whose fcd is to be retrieved + * @return fcd value + */ + private final char getFCD(String str, int offset) + { + char ch = str.charAt(offset); + int result = m_nfcImpl_.getFCD16FromSingleLead(ch); + if (result != 0 && Character.isHighSurrogate(ch)) { + char c2; + if (++offset < str.length() && Character.isLowSurrogate(c2 = str.charAt(offset))) { + result = m_nfcImpl_.getFCD16(Character.toCodePoint(ch, c2)); + } else { + result = 0; + } + } + return (char)result; + } + + /** + * Getting the modified collation elements taking into account the collation + * attributes + * @param ce + * @return the modified collation element + */ + private final int getCE(int ce) + { + // note for tertiary we can't use the collator->tertiaryMask, that + // is a preprocessed mask that takes into account case options. since + // we are only concerned with exact matches, we don't need that. + ce &= m_ceMask_; + + if (m_collator_.isAlternateHandlingShifted()) { + // alternate handling here, since only the 16 most significant + // digits is only used, we can safely do a compare without masking + // if the ce is a variable, we mask and get only the primary values + // no shifting to quartenary is required since all primary values + // less than variabletop will need to be masked off anyway. + if (((m_collator_.m_variableTopValue_ << 16) & UNSIGNED_32BIT_MASK) > (ce & UNSIGNED_32BIT_MASK)) { + if (m_collator_.getStrength() == Collator.QUATERNARY) { + ce = CollationElementIterator.primaryOrder(ce); + } + else { + ce = CollationElementIterator.IGNORABLE; + } + } + } + + return ce; + } + + /** + * Appends a int to a int array, increasing the size of the array when + * we are out of space. + * @param offset in array to append to + * @param value to append + * @param array to append to + * @return the array appended to, this could be a new and bigger array + */ + private static final int[] append(int offset, int value, int array[]) + { + if (offset >= array.length) { + int temp[] = new int[offset + INITIAL_ARRAY_SIZE_]; + System.arraycopy(array, 0, temp, 0, array.length); + array = temp; + } + array[offset] = value; + return array; + } + + /** + * Initializing the ce table for a pattern. Stores non-ignorable collation + * keys. Table size will be estimated by the size of the pattern text. + * Table expansion will be perform as we go along. Adding 1 to ensure that + * the table size definitely increases. + * Internal method, status assumed to be a success. + * @return total number of expansions + */ + private final int initializePatternCETable() + { + m_utilColEIter_.setText(m_pattern_.targetText); + + int offset = 0; + int result = 0; + int ce = m_utilColEIter_.next(); + + while (ce != CollationElementIterator.NULLORDER) { + int newce = getCE(ce); + if (newce != CollationElementIterator.IGNORABLE) { + m_pattern_.m_CE_ = append(offset, newce, m_pattern_.m_CE_); + offset ++; + } + result += m_utilColEIter_.getMaxExpansion(ce) - 1; + ce = m_utilColEIter_.next(); + } + + m_pattern_.m_CE_ = append(offset, 0, m_pattern_.m_CE_); + m_pattern_.m_CELength_ = offset; + + return result; + } + + /** + * Initializes the pattern struct. + * Internal method, status assumed to be success. + * @return expansionsize the total expansion size of the pattern + */ + private final int initializePattern() + { + if (m_collator_.getStrength() == Collator.PRIMARY) { + m_pattern_.m_hasPrefixAccents_ = false; + m_pattern_.m_hasSuffixAccents_ = false; + } else { + m_pattern_.m_hasPrefixAccents_ = (getFCD(m_pattern_.targetText, 0) + >> SECOND_LAST_BYTE_SHIFT_) != 0; + m_pattern_.m_hasSuffixAccents_ = (getFCD(m_pattern_.targetText.codePointBefore( + m_pattern_.targetText.length())) + & LAST_BYTE_MASK_) != 0; + } + // since intializePattern is an internal method status is a success. + return initializePatternCETable(); + } + + /** + * Initializing shift tables, with the default values. + * If a corresponding default value is 0, the shift table is not set. + * @param shift table for forwards shift + * @param backshift table for backwards shift + * @param cetable table containing pattern ce + * @param cesize size of the pattern ces + * @param expansionsize total size of the expansions + * @param defaultforward the default forward value + * @param defaultbackward the default backward value + */ + private final void setShiftTable(char shift[], + char backshift[], + int cetable[], int cesize, + int expansionsize, + char defaultforward, + char defaultbackward) + { + // estimate the value to shift. to do that we estimate the smallest + // number of characters to give the relevant ces, ie approximately + // the number of ces minus their expansion, since expansions can come + // from a character. + for (int count = 0; count < MAX_TABLE_SIZE_; count ++) { + shift[count] = defaultforward; + } + cesize --; // down to the last index + for (int count = 0; count < cesize; count ++) { + // number of ces from right of array to the count + int temp = defaultforward - count - 1; + shift[hash(cetable[count])] = temp > 1 ? ((char)temp) : 1; + } + shift[hash(cetable[cesize])] = 1; + // for ignorables we just shift by one. see test examples. + shift[hash(0)] = 1; + + for (int count = 0; count < MAX_TABLE_SIZE_; count ++) { + backshift[count] = defaultbackward; + } + for (int count = cesize; count > 0; count --) { + // the original value count does not seem to work + backshift[hash(cetable[count])] = (char)(count > expansionsize ? + count - expansionsize : 1); + } + backshift[hash(cetable[0])] = 1; + backshift[hash(0)] = 1; + } + + /** + *

    Building of the pattern collation element list and the Boyer Moore + * StringSearch table.

    + *

    The canonical match will only be performed after the default match + * fails.

    + *

    For both cases we need to remember the size of the composed and + * decomposed versions of the string. Since the Boyer-Moore shift + * calculations shifts by a number of characters in the text and tries to + * match the pattern from that offset, the shift value can not be too large + * in case we miss some characters. To choose a right shift size, we + * estimate the NFC form of the and use its size as a shift guide. The NFC + * form should be the small possible representation of the pattern. Anyways, + * we'll err on the smaller shift size. Hence the calculation for + * minlength. Canonical match will be performed slightly differently. We'll + * split the pattern into 3 parts, the prefix accents (PA), the middle + * string bounded by the first and last base character (MS), the ending + * accents (EA). Matches will be done on MS first, and only when we match + * MS then some processing will be required for the prefix and end accents + * in order to determine if they match PA and EA. Hence the default shift + * values for the canonical match will take the size of either end's accent + * into consideration. Forwards search will take the end accents into + * consideration for the default shift values and the backwards search will + * take the prefix accents into consideration.

    + *

    If pattern has no non-ignorable ce, we return a illegal argument + * error.

    + */ + private final void initialize() + { + int expandlength = initializePattern(); + if (m_pattern_.m_CELength_ > 0) { + char minlength = (char)(m_pattern_.m_CELength_ > expandlength + ? m_pattern_.m_CELength_ - expandlength : 1); + m_pattern_.m_defaultShiftSize_ = minlength; + setShiftTable(m_pattern_.m_shift_, m_pattern_.m_backShift_, + m_pattern_.m_CE_, m_pattern_.m_CELength_, + expandlength, minlength, minlength); + } + else { + m_pattern_.m_defaultShiftSize_ = 0; + } + } + + /** + * Determine whether the search text bounded by the offset start and end is + * one or more whole units of text as determined by the breakiterator in + * StringSearch. + * @param start target text start offset + * @param end target text end offset + */ + private final boolean isBreakUnit(int start, int end) + { + if (breakIterator != null) { + int startindex = breakIterator.first(); + int endindex = breakIterator.last(); + + // out-of-range indexes are never boundary positions + if (start < startindex || start > endindex || end < startindex + || end > endindex) { + return false; + } + // otherwise, we can use following() on the position before the + // specified one and return true of the position we get back is the + // one the user specified + boolean result = (start == startindex + || breakIterator.following(start - 1) == start) + && (end == endindex + || breakIterator.following(end - 1) == end); + if (result) { + // iterates the individual ces + m_utilColEIter_.setText( + new CharacterIteratorWrapper(targetText), start); + for (int count = 0; count < m_pattern_.m_CELength_; + count ++) { + int ce = getCE(m_utilColEIter_.next()); + if (ce == CollationElementIterator.IGNORABLE) { + count --; + continue; + } + if (ce != m_pattern_.m_CE_[count]) { + return false; + } + } + int nextce = m_utilColEIter_.next(); + while (m_utilColEIter_.getOffset() == end + && getCE(nextce) == CollationElementIterator.IGNORABLE) { + nextce = m_utilColEIter_.next(); + } + if (nextce != CollationElementIterator.NULLORDER + && m_utilColEIter_.getOffset() == end) { + // extra collation elements at the end of the match + return false; + } + } + return result; + } + return true; + } + + /** + * Getting the next base character offset if current offset is an accent, + * or the current offset if the current character contains a base character. + * accents the following base character will be returned + * @param text string + * @param textoffset current offset + * @param textlength length of text string + * @return the next base character or the current offset + * if the current character is contains a base character. + */ + private final int getNextBaseOffset(CharacterIterator text, int textoffset) + { + if (textoffset >= text.getEndIndex()) { + return textoffset; + } + // iteration ends with reading CharacterIterator.DONE which has fcd==0 + char c = text.setIndex(textoffset); + for (;;) { + if ((m_nfcImpl_.getFCD16FromSingleLead(c) >> SECOND_LAST_BYTE_SHIFT_) == 0) { + return textoffset; + } + char next = text.next(); + if (Character.isSurrogatePair(c, next)) { + int fcd = m_nfcImpl_.getFCD16(Character.toCodePoint(c, next)); + if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) { + return textoffset; + } + next = text.next(); + textoffset += 2; + } else { + ++textoffset; + } + c = next; + } + } + + /** + * Gets the next base character offset depending on the string search + * pattern data + * @param textoffset one offset away from the last character + * to search for. + * @return start index of the next base character or the current offset + * if the current character is contains a base character. + */ + private final int getNextBaseOffset(int textoffset) + { + if (m_pattern_.m_hasSuffixAccents_ && textoffset < m_textLimitOffset_) { + if ((getFCDBefore(targetText, textoffset) & LAST_BYTE_MASK_) != 0) { + return getNextBaseOffset(targetText, textoffset); + } + } + return textoffset; + } + + /** + * Shifting the collation element iterator position forward to prepare for + * a following match. If the last character is a unsafe character, we'll + * only shift by 1 to capture contractions, normalization etc. + * Internal method, status assumed to be success. + * @param textoffset start text position to do search + * @param ce the text ce which failed the match. + * @param patternceindex index of the ce within the pattern ce buffer which + * failed the match + * @return final offset + */ + private int shiftForward(int textoffset, int ce, int patternceindex) + + { + if (ce != CollationElementIterator.NULLORDER) { + int shift = m_pattern_.m_shift_[hash(ce)]; + // this is to adjust for characters in the middle of the + // substring for matching that failed. + int adjust = m_pattern_.m_CELength_ - patternceindex; + if (adjust > 1 && shift >= adjust) { + shift -= adjust - 1; + } + textoffset += shift; + } + else { + textoffset += m_pattern_.m_defaultShiftSize_; + } + + textoffset = getNextBaseOffset(textoffset); + // check for unsafe characters + // * if it is the start or middle of a contraction: to be done after + // a initial match is found + // * thai or lao base consonant character: similar to contraction + // * high surrogate character: similar to contraction + // * next character is a accent: shift to the next base character + return textoffset; + } + + /** + * Gets the offset to the next safe point in text. + * ie. not the middle of a contraction, swappable characters or + * supplementary characters. + * @param textoffset offset in string + * @param end offset in string + * @return offset to the next safe character + */ + private final int getNextSafeOffset(int textoffset, int end) + { + int result = textoffset; // first contraction character + targetText.setIndex(result); + while (result != end && + m_collator_.isUnsafe(targetText.current())) { + result ++; + targetText.setIndex(result); + } + return result; + } + + /** + * This checks for accents in the potential match started with a composite + * character. + * This is really painful... we have to check that composite character do + * not have any extra accents. We have to normalize the potential match and + * find the immediate decomposed character before the match. + * The first composite character would have been taken care of by the fcd + * checks in checkForwardExactMatch. + * This is the slow path after the fcd of the first character and + * the last character has been checked by checkForwardExactMatch and we + * determine that the potential match has extra non-ignorable preceding + * ces. + * E.g. looking for \u0301 acute in \u01FA A ring above and acute, + * checkExtraMatchAccent should fail since there is a middle ring in + * \u01FA Note here that accents checking are slow and cautioned in the API + * docs. + * Internal method, status assumed to be a success, caller should check + * status before calling this method + * @param start index of the potential unfriendly composite character + * @param end index of the potential unfriendly composite character + * @return true if there is non-ignorable accents before at the beginning + * of the match, false otherwise. + */ + private final boolean checkExtraMatchAccents(int start, int end) + { + boolean result = false; + if (m_pattern_.m_hasPrefixAccents_) { + targetText.setIndex(start); + + if (UTF16.isLeadSurrogate(targetText.next())) { + if (!UTF16.isTrailSurrogate(targetText.next())) { + targetText.previous(); + } + } + // we are only concerned with the first composite character + String str = getString(targetText, start, end); + if (Normalizer.quickCheck(str, Normalizer.NFD,0) + == Normalizer.NO) { + int safeoffset = getNextSafeOffset(start, end); + if (safeoffset != end) { + safeoffset ++; + } + String decomp = Normalizer.decompose( + str.substring(0, safeoffset - start), false); + m_utilColEIter_.setText(decomp); + int firstce = m_pattern_.m_CE_[0]; + boolean ignorable = true; + int ce = CollationElementIterator.IGNORABLE; + int offset = 0; + while (ce != firstce) { + offset = m_utilColEIter_.getOffset(); + if (ce != firstce + && ce != CollationElementIterator.IGNORABLE) { + ignorable = false; + } + ce = m_utilColEIter_.next(); + } + m_utilColEIter_.setExactOffset(offset); // back up 1 to the + m_utilColEIter_.previous(); // right offset + offset = m_utilColEIter_.getOffset(); + result = !ignorable && (UCharacter.getCombiningClass( + UTF16.charAt(decomp, offset)) != 0); + } + } + + return result; + } + + /** + * Used by exact matches, checks if there are accents before the match. + * This is really painful... we have to check that composite characters at + * the start of the matches have to not have any extra accents. + * We check the FCD of the character first, if it starts with an accent and + * the first pattern ce does not match the first ce of the character, we + * bail. + * Otherwise we try normalizing the first composite + * character and find the immediate decomposed character before the match to + * see if it is an non-ignorable accent. + * Now normalizing the first composite character is enough because we ensure + * that when the match is passed in here with extra beginning ces, the + * first or last ce that match has to occur within the first character. + * E.g. looking for \u0301 acute in \u01FA A ring above and acute, + * checkExtraMatchAccent should fail since there is a middle ring in \u01FA + * Note here that accents checking are slow and cautioned in the API docs. + * @param start offset + * @param end offset + * @return true if there are accents on either side of the match, + * false otherwise + */ + private final boolean hasAccentsBeforeMatch(int start, int end) + { + if (m_pattern_.m_hasPrefixAccents_) { + // we have been iterating forwards previously + boolean ignorable = true; + int firstce = m_pattern_.m_CE_[0]; + m_colEIter_.setExactOffset(start); + int ce = getCE(m_colEIter_.next()); + while (ce != firstce) { + if (ce != CollationElementIterator.IGNORABLE) { + ignorable = false; + } + ce = getCE(m_colEIter_.next()); + } + if (!ignorable && m_colEIter_.isInBuffer()) { + // within normalization buffer, discontiguous handled here + return true; + } + + // within text + boolean accent = (getFCD(targetText, start) >> SECOND_LAST_BYTE_SHIFT_) + != 0; + if (!accent) { + return checkExtraMatchAccents(start, end); + } + if (!ignorable) { + return true; + } + if (start > m_textBeginOffset_) { + targetText.setIndex(start); + targetText.previous(); + if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) + != 0) { + m_colEIter_.setExactOffset(start); + ce = m_colEIter_.previous(); + if (ce != CollationElementIterator.NULLORDER + && ce != CollationElementIterator.IGNORABLE) { + return true; + } + } + } + } + + return false; + } + + /** + * Used by exact matches, checks if there are accents bounding the match. + * Note this is the initial boundary check. If the potential match + * starts or ends with composite characters, the accents in those + * characters will be determined later. + * Not doing backwards iteration here, since discontiguos contraction for + * backwards collation element iterator, use up too many characters. + * E.g. looking for \u030A ring in \u01FA A ring above and acute, + * should fail since there is a acute at the end of \u01FA + * Note here that accents checking are slow and cautioned in the API docs. + * @param start offset of match + * @param end end offset of the match + * @return true if there are accents on either side of the match, + * false otherwise + */ + private final boolean hasAccentsAfterMatch(int start, int end) + { + if (m_pattern_.m_hasSuffixAccents_) { + targetText.setIndex(end); + if (end > m_textBeginOffset_ + && UTF16.isTrailSurrogate(targetText.previous())) { + if (targetText.getIndex() > m_textBeginOffset_ && + !UTF16.isLeadSurrogate(targetText.previous())) { + targetText.next(); + } + } + if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) != 0) { + int firstce = m_pattern_.m_CE_[0]; + m_colEIter_.setExactOffset(start); + while (getCE(m_colEIter_.next()) != firstce) { + } + int count = 1; + while (count < m_pattern_.m_CELength_) { + if (getCE(m_colEIter_.next()) + == CollationElementIterator.IGNORABLE) { + count --; + } + count ++; + } + //int ce = getCE(m_colEIter_.next()); + int ce = m_colEIter_.next(); + if (ce != CollationElementIterator.NULLORDER + && ce != CollationElementIterator.IGNORABLE) { + ce = getCE(ce); + } + if (ce != CollationElementIterator.NULLORDER + && ce != CollationElementIterator.IGNORABLE) { + if (m_colEIter_.getOffset() <= end) { + return true; + } + if ((getFCD(targetText, end) >> SECOND_LAST_BYTE_SHIFT_) + != 0) { + return true; + } + } + } + } + return false; + } + + /** + * Checks if the offset runs out of the text string range + * @param textstart offset of the first character in the range + * @param textlimit limit offset of the text string range + * @param offset to test + * @return true if offset is out of bounds, false otherwise + */ + private static final boolean isOutOfBounds(int textstart, int textlimit, + int offset) + { + return offset < textstart || offset > textlimit; + } + + /** + * Checks for identical match + * @param strsrch string search data + * @param start offset of possible match + * @param end offset of possible match + * @return true if identical match is found + */ + private final boolean checkIdentical(int start, int end) + { + if (m_collator_.getStrength() != Collator.IDENTICAL) { + return true; + } + + String textstr = getString(targetText, start, end - start); + if (Normalizer.quickCheck(textstr, Normalizer.NFD,0) + == Normalizer.NO) { + textstr = Normalizer.decompose(textstr, false); + } + String patternstr = m_pattern_.targetText; + if (Normalizer.quickCheck(patternstr, Normalizer.NFD,0) + == Normalizer.NO) { + patternstr = Normalizer.decompose(patternstr, false); + } + return textstr.equals(patternstr); + } + + /** + * Checks to see if the match is repeated + * @param start new match start index + * @param limit new match limit index + * @return true if the the match is repeated, false otherwise + */ + private final boolean checkRepeatedMatch(int start, int limit) + { + if (m_matchedIndex_ == DONE) { + return false; + } + int end = limit - 1; // last character in the match + int lastmatchend = m_matchedIndex_ + matchLength - 1; + if (!isOverlapping()) { + return (start >= m_matchedIndex_ && start <= lastmatchend) + || (end >= m_matchedIndex_ && end <= lastmatchend) + || (start <= m_matchedIndex_ && end >= lastmatchend); + + } + return start <= m_matchedIndex_ && end >= lastmatchend; + } + + /** + * Checks match for contraction. + * If the match ends with a partial contraction we fail. + * If the match starts too far off (because of backwards iteration) we try + * to chip off the extra characters depending on whether a breakiterator + * has been used. + * Temporary utility buffer used to return modified start and end. + * @param start offset of potential match, to be modified if necessary + * @param end offset of potential match, to be modified if necessary + * @return true if match passes the contraction test, false otherwise. + */ + private final boolean checkNextExactContractionMatch(int start, int end) + { + // This part checks if either ends of the match contains potential + // contraction. If so we'll have to iterate through them + char endchar = 0; + if (end < m_textLimitOffset_) { + targetText.setIndex(end); + endchar = targetText.current(); + } + char poststartchar = 0; + if (start + 1 < m_textLimitOffset_) { + targetText.setIndex(start + 1); + poststartchar = targetText.current(); + } + if (m_collator_.isUnsafe(endchar) + || m_collator_.isUnsafe(poststartchar)) { + // expansion prefix, what's left to iterate + int bufferedCEOffset = m_colEIter_.m_CEBufferOffset_; + boolean hasBufferedCE = bufferedCEOffset > 0; + m_colEIter_.setExactOffset(start); + int temp = start; + while (bufferedCEOffset > 0) { + // getting rid of the redundant ce, caused by setOffset. + // since backward contraction/expansion may have extra ces if + // we are in the normalization buffer, hasAccentsBeforeMatch + // would have taken care of it. + // E.g. the character \u01FA will have an expansion of 3, but + // if we are only looking for acute and ring \u030A and \u0301, + // we'll have to skip the first ce in the expansion buffer. + m_colEIter_.next(); + if (m_colEIter_.getOffset() != temp) { + start = temp; + temp = m_colEIter_.getOffset(); + } + bufferedCEOffset --; + } + + int count = 0; + while (count < m_pattern_.m_CELength_) { + int ce = getCE(m_colEIter_.next()); + if (ce == CollationElementIterator.IGNORABLE) { + continue; + } + if (hasBufferedCE && count == 0 + && m_colEIter_.getOffset() != temp) { + start = temp; + temp = m_colEIter_.getOffset(); + } + if (ce != m_pattern_.m_CE_[count]) { + end ++; + end = getNextBaseOffset(end); + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return false; + } + count ++; + } + } + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return true; + } + + + /** + * Checks and sets the match information if found. + * Checks + *
      + *
    • the potential match does not repeat the previous match + *
    • boundaries are correct + *
    • exact matches has no extra accents + *
    • identical matchesb + *
    • potential match does not end in the middle of a contraction + *
    + * Otherwise the offset will be shifted to the next character. + * The result m_matchIndex_ and m_matchLength_ will be set to the truncated + * more fitting result value. + * Uses the temporary utility buffer for storing the modified textoffset. + * @param textoffset offset in the collation element text. + * @return true if the match is valid, false otherwise + */ + private final boolean checkNextExactMatch(int textoffset) + { + int start = m_colEIter_.getOffset(); + if (!checkNextExactContractionMatch(start, textoffset)) { + // returns the modified textoffset + m_utilBuffer_[0] = m_utilBuffer_[1]; + return false; + } + + start = m_utilBuffer_[0]; + textoffset = m_utilBuffer_[1]; + // this totally matches, however we need to check if it is repeating + if (!isBreakUnit(start, textoffset) + || checkRepeatedMatch(start, textoffset) + || hasAccentsBeforeMatch(start, textoffset) + || !checkIdentical(start, textoffset) + || hasAccentsAfterMatch(start, textoffset)) { + textoffset ++; + textoffset = getNextBaseOffset(textoffset); + m_utilBuffer_[0] = textoffset; + return false; + } + + if (m_collator_.getStrength() == Collator.PRIMARY) { + textoffset = checkBreakBoundary(textoffset); + } + + // totally match, we will get rid of the ending ignorables. + m_matchedIndex_ = start; + matchLength = textoffset - start; + return true; + } + + /** + * Getting the previous base character offset, or the current offset if the + * current character is a base character + * @param text the source text to work on + * @param textoffset one offset after the current character + * @return the offset of the next character after the base character or the + * first composed character with accents + */ + private final int getPreviousBaseOffset(CharacterIterator text, + int textoffset) + { + if (textoffset > m_textBeginOffset_) { + while (true) { + int result = textoffset; + text.setIndex(result); + if (UTF16.isTrailSurrogate(text.previous())) { + if (text.getIndex() != text.getBeginIndex() && + !UTF16.isLeadSurrogate(text.previous())) { + text.next(); + } + } + textoffset = text.getIndex(); + char fcd = getFCD(text, textoffset); + if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) { + if ((fcd & LAST_BYTE_MASK_) != 0) { + return textoffset; + } + return result; + } + if (textoffset == m_textBeginOffset_) { + return m_textBeginOffset_; + } + } + } + return textoffset; + } + + /** + * Getting the indexes of the accents that are not blocked in the argument + * accent array + * @param accents accents in nfd. + * @param accentsindex array to store the indexes of accents in accents that + * are not blocked + * @return the length of populated accentsindex + */ + private int getUnblockedAccentIndex(StringBuilder accents, + int accentsindex[]) + { + int index = 0; + int length = accents.length(); + int cclass = 0; + int result = 0; + while (index < length) { + int codepoint = UTF16.charAt(accents, index); + int tempclass = UCharacter.getCombiningClass(codepoint); + if (tempclass != cclass) { + cclass = tempclass; + accentsindex[result] = index; + result ++; + } + if (UCharacter.isSupplementary(codepoint)) { + index += 2; + } + else { + index ++; + } + } + accentsindex[result] = length; + return result; + } + + /** + * Appends 3 StringBuilder/CharacterIterator together into a destination + * string buffer. + * @param source1 string buffer + * @param source2 character iterator + * @param start2 start of the character iterator to merge + * @param end2 end of the character iterator to merge + * @param source3 string buffer + * @return appended string buffer + */ + private static final StringBuilder merge(StringBuilder source1, + CharacterIterator source2, + int start2, int end2, + StringBuilder source3) + { + StringBuilder result = new StringBuilder(); + if (source1 != null && source1.length() != 0) { + result.append(source1); + } + source2.setIndex(start2); + while (source2.getIndex() < end2) { + result.append(source2.current()); + source2.next(); + } + if (source3 != null && source3.length() != 0) { + result.append(source3); + } + return result; + } + + /** + * Running through a collation element iterator to see if the contents + * matches pattern in string search data + * @param coleiter collation element iterator to test + * @return true if a match if found, false otherwise + */ + private final boolean checkCollationMatch(CollationElementIterator coleiter) + { + int patternceindex = m_pattern_.m_CELength_; + int offset = 0; + while (patternceindex > 0) { + int ce = getCE(coleiter.next()); + if (ce == CollationElementIterator.IGNORABLE) { + continue; + } + if (ce != m_pattern_.m_CE_[offset]) { + return false; + } + offset ++; + patternceindex --; + } + return true; + } + + /** + * Rearranges the front accents to try matching. + * Prefix accents in the text will be grouped according to their combining + * class and the groups will be mixed and matched to try find the perfect + * match with the pattern. + * So for instance looking for "\u0301" in "\u030A\u0301\u0325" + * step 1: split "\u030A\u0301" into 6 other type of potential accent + * substrings "\u030A", "\u0301", "\u0325", "\u030A\u0301", + * "\u030A\u0325", "\u0301\u0325". + * step 2: check if any of the generated substrings matches the pattern. + * Internal method, status is assumed to be success, caller has to check + * status before calling this method. + * @param start first offset of the accents to start searching + * @param end start of the last accent set + * @return DONE if a match is not found, otherwise return the starting + * offset of the match. Note this start includes all preceding + * accents. + */ + private int doNextCanonicalPrefixMatch(int start, int end) + { + if ((getFCD(targetText, start) & LAST_BYTE_MASK_) == 0) { + // die... failed at a base character + return DONE; + } + + start = targetText.getIndex(); // index changed by fcd + int offset = getNextBaseOffset(targetText, start); + start = getPreviousBaseOffset(start); + + StringBuilder accents = new StringBuilder(); + String accentstr = getString(targetText, start, offset - start); + // normalizing the offensive string + if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0) + == Normalizer.NO) { + accentstr = Normalizer.decompose(accentstr, false); + } + accents.append(accentstr); + + int accentsindex[] = new int[INITIAL_ARRAY_SIZE_]; + int accentsize = getUnblockedAccentIndex(accents, accentsindex); + int count = (2 << (accentsize - 1)) - 1; + while (count > 0) { + // copy the base characters + m_canonicalPrefixAccents_.delete(0, + m_canonicalPrefixAccents_.length()); + int k = 0; + for (; k < accentsindex[0]; k ++) { + m_canonicalPrefixAccents_.append(accents.charAt(k)); + } + // forming all possible canonical rearrangement by dropping + // sets of accents + for (int i = 0; i <= accentsize - 1; i ++) { + int mask = 1 << (accentsize - i - 1); + if ((count & mask) != 0) { + for (int j = accentsindex[i]; j < accentsindex[i + 1]; + j ++) { + m_canonicalPrefixAccents_.append(accents.charAt(j)); + } + } + } + StringBuilder match = merge(m_canonicalPrefixAccents_, + targetText, offset, end, + m_canonicalSuffixAccents_); + + // if status is a failure, ucol_setText does nothing. + // run the collator iterator through this match + m_utilColEIter_.setText(match.toString()); + if (checkCollationMatch(m_utilColEIter_)) { + return start; + } + count --; + } + return DONE; + } + + /** + * Gets the offset to the safe point in text before textoffset. + * ie. not the middle of a contraction, swappable characters or + * supplementary characters. + * @param start offset in string + * @param textoffset offset in string + * @return offset to the previous safe character + */ + private final int getPreviousSafeOffset(int start, int textoffset) + { + int result = textoffset; // first contraction character + targetText.setIndex(textoffset); + while (result >= start && m_collator_.isUnsafe(targetText.previous())) { + result = targetText.getIndex(); + } + if (result != start) { + // the first contraction character is consider unsafe here + result = targetText.getIndex(); // originally result --; + } + return result; + } + + /** + * Take the rearranged end accents and tries matching. If match failed at + * a seperate preceding set of accents (seperated from the rearranged on by + * at least a base character) then we rearrange the preceding accents and + * tries matching again. + * We allow skipping of the ends of the accent set if the ces do not match. + * However if the failure is found before the accent set, it fails. + * Internal method, status assumed to be success, caller has to check + * status before calling this method. + * @param textoffset of the start of the rearranged accent + * @return DONE if a match is not found, otherwise return the starting + * offset of the match. Note this start includes all preceding + * accents. + */ + private int doNextCanonicalSuffixMatch(int textoffset) + { + int safelength = 0; + StringBuilder safetext; + int safeoffset = m_textBeginOffset_; + + if (textoffset != m_textBeginOffset_ + && m_canonicalSuffixAccents_.length() > 0 + && m_collator_.isUnsafe(m_canonicalSuffixAccents_.charAt(0))) { + safeoffset = getPreviousSafeOffset(m_textBeginOffset_, + textoffset); + safelength = textoffset - safeoffset; + safetext = merge(null, targetText, safeoffset, textoffset, + m_canonicalSuffixAccents_); + } + else { + safetext = m_canonicalSuffixAccents_; + } + + // if status is a failure, ucol_setText does nothing + CollationElementIterator coleiter = m_utilColEIter_; + coleiter.setText(safetext.toString()); + // status checked in loop below + + int ceindex = m_pattern_.m_CELength_ - 1; + boolean isSafe = true; // indication flag for position in safe zone + + while (ceindex >= 0) { + int textce = coleiter.previous(); + if (textce == CollationElementIterator.NULLORDER) { + // check if we have passed the safe buffer + if (coleiter == m_colEIter_) { + return DONE; + } + coleiter = m_colEIter_; + if (safetext != m_canonicalSuffixAccents_) { + safetext.delete(0, safetext.length()); + } + coleiter.setExactOffset(safeoffset); + // status checked at the start of the loop + isSafe = false; + continue; + } + textce = getCE(textce); + if (textce != CollationElementIterator.IGNORABLE + && textce != m_pattern_.m_CE_[ceindex]) { + // do the beginning stuff + int failedoffset = coleiter.getOffset(); + if (isSafe && failedoffset >= safelength) { + // alas... no hope. failed at rearranged accent set + return DONE; + } + else { + if (isSafe) { + failedoffset += safeoffset; + } + + // try rearranging the front accents + int result = doNextCanonicalPrefixMatch(failedoffset, + textoffset); + if (result != DONE) { + // if status is a failure, ucol_setOffset does nothing + m_colEIter_.setExactOffset(result); + } + return result; + } + } + if (textce == m_pattern_.m_CE_[ceindex]) { + ceindex --; + } + } + // set offset here + if (isSafe) { + int result = coleiter.getOffset(); + // sets the text iterator with the correct expansion and offset + int leftoverces = coleiter.m_CEBufferOffset_; + if (result >= safelength) { + result = textoffset; + } + else { + result += safeoffset; + } + m_colEIter_.setExactOffset(result); + m_colEIter_.m_CEBufferOffset_ = leftoverces; + return result; + } + + return coleiter.getOffset(); + } + + /** + * Trying out the substring and sees if it can be a canonical match. + * This will try normalizing the end accents and arranging them into + * canonical equivalents and check their corresponding ces with the pattern + * ce. + * Suffix accents in the text will be grouped according to their combining + * class and the groups will be mixed and matched to try find the perfect + * match with the pattern. + * So for instance looking for "\u0301" in "\u030A\u0301\u0325" + * step 1: split "\u030A\u0301" into 6 other type of potential accent + * substrings + * "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", + * "\u0301\u0325". + * step 2: check if any of the generated substrings matches the pattern. + * @param textoffset end offset in the collation element text that ends with + * the accents to be rearranged + * @return true if the match is valid, false otherwise + */ + private boolean doNextCanonicalMatch(int textoffset) + { + int offset = m_colEIter_.getOffset(); + targetText.setIndex(textoffset); + if (UTF16.isTrailSurrogate(targetText.previous()) + && targetText.getIndex() > m_textBeginOffset_) { + if (!UTF16.isLeadSurrogate(targetText.previous())) { + targetText.next(); + } + } + if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) { + if (m_pattern_.m_hasPrefixAccents_) { + offset = doNextCanonicalPrefixMatch(offset, textoffset); + if (offset != DONE) { + m_colEIter_.setExactOffset(offset); + return true; + } + } + return false; + } + + if (!m_pattern_.m_hasSuffixAccents_) { + return false; + } + + StringBuilder accents = new StringBuilder(); + // offset to the last base character in substring to search + int baseoffset = getPreviousBaseOffset(targetText, textoffset); + // normalizing the offensive string + String accentstr = getString(targetText, baseoffset, + textoffset - baseoffset); + if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0) + == Normalizer.NO) { + accentstr = Normalizer.decompose(accentstr, false); + } + accents.append(accentstr); + // status checked in loop below + + int accentsindex[] = new int[INITIAL_ARRAY_SIZE_]; + int size = getUnblockedAccentIndex(accents, accentsindex); + + // 2 power n - 1 plus the full set of accents + int count = (2 << (size - 1)) - 1; + while (count > 0) { + m_canonicalSuffixAccents_.delete(0, + m_canonicalSuffixAccents_.length()); + // copy the base characters + for (int k = 0; k < accentsindex[0]; k ++) { + m_canonicalSuffixAccents_.append(accents.charAt(k)); + } + // forming all possible canonical rearrangement by dropping + // sets of accents + for (int i = 0; i <= size - 1; i ++) { + int mask = 1 << (size - i - 1); + if ((count & mask) != 0) { + for (int j = accentsindex[i]; j < accentsindex[i + 1]; + j ++) { + m_canonicalSuffixAccents_.append(accents.charAt(j)); + } + } + } + offset = doNextCanonicalSuffixMatch(baseoffset); + if (offset != DONE) { + return true; // match found + } + count --; + } + return false; + } + + /** + * Gets the previous base character offset depending on the string search + * pattern data + * @param strsrch string search data + * @param textoffset current offset, current character + * @return the offset of the next character after this base character or + * itself if it is a composed character with accents + */ + private final int getPreviousBaseOffset(int textoffset) + { + if (m_pattern_.m_hasPrefixAccents_ && textoffset > m_textBeginOffset_) { + int offset = textoffset; + if ((getFCD(targetText, offset) >> SECOND_LAST_BYTE_SHIFT_) != 0) { + return getPreviousBaseOffset(targetText, textoffset); + } + } + return textoffset; + } + + /** + * Checks match for contraction. + * If the match ends with a partial contraction we fail. + * If the match starts too far off (because of backwards iteration) we try + * to chip off the extra characters. + * Uses the temporary util buffer for return values of the modified start + * and end. + * @param start offset of potential match, to be modified if necessary + * @param end offset of potential match, to be modified if necessary + * @return true if match passes the contraction test, false otherwise. + */ + private boolean checkNextCanonicalContractionMatch(int start, int end) + { + // This part checks if either ends of the match contains potential + // contraction. If so we'll have to iterate through them + char schar = 0; + char echar = 0; + if (end < m_textLimitOffset_) { + targetText.setIndex(end); + echar = targetText.current(); + } + if (start < m_textLimitOffset_) { + targetText.setIndex(start + 1); + schar = targetText.current(); + } + if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) { + int expansion = m_colEIter_.m_CEBufferOffset_; + boolean hasExpansion = expansion > 0; + m_colEIter_.setExactOffset(start); + int temp = start; + while (expansion > 0) { + // getting rid of the redundant ce, caused by setOffset. + // since backward contraction/expansion may have extra ces if + // we are in the normalization buffer, hasAccentsBeforeMatch + // would have taken care of it. + // E.g. the character \u01FA will have an expansion of 3, but + // if we are only looking for acute and ring \u030A and \u0301, + // we'll have to skip the first ce in the expansion buffer. + m_colEIter_.next(); + if (m_colEIter_.getOffset() != temp) { + start = temp; + temp = m_colEIter_.getOffset(); + } + expansion --; + } + + int count = 0; + while (count < m_pattern_.m_CELength_) { + int ce = getCE(m_colEIter_.next()); + // status checked below, note that if status is a failure + // ucol_next returns UCOL_NULLORDER + if (ce == CollationElementIterator.IGNORABLE) { + continue; + } + if (hasExpansion && count == 0 + && m_colEIter_.getOffset() != temp) { + start = temp; + temp = m_colEIter_.getOffset(); + } + + if (count == 0 && ce != m_pattern_.m_CE_[0]) { + // accents may have extra starting ces, this occurs when a + // pure accent pattern is matched without rearrangement + // text \u0325\u0300 and looking for \u0300 + int expected = m_pattern_.m_CE_[0]; + if ((getFCD(targetText, start) & LAST_BYTE_MASK_) != 0) { + ce = getCE(m_colEIter_.next()); + while (ce != expected + && ce != CollationElementIterator.NULLORDER + && m_colEIter_.getOffset() <= end) { + ce = getCE(m_colEIter_.next()); + } + } + } + if (ce != m_pattern_.m_CE_[count]) { + end ++; + end = getNextBaseOffset(end); + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return false; + } + count ++; + } + } + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return true; + } + + /** + * Checks and sets the match information if found. + * Checks + *
      + *
    • the potential match does not repeat the previous match + *
    • boundaries are correct + *
    • potential match does not end in the middle of a contraction + *
    • identical matches + *
    + * Otherwise the offset will be shifted to the next character. + * The result m_matchIndex_ and m_matchLength_ will be set to the truncated + * more fitting result value. + * Uses the temporary utility buffer for storing the modified textoffset. + * @param textoffset offset in the collation element text. + * @return true if the match is valid, false otherwise + */ + private boolean checkNextCanonicalMatch(int textoffset) + { + // to ensure that the start and ends are not composite characters + // if we have a canonical accent match + if ((m_pattern_.m_hasSuffixAccents_ + && m_canonicalSuffixAccents_.length() != 0) || + (m_pattern_.m_hasPrefixAccents_ + && m_canonicalPrefixAccents_.length() != 0)) { + m_matchedIndex_ = getPreviousBaseOffset(m_colEIter_.getOffset()); + matchLength = textoffset - m_matchedIndex_; + return true; + } + + int start = m_colEIter_.getOffset(); + if (!checkNextCanonicalContractionMatch(start, textoffset)) { + // return the modified textoffset + m_utilBuffer_[0] = m_utilBuffer_[1]; + return false; + } + start = m_utilBuffer_[0]; + textoffset = m_utilBuffer_[1]; + start = getPreviousBaseOffset(start); + // this totally matches, however we need to check if it is repeating + if (checkRepeatedMatch(start, textoffset) + || !isBreakUnit(start, textoffset) + || !checkIdentical(start, textoffset)) { + textoffset ++; + textoffset = getNextBaseOffset(targetText, textoffset); + m_utilBuffer_[0] = textoffset; + return false; + } + + m_matchedIndex_ = start; + matchLength = textoffset - start; + return true; + } + + /** + * Shifting the collation element iterator position forward to prepare for + * a preceding match. If the first character is a unsafe character, we'll + * only shift by 1 to capture contractions, normalization etc. + * @param textoffset start text position to do search + * @param ce the text ce which failed the match. + * @param patternceindex index of the ce within the pattern ce buffer which + * failed the match + * @return final offset + */ + private int reverseShift(int textoffset, int ce, int patternceindex) + { + if (isOverlapping()) { + if (textoffset != m_textLimitOffset_) { + textoffset --; + } + else { + textoffset -= m_pattern_.m_defaultShiftSize_; + } + } + else { + if (ce != CollationElementIterator.NULLORDER) { + int shift = m_pattern_.m_backShift_[hash(ce)]; + + // this is to adjust for characters in the middle of the substring + // for matching that failed. + int adjust = patternceindex; + if (adjust > 1 && shift > adjust) { + shift -= adjust - 1; + } + textoffset -= shift; + } + else { + textoffset -= m_pattern_.m_defaultShiftSize_; + } + } + + textoffset = getPreviousBaseOffset(textoffset); + return textoffset; + } + + /** + * Checks match for contraction. + * If the match starts with a partial contraction we fail. + * Uses the temporary utility buffer to return the modified start and end. + * @param start offset of potential match, to be modified if necessary + * @param end offset of potential match, to be modified if necessary + * @return true if match passes the contraction test, false otherwise. + */ + private boolean checkPreviousExactContractionMatch(int start, int end) + { + // This part checks if either ends of the match contains potential + // contraction. If so we'll have to iterate through them + char echar = 0; + if (end < m_textLimitOffset_) { + targetText.setIndex(end); + echar = targetText.current(); + } + char schar = 0; + if (start + 1 < m_textLimitOffset_) { + targetText.setIndex(start + 1); + schar = targetText.current(); + } + if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) { + // expansion suffix, what's left to iterate + int expansion = m_colEIter_.m_CEBufferSize_ + - m_colEIter_.m_CEBufferOffset_; + boolean hasExpansion = expansion > 0; + m_colEIter_.setExactOffset(end); + int temp = end; + while (expansion > 0) { + // getting rid of the redundant ce + // since forward contraction/expansion may have extra ces + // if we are in the normalization buffer, hasAccentsBeforeMatch + // would have taken care of it. + // E.g. the character \u01FA will have an expansion of 3, but if + // we are only looking for A ring A\u030A, we'll have to skip the + // last ce in the expansion buffer + m_colEIter_.previous(); + if (m_colEIter_.getOffset() != temp) { + end = temp; + temp = m_colEIter_.getOffset(); + } + expansion --; + } + + int count = m_pattern_.m_CELength_; + while (count > 0) { + int ce = getCE(m_colEIter_.previous()); + // status checked below, note that if status is a failure + // ucol_previous returns UCOL_NULLORDER + if (ce == CollationElementIterator.IGNORABLE) { + continue; + } + if (hasExpansion && count == 0 + && m_colEIter_.getOffset() != temp) { + end = temp; + temp = m_colEIter_.getOffset(); + } + if (ce != m_pattern_.m_CE_[count - 1]) { + start --; + start = getPreviousBaseOffset(targetText, start); + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return false; + } + count --; + } + } + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return true; + } + + /** + * Checks and sets the match information if found. + * Checks + *
      + *
    • the current match does not repeat the last match + *
    • boundaries are correct + *
    • exact matches has no extra accents + *
    • identical matches + *
    + * Otherwise the offset will be shifted to the preceding character. + * Uses the temporary utility buffer to store the modified textoffset. + * @param textoffset offset in the collation element text. the returned value + * will be the truncated start offset of the match or the new start + * search offset. + * @return true if the match is valid, false otherwise + */ + private final boolean checkPreviousExactMatch(int textoffset) + { + // to ensure that the start and ends are not composite characters + int end = m_colEIter_.getOffset(); + if (!checkPreviousExactContractionMatch(textoffset, end)) { + return false; + } + textoffset = m_utilBuffer_[0]; + end = m_utilBuffer_[1]; + + // this totally matches, however we need to check if it is repeating + // the old match + if (checkRepeatedMatch(textoffset, end) + || !isBreakUnit(textoffset, end) + || hasAccentsBeforeMatch(textoffset, end) + || !checkIdentical(textoffset, end) + || hasAccentsAfterMatch(textoffset, end)) { + textoffset --; + textoffset = getPreviousBaseOffset(targetText, textoffset); + m_utilBuffer_[0] = textoffset; + return false; + } + + if (m_collator_.getStrength() == Collator.PRIMARY) { + end = checkBreakBoundary(end); + } + + m_matchedIndex_ = textoffset; + matchLength = end - textoffset; + return true; + } + + /** + * Rearranges the end accents to try matching. + * Suffix accents in the text will be grouped according to their combining + * class and the groups will be mixed and matched to try find the perfect + * match with the pattern. + * So for instance looking for "\u0301" in "\u030A\u0301\u0325" + * step 1: split "\u030A\u0301" into 6 other type of potential accent + * substrings + * "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", + * "\u0301\u0325". + * step 2: check if any of the generated substrings matches the pattern. + * @param start offset of the first base character + * @param end start of the last accent set + * @return DONE if a match is not found, otherwise return the ending + * offset of the match. Note this start includes all following + * accents. + */ + private int doPreviousCanonicalSuffixMatch(int start, int end) + { + targetText.setIndex(end); + if (UTF16.isTrailSurrogate(targetText.previous()) + && targetText.getIndex() > m_textBeginOffset_) { + if (!UTF16.isLeadSurrogate(targetText.previous())) { + targetText.next(); + } + } + if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) { + // die... failed at a base character + return DONE; + } + end = getNextBaseOffset(targetText, end); + + StringBuilder accents = new StringBuilder(); + int offset = getPreviousBaseOffset(targetText, end); + // normalizing the offensive string + String accentstr = getString(targetText, offset, end - offset); + if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0) + == Normalizer.NO) { + accentstr = Normalizer.decompose(accentstr, false); + } + accents.append(accentstr); + + int accentsindex[] = new int[INITIAL_ARRAY_SIZE_]; + int accentsize = getUnblockedAccentIndex(accents, accentsindex); + int count = (2 << (accentsize - 1)) - 1; + while (count > 0) { + m_canonicalSuffixAccents_.delete(0, + m_canonicalSuffixAccents_.length()); + // copy the base characters + for (int k = 0; k < accentsindex[0]; k ++) { + m_canonicalSuffixAccents_.append(accents.charAt(k)); + } + // forming all possible canonical rearrangement by dropping + // sets of accents + for (int i = 0; i <= accentsize - 1; i ++) { + int mask = 1 << (accentsize - i - 1); + if ((count & mask) != 0) { + for (int j = accentsindex[i]; j < accentsindex[i + 1]; + j ++) { + m_canonicalSuffixAccents_.append(accents.charAt(j)); + } + } + } + StringBuilder match = merge(m_canonicalPrefixAccents_, targetText, + start, offset, + m_canonicalSuffixAccents_); + // run the collator iterator through this match + // if status is a failure ucol_setText does nothing + m_utilColEIter_.setText(match.toString()); + if (checkCollationMatch(m_utilColEIter_)) { + return end; + } + count --; + } + return DONE; + } + + /** + * Take the rearranged start accents and tries matching. If match failed at + * a seperate following set of accents (seperated from the rearranged on by + * at least a base character) then we rearrange the preceding accents and + * tries matching again. + * We allow skipping of the ends of the accent set if the ces do not match. + * However if the failure is found before the accent set, it fails. + * Internal method, status assumed to be success, caller has to check + * status before calling this method. + * @param textoffset of the ends of the rearranged accent + * @return DONE if a match is not found, otherwise return the ending offset + * of the match. Note this start includes all following accents. + */ + private int doPreviousCanonicalPrefixMatch(int textoffset) + { + // int safelength = 0; + StringBuilder safetext; + int safeoffset = textoffset; + + if (textoffset > m_textBeginOffset_ + && m_collator_.isUnsafe(m_canonicalPrefixAccents_.charAt( + m_canonicalPrefixAccents_.length() - 1))) { + safeoffset = getNextSafeOffset(textoffset, m_textLimitOffset_); + //safelength = safeoffset - textoffset; + safetext = merge(m_canonicalPrefixAccents_, targetText, textoffset, + safeoffset, null); + } + else { + safetext = m_canonicalPrefixAccents_; + } + + // if status is a failure, ucol_setText does nothing + CollationElementIterator coleiter = m_utilColEIter_; + coleiter.setText(safetext.toString()); + // status checked in loop below + + int ceindex = 0; + boolean isSafe = true; // safe zone indication flag for position + int prefixlength = m_canonicalPrefixAccents_.length(); + + while (ceindex < m_pattern_.m_CELength_) { + int textce = coleiter.next(); + if (textce == CollationElementIterator.NULLORDER) { + // check if we have passed the safe buffer + if (coleiter == m_colEIter_) { + return DONE; + } + if (safetext != m_canonicalPrefixAccents_) { + safetext.delete(0, safetext.length()); + } + coleiter = m_colEIter_; + coleiter.setExactOffset(safeoffset); + // status checked at the start of the loop + isSafe = false; + continue; + } + textce = getCE(textce); + if (textce != CollationElementIterator.IGNORABLE + && textce != m_pattern_.m_CE_[ceindex]) { + // do the beginning stuff + int failedoffset = coleiter.getOffset(); + if (isSafe && failedoffset <= prefixlength) { + // alas... no hope. failed at rearranged accent set + return DONE; + } + else { + if (isSafe) { + failedoffset = safeoffset - failedoffset; + if (safetext != m_canonicalPrefixAccents_) { + safetext.delete(0, safetext.length()); + } + } + + // try rearranging the end accents + int result = doPreviousCanonicalSuffixMatch(textoffset, + failedoffset); + if (result != DONE) { + // if status is a failure, ucol_setOffset does nothing + m_colEIter_.setExactOffset(result); + } + return result; + } + } + if (textce == m_pattern_.m_CE_[ceindex]) { + ceindex ++; + } + } + // set offset here + if (isSafe) { + int result = coleiter.getOffset(); + // sets the text iterator here with the correct expansion and offset + int leftoverces = coleiter.m_CEBufferSize_ + - coleiter.m_CEBufferOffset_; + if (result <= prefixlength) { + result = textoffset; + } + else { + result = textoffset + (safeoffset - result); + } + m_colEIter_.setExactOffset(result); + m_colEIter_.m_CEBufferOffset_ = m_colEIter_.m_CEBufferSize_ + - leftoverces; + return result; + } + + return coleiter.getOffset(); + } + + /** + * Trying out the substring and sees if it can be a canonical match. + * This will try normalizing the starting accents and arranging them into + * canonical equivalents and check their corresponding ces with the pattern + * ce. + * Prefix accents in the text will be grouped according to their combining + * class and the groups will be mixed and matched to try find the perfect + * match with the pattern. + * So for instance looking for "\u0301" in "\u030A\u0301\u0325" + * step 1: split "\u030A\u0301" into 6 other type of potential accent + * substrings + * "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", + * "\u0301\u0325". + * step 2: check if any of the generated substrings matches the pattern. + * @param textoffset start offset in the collation element text that starts + * with the accents to be rearranged + * @return true if the match is valid, false otherwise + */ + private boolean doPreviousCanonicalMatch(int textoffset) + { + int offset = m_colEIter_.getOffset(); + if ((getFCD(targetText, textoffset) >> SECOND_LAST_BYTE_SHIFT_) == 0) { + if (m_pattern_.m_hasSuffixAccents_) { + offset = doPreviousCanonicalSuffixMatch(textoffset, offset); + if (offset != DONE) { + m_colEIter_.setExactOffset(offset); + return true; + } + } + return false; + } + + if (!m_pattern_.m_hasPrefixAccents_) { + return false; + } + + StringBuilder accents = new StringBuilder(); + // offset to the last base character in substring to search + int baseoffset = getNextBaseOffset(targetText, textoffset); + // normalizing the offensive string + String textstr = getString(targetText, textoffset, + baseoffset - textoffset); + if (Normalizer.quickCheck(textstr, Normalizer.NFD,0) + == Normalizer.NO) { + textstr = Normalizer.decompose(textstr, false); + } + accents.append(textstr); + // status checked in loop + + int accentsindex[] = new int[INITIAL_ARRAY_SIZE_]; + int size = getUnblockedAccentIndex(accents, accentsindex); + + // 2 power n - 1 plus the full set of accents + int count = (2 << (size - 1)) - 1; + while (count > 0) { + m_canonicalPrefixAccents_.delete(0, + m_canonicalPrefixAccents_.length()); + // copy the base characters + for (int k = 0; k < accentsindex[0]; k ++) { + m_canonicalPrefixAccents_.append(accents.charAt(k)); + } + // forming all possible canonical rearrangement by dropping + // sets of accents + for (int i = 0; i <= size - 1; i ++) { + int mask = 1 << (size - i - 1); + if ((count & mask) != 0) { + for (int j = accentsindex[i]; j < accentsindex[i + 1]; + j ++) { + m_canonicalPrefixAccents_.append(accents.charAt(j)); + } + } + } + offset = doPreviousCanonicalPrefixMatch(baseoffset); + if (offset != DONE) { + return true; // match found + } + count --; + } + return false; + } + + /** + * Checks match for contraction. + * If the match starts with a partial contraction we fail. + * Uses the temporary utility buffer to return the modified start and end. + * @param start offset of potential match, to be modified if necessary + * @param end offset of potential match, to be modified if necessary + * @return true if match passes the contraction test, false otherwise. + */ + private boolean checkPreviousCanonicalContractionMatch(int start, int end) + { + int temp = end; + // This part checks if either ends of the match contains potential + // contraction. If so we'll have to iterate through them + char echar = 0; + char schar = 0; + if (end < m_textLimitOffset_) { + targetText.setIndex(end); + echar = targetText.current(); + } + if (start + 1 < m_textLimitOffset_) { + targetText.setIndex(start + 1); + schar = targetText.current(); + } + if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) { + int expansion = m_colEIter_.m_CEBufferSize_ + - m_colEIter_.m_CEBufferOffset_; + boolean hasExpansion = expansion > 0; + m_colEIter_.setExactOffset(end); + while (expansion > 0) { + // getting rid of the redundant ce + // since forward contraction/expansion may have extra ces + // if we are in the normalization buffer, hasAccentsBeforeMatch + // would have taken care of it. + // E.g. the character \u01FA will have an expansion of 3, but + // if we are only looking for A ring A\u030A, we'll have to + // skip the last ce in the expansion buffer + m_colEIter_.previous(); + if (m_colEIter_.getOffset() != temp) { + end = temp; + temp = m_colEIter_.getOffset(); + } + expansion --; + } + + int count = m_pattern_.m_CELength_; + while (count > 0) { + int ce = getCE(m_colEIter_.previous()); + // status checked below, note that if status is a failure + // previous() returns NULLORDER + if (ce == CollationElementIterator.IGNORABLE) { + continue; + } + if (hasExpansion && count == 0 + && m_colEIter_.getOffset() != temp) { + end = temp; + temp = m_colEIter_.getOffset(); + } + if (count == m_pattern_.m_CELength_ + && ce != m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1]) { + // accents may have extra starting ces, this occurs when a + // pure accent pattern is matched without rearrangement + int expected = m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1]; + targetText.setIndex(end); + if (UTF16.isTrailSurrogate(targetText.previous())) { + if (targetText.getIndex() > m_textBeginOffset_ && + !UTF16.isLeadSurrogate(targetText.previous())) { + targetText.next(); + } + } + end = targetText.getIndex(); + if ((getFCD(targetText, end) & LAST_BYTE_MASK_) != 0) { + ce = getCE(m_colEIter_.previous()); + while (ce != expected + && ce != CollationElementIterator.NULLORDER + && m_colEIter_.getOffset() <= start) { + ce = getCE(m_colEIter_.previous()); + } + } + } + if (ce != m_pattern_.m_CE_[count - 1]) { + start --; + start = getPreviousBaseOffset(start); + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return false; + } + count --; + } + } + m_utilBuffer_[0] = start; + m_utilBuffer_[1] = end; + return true; + } + + /** + * Checks and sets the match information if found. + * Checks + *
      + *
    • the potential match does not repeat the previous match + *
    • boundaries are correct + *
    • potential match does not end in the middle of a contraction + *
    • identical matches + *
    + * Otherwise the offset will be shifted to the next character. + * Uses the temporary utility buffer for storing the modified textoffset. + * @param textoffset offset in the collation element text. the returned + * value will be the truncated start offset of the match or the + * new start search offset. + * @return true if the match is valid, false otherwise + */ + private boolean checkPreviousCanonicalMatch(int textoffset) + { + // to ensure that the start and ends are not composite characters + // if we have a canonical accent match + if (m_pattern_.m_hasSuffixAccents_ + && m_canonicalSuffixAccents_.length() != 0 + || m_pattern_.m_hasPrefixAccents_ + && m_canonicalPrefixAccents_.length() != 0) { + m_matchedIndex_ = textoffset; + matchLength = getNextBaseOffset(m_colEIter_.getOffset()) + - textoffset; + return true; + } + + int end = m_colEIter_.getOffset(); + if (!checkPreviousCanonicalContractionMatch(textoffset, end)) { + // storing the modified textoffset + return false; + } + textoffset = m_utilBuffer_[0]; + end = m_utilBuffer_[1]; + end = getNextBaseOffset(end); + // this totally matches, however we need to check if it is repeating + if (checkRepeatedMatch(textoffset, end) + || !isBreakUnit(textoffset, end) + || !checkIdentical(textoffset, end)) { + textoffset --; + textoffset = getPreviousBaseOffset(textoffset); + m_utilBuffer_[0] = textoffset; + return false; + } + + m_matchedIndex_ = textoffset; + matchLength = end - textoffset; + return true; + } + + /** + * Method that does the next exact match + * @param start the offset to start shifting from and performing the + * next exact match + */ + private void handleNextExact(int start) + { + int textoffset = shiftForward(start, + CollationElementIterator.NULLORDER, + m_pattern_.m_CELength_); + int targetce = CollationElementIterator.IGNORABLE; + while (textoffset <= m_textLimitOffset_) { + m_colEIter_.setExactOffset(textoffset); + int patternceindex = m_pattern_.m_CELength_ - 1; + boolean found = false; + int lastce = CollationElementIterator.NULLORDER; + + while (true) { + // finding the last pattern ce match, imagine composite + // characters. for example: search for pattern A in text \u00C0 + // we'll have to skip \u0300 the grave first before we get to A + targetce = m_colEIter_.previous(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (targetce == CollationElementIterator.IGNORABLE && + m_colEIter_.isInBuffer()) { + // this is for the text \u0315\u0300 that requires + // normalization and pattern \u0300, where \u0315 is ignorable + continue; + } + if (lastce == CollationElementIterator.NULLORDER + || lastce == CollationElementIterator.IGNORABLE) { + lastce = targetce; + } + if (targetce == m_pattern_.m_CE_[patternceindex]) { + // the first ce can be a contraction + found = true; + break; + } + if (m_colEIter_.m_CEBufferOffset_ <= 0) { + found = false; + break; + } + } + + while (found && patternceindex > 0) { + lastce = targetce; + targetce = m_colEIter_.previous(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (targetce == CollationElementIterator.IGNORABLE) { + continue; + } + + patternceindex --; + found = found && targetce == m_pattern_.m_CE_[patternceindex]; + } + + targetce = lastce; + + if (!found) { + textoffset = shiftForward(textoffset, lastce, patternceindex); + // status checked at loop. + patternceindex = m_pattern_.m_CELength_; + continue; + } + + if (checkNextExactMatch(textoffset)) { + // status checked in ucol_setOffset + return; + } + textoffset = m_utilBuffer_[0]; + } + setMatchNotFound(); + } + + /** + * Method that does the next canonical match + * @param start the offset to start shifting from and performing the + * next canonical match + */ + private void handleNextCanonical(int start) + { + boolean hasPatternAccents = + m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_; + + // shifting it check for setting offset + // if setOffset is called previously or there was no previous match, we + // leave the offset as it is. + int textoffset = shiftForward(start, CollationElementIterator.NULLORDER, + m_pattern_.m_CELength_); + m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length()); + m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length()); + int targetce = CollationElementIterator.IGNORABLE; + + while (textoffset <= m_textLimitOffset_) + { + m_colEIter_.setExactOffset(textoffset); + int patternceindex = m_pattern_.m_CELength_ - 1; + boolean found = false; + int lastce = CollationElementIterator.NULLORDER; + + while (true) { + // finding the last pattern ce match, imagine composite characters + // for example: search for pattern A in text \u00C0 + // we'll have to skip \u0300 the grave first before we get to A + targetce = m_colEIter_.previous(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (lastce == CollationElementIterator.NULLORDER + || lastce == CollationElementIterator.IGNORABLE) { + lastce = targetce; + } + if (targetce == m_pattern_.m_CE_[patternceindex]) { + // the first ce can be a contraction + found = true; + break; + } + if (m_colEIter_.m_CEBufferOffset_ <= 0) { + found = false; + break; + } + } + + while (found && patternceindex > 0) { + targetce = m_colEIter_.previous(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (targetce == CollationElementIterator.IGNORABLE) { + continue; + } + + patternceindex --; + found = found && targetce == m_pattern_.m_CE_[patternceindex]; + } + + // initializing the rearranged accent array + if (hasPatternAccents && !found) { + found = doNextCanonicalMatch(textoffset); + } + + if (!found) { + textoffset = shiftForward(textoffset, lastce, patternceindex); + // status checked at loop + patternceindex = m_pattern_.m_CELength_; + continue; + } + + if (checkNextCanonicalMatch(textoffset)) { + return; + } + textoffset = m_utilBuffer_[0]; + } + setMatchNotFound(); + } + + /** + * Method that does the previous exact match + * @param start the offset to start shifting from and performing the + * previous exact match + */ + private void handlePreviousExact(int start) + { + int textoffset = reverseShift(start, CollationElementIterator.NULLORDER, + m_pattern_.m_CELength_); + while (textoffset >= m_textBeginOffset_) + { + m_colEIter_.setExactOffset(textoffset); + int patternceindex = 1; + int targetce = CollationElementIterator.IGNORABLE; + boolean found = false; + int firstce = CollationElementIterator.NULLORDER; + + while (true) { + // finding the first pattern ce match, imagine composite + // characters. for example: search for pattern \u0300 in text + // \u00C0, we'll have to skip A first before we get to + // \u0300 the grave accent + targetce = m_colEIter_.next(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (firstce == CollationElementIterator.NULLORDER + || firstce == CollationElementIterator.IGNORABLE) { + firstce = targetce; + } + if (targetce == CollationElementIterator.IGNORABLE && m_collator_.getStrength() != Collator.PRIMARY) { + continue; + } + if (targetce == m_pattern_.m_CE_[0]) { + found = true; + break; + } + if (m_colEIter_.m_CEBufferOffset_ == -1 + || m_colEIter_.m_CEBufferOffset_ + == m_colEIter_.m_CEBufferSize_) { + // checking for accents in composite character + found = false; + break; + } + } + + //targetce = firstce; + + while (found && patternceindex < m_pattern_.m_CELength_) { + firstce = targetce; + targetce = m_colEIter_.next(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (targetce == CollationElementIterator.IGNORABLE) { + continue; + } + + found = found && targetce == m_pattern_.m_CE_[patternceindex]; + patternceindex ++; + } + + targetce = firstce; + + if (!found) { + textoffset = reverseShift(textoffset, targetce, patternceindex); + patternceindex = 0; + continue; + } + + if (checkPreviousExactMatch(textoffset)) { + return; + } + textoffset = m_utilBuffer_[0]; + } + setMatchNotFound(); + } + + /** + * Method that does the previous canonical match + * @param start the offset to start shifting from and performing the + * previous canonical match + */ + private void handlePreviousCanonical(int start) + { + boolean hasPatternAccents = + m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_; + + // shifting it check for setting offset + // if setOffset is called previously or there was no previous match, we + // leave the offset as it is. + int textoffset = reverseShift(start, CollationElementIterator.NULLORDER, + m_pattern_.m_CELength_); + m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length()); + m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length()); + + while (textoffset >= m_textBeginOffset_) + { + m_colEIter_.setExactOffset(textoffset); + int patternceindex = 1; + int targetce = CollationElementIterator.IGNORABLE; + boolean found = false; + int firstce = CollationElementIterator.NULLORDER; + + while (true) { + // finding the first pattern ce match, imagine composite + // characters. for example: search for pattern \u0300 in text + // \u00C0, we'll have to skip A first before we get to + // \u0300 the grave accent + targetce = m_colEIter_.next(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (firstce == CollationElementIterator.NULLORDER + || firstce == CollationElementIterator.IGNORABLE) { + firstce = targetce; + } + + if (targetce == m_pattern_.m_CE_[0]) { + // the first ce can be a contraction + found = true; + break; + } + if (m_colEIter_.m_CEBufferOffset_ == -1 + || m_colEIter_.m_CEBufferOffset_ + == m_colEIter_.m_CEBufferSize_) { + // checking for accents in composite character + found = false; + break; + } + } + + targetce = firstce; + + while (found && patternceindex < m_pattern_.m_CELength_) { + targetce = m_colEIter_.next(); + if (targetce == CollationElementIterator.NULLORDER) { + found = false; + break; + } + targetce = getCE(targetce); + if (targetce == CollationElementIterator.IGNORABLE) { + continue; + } + + found = found && targetce == m_pattern_.m_CE_[patternceindex]; + patternceindex ++; + } + + // initializing the rearranged accent array + if (hasPatternAccents && !found) { + found = doPreviousCanonicalMatch(textoffset); + } + + if (!found) { + textoffset = reverseShift(textoffset, targetce, patternceindex); + patternceindex = 0; + continue; + } + + if (checkPreviousCanonicalMatch(textoffset)) { + return; + } + textoffset = m_utilBuffer_[0]; + } + setMatchNotFound(); + } + + /** + * Gets a substring out of a CharacterIterator + * @param text CharacterIterator + * @param start start offset + * @param length of substring + * @return substring from text starting at start and length length + */ + private static final String getString(CharacterIterator text, int start, + int length) + { + StringBuilder result = new StringBuilder(length); + int offset = text.getIndex(); + text.setIndex(start); + for (int i = 0; i < length; i ++) { + result.append(text.current()); + text.next(); + } + text.setIndex(offset); + return result.toString(); + } + + /** + * Getting the mask for collation strength + * @param strength collation strength + * @return collation element mask + */ + private static final int getMask(int strength) + { + switch (strength) + { + case Collator.PRIMARY: + return RuleBasedCollator.CE_PRIMARY_MASK_; + case Collator.SECONDARY: + return RuleBasedCollator.CE_SECONDARY_MASK_ + | RuleBasedCollator.CE_PRIMARY_MASK_; + default: + return RuleBasedCollator.CE_TERTIARY_MASK_ + | RuleBasedCollator.CE_SECONDARY_MASK_ + | RuleBasedCollator.CE_PRIMARY_MASK_; + } + } + + /** + * Sets match not found + */ + private void setMatchNotFound() + { + // this method resets the match result regardless of the error status. + m_matchedIndex_ = DONE; + setMatchLength(0); + } + + /** + * Check the boundaries of the match. + */ + private int checkBreakBoundary(int end) { + if (!m_charBreakIter_.isBoundary(end)) { + end = m_charBreakIter_.following(end); + } + return end; + } +} diff --git a/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java b/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java new file mode 100644 index 00000000000..818c7e93ae4 --- /dev/null +++ b/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java @@ -0,0 +1,1514 @@ +/* + ******************************************************************************* + * Copyright (C) 2004-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* +*/ +package com.ibm.icu.util; + +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.MissingResourceException; +import java.util.ResourceBundle; + +import com.ibm.icu.impl.Utility; +import com.ibm.icu.impl.ZoneMeta; +import com.ibm.icu.text.BreakIterator; +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.DateFormat; +import com.ibm.icu.text.NumberFormat; +import com.ibm.icu.text.SimpleDateFormat; + +/** + * This convenience class provides a mechanism for bundling together different + * globalization preferences. It includes: + *
      + *
    • A list of locales/languages in preference order
    • + *
    • A territory
    • + *
    • A currency
    • + *
    • A timezone
    • + *
    • A calendar
    • + *
    • A collator (for language-sensitive sorting, searching, and matching).
    • + *
    • Explicit overrides for date/time formats, etc.
    • + *
    + * The class will heuristically compute implicit, heuristic values for the above + * based on available data if explicit values are not supplied. These implicit + * values can be presented to users for confirmation, or replacement if the + * values are incorrect. + *

    + * To reset any explicit field so that it will get heuristic values, pass in + * null. For example, myPreferences.setLocale(null); + *

    + * All of the heuristics can be customized by subclasses, by overriding + * getTerritory(), guessCollator(), etc. + *

    + * The class also supplies display names for languages, scripts, territories, + * currencies, timezones, etc. These are computed according to the + * locale/language preference list. Thus, if the preference is Breton; French; + * English, then the display name for a language will be returned in Breton if + * available, otherwise in French if available, otherwise in English. + *

    + * The codes used to reference territory, currency, etc. are as defined elsewhere + * in ICU, and are taken from CLDR (which reflects RFC 3066bis usage, ISO 4217, + * and the TZ Timezone database identifiers). + *

    + * This is at a prototype stage, and has not incorporated all the design + * changes that we would like yet; further feedback is welcome.

    + * Note: + *
      + *
    • to get the display name for the first day of the week, use the calendar + + * display names.
    • + *
    • to get the work days, ask the calendar (when that is available).
    • + *
    • to get papersize / measurement system/bidi-orientation, ask the locale + * (when that is available there)
    • + *
    • to get the field order in a date, and whether a time is 24hour or not, + * ask the DateFormat (when that is available there)
    • + *
    • it will support HOST locale when it becomes available (it is a special + * locale that will ask the services to use the host platform's values).
    • + *
    + * + * @draft ICU 3.6 (retainAll) + * @provisional This API might change or be removed in a future release. + */ + +//TODO: +// - Add Holidays +// - Add convenience to get/take Locale as well as ULocale. +// - Add Lenient datetime formatting when that is available. +// - Should this be serializable? +// - Other utilities? + +public class GlobalizationPreferences implements Freezable { + + /** + * Default constructor + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences(){} + /** + * Number Format type + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public static final int + NF_NUMBER = 0, // NumberFormat.NUMBERSTYLE + NF_CURRENCY = 1, // NumberFormat.CURRENCYSTYLE + NF_PERCENT = 2, // NumberFormat.PERCENTSTYLE + NF_SCIENTIFIC = 3, // NumberFormat.SCIENTIFICSTYLE + NF_INTEGER = 4; // NumberFormat.INTEGERSTYLE + + private static final int NF_LIMIT = NF_INTEGER + 1; + + /** + * Date Format type + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public static final int + DF_FULL = DateFormat.FULL, // 0 + DF_LONG = DateFormat.LONG, // 1 + DF_MEDIUM = DateFormat.MEDIUM, // 2 + DF_SHORT = DateFormat.SHORT, // 3 + DF_NONE = 4; + + private static final int DF_LIMIT = DF_NONE + 1; + + /** + * For selecting a choice of display names + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public static final int + ID_LOCALE = 0, + ID_LANGUAGE = 1, + ID_SCRIPT = 2, + ID_TERRITORY = 3, + ID_VARIANT = 4, + ID_KEYWORD = 5, + ID_KEYWORD_VALUE = 6, + ID_CURRENCY = 7, + ID_CURRENCY_SYMBOL = 8, + ID_TIMEZONE = 9; + + //private static final int ID_LIMIT = ID_TIMEZONE + 1; + + /** + * Break iterator type + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public static final int + BI_CHARACTER = BreakIterator.KIND_CHARACTER, // 0 + BI_WORD = BreakIterator.KIND_WORD, // 1 + BI_LINE = BreakIterator.KIND_LINE, // 2 + BI_SENTENCE = BreakIterator.KIND_SENTENCE, // 3 + BI_TITLE = BreakIterator.KIND_TITLE; // 4 + + private static final int BI_LIMIT = BI_TITLE + 1; + + /** + * Sets the language/locale priority list. If other information is + * not (yet) available, this is used to to produce a default value + * for the appropriate territory, currency, timezone, etc. The + * user should be given the opportunity to correct those defaults + * in case they are incorrect. + * + * @param inputLocales list of locales in priority order, eg {"be", "fr"} + * for Breton first, then French if that fails. + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setLocales(List inputLocales) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + locales = processLocales(inputLocales); + return this; + } + + /** + * Get a copy of the language/locale priority list + * + * @return a copy of the language/locale priority list. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public List getLocales() { + List result; + if (locales == null) { + result = guessLocales(); + } else { + result = new ArrayList(); + result.addAll(locales); + } + return result; + } + + /** + * Convenience function for getting the locales in priority order + * @param index The index (0..n) of the desired item. + * @return desired item. null if index is out of range + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public ULocale getLocale(int index) { + List lcls = locales; + if (lcls == null) { + lcls = guessLocales(); + } + if (index >= 0 && index < lcls.size()) { + return lcls.get(index); + } + return null; + } + + /** + * Convenience routine for setting the language/locale priority + * list from an array. + * + * @see #setLocales(List locales) + * @param uLocales list of locales in an array + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setLocales(ULocale[] uLocales) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + return setLocales(Arrays.asList(uLocales)); + } + + /** + * Convenience routine for setting the language/locale priority + * list from a single locale/language. + * + * @see #setLocales(List locales) + * @param uLocale single locale + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setLocale(ULocale uLocale) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + return setLocales(new ULocale[]{uLocale}); + } + + /** + * Convenience routine for setting the locale priority list from + * an Accept-Language string. + * @see #setLocales(List locales) + * @param acceptLanguageString Accept-Language list, as defined by + * Section 14.4 of the RFC 2616 (HTTP 1.1) + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setLocales(String acceptLanguageString) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + ULocale[] acceptLocales = null; + try { + acceptLocales = ULocale.parseAcceptLanguage(acceptLanguageString, true); + } catch (ParseException pe) { + //TODO: revisit after 3.8 + throw new IllegalArgumentException("Invalid Accept-Language string"); + } + return setLocales(acceptLocales); + } + + /** + * Convenience function to get a ResourceBundle instance using + * the specified base name based on the language/locale priority list + * stored in this object. + * + * @param baseName the base name of the resource bundle, a fully qualified + * class name + * @return a resource bundle for the given base name and locale based on the + * language/locale priority list stored in this object + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public ResourceBundle getResourceBundle(String baseName) { + return getResourceBundle(baseName, null); + } + + /** + * Convenience function to get a ResourceBundle instance using + * the specified base name and class loader based on the language/locale + * priority list stored in this object. + * + * @param baseName the base name of the resource bundle, a fully qualified + * class name + * @param loader the class object from which to load the resource bundle + * @return a resource bundle for the given base name and locale based on the + * language/locale priority list stored in this object + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public ResourceBundle getResourceBundle(String baseName, ClassLoader loader) { + UResourceBundle urb = null; + UResourceBundle candidate = null; + String actualLocaleName = null; + List fallbacks = getLocales(); + for (int i = 0; i < fallbacks.size(); i++) { + String localeName = (fallbacks.get(i)).toString(); + if (actualLocaleName != null && localeName.equals(actualLocaleName)) { + // Actual locale name in the previous round may exactly matches + // with the next fallback locale + urb = candidate; + break; + } + try { + if (loader == null) { + candidate = UResourceBundle.getBundleInstance(baseName, localeName); + } + else { + candidate = UResourceBundle.getBundleInstance(baseName, localeName, loader); + } + if (candidate != null) { + actualLocaleName = candidate.getULocale().getName(); + if (actualLocaleName.equals(localeName)) { + urb = candidate; + break; + } + if (urb == null) { + // Preserve the available bundle as the last resort + urb = candidate; + } + } + } catch (MissingResourceException mre) { + actualLocaleName = null; + continue; + } + } + if (urb == null) { + throw new MissingResourceException("Can't find bundle for base name " + + baseName, baseName, ""); + } + return urb; + } + + /** + * Sets the territory, which is a valid territory according to for + * RFC 3066 (or successor). If not otherwise set, default + * currency and timezone values will be set from this. The user + * should be given the opportunity to correct those defaults in + * case they are incorrect. + * + * @param territory code + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setTerritory(String territory) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + this.territory = territory; // immutable, so don't need to clone + return this; + } + + /** + * Gets the territory setting. If it wasn't explicitly set, it is + * computed from the general locale setting. + * + * @return territory code, explicit or implicit. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public String getTerritory() { + if (territory == null) { + return guessTerritory(); + } + return territory; // immutable, so don't need to clone + } + + /** + * Sets the currency code. If this has not been set, uses default for territory. + * + * @param currency Valid ISO 4217 currency code. + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setCurrency(Currency currency) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + this.currency = currency; // immutable, so don't need to clone + return this; + } + + /** + * Get a copy of the currency computed according to the settings. + * + * @return currency code, explicit or implicit. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public Currency getCurrency() { + if (currency == null) { + return guessCurrency(); + } + return currency; // immutable, so don't have to clone + } + + /** + * Sets the calendar. If this has not been set, uses default for territory. + * + * @param calendar arbitrary calendar + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setCalendar(Calendar calendar) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + this.calendar = (Calendar) calendar.clone(); // clone for safety + return this; + } + + /** + * Get a copy of the calendar according to the settings. + * + * @return calendar explicit or implicit. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public Calendar getCalendar() { + if (calendar == null) { + return guessCalendar(); + } + Calendar temp = (Calendar) calendar.clone(); // clone for safety + temp.setTimeZone(getTimeZone()); + temp.setTimeInMillis(System.currentTimeMillis()); + return temp; + } + + /** + * Sets the timezone ID. If this has not been set, uses default for territory. + * + * @param timezone a valid TZID (see UTS#35). + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setTimeZone(TimeZone timezone) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + this.timezone = (TimeZone) timezone.clone(); // clone for safety; + return this; + } + + /** + * Get the timezone. It was either explicitly set, or is + * heuristically computed from other settings. + * + * @return timezone, either implicitly or explicitly set + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public TimeZone getTimeZone() { + if (timezone == null) { + return guessTimeZone(); + } + return (TimeZone) timezone.clone(); // clone for safety + } + + /** + * Get a copy of the collator according to the settings. + * + * @return collator explicit or implicit. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public Collator getCollator() { + if (collator == null) { + return guessCollator(); + } + try { + return (Collator) collator.clone(); // clone for safety + } catch (CloneNotSupportedException e) { + throw new IllegalStateException("Error in cloning collator"); + } + } + + /** + * Explicitly set the collator for this object. + * @param collator The collator object to be passed. + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setCollator(Collator collator) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + try { + this.collator = (Collator) collator.clone(); // clone for safety + } catch (CloneNotSupportedException e) { + throw new IllegalStateException("Error in cloning collator"); + } + return this; + } + + /** + * Get a copy of the break iterator for the specified type according to the + * settings. + * + * @param type break type - BI_CHARACTER or BI_WORD, BI_LINE, BI_SENTENCE, BI_TITLE + * @return break iterator explicit or implicit + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public BreakIterator getBreakIterator(int type) { + if (type < BI_CHARACTER || type >= BI_LIMIT) { + throw new IllegalArgumentException("Illegal break iterator type"); + } + if (breakIterators == null || breakIterators[type] == null) { + return guessBreakIterator(type); + } + return (BreakIterator) breakIterators[type].clone(); // clone for safety + } + + /** + * Explicitly set the break iterator for this object. + * + * @param type break type - BI_CHARACTER or BI_WORD, BI_LINE, BI_SENTENCE, BI_TITLE + * @param iterator a break iterator + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setBreakIterator(int type, BreakIterator iterator) { + if (type < BI_CHARACTER || type >= BI_LIMIT) { + throw new IllegalArgumentException("Illegal break iterator type"); + } + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + if (breakIterators == null) + breakIterators = new BreakIterator[BI_LIMIT]; + breakIterators[type] = (BreakIterator) iterator.clone(); // clone for safety + return this; + } + + /** + * Get the display name for an ID: language, script, territory, currency, timezone... + * Uses the language priority list to do so. + * + * @param id language code, script code, ... + * @param type specifies the type of the ID: ID_LANGUAGE, etc. + * @return the display name + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public String getDisplayName(String id, int type) { + String result = id; + for (ULocale locale : getLocales()) { + if (!isAvailableLocale(locale, TYPE_GENERIC)) { + continue; + } + switch (type) { + case ID_LOCALE: + result = ULocale.getDisplayName(id, locale); + break; + case ID_LANGUAGE: + result = ULocale.getDisplayLanguage(id, locale); + break; + case ID_SCRIPT: + result = ULocale.getDisplayScript("und-" + id, locale); + break; + case ID_TERRITORY: + result = ULocale.getDisplayCountry("und-" + id, locale); + break; + case ID_VARIANT: + // TODO fix variant parsing + result = ULocale.getDisplayVariant("und-QQ-" + id, locale); + break; + case ID_KEYWORD: + result = ULocale.getDisplayKeyword(id, locale); + break; + case ID_KEYWORD_VALUE: + String[] parts = new String[2]; + Utility.split(id,'=',parts); + result = ULocale.getDisplayKeywordValue("und@"+id, parts[0], locale); + // TODO fix to tell when successful + if (result.equals(parts[1])) { + continue; + } + break; + case ID_CURRENCY_SYMBOL: + case ID_CURRENCY: + Currency temp = new Currency(id); + result =temp.getName(locale, type==ID_CURRENCY + ? Currency.LONG_NAME + : Currency.SYMBOL_NAME, new boolean[1]); + // TODO: have method that doesn't take parameter. Add + // function to determine whether string is choice + // format. + // TODO: have method that doesn't require us + // to create a currency + break; + case ID_TIMEZONE: + SimpleDateFormat dtf = new SimpleDateFormat("vvvv",locale); + dtf.setTimeZone(TimeZone.getTimeZone(id)); + result = dtf.format(new Date()); + // TODO, have method that doesn't require us to create a timezone + // fix other hacks + // hack for couldn't match + + boolean isBadStr = false; + // Matcher badTimeZone = Pattern.compile("[A-Z]{2}|.*\\s\\([A-Z]{2}\\)").matcher(""); + // badtzstr = badTimeZone.reset(result).matches(); + String teststr = result; + int sidx = result.indexOf('('); + int eidx = result.indexOf(')'); + if (sidx != -1 && eidx != -1 && (eidx - sidx) == 3) { + teststr = result.substring(sidx+1, eidx); + } + if (teststr.length() == 2) { + isBadStr = true; + for (int i = 0; i < 2; i++) { + char c = teststr.charAt(i); + if (c < 'A' || 'Z' < c) { + isBadStr = false; + break; + } + } + } + if (isBadStr) { + continue; + } + break; + default: + throw new IllegalArgumentException("Unknown type: " + type); + } + + // TODO need better way of seeing if we fell back to root!! + // This will not work at all for lots of stuff + if (!id.equals(result)) { + return result; + } + } + return result; + } + + /** + * Set an explicit date format. Overrides the locale priority list for + * a particular combination of dateStyle and timeStyle. DF_NONE should + * be used if for the style, where only the date or time format individually + * is being set. + * + * @param dateStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE + * @param timeStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE + * @param format The date format + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setDateFormat(int dateStyle, int timeStyle, DateFormat format) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + if (dateFormats == null) { + dateFormats = new DateFormat[DF_LIMIT][DF_LIMIT]; + } + dateFormats[dateStyle][timeStyle] = (DateFormat) format.clone(); // for safety + return this; + } + + /** + * Gets a date format according to the current settings. If there + * is an explicit (non-null) date/time format set, a copy of that + * is returned. Otherwise, the language priority list is used. + * DF_NONE should be used for the style, where only the date or + * time format individually is being gotten. + * + * @param dateStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE + * @param timeStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE + * @return a DateFormat, according to the above description + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public DateFormat getDateFormat(int dateStyle, int timeStyle) { + if (dateStyle == DF_NONE && timeStyle == DF_NONE + || dateStyle < 0 || dateStyle >= DF_LIMIT + || timeStyle < 0 || timeStyle >= DF_LIMIT) { + throw new IllegalArgumentException("Illegal date format style arguments"); + } + DateFormat result = null; + if (dateFormats != null) { + result = dateFormats[dateStyle][timeStyle]; + } + if (result != null) { + result = (DateFormat) result.clone(); // clone for safety + // Not sure overriding configuration is what we really want... + result.setTimeZone(getTimeZone()); + } else { + result = guessDateFormat(dateStyle, timeStyle); + } + return result; + } + + /** + * Gets a number format according to the current settings. If + * there is an explicit (non-null) number format set, a copy of + * that is returned. Otherwise, the language priority list is + * used. + * + * @param style NF_NUMBER, NF_CURRENCY, NF_PERCENT, NF_SCIENTIFIC, NF_INTEGER + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public NumberFormat getNumberFormat(int style) { + if (style < 0 || style >= NF_LIMIT) { + throw new IllegalArgumentException("Illegal number format type"); + } + NumberFormat result = null; + if (numberFormats != null) { + result = numberFormats[style]; + } + if (result != null) { + result = (NumberFormat) result.clone(); // clone for safety (later optimize) + } else { + result = guessNumberFormat(style); + } + return result; + } + + /** + * Sets a number format explicitly. Overrides the general locale settings. + * + * @param style NF_NUMBER, NF_CURRENCY, NF_PERCENT, NF_SCIENTIFIC, NF_INTEGER + * @param format The number format + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences setNumberFormat(int style, NumberFormat format) { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + if (numberFormats == null) { + numberFormats = new NumberFormat[NF_LIMIT]; + } + numberFormats[style] = (NumberFormat) format.clone(); // for safety + return this; + } + + /** + * Restore the object to the initial state. + * + * @return this, for chaining + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences reset() { + if (isFrozen()) { + throw new UnsupportedOperationException("Attempt to modify immutable object"); + } + locales = null; + territory = null; + calendar = null; + collator = null; + breakIterators = null; + timezone = null; + currency = null; + dateFormats = null; + numberFormats = null; + implicitLocales = null; + return this; + } + + /** + * Process a language/locale priority list specified via setLocales. + * The input locale list may be expanded or re-ordered to represent the prioritized + * language/locale order actually used by this object by the algorithm explained + * below. + *
    + *
    + * Step 1: Move later occurrence of more specific locale before earlier + * occurrence of less specific locale. + *
    + * Before: en, fr_FR, en_US, en_GB + *
    + * After: en_US, en_GB, en, fr_FR + *
    + *
    + * Step 2: Append a fallback locale to each locale. + *
    + * Before: en_US, en_GB, en, fr_FR + *
    + * After: en_US, en, en_GB, en, en, fr_FR, fr + *
    + *
    + * Step 3: Remove earlier occurrence of duplicated locale entries. + *
    + * Before: en_US, en, en_GB, en, en, fr_FR, fr + *
    + * After: en_US, en_GB, en, fr_FR, fr + *
    + *
    + * The final locale list is used to produce a default value for the appropriate territory, + * currency, timezone, etc. The list also represents the lookup order used in + * getResourceBundle for this object. A subclass may override this method + * to customize the algorithm used for populating the locale list. + * + * @param inputLocales The list of input locales + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected List processLocales(List inputLocales) { + List result = new ArrayList(); + /* + * Step 1: Relocate later occurrence of more specific locale + * before earlier occurrence of less specific locale. + * + * Example: + * Before - en_US, fr_FR, zh, en_US_Boston, zh_TW, zh_Hant, fr_CA + * After - en_US_Boston, en_US, fr_FR, zh_TW, zh_Hant, zh, fr_CA + */ + for (int i = 0; i < inputLocales.size(); i++) { + ULocale uloc = inputLocales.get(i); + + String language = uloc.getLanguage(); + String script = uloc.getScript(); + String country = uloc.getCountry(); + String variant = uloc.getVariant(); + + boolean bInserted = false; + for (int j = 0; j < result.size(); j++) { + // Check if this locale is more specific + // than existing locale entries already inserted + // in the destination list + ULocale u = result.get(j); + if (!u.getLanguage().equals(language)) { + continue; + } + String s = u.getScript(); + String c = u.getCountry(); + String v = u.getVariant(); + if (!s.equals(script)) { + if (s.length() == 0 && c.length() == 0 && v.length() == 0) { + result.add(j, uloc); + bInserted = true; + break; + } else if (s.length() == 0 && c.equals(country)) { + // We want to see zh_Hant_HK before zh_HK + result.add(j, uloc); + bInserted = true; + break; + } else if (script.length() == 0 && country.length() > 0 && c.length() == 0) { + // We want to see zh_HK before zh_Hant + result.add(j, uloc); + bInserted = true; + break; + } + continue; + } + if (!c.equals(country)) { + if (c.length() == 0 && v.length() == 0) { + result.add(j, uloc); + bInserted = true; + break; + } + } + if (!v.equals(variant) && v.length() == 0) { + result.add(j, uloc); + bInserted = true; + break; + } + } + if (!bInserted) { + // Add this locale at the end of the list + result.add(uloc); + } + } + + // TODO: Locale aliases might be resolved here + // For example, zh_Hant_TW = zh_TW + + /* + * Step 2: Append fallback locales for each entry + * + * Example: + * Before - en_US_Boston, en_US, fr_FR, zh_TW, zh_Hant, zh, fr_CA + * After - en_US_Boston, en_US, en, en_US, en, fr_FR, fr, + * zh_TW, zn, zh_Hant, zh, zh, fr_CA, fr + */ + int index = 0; + while (index < result.size()) { + ULocale uloc = result.get(index); + while (true) { + uloc = uloc.getFallback(); + if (uloc.getLanguage().length() == 0) { + break; + } + index++; + result.add(index, uloc); + } + index++; + } + + /* + * Step 3: Remove earlier occurrence of duplicated locales + * + * Example: + * Before - en_US_Boston, en_US, en, en_US, en, fr_FR, fr, + * zh_TW, zn, zh_Hant, zh, zh, fr_CA, fr + * After - en_US_Boston, en_US, en, fr_FR, zh_TW, zh_Hant, + * zh, fr_CA, fr + */ + index = 0; + while (index < result.size() - 1) { + ULocale uloc = result.get(index); + boolean bRemoved = false; + for (int i = index + 1; i < result.size(); i++) { + if (uloc.equals(result.get(i))) { + // Remove earlier one + result.remove(index); + bRemoved = true; + break; + } + } + if (!bRemoved) { + index++; + } + } + return result; + } + + + /** + * This function can be overridden by subclasses to use different heuristics. + * It MUST return a 'safe' value, + * one whose modification will not affect this object. + * + * @param dateStyle + * @param timeStyle + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected DateFormat guessDateFormat(int dateStyle, int timeStyle) { + DateFormat result; + ULocale dfLocale = getAvailableLocale(TYPE_DATEFORMAT); + if (dfLocale == null) { + dfLocale = ULocale.ROOT; + } + if (timeStyle == DF_NONE) { + result = DateFormat.getDateInstance(getCalendar(), dateStyle, dfLocale); + } else if (dateStyle == DF_NONE) { + result = DateFormat.getTimeInstance(getCalendar(), timeStyle, dfLocale); + } else { + result = DateFormat.getDateTimeInstance(getCalendar(), dateStyle, timeStyle, dfLocale); + } + return result; + } + + /** + * This function can be overridden by subclasses to use different heuristics. + * It MUST return a 'safe' value, + * one whose modification will not affect this object. + * + * @param style + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected NumberFormat guessNumberFormat(int style) { + NumberFormat result; + ULocale nfLocale = getAvailableLocale(TYPE_NUMBERFORMAT); + if (nfLocale == null) { + nfLocale = ULocale.ROOT; + } + switch (style) { + case NF_NUMBER: + result = NumberFormat.getInstance(nfLocale); + break; + case NF_SCIENTIFIC: + result = NumberFormat.getScientificInstance(nfLocale); + break; + case NF_INTEGER: + result = NumberFormat.getIntegerInstance(nfLocale); + break; + case NF_PERCENT: + result = NumberFormat.getPercentInstance(nfLocale); + break; + case NF_CURRENCY: + result = NumberFormat.getCurrencyInstance(nfLocale); + result.setCurrency(getCurrency()); + break; + default: + throw new IllegalArgumentException("Unknown number format style"); + } + return result; + } + + /** + * This function can be overridden by subclasses to use different heuristics. + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected String guessTerritory() { + String result; + // pass through locales to see if there is a territory. + for (ULocale locale : getLocales()) { + result = locale.getCountry(); + if (result.length() != 0) { + return result; + } + } + // if not, guess from the first language tag, or maybe from + // intersection of languages, eg nl + fr => BE + // TODO: fix using real data + // for now, just use fixed values + ULocale firstLocale = getLocale(0); + String language = firstLocale.getLanguage(); + String script = firstLocale.getScript(); + result = null; + if (script.length() != 0) { + result = language_territory_hack_map.get(language + "_" + script); + } + if (result == null) { + result = language_territory_hack_map.get(language); + } + if (result == null) { + result = "US"; // need *some* default + } + return result; + } + + /** + * This function can be overridden by subclasses to use different heuristics + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected Currency guessCurrency() { + return Currency.getInstance(new ULocale("und-" + getTerritory())); + } + + /** + * This function can be overridden by subclasses to use different heuristics + * It MUST return a 'safe' value, + * one whose modification will not affect this object. + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected List guessLocales() { + if (implicitLocales == null) { + List result = new ArrayList(1); + result.add(ULocale.getDefault()); + implicitLocales = processLocales(result); + } + return implicitLocales; + } + + /** + * This function can be overridden by subclasses to use different heuristics. + * It MUST return a 'safe' value, + * one whose modification will not affect this object. + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected Collator guessCollator() { + ULocale collLocale = getAvailableLocale(TYPE_COLLATOR); + if (collLocale == null) { + collLocale = ULocale.ROOT; + } + return Collator.getInstance(collLocale); + } + + /** + * This function can be overridden by subclasses to use different heuristics. + * It MUST return a 'safe' value, + * one whose modification will not affect this object. + * + * @param type + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected BreakIterator guessBreakIterator(int type) { + BreakIterator bitr = null; + ULocale brkLocale = getAvailableLocale(TYPE_BREAKITERATOR); + if (brkLocale == null) { + brkLocale = ULocale.ROOT; + } + switch (type) { + case BI_CHARACTER: + bitr = BreakIterator.getCharacterInstance(brkLocale); + break; + case BI_TITLE: + bitr = BreakIterator.getTitleInstance(brkLocale); + break; + case BI_WORD: + bitr = BreakIterator.getWordInstance(brkLocale); + break; + case BI_LINE: + bitr = BreakIterator.getLineInstance(brkLocale); + break; + case BI_SENTENCE: + bitr = BreakIterator.getSentenceInstance(brkLocale); + break; + default: + throw new IllegalArgumentException("Unknown break iterator type"); + } + return bitr; + } + + /** + * This function can be overridden by subclasses to use different heuristics. + * It MUST return a 'safe' value, + * one whose modification will not affect this object. + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected TimeZone guessTimeZone() { + // TODO fix using real data + // for single-zone countries, pick that zone + // for others, pick the most populous zone + // for now, just use fixed value + // NOTE: in a few cases can do better by looking at language. + // Eg haw+US should go to Pacific/Honolulu + // fr+CA should go to America/Montreal + String timezoneString = territory_tzid_hack_map.get(getTerritory()); + if (timezoneString == null) { + String[] attempt = ZoneMeta.getAvailableIDs(getTerritory()); + if (attempt.length == 0) { + timezoneString = "Etc/GMT"; // gotta do something + } else { + int i; + // this all needs to be fixed to use real data. But for now, do slightly better by skipping cruft + for (i = 0; i < attempt.length; ++i) { + if (attempt[i].indexOf("/") >= 0) break; + } + if (i > attempt.length) i = 0; + timezoneString = attempt[i]; + } + } + return TimeZone.getTimeZone(timezoneString); + } + + /** + * This function can be overridden by subclasses to use different heuristics. + * It MUST return a 'safe' value, + * one whose modification will not affect this object. + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + protected Calendar guessCalendar() { + ULocale calLocale = getAvailableLocale(TYPE_CALENDAR); + if (calLocale == null) { + calLocale = ULocale.US; + } + return Calendar.getInstance(getTimeZone(), calLocale); + } + + // PRIVATES + + private List locales; + private String territory; + private Currency currency; + private TimeZone timezone; + private Calendar calendar; + private Collator collator; + private BreakIterator[] breakIterators; + private DateFormat[][] dateFormats; + private NumberFormat[] numberFormats; + private List implicitLocales; + + { + reset(); + } + + + private ULocale getAvailableLocale(int type) { + List locs = getLocales(); + ULocale result = null; + for (int i = 0; i < locs.size(); i++) { + ULocale l = locs.get(i); + if (isAvailableLocale(l, type)) { + result = l; + break; + } + } + return result; + } + + private boolean isAvailableLocale(ULocale loc, int type) { + BitSet bits = available_locales.get(loc); + if (bits != null && bits.get(type)) { + return true; + } + return false; + } + + /* + * Available locales for service types + */ + private static final HashMap available_locales = new HashMap(); + private static final int + TYPE_GENERIC = 0, + TYPE_CALENDAR = 1, + TYPE_DATEFORMAT= 2, + TYPE_NUMBERFORMAT = 3, + TYPE_COLLATOR = 4, + TYPE_BREAKITERATOR = 5, + TYPE_LIMIT = TYPE_BREAKITERATOR + 1; + + static { + BitSet bits; + ULocale[] allLocales = ULocale.getAvailableLocales(); + for (int i = 0; i < allLocales.length; i++) { + bits = new BitSet(TYPE_LIMIT); + available_locales.put(allLocales[i], bits); + bits.set(TYPE_GENERIC); + } + + ULocale[] calLocales = Calendar.getAvailableULocales(); + for (int i = 0; i < calLocales.length; i++) { + bits = available_locales.get(calLocales[i]); + if (bits == null) { + bits = new BitSet(TYPE_LIMIT); + available_locales.put(allLocales[i], bits); + } + bits.set(TYPE_CALENDAR); + } + + ULocale[] dateLocales = DateFormat.getAvailableULocales(); + for (int i = 0; i < dateLocales.length; i++) { + bits = available_locales.get(dateLocales[i]); + if (bits == null) { + bits = new BitSet(TYPE_LIMIT); + available_locales.put(allLocales[i], bits); + } + bits.set(TYPE_DATEFORMAT); + } + + ULocale[] numLocales = NumberFormat.getAvailableULocales(); + for (int i = 0; i < numLocales.length; i++) { + bits = available_locales.get(numLocales[i]); + if (bits == null) { + bits = new BitSet(TYPE_LIMIT); + available_locales.put(allLocales[i], bits); + } + bits.set(TYPE_NUMBERFORMAT); + } + + ULocale[] collLocales = Collator.getAvailableULocales(); + for (int i = 0; i < collLocales.length; i++) { + bits = available_locales.get(collLocales[i]); + if (bits == null) { + bits = new BitSet(TYPE_LIMIT); + available_locales.put(allLocales[i], bits); + } + bits.set(TYPE_COLLATOR); + } + + ULocale[] brkLocales = BreakIterator.getAvailableULocales(); + for (int i = 0; i < brkLocales.length; i++) { + bits = available_locales.get(brkLocales[i]); + bits.set(TYPE_BREAKITERATOR); + } + } + + /** WARNING: All of this data is temporary, until we start importing from CLDR!!! + * + */ + private static final Map language_territory_hack_map = new HashMap(); + private static final String[][] language_territory_hack = { + {"af", "ZA"}, + {"am", "ET"}, + {"ar", "SA"}, + {"as", "IN"}, + {"ay", "PE"}, + {"az", "AZ"}, + {"bal", "PK"}, + {"be", "BY"}, + {"bg", "BG"}, + {"bn", "IN"}, + {"bs", "BA"}, + {"ca", "ES"}, + {"ch", "MP"}, + {"cpe", "SL"}, + {"cs", "CZ"}, + {"cy", "GB"}, + {"da", "DK"}, + {"de", "DE"}, + {"dv", "MV"}, + {"dz", "BT"}, + {"el", "GR"}, + {"en", "US"}, + {"es", "ES"}, + {"et", "EE"}, + {"eu", "ES"}, + {"fa", "IR"}, + {"fi", "FI"}, + {"fil", "PH"}, + {"fj", "FJ"}, + {"fo", "FO"}, + {"fr", "FR"}, + {"ga", "IE"}, + {"gd", "GB"}, + {"gl", "ES"}, + {"gn", "PY"}, + {"gu", "IN"}, + {"gv", "GB"}, + {"ha", "NG"}, + {"he", "IL"}, + {"hi", "IN"}, + {"ho", "PG"}, + {"hr", "HR"}, + {"ht", "HT"}, + {"hu", "HU"}, + {"hy", "AM"}, + {"id", "ID"}, + {"is", "IS"}, + {"it", "IT"}, + {"ja", "JP"}, + {"ka", "GE"}, + {"kk", "KZ"}, + {"kl", "GL"}, + {"km", "KH"}, + {"kn", "IN"}, + {"ko", "KR"}, + {"kok", "IN"}, + {"ks", "IN"}, + {"ku", "TR"}, + {"ky", "KG"}, + {"la", "VA"}, + {"lb", "LU"}, + {"ln", "CG"}, + {"lo", "LA"}, + {"lt", "LT"}, + {"lv", "LV"}, + {"mai", "IN"}, + {"men", "GN"}, + {"mg", "MG"}, + {"mh", "MH"}, + {"mk", "MK"}, + {"ml", "IN"}, + {"mn", "MN"}, + {"mni", "IN"}, + {"mo", "MD"}, + {"mr", "IN"}, + {"ms", "MY"}, + {"mt", "MT"}, + {"my", "MM"}, + {"na", "NR"}, + {"nb", "NO"}, + {"nd", "ZA"}, + {"ne", "NP"}, + {"niu", "NU"}, + {"nl", "NL"}, + {"nn", "NO"}, + {"no", "NO"}, + {"nr", "ZA"}, + {"nso", "ZA"}, + {"ny", "MW"}, + {"om", "KE"}, + {"or", "IN"}, + {"pa", "IN"}, + {"pau", "PW"}, + {"pl", "PL"}, + {"ps", "PK"}, + {"pt", "BR"}, + {"qu", "PE"}, + {"rn", "BI"}, + {"ro", "RO"}, + {"ru", "RU"}, + {"rw", "RW"}, + {"sd", "IN"}, + {"sg", "CF"}, + {"si", "LK"}, + {"sk", "SK"}, + {"sl", "SI"}, + {"sm", "WS"}, + {"so", "DJ"}, + {"sq", "CS"}, + {"sr", "CS"}, + {"ss", "ZA"}, + {"st", "ZA"}, + {"sv", "SE"}, + {"sw", "KE"}, + {"ta", "IN"}, + {"te", "IN"}, + {"tem", "SL"}, + {"tet", "TL"}, + {"th", "TH"}, + {"ti", "ET"}, + {"tg", "TJ"}, + {"tk", "TM"}, + {"tkl", "TK"}, + {"tvl", "TV"}, + {"tl", "PH"}, + {"tn", "ZA"}, + {"to", "TO"}, + {"tpi", "PG"}, + {"tr", "TR"}, + {"ts", "ZA"}, + {"uk", "UA"}, + {"ur", "IN"}, + {"uz", "UZ"}, + {"ve", "ZA"}, + {"vi", "VN"}, + {"wo", "SN"}, + {"xh", "ZA"}, + {"zh", "CN"}, + {"zh_Hant", "TW"}, + {"zu", "ZA"}, + {"aa", "ET"}, + {"byn", "ER"}, + {"eo", "DE"}, + {"gez", "ET"}, + {"haw", "US"}, + {"iu", "CA"}, + {"kw", "GB"}, + {"sa", "IN"}, + {"sh", "HR"}, + {"sid", "ET"}, + {"syr", "SY"}, + {"tig", "ER"}, + {"tt", "RU"}, + {"wal", "ET"}, }; + static { + for (int i = 0; i < language_territory_hack.length; ++i) { + language_territory_hack_map.put(language_territory_hack[i][0],language_territory_hack[i][1]); + } + } + + static final Map territory_tzid_hack_map = new HashMap(); + static final String[][] territory_tzid_hack = { + {"AQ", "Antarctica/McMurdo"}, + {"AR", "America/Buenos_Aires"}, + {"AU", "Australia/Sydney"}, + {"BR", "America/Sao_Paulo"}, + {"CA", "America/Toronto"}, + {"CD", "Africa/Kinshasa"}, + {"CL", "America/Santiago"}, + {"CN", "Asia/Shanghai"}, + {"EC", "America/Guayaquil"}, + {"ES", "Europe/Madrid"}, + {"GB", "Europe/London"}, + {"GL", "America/Godthab"}, + {"ID", "Asia/Jakarta"}, + {"ML", "Africa/Bamako"}, + {"MX", "America/Mexico_City"}, + {"MY", "Asia/Kuala_Lumpur"}, + {"NZ", "Pacific/Auckland"}, + {"PT", "Europe/Lisbon"}, + {"RU", "Europe/Moscow"}, + {"UA", "Europe/Kiev"}, + {"US", "America/New_York"}, + {"UZ", "Asia/Tashkent"}, + {"PF", "Pacific/Tahiti"}, + {"FM", "Pacific/Kosrae"}, + {"KI", "Pacific/Tarawa"}, + {"KZ", "Asia/Almaty"}, + {"MH", "Pacific/Majuro"}, + {"MN", "Asia/Ulaanbaatar"}, + {"SJ", "Arctic/Longyearbyen"}, + {"UM", "Pacific/Midway"}, + }; + static { + for (int i = 0; i < territory_tzid_hack.length; ++i) { + territory_tzid_hack_map.put(territory_tzid_hack[i][0],territory_tzid_hack[i][1]); + } + } + + // Freezable implementation + + private boolean frozen; + + /** + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public boolean isFrozen() { + return frozen; + } + + /** + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences freeze() { + frozen = true; + return this; + } + + /** + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public GlobalizationPreferences cloneAsThawed() { + try { + GlobalizationPreferences result = (GlobalizationPreferences) clone(); + result.frozen = false; + return result; + } catch (CloneNotSupportedException e) { + // will always work + return null; + } + } +} + diff --git a/main/classes/core/.classpath b/main/classes/core/.classpath new file mode 100644 index 00000000000..11e1777405a --- /dev/null +++ b/main/classes/core/.classpath @@ -0,0 +1,6 @@ + + + + + + diff --git a/main/classes/core/.externalToolBuilders/copy-data-core.launch b/main/classes/core/.externalToolBuilders/copy-data-core.launch new file mode 100644 index 00000000000..0bf20451c65 --- /dev/null +++ b/main/classes/core/.externalToolBuilders/copy-data-core.launch @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/core/.project b/main/classes/core/.project new file mode 100644 index 00000000000..cbd1fb16590 --- /dev/null +++ b/main/classes/core/.project @@ -0,0 +1,28 @@ + + + icu4j-core + + + icu4j-shared + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.ui.externaltools.ExternalToolBuilder + full,incremental, + + + LaunchConfigHandle + <project>/.externalToolBuilders/copy-data-core.launch + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/main/classes/core/.settings/org.eclipse.jdt.core.prefs b/main/classes/core/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000000..e46367f90ca --- /dev/null +++ b/main/classes/core/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,345 @@ +#Thu Aug 27 17:47:12 EDT 2009 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.doc.comment.support=enabled +org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.autoboxing=ignore +org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning +org.eclipse.jdt.core.compiler.problem.deadCode=warning +org.eclipse.jdt.core.compiler.problem.deprecation=ignore +org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled +org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled +org.eclipse.jdt.core.compiler.problem.discouragedReference=warning +org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.problem.fallthroughCase=warning +org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled +org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore +org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning +org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning +org.eclipse.jdt.core.compiler.problem.forbiddenReference=error +org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning +org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning +org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore +org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore +org.eclipse.jdt.core.compiler.problem.invalidJavadoc=warning +org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsDeprecatedRef=disabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=enabled +org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=public +org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore +org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning +org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled +org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public +org.eclipse.jdt.core.compiler.problem.missingJavadocTagDescription=all_standard_tags +org.eclipse.jdt.core.compiler.problem.missingJavadocTags=ignore +org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled +org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=public +org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore +org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning +org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore +org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning +org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning +org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore +org.eclipse.jdt.core.compiler.problem.nullReference=warning +org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning +org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore +org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore +org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore +org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning +org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore +org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore +org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled +org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning +org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled +org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore +org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning +org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning +org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore +org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning +org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore +org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore +org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled +org.eclipse.jdt.core.compiler.problem.unusedImport=warning +org.eclipse.jdt.core.compiler.problem.unusedLabel=warning +org.eclipse.jdt.core.compiler.problem.unusedLocal=warning +org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore +org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled +org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled +org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning +org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning +org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning +org.eclipse.jdt.core.compiler.source=1.5 +org.eclipse.jdt.core.formatter.align_type_members_on_columns=false +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_assignment=0 +org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 +org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 +org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 +org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 +org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 +org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 +org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 +org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 +org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_after_package=1 +org.eclipse.jdt.core.formatter.blank_lines_before_field=0 +org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 +org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 +org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 +org.eclipse.jdt.core.formatter.blank_lines_before_method=1 +org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 +org.eclipse.jdt.core.formatter.blank_lines_before_package=0 +org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 +org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 +org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line +org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false +org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false +org.eclipse.jdt.core.formatter.comment.format_block_comments=true +org.eclipse.jdt.core.formatter.comment.format_header=false +org.eclipse.jdt.core.formatter.comment.format_html=true +org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true +org.eclipse.jdt.core.formatter.comment.format_line_comments=true +org.eclipse.jdt.core.formatter.comment.format_source_code=true +org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true +org.eclipse.jdt.core.formatter.comment.indent_root_tags=true +org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert +org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert +org.eclipse.jdt.core.formatter.comment.line_length=120 +org.eclipse.jdt.core.formatter.compact_else_if=true +org.eclipse.jdt.core.formatter.continuation_indentation=2 +org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 +org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true +org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true +org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_empty_lines=false +org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true +org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true +org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false +org.eclipse.jdt.core.formatter.indentation.size=4 +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert +org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert +org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert +org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert +org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert +org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert +org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert +org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert +org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert +org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert +org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert +org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert +org.eclipse.jdt.core.formatter.join_lines_in_comments=true +org.eclipse.jdt.core.formatter.join_wrapped_lines=true +org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false +org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false +org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false +org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false +org.eclipse.jdt.core.formatter.lineSplit=120 +org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false +org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false +org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 +org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 +org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true +org.eclipse.jdt.core.formatter.tabulation.char=space +org.eclipse.jdt.core.formatter.tabulation.size=4 +org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false +org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true diff --git a/main/classes/core/.settings/org.eclipse.jdt.ui.prefs b/main/classes/core/.settings/org.eclipse.jdt.ui.prefs new file mode 100644 index 00000000000..646a3929f0a --- /dev/null +++ b/main/classes/core/.settings/org.eclipse.jdt.ui.prefs @@ -0,0 +1,10 @@ +#Wed Jun 17 11:09:27 EDT 2009 +eclipse.preferences.version=1 +formatter_profile=_ICU4J Standard +formatter_settings_version=11 +org.eclipse.jdt.ui.ignorelowercasenames=true +org.eclipse.jdt.ui.importorder=java;javax;org;com; +org.eclipse.jdt.ui.javadoc=true +org.eclipse.jdt.ui.ondemandthreshold=99 +org.eclipse.jdt.ui.staticondemandthreshold=99 +org.eclipse.jdt.ui.text.custom_code_templates= diff --git a/main/classes/core/build.properties b/main/classes/core/build.properties new file mode 100644 index 00000000000..a21fb196196 --- /dev/null +++ b/main/classes/core/build.properties @@ -0,0 +1,6 @@ +#******************************************************************************* +#* Copyright (C) 2009, International Business Machines Corporation and * +#* others. All Rights Reserved. * +#******************************************************************************* +shared.dir = ../../shared +javac.compilerarg = -Xlint:all,-deprecation,-dep-ann diff --git a/main/classes/core/build.xml b/main/classes/core/build.xml new file mode 100644 index 00000000000..6006c856306 --- /dev/null +++ b/main/classes/core/build.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/core/core-build.launch b/main/classes/core/core-build.launch new file mode 100644 index 00000000000..2b3b3d7d1ca --- /dev/null +++ b/main/classes/core/core-build.launch @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/main/classes/core/manifest.stub b/main/classes/core/manifest.stub new file mode 100644 index 00000000000..4e64b0553c6 --- /dev/null +++ b/main/classes/core/manifest.stub @@ -0,0 +1,11 @@ +Manifest-Version: 1.0 + +Name: com/ibm/icu/ +Specification-Title: ICU for Java +Specification-Version: @SPECVERSION@ +Specification-Vendor: ICU +Implementation-Title: ICU for Java +Implementation-Version: @IMPLVERSION@ +Implementation-Vendor: IBM Corporation +Implementation-Vendor-Id: com.ibm +Copyright-Info: @COPYRIGHT@ \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/ICUConfig.properties b/main/classes/core/src/com/ibm/icu/ICUConfig.properties new file mode 100644 index 00000000000..c25d565896f --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/ICUConfig.properties @@ -0,0 +1,23 @@ +#* +#******************************************************************************* +#* Copyright (C) 2008-2010, International Business Machines Corporation and * +#* others. All Rights Reserved. * +#******************************************************************************* +#* This is the properties contains ICU runtime configuration +#* + +# +# The default TimeZone implementation type used by the ICU TimeZone +# factory method. [ ICU | JDK ] +# +com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU + +# +# By default, DecimalFormat uses some internal equivalent character +# data in addition to ones in DecimalFormatSymbols for parsing +# decimal/grouping separators. When this property is true, +# DecimalFormat uses separators configured by DecimalFormatSymbols only +# and does not try to find a match in the internal equivalent character +# data. +# +com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false diff --git a/main/classes/core/src/com/ibm/icu/impl/Assert.java b/main/classes/core/src/com/ibm/icu/impl/Assert.java new file mode 100644 index 00000000000..7d8b113e6dd --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Assert.java @@ -0,0 +1,23 @@ +/* +******************************************************************************* +* Copyright (C) 2005-2006, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +*/ +package com.ibm.icu.impl; + +// 1.3 compatibility layer +public class Assert { + public static void fail(Exception e) { + fail(e.toString()); // can't wrap exceptions in jdk 1.3 + } + public static void fail(String msg) { + throw new IllegalStateException("failure '" + msg + "'"); + } + public static void assrt(boolean val) { + if (!val) throw new IllegalStateException("assert failed"); + } + public static void assrt(String msg, boolean val) { + if (!val) throw new IllegalStateException("assert '" + msg + "' failed"); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/BMPSet.java b/main/classes/core/src/com/ibm/icu/impl/BMPSet.java new file mode 100644 index 00000000000..7cb471c9a2a --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/BMPSet.java @@ -0,0 +1,500 @@ +/* + ****************************************************************************** + * + * Copyright (C) 2009-2010, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + */ + +package com.ibm.icu.impl; + +import com.ibm.icu.text.UnicodeSet.SpanCondition; + +/* + * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points. + * + * Latin-1: Look up bytes. 2-byte characters: Bits organized vertically. 3-byte characters: Use zero/one/mixed data + * per 64-block in U+0000..U+FFFF, with mixed for illegal ranges. Supplementary characters: Call contains() on the + * parent set. + */ +public final class BMPSet { + public static int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000); + + /* + * One boolean ('true' or 'false') per Latin-1 character. + */ + private boolean[] latin1Contains; + + /* + * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points + * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6} + * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead) + * + * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) for faster validity checking at + * runtime. + */ + private int[] table7FF; + + /* + * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks + * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12} + * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit + * indicates if contains(c) for all code points in the 64-block. If the upper bit is 1, then the block is mixed + * and set.contains(c) must be called. + * + * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to the result of contains(FFFD) for faster + * validity checking at runtime. + */ + private int[] bmpBlockBits; + + /* + * Inversion list indexes for restricted binary searches in findCodePoint(), from findCodePoint(U+0800, U+1000, + * U+2000, .., U+F000, U+10000). U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are + * always looked up in the bit tables. The last pair of indexes is for finding supplementary code points. + */ + private int[] list4kStarts; + + /* + * The inversion list of the parent set, for the slower contains() implementation for mixed BMP blocks and for + * supplementary code points. The list is terminated with list[listLength-1]=0x110000. + */ + private final int[] list; + private final int listLength; // length used; list may be longer to minimize reallocs + + public BMPSet(final int[] parentList, int parentListLength) { + list = parentList; + listLength = parentListLength; + latin1Contains = new boolean[0x100]; + table7FF = new int[64]; + bmpBlockBits = new int[64]; + list4kStarts = new int[18]; + + /* + * Set the list indexes for binary searches for U+0800, U+1000, U+2000, .., U+F000, U+10000. U+0800 is the + * first 3-byte-UTF-8 code point. Lower code points are looked up in the bit tables. The last pair of + * indexes is for finding supplementary code points. + */ + list4kStarts[0] = findCodePoint(0x800, 0, listLength - 1); + int i; + for (i = 1; i <= 0x10; ++i) { + list4kStarts[i] = findCodePoint(i << 12, list4kStarts[i - 1], listLength - 1); + } + list4kStarts[0x11] = listLength - 1; + + initBits(); + } + + public BMPSet(final BMPSet otherBMPSet, final int[] newParentList, int newParentListLength) { + list = newParentList; + listLength = newParentListLength; + latin1Contains = otherBMPSet.latin1Contains.clone(); + table7FF = otherBMPSet.table7FF.clone(); + bmpBlockBits = otherBMPSet.bmpBlockBits.clone(); + list4kStarts = otherBMPSet.list4kStarts.clone(); + } + + public boolean contains(int c) { + if (c <= 0xff) { + return (latin1Contains[c]); + } else if (c <= 0x7ff) { + return ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0); + } else if (c < 0xd800 || (c >= 0xe000 && c <= 0xffff)) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + return (0 != twoBits); + } else { + // Look up the code point in its 4k block of code points. + return containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1]); + } + } else if (c <= 0x10ffff) { + // surrogate or supplementary code point + return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]); + } else { + // Out-of-range code points get false, consistent with long-standing + // behavior of UnicodeSet.contains(c). + return false; + } + } + + /* + * Span the initial substring for which each character c has spanCondition==contains(c). It must be + * spanCondition==0 or 1. + * + * @param start The start index + * @param end The end index + * @return The length of the span. + * + * NOTE: to reduce the overhead of function call to contains(c), it is manually inlined here. Check for + * sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points + * as usual in ICU. + */ + public final int span(CharSequence s, int start, int end, SpanCondition spanCondition) { + char c, c2; + int i = start; + int limit = Math.min(s.length(), end); + if (SpanCondition.NOT_CONTAINED != spanCondition) { + // span + while (i < limit) { + c = s.charAt(i); + if (c <= 0xff) { + if (!latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { + break; + } + } else if (c < 0xd800 || + c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits == 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c, c2); + if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + ++i; + } + ++i; + } + } else { + // span not + while (i < limit) { + c = s.charAt(i); + if (c <= 0xff) { + if (latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { + break; + } + } else if (c < 0xd800 || + c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits != 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c, c2); + if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + ++i; + } + ++i; + } + } + return i - start; + } + + /* + * Symmetrical with span(). + * Span the trailing substring for which each character c has spanCondition==contains(c). It must be s.length >= + * limit and spanCondition==0 or 1. + * + * @return The string index which starts the span (i.e. inclusive). + */ + public final int spanBack(CharSequence s, int limit, SpanCondition spanCondition) { + char c, c2; + + limit = Math.min(s.length(), limit); + if (SpanCondition.NOT_CONTAINED != spanCondition) { + // span + for (;;) { + c = s.charAt(--limit); + if (c <= 0xff) { + if (!latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { + break; + } + } else if (c < 0xd800 || + c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits == 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c2, c); + if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + --limit; + } + if (0 == limit) { + return 0; + } + } + } else { + // span not + for (;;) { + c = s.charAt(--limit); + if (c <= 0xff) { + if (latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { + break; + } + } else if (c < 0xd800 || + c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits != 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c2, c); + if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + --limit; + } + if (0 == limit) { + return 0; + } + } + } + return limit + 1; + } + + /* + * Set bits in a bit rectangle in "vertical" bit organization. start> 6; + int trail = start & 0x3f; + + // Set one bit indicating an all-one block. + int bits = 1 << lead; + if ((start + 1) == limit) { // Single-character shortcut. + table[trail] |= bits; + return; + } + + int limitLead = limit >> 6; + int limitTrail = limit & 0x3f; + + if (lead == limitLead) { + // Partial vertical bit column. + while (trail < limitTrail) { + table[trail++] |= bits; + } + } else { + // Partial vertical bit column, + // followed by a bit rectangle, + // followed by another partial vertical bit column. + if (trail > 0) { + do { + table[trail++] |= bits; + } while (trail < 64); + ++lead; + } + if (lead < limitLead) { + bits = ~((1 << lead) - 1); + if (limitLead < 0x20) { + bits &= (1 << limitLead) - 1; + } + for (trail = 0; trail < 64; ++trail) { + table[trail] |= bits; + } + } + bits = 1 << limitLead; + for (trail = 0; trail < limitTrail; ++trail) { + table[trail] |= bits; + } + } + } + + private void initBits() { + int start, limit; + int listIndex = 0; + + // Set latin1Contains[]. + do { + start = list[listIndex++]; + if (listIndex < listLength) { + limit = list[listIndex++]; + } else { + limit = 0x110000; + } + if (start >= 0x100) { + break; + } + do { + latin1Contains[start++] = true; + } while (start < limit && start < 0x100); + } while (limit <= 0x100); + + // Set table7FF[]. + while (start < 0x800) { + set32x64Bits(table7FF, start, limit <= 0x800 ? limit : 0x800); + if (limit > 0x800) { + start = 0x800; + break; + } + + start = list[listIndex++]; + if (listIndex < listLength) { + limit = list[listIndex++]; + } else { + limit = 0x110000; + } + } + + // Set bmpBlockBits[]. + int minStart = 0x800; + while (start < 0x10000) { + if (limit > 0x10000) { + limit = 0x10000; + } + + if (start < minStart) { + start = minStart; + } + if (start < limit) { // Else: Another range entirely in a known mixed-value block. + if (0 != (start & 0x3f)) { + // Mixed-value block of 64 code points. + start >>= 6; + bmpBlockBits[start & 0x3f] |= 0x10001 << (start >> 6); + start = (start + 1) << 6; // Round up to the next block boundary. + minStart = start; // Ignore further ranges in this block. + } + if (start < limit) { + if (start < (limit & ~0x3f)) { + // Multiple all-ones blocks of 64 code points each. + set32x64Bits(bmpBlockBits, start >> 6, limit >> 6); + } + + if (0 != (limit & 0x3f)) { + // Mixed-value block of 64 code points. + limit >>= 6; + bmpBlockBits[limit & 0x3f] |= 0x10001 << (limit >> 6); + limit = (limit + 1) << 6; // Round up to the next block boundary. + minStart = limit; // Ignore further ranges in this block. + } + } + } + + if (limit == 0x10000) { + break; + } + + start = list[listIndex++]; + if (listIndex < listLength) { + limit = list[listIndex++]; + } else { + limit = 0x110000; + } + } + } + + + /** + * Same as UnicodeSet.findCodePoint(int c) except that the binary search is restricted for finding code + * points in a certain range. + * + * For restricting the search for finding in the range start..end, pass in lo=findCodePoint(start) and + * hi=findCodePoint(end) with 0<=lo<=hi= hi || c >= list[hi - 1]) + return hi; + // invariant: c >= list[lo] + // invariant: c < list[hi] + for (;;) { + int i = (lo + hi) >> 1; + if (i == lo) { + break; // Found! + } else if (c < list[i]) { + hi = i; + } else { + lo = i; + } + } + return hi; + } + + private final boolean containsSlow(int c, int lo, int hi) { + return (0 != (findCodePoint(c, lo, hi) & 1)); + } +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/BOCU.java b/main/classes/core/src/com/ibm/icu/impl/BOCU.java new file mode 100644 index 00000000000..898eb47b846 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/BOCU.java @@ -0,0 +1,378 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import com.ibm.icu.text.UCharacterIterator; + +/** + *

    Binary Ordered Compression for Unicode

    + * + *

    Users are strongly encouraged to read the ICU paper on + * + * BOCU before attempting to use this class.

    + * + *

    BOCU is used to compress unicode text into a stream of unsigned + * bytes. For many kinds of text the compression compares favorably + * to UTF-8, and for some kinds of text (such as CJK) it does better. + * The resulting bytes will compare in the same order as the original + * code points. The byte stream does not contain the values 0, 1, or + * 2.

    + * + *

    One example of a use of BOCU is in + * com.ibm.icu.text.Collator#getCollationKey(String) for a RuleBasedCollator object with + * collation strength IDENTICAL. The result CollationKey will consist of the + * collation order of the source string followed by the BOCU result of the + * source string. + *

    + * + *

    Unlike a UTF encoding, BOCU-compressed text is not suitable for + * random access.

    + * + *

    Method: Slope Detection
    Remember the previous code point + * (initial 0). For each code point in the string, encode the + * difference with the previous one. Similar to a UTF, the length of + * the byte sequence is encoded in the lead bytes. Unlike a UTF, the + * trail byte values may overlap with lead/single byte values. The + * signedness of the difference must be encoded as the most + * significant part.

    + * + *

    We encode differences with few bytes if their absolute values + * are small. For correct ordering, we must treat the entire value + * range -10ffff..+10ffff in ascending order, which forbids encoding + * the sign and the absolute value separately. Instead, we split the + * lead byte range in the middle and encode non-negative values going + * up and negative values going down.

    + * + *

    For very small absolute values, the difference is added to a + * middle byte value for single-byte encoded differences. For + * somewhat larger absolute values, the difference is divided by the + * number of byte values available, the modulo is used for one trail + * byte, and the remainder is added to a lead byte avoiding the + * single-byte range. For large absolute values, the difference is + * similarly encoded in three bytes. (Syn Wee, I need examples + * here.)

    + * + *

    BOCU does not use byte values 0, 1, or 2, but uses all other + * byte values for lead and single bytes, so that the middle range of + * single bytes is as large as possible.

    + * + *

    Note that the lead byte ranges overlap some, but that the + * sequences as a whole are well ordered. I.e., even if the lead byte + * is the same for sequences of different lengths, the trail bytes + * establish correct order. It would be possible to encode slightly + * larger ranges for each length (>1) by subtracting the lower bound + * of the range. However, that would also slow down the calculation. + * (Syn Wee, need an example).

    + * + *

    For the actual string encoding, an optimization moves the + * previous code point value to the middle of its Unicode script block + * to minimize the differences in same-script text runs. (Syn Wee, + * need an example.)

    + * + * @author Syn Wee Quek + * @since release 2.2, May 3rd 2002 + */ +public class BOCU +{ + // public constructors -------------------------------------------------- + + // public methods ------------------------------------------------------- + + /** + *

    Encode the code points of a string as a sequence of bytes, + * preserving lexical order.

    + *

    The minimum size of buffer required for the compression can be + * preflighted by getCompressionLength(String).

    + * @param source text source + * @param buffer output buffer + * @param offset to start writing to + * @return end offset where the writing stopped + * @see #getCompressionLength(String) + * @exception ArrayIndexOutOfBoundsException thrown if size of buffer is + * too small for the output. + */ + public static int compress(String source, byte buffer[], int offset) + { + int prev = 0; + UCharacterIterator iterator = UCharacterIterator.getInstance(source); + int codepoint = iterator.nextCodePoint(); + while (codepoint != UCharacterIterator.DONE) { + if (prev < 0x4e00 || prev >= 0xa000) { + prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_; + } + else { + // Unihan U+4e00..U+9fa5: + // double-bytes down from the upper end + prev = 0x9fff - SLOPE_REACH_POS_2_; + } + + offset = writeDiff(codepoint - prev, buffer, offset); + prev = codepoint; + codepoint = iterator.nextCodePoint(); + } + return offset; + } + + /** + * Return the number of bytes that compress() would write. + * @param source text source string + * @return the length of the BOCU result + * @see #compress(String, byte[], int) + */ + public static int getCompressionLength(String source) + { + int prev = 0; + int result = 0; + UCharacterIterator iterator = UCharacterIterator.getInstance(source); + int codepoint = iterator.nextCodePoint(); + while (codepoint != UCharacterIterator.DONE) { + if (prev < 0x4e00 || prev >= 0xa000) { + prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_; + } + else { + // Unihan U+4e00..U+9fa5: + // double-bytes down from the upper end + prev = 0x9fff - SLOPE_REACH_POS_2_; + } + + codepoint = iterator.nextCodePoint(); + result += lengthOfDiff(codepoint - prev); + prev = codepoint; + } + return result; + } + + // public setter methods ------------------------------------------------- + + // public getter methods ------------------------------------------------ + + // public other methods ------------------------------------------------- + + // protected constructor ------------------------------------------------ + + // protected data members ------------------------------------------------ + + // protected methods ----------------------------------------------------- + + // private data members -------------------------------------------------- + + /** + * Do not use byte values 0, 1, 2 because they are separators in sort keys. + */ + private static final int SLOPE_MIN_ = 3; + private static final int SLOPE_MAX_ = 0xff; + private static final int SLOPE_MIDDLE_ = 0x81; + private static final int SLOPE_TAIL_COUNT_ = SLOPE_MAX_ - SLOPE_MIN_ + 1; + //private static final int SLOPE_MAX_BYTES_ = 4; + + /** + * Number of lead bytes: + * 1 middle byte for 0 + * 2*80=160 single bytes for !=0 + * 2*42=84 for double-byte values + * 2*3=6 for 3-byte values + * 2*1=2 for 4-byte values + * + * The sum must be <=SLOPE_TAIL_COUNT. + * + * Why these numbers? + * - There should be >=128 single-byte values to cover 128-blocks + * with small scripts. + * - There should be >=20902 single/double-byte values to cover Unihan. + * - It helps CJK Extension B some if there are 3-byte values that cover + * the distance between them and Unihan. + * This also helps to jump among distant places in the BMP. + * - Four-byte values are necessary to cover the rest of Unicode. + * + * Symmetrical lead byte counts are for convenience. + * With an equal distribution of even and odd differences there is also + * no advantage to asymmetrical lead byte counts. + */ + private static final int SLOPE_SINGLE_ = 80; + private static final int SLOPE_LEAD_2_ = 42; + private static final int SLOPE_LEAD_3_ = 3; + //private static final int SLOPE_LEAD_4_ = 1; + + /** + * The difference value range for single-byters. + */ + private static final int SLOPE_REACH_POS_1_ = SLOPE_SINGLE_; + private static final int SLOPE_REACH_NEG_1_ = (-SLOPE_SINGLE_); + + /** + * The difference value range for double-byters. + */ + private static final int SLOPE_REACH_POS_2_ = + SLOPE_LEAD_2_ * SLOPE_TAIL_COUNT_ + SLOPE_LEAD_2_ - 1; + private static final int SLOPE_REACH_NEG_2_ = (-SLOPE_REACH_POS_2_ - 1); + + /** + * The difference value range for 3-byters. + */ + private static final int SLOPE_REACH_POS_3_ = SLOPE_LEAD_3_ + * SLOPE_TAIL_COUNT_ + * SLOPE_TAIL_COUNT_ + + (SLOPE_LEAD_3_ - 1) + * SLOPE_TAIL_COUNT_ + + (SLOPE_TAIL_COUNT_ - 1); + private static final int SLOPE_REACH_NEG_3_ = (-SLOPE_REACH_POS_3_ - 1); + + /** + * The lead byte start values. + */ + private static final int SLOPE_START_POS_2_ = SLOPE_MIDDLE_ + + SLOPE_SINGLE_ + 1; + private static final int SLOPE_START_POS_3_ = SLOPE_START_POS_2_ + + SLOPE_LEAD_2_; + private static final int SLOPE_START_NEG_2_ = SLOPE_MIDDLE_ + + SLOPE_REACH_NEG_1_; + private static final int SLOPE_START_NEG_3_ = SLOPE_START_NEG_2_ + - SLOPE_LEAD_2_; + + // private constructor --------------------------------------------------- + + /** + * Constructor private to prevent initialization + */ + ///CLOVER:OFF + private BOCU() + { + } + ///CLOVER:ON + + // private methods ------------------------------------------------------- + + /** + * Integer division and modulo with negative numerators + * yields negative modulo results and quotients that are one more than + * what we need here. + * @param number which operations are to be performed on + * @param factor the factor to use for division + * @return (result of division) << 32 | modulo + */ + private static final long getNegDivMod(int number, int factor) + { + int modulo = number % factor; + long result = number / factor; + if (modulo < 0) { + -- result; + modulo += factor; + } + return (result << 32) | modulo; + } + + /** + * Encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes, + * preserving lexical order + * @param diff + * @param buffer byte buffer to append to + * @param offset to the byte buffer to start appending + * @return end offset where the appending stops + */ + private static final int writeDiff(int diff, byte buffer[], int offset) + { + if (diff >= SLOPE_REACH_NEG_1_) { + if (diff <= SLOPE_REACH_POS_1_) { + buffer[offset ++] = (byte)(SLOPE_MIDDLE_ + diff); + } + else if (diff <= SLOPE_REACH_POS_2_) { + buffer[offset ++] = (byte)(SLOPE_START_POS_2_ + + (diff / SLOPE_TAIL_COUNT_)); + buffer[offset ++] = (byte)(SLOPE_MIN_ + + (diff % SLOPE_TAIL_COUNT_)); + } + else if (diff <= SLOPE_REACH_POS_3_) { + buffer[offset + 2] = (byte)(SLOPE_MIN_ + + (diff % SLOPE_TAIL_COUNT_)); + diff /= SLOPE_TAIL_COUNT_; + buffer[offset + 1] = (byte)(SLOPE_MIN_ + + (diff % SLOPE_TAIL_COUNT_)); + buffer[offset] = (byte)(SLOPE_START_POS_3_ + + (diff / SLOPE_TAIL_COUNT_)); + offset += 3; + } + else { + buffer[offset + 3] = (byte)(SLOPE_MIN_ + + diff % SLOPE_TAIL_COUNT_); + diff /= SLOPE_TAIL_COUNT_; + buffer[offset] = (byte)(SLOPE_MIN_ + + diff % SLOPE_TAIL_COUNT_); + diff /= SLOPE_TAIL_COUNT_; + buffer[offset + 1] = (byte)(SLOPE_MIN_ + + diff % SLOPE_TAIL_COUNT_); + buffer[offset] = (byte)SLOPE_MAX_; + offset += 4; + } + } + else { + long division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); + int modulo = (int)division; + if (diff >= SLOPE_REACH_NEG_2_) { + diff = (int)(division >> 32); + buffer[offset ++] = (byte)(SLOPE_START_NEG_2_ + diff); + buffer[offset ++] = (byte)(SLOPE_MIN_ + modulo); + } + else if (diff >= SLOPE_REACH_NEG_3_) { + buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo); + diff = (int)(division >> 32); + division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); + modulo = (int)division; + diff = (int)(division >> 32); + buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo); + buffer[offset] = (byte)(SLOPE_START_NEG_3_ + diff); + offset += 3; + } + else { + buffer[offset + 3] = (byte)(SLOPE_MIN_ + modulo); + diff = (int)(division >> 32); + division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); + modulo = (int)division; + diff = (int)(division >> 32); + buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo); + division = getNegDivMod(diff, SLOPE_TAIL_COUNT_); + modulo = (int)division; + buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo); + buffer[offset] = SLOPE_MIN_; + offset += 4; + } + } + return offset; + } + + /** + * How many bytes would writeDiff() write? + * @param diff + */ + private static final int lengthOfDiff(int diff) + { + if (diff >= SLOPE_REACH_NEG_1_) { + if (diff <= SLOPE_REACH_POS_1_) { + return 1; + } + else if (diff <= SLOPE_REACH_POS_2_) { + return 2; + } + else if(diff <= SLOPE_REACH_POS_3_) { + return 3; + } + else { + return 4; + } + } + else { + if (diff >= SLOPE_REACH_NEG_2_) { + return 2; + } + else if (diff >= SLOPE_REACH_NEG_3_) { + return 3; + } + else { + return 4; + } + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CacheBase.java b/main/classes/core/src/com/ibm/icu/impl/CacheBase.java new file mode 100644 index 00000000000..11738a12a73 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CacheBase.java @@ -0,0 +1,39 @@ +/* +******************************************************************************* +* Copyright (C) 2010, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +*/ +package com.ibm.icu.impl; + +/** + * Base class for cache implementations. + * To use, instantiate a subclass of a concrete implementation class, where the subclass + * implements the createInstance() method, and call get() with the key and the data. + * The get() call will use the data only if it needs to call createInstance(), + * otherwise the data is ignored. + * + * @param Cache lookup key type + * @param Cache instance value type + * @param Data type for creating a new instance value + * + * @author Markus Scherer, Mark Davis + */ +public abstract class CacheBase { + /** + * Retrieves an instance from the cache. Calls createInstance(key, data) if the cache + * does not already contain an instance with this key. + * Ignores data if the cache already contains an instance with this key. + * @param key Cache lookup key for the requested instance + * @param data Data for createInstance() if the instance is not already cached + * @return The requested instance + */ + public abstract V getInstance(K key, D data); + /** + * Creates an instance for the key and data. Must be overridden. + * @param key Cache lookup key for the requested instance + * @param data Data for the instance creation + * @return The requested instance + */ + protected abstract V createInstance(K key, D data); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarAstronomer.java b/main/classes/core/src/com/ibm/icu/impl/CalendarAstronomer.java new file mode 100644 index 00000000000..a6f3528db7b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CalendarAstronomer.java @@ -0,0 +1,1666 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl; + +import java.util.Date; +import java.util.TimeZone; + +/** + * CalendarAstronomer is a class that can perform the calculations to + * determine the positions of the sun and moon, the time of sunrise and + * sunset, and other astronomy-related data. The calculations it performs + * are in some cases quite complicated, and this utility class saves you + * the trouble of worrying about them. + *

    + * The measurement of time is a very important part of astronomy. Because + * astronomical bodies are constantly in motion, observations are only valid + * at a given moment in time. Accordingly, each CalendarAstronomer + * object has a time property that determines the date + * and time for which its calculations are performed. You can set and + * retrieve this property with {@link #setDate setDate}, {@link #getDate getDate} + * and related methods. + *

    + * Almost all of the calculations performed by this class, or by any + * astronomer, are approximations to various degrees of accuracy. The + * calculations in this class are mostly modelled after those described + * in the book + * + * Practical Astronomy With Your Calculator, by Peter J. + * Duffett-Smith, Cambridge University Press, 1990. This is an excellent + * book, and if you want a greater understanding of how these calculations + * are performed it a very good, readable starting point. + *

    + * WARNING: This class is very early in its development, and + * it is highly likely that its API will change to some degree in the future. + * At the moment, it basically does just enough to support {@link com.ibm.icu.util.IslamicCalendar} + * and {@link com.ibm.icu.util.ChineseCalendar}. + * + * @author Laura Werner + * @author Alan Liu + * @internal + */ +public class CalendarAstronomer { + + //------------------------------------------------------------------------- + // Astronomical constants + //------------------------------------------------------------------------- + + /** + * The number of standard hours in one sidereal day. + * Approximately 24.93. + * @internal + */ + public static final double SIDEREAL_DAY = 23.93446960027; + + /** + * The number of sidereal hours in one mean solar day. + * Approximately 24.07. + * @internal + */ + public static final double SOLAR_DAY = 24.065709816; + + /** + * The average number of solar days from one new moon to the next. This is the time + * it takes for the moon to return the same ecliptic longitude as the sun. + * It is longer than the sidereal month because the sun's longitude increases + * during the year due to the revolution of the earth around the sun. + * Approximately 29.53. + * + * @see #SIDEREAL_MONTH + * @internal + */ + public static final double SYNODIC_MONTH = 29.530588853; + + /** + * The average number of days it takes + * for the moon to return to the same ecliptic longitude relative to the + * stellar background. This is referred to as the sidereal month. + * It is shorter than the synodic month due to + * the revolution of the earth around the sun. + * Approximately 27.32. + * + * @see #SYNODIC_MONTH + * @internal + */ + public static final double SIDEREAL_MONTH = 27.32166; + + /** + * The average number number of days between successive vernal equinoxes. + * Due to the precession of the earth's + * axis, this is not precisely the same as the sidereal year. + * Approximately 365.24 + * + * @see #SIDEREAL_YEAR + * @internal + */ + public static final double TROPICAL_YEAR = 365.242191; + + /** + * The average number of days it takes + * for the sun to return to the same position against the fixed stellar + * background. This is the duration of one orbit of the earth about the sun + * as it would appear to an outside observer. + * Due to the precession of the earth's + * axis, this is not precisely the same as the tropical year. + * Approximately 365.25. + * + * @see #TROPICAL_YEAR + * @internal + */ + public static final double SIDEREAL_YEAR = 365.25636; + + //------------------------------------------------------------------------- + // Time-related constants + //------------------------------------------------------------------------- + + /** + * The number of milliseconds in one second. + * @internal + */ + public static final int SECOND_MS = 1000; + + /** + * The number of milliseconds in one minute. + * @internal + */ + public static final int MINUTE_MS = 60*SECOND_MS; + + /** + * The number of milliseconds in one hour. + * @internal + */ + public static final int HOUR_MS = 60*MINUTE_MS; + + /** + * The number of milliseconds in one day. + * @internal + */ + public static final long DAY_MS = 24*HOUR_MS; + + /** + * The start of the julian day numbering scheme used by astronomers, which + * is 1/1/4713 BC (Julian), 12:00 GMT. This is given as the number of milliseconds + * since 1/1/1970 AD (Gregorian), a negative number. + * Note that julian day numbers and + * the Julian calendar are not the same thing. Also note that + * julian days start at noon, not midnight. + * @internal + */ + public static final long JULIAN_EPOCH_MS = -210866760000000L; + +// static { +// Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT")); +// cal.clear(); +// cal.set(cal.ERA, 0); +// cal.set(cal.YEAR, 4713); +// cal.set(cal.MONTH, cal.JANUARY); +// cal.set(cal.DATE, 1); +// cal.set(cal.HOUR_OF_DAY, 12); +// System.out.println("1.5 Jan 4713 BC = " + cal.getTime().getTime()); + +// cal.clear(); +// cal.set(cal.YEAR, 2000); +// cal.set(cal.MONTH, cal.JANUARY); +// cal.set(cal.DATE, 1); +// cal.add(cal.DATE, -1); +// System.out.println("0.0 Jan 2000 = " + cal.getTime().getTime()); +// } + + /** + * Milliseconds value for 0.0 January 2000 AD. + */ + static final long EPOCH_2000_MS = 946598400000L; + + //------------------------------------------------------------------------- + // Assorted private data used for conversions + //------------------------------------------------------------------------- + + // My own copies of these so compilers are more likely to optimize them away + static private final double PI = 3.14159265358979323846; + static private final double PI2 = PI * 2.0; + + static private final double RAD_HOUR = 12 / PI; // radians -> hours + static private final double DEG_RAD = PI / 180; // degrees -> radians + static private final double RAD_DEG = 180 / PI; // radians -> degrees + + //------------------------------------------------------------------------- + // Constructors + //------------------------------------------------------------------------- + + /** + * Construct a new CalendarAstronomer object that is initialized to + * the current date and time. + * @internal + */ + public CalendarAstronomer() { + this(System.currentTimeMillis()); + } + + /** + * Construct a new CalendarAstronomer object that is initialized to + * the specified date and time. + * @internal + */ + public CalendarAstronomer(Date d) { + this(d.getTime()); + } + + /** + * Construct a new CalendarAstronomer object that is initialized to + * the specified time. The time is expressed as a number of milliseconds since + * January 1, 1970 AD (Gregorian). + * + * @see java.util.Date#getTime() + * @internal + */ + public CalendarAstronomer(long aTime) { + time = aTime; + } + + /** + * Construct a new CalendarAstronomer object with the given + * latitude and longitude. The object's time is set to the current + * date and time. + *

    + * @param longitude The desired longitude, in degrees east of + * the Greenwich meridian. + * + * @param latitude The desired latitude, in degrees. Positive + * values signify North, negative South. + * + * @see java.util.Date#getTime() + * @internal + */ + public CalendarAstronomer(double longitude, double latitude) { + this(); + fLongitude = normPI(longitude * DEG_RAD); + fLatitude = normPI(latitude * DEG_RAD); + fGmtOffset = (long)(fLongitude * 24 * HOUR_MS / PI2); + } + + + //------------------------------------------------------------------------- + // Time and date getters and setters + //------------------------------------------------------------------------- + + /** + * Set the current date and time of this CalendarAstronomer object. All + * astronomical calculations are performed based on this time setting. + * + * @param aTime the date and time, expressed as the number of milliseconds since + * 1/1/1970 0:00 GMT (Gregorian). + * + * @see #setDate + * @see #getTime + * @internal + */ + public void setTime(long aTime) { + time = aTime; + clearCache(); + } + + /** + * Set the current date and time of this CalendarAstronomer object. All + * astronomical calculations are performed based on this time setting. + * + * @param date the time and date, expressed as a Date object. + * + * @see #setTime + * @see #getDate + * @internal + */ + public void setDate(Date date) { + setTime(date.getTime()); + } + + /** + * Set the current date and time of this CalendarAstronomer object. All + * astronomical calculations are performed based on this time setting. + * + * @param jdn the desired time, expressed as a "julian day number", + * which is the number of elapsed days since + * 1/1/4713 BC (Julian), 12:00 GMT. Note that julian day + * numbers start at noon. To get the jdn for + * the corresponding midnight, subtract 0.5. + * + * @see #getJulianDay + * @see #JULIAN_EPOCH_MS + * @internal + */ + public void setJulianDay(double jdn) { + time = (long)(jdn * DAY_MS) + JULIAN_EPOCH_MS; + clearCache(); + julianDay = jdn; + } + + /** + * Get the current time of this CalendarAstronomer object, + * represented as the number of milliseconds since + * 1/1/1970 AD 0:00 GMT (Gregorian). + * + * @see #setTime + * @see #getDate + * @internal + */ + public long getTime() { + return time; + } + + /** + * Get the current time of this CalendarAstronomer object, + * represented as a Date object. + * + * @see #setDate + * @see #getTime + * @internal + */ + public Date getDate() { + return new Date(time); + } + + /** + * Get the current time of this CalendarAstronomer object, + * expressed as a "julian day number", which is the number of elapsed + * days since 1/1/4713 BC (Julian), 12:00 GMT. + * + * @see #setJulianDay + * @see #JULIAN_EPOCH_MS + * @internal + */ + public double getJulianDay() { + if (julianDay == INVALID) { + julianDay = (double)(time - JULIAN_EPOCH_MS) / (double)DAY_MS; + } + return julianDay; + } + + /** + * Return this object's time expressed in julian centuries: + * the number of centuries after 1/1/1900 AD, 12:00 GMT + * + * @see #getJulianDay + * @internal + */ + public double getJulianCentury() { + if (julianCentury == INVALID) { + julianCentury = (getJulianDay() - 2415020.0) / 36525; + } + return julianCentury; + } + + /** + * Returns the current Greenwich sidereal time, measured in hours + * @internal + */ + public double getGreenwichSidereal() { + if (siderealTime == INVALID) { + // See page 86 of "Practial Astronomy with your Calculator", + // by Peter Duffet-Smith, for details on the algorithm. + + double UT = normalize((double)time/HOUR_MS, 24); + + siderealTime = normalize(getSiderealOffset() + UT*1.002737909, 24); + } + return siderealTime; + } + + private double getSiderealOffset() { + if (siderealT0 == INVALID) { + double JD = Math.floor(getJulianDay() - 0.5) + 0.5; + double S = JD - 2451545.0; + double T = S / 36525.0; + siderealT0 = normalize(6.697374558 + 2400.051336*T + 0.000025862*T*T, 24); + } + return siderealT0; + } + + /** + * Returns the current local sidereal time, measured in hours + * @internal + */ + public double getLocalSidereal() { + return normalize(getGreenwichSidereal() + (double)fGmtOffset/HOUR_MS, 24); + } + + /** + * Converts local sidereal time to Universal Time. + * + * @param lst The Local Sidereal Time, in hours since sidereal midnight + * on this object's current date. + * + * @return The corresponding Universal Time, in milliseconds since + * 1 Jan 1970, GMT. + */ + private long lstToUT(double lst) { + // Convert to local mean time + double lt = normalize((lst - getSiderealOffset()) * 0.9972695663, 24); + + // Then find local midnight on this day + long base = DAY_MS * ((time + fGmtOffset)/DAY_MS) - fGmtOffset; + + //out(" lt =" + lt + " hours"); + //out(" base=" + new Date(base)); + + return base + (long)(lt * HOUR_MS); + } + + + //------------------------------------------------------------------------- + // Coordinate transformations, all based on the current time of this object + //------------------------------------------------------------------------- + + /** + * Convert from ecliptic to equatorial coordinates. + * + * @param ecliptic A point in the sky in ecliptic coordinates. + * @return The corresponding point in equatorial coordinates. + * @internal + */ + public final Equatorial eclipticToEquatorial(Ecliptic ecliptic) + { + return eclipticToEquatorial(ecliptic.longitude, ecliptic.latitude); + } + + /** + * Convert from ecliptic to equatorial coordinates. + * + * @param eclipLong The ecliptic longitude + * @param eclipLat The ecliptic latitude + * + * @return The corresponding point in equatorial coordinates. + * @internal + */ + public final Equatorial eclipticToEquatorial(double eclipLong, double eclipLat) + { + // See page 42 of "Practial Astronomy with your Calculator", + // by Peter Duffet-Smith, for details on the algorithm. + + double obliq = eclipticObliquity(); + double sinE = Math.sin(obliq); + double cosE = Math.cos(obliq); + + double sinL = Math.sin(eclipLong); + double cosL = Math.cos(eclipLong); + + double sinB = Math.sin(eclipLat); + double cosB = Math.cos(eclipLat); + double tanB = Math.tan(eclipLat); + + return new Equatorial(Math.atan2(sinL*cosE - tanB*sinE, cosL), + Math.asin(sinB*cosE + cosB*sinE*sinL) ); + } + + /** + * Convert from ecliptic longitude to equatorial coordinates. + * + * @param eclipLong The ecliptic longitude + * + * @return The corresponding point in equatorial coordinates. + * @internal + */ + public final Equatorial eclipticToEquatorial(double eclipLong) + { + return eclipticToEquatorial(eclipLong, 0); // TODO: optimize + } + + /** + * @internal + */ + public Horizon eclipticToHorizon(double eclipLong) + { + Equatorial equatorial = eclipticToEquatorial(eclipLong); + + double H = getLocalSidereal()*PI/12 - equatorial.ascension; // Hour-angle + + double sinH = Math.sin(H); + double cosH = Math.cos(H); + double sinD = Math.sin(equatorial.declination); + double cosD = Math.cos(equatorial.declination); + double sinL = Math.sin(fLatitude); + double cosL = Math.cos(fLatitude); + + double altitude = Math.asin(sinD*sinL + cosD*cosL*cosH); + double azimuth = Math.atan2(-cosD*cosL*sinH, sinD - sinL * Math.sin(altitude)); + + return new Horizon(azimuth, altitude); + } + + + //------------------------------------------------------------------------- + // The Sun + //------------------------------------------------------------------------- + + // + // Parameters of the Sun's orbit as of the epoch Jan 0.0 1990 + // Angles are in radians (after multiplying by PI/180) + // + static final double JD_EPOCH = 2447891.5; // Julian day of epoch + + static final double SUN_ETA_G = 279.403303 * PI/180; // Ecliptic longitude at epoch + static final double SUN_OMEGA_G = 282.768422 * PI/180; // Ecliptic longitude of perigee + static final double SUN_E = 0.016713; // Eccentricity of orbit + //double sunR0 = 1.495585e8; // Semi-major axis in KM + //double sunTheta0 = 0.533128 * PI/180; // Angular diameter at R0 + + // The following three methods, which compute the sun parameters + // given above for an arbitrary epoch (whatever time the object is + // set to), make only a small difference as compared to using the + // above constants. E.g., Sunset times might differ by ~12 + // seconds. Furthermore, the eta-g computation is befuddled by + // Duffet-Smith's incorrect coefficients (p.86). I've corrected + // the first-order coefficient but the others may be off too - no + // way of knowing without consulting another source. + +// /** +// * Return the sun's ecliptic longitude at perigee for the current time. +// * See Duffett-Smith, p. 86. +// * @return radians +// */ +// private double getSunOmegaG() { +// double T = getJulianCentury(); +// return (281.2208444 + (1.719175 + 0.000452778*T)*T) * DEG_RAD; +// } + +// /** +// * Return the sun's ecliptic longitude for the current time. +// * See Duffett-Smith, p. 86. +// * @return radians +// */ +// private double getSunEtaG() { +// double T = getJulianCentury(); +// //return (279.6966778 + (36000.76892 + 0.0003025*T)*T) * DEG_RAD; +// // +// // The above line is from Duffett-Smith, and yields manifestly wrong +// // results. The below constant is derived empirically to match the +// // constant he gives for the 1990 EPOCH. +// // +// return (279.6966778 + (-0.3262541582718024 + 0.0003025*T)*T) * DEG_RAD; +// } + +// /** +// * Return the sun's eccentricity of orbit for the current time. +// * See Duffett-Smith, p. 86. +// * @return double +// */ +// private double getSunE() { +// double T = getJulianCentury(); +// return 0.01675104 - (0.0000418 + 0.000000126*T)*T; +// } + + /** + * The longitude of the sun at the time specified by this object. + * The longitude is measured in radians along the ecliptic + * from the "first point of Aries," the point at which the ecliptic + * crosses the earth's equatorial plane at the vernal equinox. + *

    + * Currently, this method uses an approximation of the two-body Kepler's + * equation for the earth and the sun. It does not take into account the + * perturbations caused by the other planets, the moon, etc. + * @internal + */ + public double getSunLongitude() + { + // See page 86 of "Practial Astronomy with your Calculator", + // by Peter Duffet-Smith, for details on the algorithm. + + if (sunLongitude == INVALID) { + double[] result = getSunLongitude(getJulianDay()); + sunLongitude = result[0]; + meanAnomalySun = result[1]; + } + return sunLongitude; + } + + /** + * TODO Make this public when the entire class is package-private. + */ + /*public*/ double[] getSunLongitude(double julian) + { + // See page 86 of "Practial Astronomy with your Calculator", + // by Peter Duffet-Smith, for details on the algorithm. + + double day = julian - JD_EPOCH; // Days since epoch + + // Find the angular distance the sun in a fictitious + // circular orbit has travelled since the epoch. + double epochAngle = norm2PI(PI2/TROPICAL_YEAR*day); + + // The epoch wasn't at the sun's perigee; find the angular distance + // since perigee, which is called the "mean anomaly" + double meanAnomaly = norm2PI(epochAngle + SUN_ETA_G - SUN_OMEGA_G); + + // Now find the "true anomaly", e.g. the real solar longitude + // by solving Kepler's equation for an elliptical orbit + // NOTE: The 3rd ed. of the book lists omega_g and eta_g in different + // equations; omega_g is to be correct. + return new double[] { + norm2PI(trueAnomaly(meanAnomaly, SUN_E) + SUN_OMEGA_G), + meanAnomaly + }; + } + + /** + * The position of the sun at this object's current date and time, + * in equatorial coordinates. + * @internal + */ + public Equatorial getSunPosition() { + return eclipticToEquatorial(getSunLongitude(), 0); + } + + private static class SolarLongitude { + double value; + SolarLongitude(double val) { value = val; } + } + + /** + * Constant representing the vernal equinox. + * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. + * Note: In this case, "vernal" refers to the northern hemisphere's seasons. + * @internal + */ + public static final SolarLongitude VERNAL_EQUINOX = new SolarLongitude(0); + + /** + * Constant representing the summer solstice. + * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. + * Note: In this case, "summer" refers to the northern hemisphere's seasons. + * @internal + */ + public static final SolarLongitude SUMMER_SOLSTICE = new SolarLongitude(PI/2); + + /** + * Constant representing the autumnal equinox. + * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. + * Note: In this case, "autumn" refers to the northern hemisphere's seasons. + * @internal + */ + public static final SolarLongitude AUTUMN_EQUINOX = new SolarLongitude(PI); + + /** + * Constant representing the winter solstice. + * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. + * Note: In this case, "winter" refers to the northern hemisphere's seasons. + * @internal + */ + public static final SolarLongitude WINTER_SOLSTICE = new SolarLongitude((PI*3)/2); + + /** + * Find the next time at which the sun's ecliptic longitude will have + * the desired value. + * @internal + */ + public long getSunTime(double desired, boolean next) + { + return timeOfAngle( new AngleFunc() { public double eval() { return getSunLongitude(); } }, + desired, + TROPICAL_YEAR, + MINUTE_MS, + next); + } + + /** + * Find the next time at which the sun's ecliptic longitude will have + * the desired value. + * @internal + */ + public long getSunTime(SolarLongitude desired, boolean next) { + return getSunTime(desired.value, next); + } + + /** + * Returns the time (GMT) of sunrise or sunset on the local date to which + * this calendar is currently set. + * + * NOTE: This method only works well if this object is set to a + * time near local noon. Because of variations between the local + * official time zone and the geographic longitude, the + * computation can flop over into an adjacent day if this object + * is set to a time near local midnight. + * + * @internal + */ + public long getSunRiseSet(boolean rise) + { + long t0 = time; + + // Make a rough guess: 6am or 6pm local time on the current day + long noon = ((time + fGmtOffset)/DAY_MS)*DAY_MS - fGmtOffset + 12*HOUR_MS; + + setTime(noon + (long)((rise ? -6 : 6) * HOUR_MS)); + + long t = riseOrSet(new CoordFunc() { + public Equatorial eval() { return getSunPosition(); } + }, + rise, + .533 * DEG_RAD, // Angular Diameter + 34 /60.0 * DEG_RAD, // Refraction correction + MINUTE_MS / 12); // Desired accuracy + + setTime(t0); + return t; + } + +// Commented out - currently unused. ICU 2.6, Alan +// //------------------------------------------------------------------------- +// // Alternate Sun Rise/Set +// // See Duffett-Smith p.93 +// //------------------------------------------------------------------------- +// +// // This yields worse results (as compared to USNO data) than getSunRiseSet(). +// /** +// * TODO Make this public when the entire class is package-private. +// */ +// /*public*/ long getSunRiseSet2(boolean rise) { +// // 1. Calculate coordinates of the sun's center for midnight +// double jd = Math.floor(getJulianDay() - 0.5) + 0.5; +// double[] sl = getSunLongitude(jd); +// double lambda1 = sl[0]; +// Equatorial pos1 = eclipticToEquatorial(lambda1, 0); +// +// // 2. Add ... to lambda to get position 24 hours later +// double lambda2 = lambda1 + 0.985647*DEG_RAD; +// Equatorial pos2 = eclipticToEquatorial(lambda2, 0); +// +// // 3. Calculate LSTs of rising and setting for these two positions +// double tanL = Math.tan(fLatitude); +// double H = Math.acos(-tanL * Math.tan(pos1.declination)); +// double lst1r = (PI2 + pos1.ascension - H) * 24 / PI2; +// double lst1s = (pos1.ascension + H) * 24 / PI2; +// H = Math.acos(-tanL * Math.tan(pos2.declination)); +// double lst2r = (PI2-H + pos2.ascension ) * 24 / PI2; +// double lst2s = (H + pos2.ascension ) * 24 / PI2; +// if (lst1r > 24) lst1r -= 24; +// if (lst1s > 24) lst1s -= 24; +// if (lst2r > 24) lst2r -= 24; +// if (lst2s > 24) lst2s -= 24; +// +// // 4. Convert LSTs to GSTs. If GST1 > GST2, add 24 to GST2. +// double gst1r = lstToGst(lst1r); +// double gst1s = lstToGst(lst1s); +// double gst2r = lstToGst(lst2r); +// double gst2s = lstToGst(lst2s); +// if (gst1r > gst2r) gst2r += 24; +// if (gst1s > gst2s) gst2s += 24; +// +// // 5. Calculate GST at 0h UT of this date +// double t00 = utToGst(0); +// +// // 6. Calculate GST at 0h on the observer's longitude +// double offset = Math.round(fLongitude*12/PI); // p.95 step 6; he _rounds_ to nearest 15 deg. +// double t00p = t00 - offset*1.002737909; +// if (t00p < 0) t00p += 24; // do NOT normalize +// +// // 7. Adjust +// if (gst1r < t00p) { +// gst1r += 24; +// gst2r += 24; +// } +// if (gst1s < t00p) { +// gst1s += 24; +// gst2s += 24; +// } +// +// // 8. +// double gstr = (24.07*gst1r-t00*(gst2r-gst1r))/(24.07+gst1r-gst2r); +// double gsts = (24.07*gst1s-t00*(gst2s-gst1s))/(24.07+gst1s-gst2s); +// +// // 9. Correct for parallax, refraction, and sun's diameter +// double dec = (pos1.declination + pos2.declination) / 2; +// double psi = Math.acos(Math.sin(fLatitude) / Math.cos(dec)); +// double x = 0.830725 * DEG_RAD; // parallax+refraction+diameter +// double y = Math.asin(Math.sin(x) / Math.sin(psi)) * RAD_DEG; +// double delta_t = 240 * y / Math.cos(dec) / 3600; // hours +// +// // 10. Add correction to GSTs, subtract from GSTr +// gstr -= delta_t; +// gsts += delta_t; +// +// // 11. Convert GST to UT and then to local civil time +// double ut = gstToUt(rise ? gstr : gsts); +// //System.out.println((rise?"rise=":"set=") + ut + ", delta_t=" + delta_t); +// long midnight = DAY_MS * (time / DAY_MS); // Find UT midnight on this day +// return midnight + (long) (ut * 3600000); +// } + +// Commented out - currently unused. ICU 2.6, Alan +// /** +// * Convert local sidereal time to Greenwich sidereal time. +// * Section 15. Duffett-Smith p.21 +// * @param lst in hours (0..24) +// * @return GST in hours (0..24) +// */ +// double lstToGst(double lst) { +// double delta = fLongitude * 24 / PI2; +// return normalize(lst - delta, 24); +// } + +// Commented out - currently unused. ICU 2.6, Alan +// /** +// * Convert UT to GST on this date. +// * Section 12. Duffett-Smith p.17 +// * @param ut in hours +// * @return GST in hours +// */ +// double utToGst(double ut) { +// return normalize(getT0() + ut*1.002737909, 24); +// } + +// Commented out - currently unused. ICU 2.6, Alan +// /** +// * Convert GST to UT on this date. +// * Section 13. Duffett-Smith p.18 +// * @param gst in hours +// * @return UT in hours +// */ +// double gstToUt(double gst) { +// return normalize(gst - getT0(), 24) * 0.9972695663; +// } + +// Commented out - currently unused. ICU 2.6, Alan +// double getT0() { +// // Common computation for UT <=> GST +// +// // Find JD for 0h UT +// double jd = Math.floor(getJulianDay() - 0.5) + 0.5; +// +// double s = jd - 2451545.0; +// double t = s / 36525.0; +// double t0 = 6.697374558 + (2400.051336 + 0.000025862*t)*t; +// return t0; +// } + +// Commented out - currently unused. ICU 2.6, Alan +// //------------------------------------------------------------------------- +// // Alternate Sun Rise/Set +// // See sci.astro FAQ +// // http://www.faqs.org/faqs/astronomy/faq/part3/section-5.html +// //------------------------------------------------------------------------- +// +// // Note: This method appears to produce inferior accuracy as +// // compared to getSunRiseSet(). +// +// /** +// * TODO Make this public when the entire class is package-private. +// */ +// /*public*/ long getSunRiseSet3(boolean rise) { +// +// // Compute day number for 0.0 Jan 2000 epoch +// double d = (double)(time - EPOCH_2000_MS) / DAY_MS; +// +// // Now compute the Local Sidereal Time, LST: +// // +// double LST = 98.9818 + 0.985647352 * d + /*UT*15 + long*/ +// fLongitude*RAD_DEG; +// // +// // (east long. positive). Note that LST is here expressed in degrees, +// // where 15 degrees corresponds to one hour. Since LST really is an angle, +// // it's convenient to use one unit---degrees---throughout. +// +// // COMPUTING THE SUN'S POSITION +// // ---------------------------- +// // +// // To be able to compute the Sun's rise/set times, you need to be able to +// // compute the Sun's position at any time. First compute the "day +// // number" d as outlined above, for the desired moment. Next compute: +// // +// double oblecl = 23.4393 - 3.563E-7 * d; +// // +// double w = 282.9404 + 4.70935E-5 * d; +// double M = 356.0470 + 0.9856002585 * d; +// double e = 0.016709 - 1.151E-9 * d; +// // +// // This is the obliquity of the ecliptic, plus some of the elements of +// // the Sun's apparent orbit (i.e., really the Earth's orbit): w = +// // argument of perihelion, M = mean anomaly, e = eccentricity. +// // Semi-major axis is here assumed to be exactly 1.0 (while not strictly +// // true, this is still an accurate approximation). Next compute E, the +// // eccentric anomaly: +// // +// double E = M + e*(180/PI) * Math.sin(M*DEG_RAD) * ( 1.0 + e*Math.cos(M*DEG_RAD) ); +// // +// // where E and M are in degrees. This is it---no further iterations are +// // needed because we know e has a sufficiently small value. Next compute +// // the true anomaly, v, and the distance, r: +// // +// /* r * cos(v) = */ double A = Math.cos(E*DEG_RAD) - e; +// /* r * sin(v) = */ double B = Math.sqrt(1 - e*e) * Math.sin(E*DEG_RAD); +// // +// // and +// // +// // r = sqrt( A*A + B*B ) +// double v = Math.atan2( B, A )*RAD_DEG; +// // +// // The Sun's true longitude, slon, can now be computed: +// // +// double slon = v + w; +// // +// // Since the Sun is always at the ecliptic (or at least very very close to +// // it), we can use simplified formulae to convert slon (the Sun's ecliptic +// // longitude) to sRA and sDec (the Sun's RA and Dec): +// // +// // sin(slon) * cos(oblecl) +// // tan(sRA) = ------------------------- +// // cos(slon) +// // +// // sin(sDec) = sin(oblecl) * sin(slon) +// // +// // As was the case when computing az, the Azimuth, if possible use an +// // atan2() function to compute sRA. +// +// double sRA = Math.atan2(Math.sin(slon*DEG_RAD) * Math.cos(oblecl*DEG_RAD), Math.cos(slon*DEG_RAD))*RAD_DEG; +// +// double sin_sDec = Math.sin(oblecl*DEG_RAD) * Math.sin(slon*DEG_RAD); +// double sDec = Math.asin(sin_sDec)*RAD_DEG; +// +// // COMPUTING RISE AND SET TIMES +// // ---------------------------- +// // +// // To compute when an object rises or sets, you must compute when it +// // passes the meridian and the HA of rise/set. Then the rise time is +// // the meridian time minus HA for rise/set, and the set time is the +// // meridian time plus the HA for rise/set. +// // +// // To find the meridian time, compute the Local Sidereal Time at 0h local +// // time (or 0h UT if you prefer to work in UT) as outlined above---name +// // that quantity LST0. The Meridian Time, MT, will now be: +// // +// // MT = RA - LST0 +// double MT = normalize(sRA - LST, 360); +// // +// // where "RA" is the object's Right Ascension (in degrees!). If negative, +// // add 360 deg to MT. If the object is the Sun, leave the time as it is, +// // but if it's stellar, multiply MT by 365.2422/366.2422, to convert from +// // sidereal to solar time. Now, compute HA for rise/set, name that +// // quantity HA0: +// // +// // sin(h0) - sin(lat) * sin(Dec) +// // cos(HA0) = --------------------------------- +// // cos(lat) * cos(Dec) +// // +// // where h0 is the altitude selected to represent rise/set. For a purely +// // mathematical horizon, set h0 = 0 and simplify to: +// // +// // cos(HA0) = - tan(lat) * tan(Dec) +// // +// // If you want to account for refraction on the atmosphere, set h0 = -35/60 +// // degrees (-35 arc minutes), and if you want to compute the rise/set times +// // for the Sun's upper limb, set h0 = -50/60 (-50 arc minutes). +// // +// double h0 = -50/60 * DEG_RAD; +// +// double HA0 = Math.acos( +// (Math.sin(h0) - Math.sin(fLatitude) * sin_sDec) / +// (Math.cos(fLatitude) * Math.cos(sDec*DEG_RAD)))*RAD_DEG; +// +// // When HA0 has been computed, leave it as it is for the Sun but multiply +// // by 365.2422/366.2422 for stellar objects, to convert from sidereal to +// // solar time. Finally compute: +// // +// // Rise time = MT - HA0 +// // Set time = MT + HA0 +// // +// // convert the times from degrees to hours by dividing by 15. +// // +// // If you'd like to check that your calculations are accurate or just +// // need a quick result, check the USNO's Sun or Moon Rise/Set Table, +// // . +// +// double result = MT + (rise ? -HA0 : HA0); // in degrees +// +// // Find UT midnight on this day +// long midnight = DAY_MS * (time / DAY_MS); +// +// return midnight + (long) (result * 3600000 / 15); +// } + + //------------------------------------------------------------------------- + // The Moon + //------------------------------------------------------------------------- + + static final double moonL0 = 318.351648 * PI/180; // Mean long. at epoch + static final double moonP0 = 36.340410 * PI/180; // Mean long. of perigee + static final double moonN0 = 318.510107 * PI/180; // Mean long. of node + static final double moonI = 5.145366 * PI/180; // Inclination of orbit + static final double moonE = 0.054900; // Eccentricity of orbit + + // These aren't used right now + static final double moonA = 3.84401e5; // semi-major axis (km) + static final double moonT0 = 0.5181 * PI/180; // Angular size at distance A + static final double moonPi = 0.9507 * PI/180; // Parallax at distance A + + /** + * The position of the moon at the time set on this + * object, in equatorial coordinates. + * @internal + */ + public Equatorial getMoonPosition() + { + // + // See page 142 of "Practial Astronomy with your Calculator", + // by Peter Duffet-Smith, for details on the algorithm. + // + if (moonPosition == null) { + // Calculate the solar longitude. Has the side effect of + // filling in "meanAnomalySun" as well. + double sunLong = getSunLongitude(); + + // + // Find the # of days since the epoch of our orbital parameters. + // TODO: Convert the time of day portion into ephemeris time + // + double day = getJulianDay() - JD_EPOCH; // Days since epoch + + // Calculate the mean longitude and anomaly of the moon, based on + // a circular orbit. Similar to the corresponding solar calculation. + double meanLongitude = norm2PI(13.1763966*PI/180*day + moonL0); + double meanAnomalyMoon = norm2PI(meanLongitude - 0.1114041*PI/180 * day - moonP0); + + // + // Calculate the following corrections: + // Evection: the sun's gravity affects the moon's eccentricity + // Annual Eqn: variation in the effect due to earth-sun distance + // A3: correction factor (for ???) + // + double evection = 1.2739*PI/180 * Math.sin(2 * (meanLongitude - sunLong) + - meanAnomalyMoon); + double annual = 0.1858*PI/180 * Math.sin(meanAnomalySun); + double a3 = 0.3700*PI/180 * Math.sin(meanAnomalySun); + + meanAnomalyMoon += evection - annual - a3; + + // + // More correction factors: + // center equation of the center correction + // a4 yet another error correction (???) + // + // TODO: Skip the equation of the center correction and solve Kepler's eqn? + // + double center = 6.2886*PI/180 * Math.sin(meanAnomalyMoon); + double a4 = 0.2140*PI/180 * Math.sin(2 * meanAnomalyMoon); + + // Now find the moon's corrected longitude + moonLongitude = meanLongitude + evection + center - annual + a4; + + // + // And finally, find the variation, caused by the fact that the sun's + // gravitational pull on the moon varies depending on which side of + // the earth the moon is on + // + double variation = 0.6583*PI/180 * Math.sin(2*(moonLongitude - sunLong)); + + moonLongitude += variation; + + // + // What we've calculated so far is the moon's longitude in the plane + // of its own orbit. Now map to the ecliptic to get the latitude + // and longitude. First we need to find the longitude of the ascending + // node, the position on the ecliptic where it is crossed by the moon's + // orbit as it crosses from the southern to the northern hemisphere. + // + double nodeLongitude = norm2PI(moonN0 - 0.0529539*PI/180 * day); + + nodeLongitude -= 0.16*PI/180 * Math.sin(meanAnomalySun); + + double y = Math.sin(moonLongitude - nodeLongitude); + double x = Math.cos(moonLongitude - nodeLongitude); + + moonEclipLong = Math.atan2(y*Math.cos(moonI), x) + nodeLongitude; + double moonEclipLat = Math.asin(y * Math.sin(moonI)); + + moonPosition = eclipticToEquatorial(moonEclipLong, moonEclipLat); + } + return moonPosition; + } + + /** + * The "age" of the moon at the time specified in this object. + * This is really the angle between the + * current ecliptic longitudes of the sun and the moon, + * measured in radians. + * + * @see #getMoonPhase + * @internal + */ + public double getMoonAge() { + // See page 147 of "Practial Astronomy with your Calculator", + // by Peter Duffet-Smith, for details on the algorithm. + // + // Force the moon's position to be calculated. We're going to use + // some the intermediate results cached during that calculation. + // + getMoonPosition(); + + return norm2PI(moonEclipLong - sunLongitude); + } + + /** + * Calculate the phase of the moon at the time set in this object. + * The returned phase is a double in the range + * 0 <= phase < 1, interpreted as follows: + *

      + *
    • 0.00: New moon + *
    • 0.25: First quarter + *
    • 0.50: Full moon + *
    • 0.75: Last quarter + *
    + * + * @see #getMoonAge + * @internal + */ + public double getMoonPhase() { + // See page 147 of "Practial Astronomy with your Calculator", + // by Peter Duffet-Smith, for details on the algorithm. + return 0.5 * (1 - Math.cos(getMoonAge())); + } + + private static class MoonAge { + double value; + MoonAge(double val) { value = val; } + } + + /** + * Constant representing a new moon. + * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime} + * @internal + */ + public static final MoonAge NEW_MOON = new MoonAge(0); + + /** + * Constant representing the moon's first quarter. + * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime} + * @internal + */ + public static final MoonAge FIRST_QUARTER = new MoonAge(PI/2); + + /** + * Constant representing a full moon. + * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime} + * @internal + */ + public static final MoonAge FULL_MOON = new MoonAge(PI); + + /** + * Constant representing the moon's last quarter. + * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime} + * @internal + */ + public static final MoonAge LAST_QUARTER = new MoonAge((PI*3)/2); + + /** + * Find the next or previous time at which the Moon's ecliptic + * longitude will have the desired value. + *

    + * @param desired The desired longitude. + * @param next true if the next occurrance of the phase + * is desired, false for the previous occurrance. + * @internal + */ + public long getMoonTime(double desired, boolean next) + { + return timeOfAngle( new AngleFunc() { + public double eval() { return getMoonAge(); } }, + desired, + SYNODIC_MONTH, + MINUTE_MS, + next); + } + + /** + * Find the next or previous time at which the moon will be in the + * desired phase. + *

    + * @param desired The desired phase of the moon. + * @param next true if the next occurrance of the phase + * is desired, false for the previous occurrance. + * @internal + */ + public long getMoonTime(MoonAge desired, boolean next) { + return getMoonTime(desired.value, next); + } + + /** + * Returns the time (GMT) of sunrise or sunset on the local date to which + * this calendar is currently set. + * @internal + */ + public long getMoonRiseSet(boolean rise) + { + return riseOrSet(new CoordFunc() { + public Equatorial eval() { return getMoonPosition(); } + }, + rise, + .533 * DEG_RAD, // Angular Diameter + 34 /60.0 * DEG_RAD, // Refraction correction + MINUTE_MS); // Desired accuracy + } + + //------------------------------------------------------------------------- + // Interpolation methods for finding the time at which a given event occurs + //------------------------------------------------------------------------- + + private interface AngleFunc { + public double eval(); + } + + private long timeOfAngle(AngleFunc func, double desired, + double periodDays, long epsilon, boolean next) + { + // Find the value of the function at the current time + double lastAngle = func.eval(); + + // Find out how far we are from the desired angle + double deltaAngle = norm2PI(desired - lastAngle) ; + + // Using the average period, estimate the next (or previous) time at + // which the desired angle occurs. + double deltaT = (deltaAngle + (next ? 0 : -PI2)) * (periodDays*DAY_MS) / PI2; + + double lastDeltaT = deltaT; // Liu + long startTime = time; // Liu + + setTime(time + (long)deltaT); + + // Now iterate until we get the error below epsilon. Throughout + // this loop we use normPI to get values in the range -Pi to Pi, + // since we're using them as correction factors rather than absolute angles. + do { + // Evaluate the function at the time we've estimated + double angle = func.eval(); + + // Find the # of milliseconds per radian at this point on the curve + double factor = Math.abs(deltaT / normPI(angle-lastAngle)); + + // Correct the time estimate based on how far off the angle is + deltaT = normPI(desired - angle) * factor; + + // HACK: + // + // If abs(deltaT) begins to diverge we need to quit this loop. + // This only appears to happen when attempting to locate, for + // example, a new moon on the day of the new moon. E.g.: + // + // This result is correct: + // newMoon(7508(Mon Jul 23 00:00:00 CST 1990,false))= + // Sun Jul 22 10:57:41 CST 1990 + // + // But attempting to make the same call a day earlier causes deltaT + // to diverge: + // CalendarAstronomer.timeOfAngle() diverging: 1.348508727575625E9 -> + // 1.3649828540224032E9 + // newMoon(7507(Sun Jul 22 00:00:00 CST 1990,false))= + // Sun Jul 08 13:56:15 CST 1990 + // + // As a temporary solution, we catch this specific condition and + // adjust our start time by one eighth period days (either forward + // or backward) and try again. + // Liu 11/9/00 + if (Math.abs(deltaT) > Math.abs(lastDeltaT)) { + long delta = (long) (periodDays * DAY_MS / 8); + setTime(startTime + (next ? delta : -delta)); + return timeOfAngle(func, desired, periodDays, epsilon, next); + } + + lastDeltaT = deltaT; + lastAngle = angle; + + setTime(time + (long)deltaT); + } + while (Math.abs(deltaT) > epsilon); + + return time; + } + + private interface CoordFunc { + public Equatorial eval(); + } + + private long riseOrSet(CoordFunc func, boolean rise, + double diameter, double refraction, + long epsilon) + { + Equatorial pos = null; + double tanL = Math.tan(fLatitude); + long deltaT = Long.MAX_VALUE; + int count = 0; + + // + // Calculate the object's position at the current time, then use that + // position to calculate the time of rising or setting. The position + // will be different at that time, so iterate until the error is allowable. + // + do { + // See "Practical Astronomy With Your Calculator, section 33. + pos = func.eval(); + double angle = Math.acos(-tanL * Math.tan(pos.declination)); + double lst = ((rise ? PI2-angle : angle) + pos.ascension ) * 24 / PI2; + + // Convert from LST to Universal Time. + long newTime = lstToUT( lst ); + + deltaT = newTime - time; + setTime(newTime); + } + while (++ count < 5 && Math.abs(deltaT) > epsilon); + + // Calculate the correction due to refraction and the object's angular diameter + double cosD = Math.cos(pos.declination); + double psi = Math.acos(Math.sin(fLatitude) / cosD); + double x = diameter / 2 + refraction; + double y = Math.asin(Math.sin(x) / Math.sin(psi)); + long delta = (long)((240 * y * RAD_DEG / cosD)*SECOND_MS); + + return time + (rise ? -delta : delta); + } + + //------------------------------------------------------------------------- + // Other utility methods + //------------------------------------------------------------------------- + + /*** + * Given 'value', add or subtract 'range' until 0 <= 'value' < range. + * The modulus operator. + */ + private static final double normalize(double value, double range) { + return value - range * Math.floor(value / range); + } + + /** + * Normalize an angle so that it's in the range 0 - 2pi. + * For positive angles this is just (angle % 2pi), but the Java + * mod operator doesn't work that way for negative numbers.... + */ + private static final double norm2PI(double angle) { + return normalize(angle, PI2); + } + + /** + * Normalize an angle into the range -PI - PI + */ + private static final double normPI(double angle) { + return normalize(angle + PI, PI2) - PI; + } + + /** + * Find the "true anomaly" (longitude) of an object from + * its mean anomaly and the eccentricity of its orbit. This uses + * an iterative solution to Kepler's equation. + * + * @param meanAnomaly The object's longitude calculated as if it were in + * a regular, circular orbit, measured in radians + * from the point of perigee. + * + * @param eccentricity The eccentricity of the orbit + * + * @return The true anomaly (longitude) measured in radians + */ + private double trueAnomaly(double meanAnomaly, double eccentricity) + { + // First, solve Kepler's equation iteratively + // Duffett-Smith, p.90 + double delta; + double E = meanAnomaly; + do { + delta = E - eccentricity * Math.sin(E) - meanAnomaly; + E = E - delta / (1 - eccentricity * Math.cos(E)); + } + while (Math.abs(delta) > 1e-5); // epsilon = 1e-5 rad + + return 2.0 * Math.atan( Math.tan(E/2) * Math.sqrt( (1+eccentricity) + /(1-eccentricity) ) ); + } + + /** + * Return the obliquity of the ecliptic (the angle between the ecliptic + * and the earth's equator) at the current time. This varies due to + * the precession of the earth's axis. + * + * @return the obliquity of the ecliptic relative to the equator, + * measured in radians. + */ + private double eclipticObliquity() { + if (eclipObliquity == INVALID) { + final double epoch = 2451545.0; // 2000 AD, January 1.5 + + double T = (getJulianDay() - epoch) / 36525; + + eclipObliquity = 23.439292 + - 46.815/3600 * T + - 0.0006/3600 * T*T + + 0.00181/3600 * T*T*T; + + eclipObliquity *= DEG_RAD; + } + return eclipObliquity; + } + + + //------------------------------------------------------------------------- + // Private data + //------------------------------------------------------------------------- + + /** + * Current time in milliseconds since 1/1/1970 AD + * @see java.util.Date#getTime + */ + private long time; + + /* These aren't used yet, but they'll be needed for sunset calculations + * and equatorial to horizon coordinate conversions + */ + private double fLongitude = 0.0; + private double fLatitude = 0.0; + private long fGmtOffset = 0; + + // + // The following fields are used to cache calculated results for improved + // performance. These values all depend on the current time setting + // of this object, so the clearCache method is provided. + // + static final private double INVALID = Double.MIN_VALUE; + + private transient double julianDay = INVALID; + private transient double julianCentury = INVALID; + private transient double sunLongitude = INVALID; + private transient double meanAnomalySun = INVALID; + private transient double moonLongitude = INVALID; + private transient double moonEclipLong = INVALID; + //private transient double meanAnomalyMoon = INVALID; + private transient double eclipObliquity = INVALID; + private transient double siderealT0 = INVALID; + private transient double siderealTime = INVALID; + + private transient Equatorial moonPosition = null; + + private void clearCache() { + julianDay = INVALID; + julianCentury = INVALID; + sunLongitude = INVALID; + meanAnomalySun = INVALID; + moonLongitude = INVALID; + moonEclipLong = INVALID; + //meanAnomalyMoon = INVALID; + eclipObliquity = INVALID; + siderealTime = INVALID; + siderealT0 = INVALID; + moonPosition = null; + } + + //private static void out(String s) { + // System.out.println(s); + //} + + //private static String deg(double rad) { + // return Double.toString(rad * RAD_DEG); + //} + + //private static String hours(long ms) { + // return Double.toString((double)ms / HOUR_MS) + " hours"; + //} + + /** + * @internal + */ + public String local(long localMillis) { + return new Date(localMillis - TimeZone.getDefault().getRawOffset()).toString(); + } + + + /** + * Represents the position of an object in the sky relative to the ecliptic, + * the plane of the earth's orbit around the Sun. + * This is a spherical coordinate system in which the latitude + * specifies the position north or south of the plane of the ecliptic. + * The longitude specifies the position along the ecliptic plane + * relative to the "First Point of Aries", which is the Sun's position in the sky + * at the Vernal Equinox. + *

    + * Note that Ecliptic objects are immutable and cannot be modified + * once they are constructed. This allows them to be passed and returned by + * value without worrying about whether other code will modify them. + * + * @see CalendarAstronomer.Equatorial + * @see CalendarAstronomer.Horizon + * @internal + */ + public static final class Ecliptic { + /** + * Constructs an Ecliptic coordinate object. + *

    + * @param lat The ecliptic latitude, measured in radians. + * @param lon The ecliptic longitude, measured in radians. + * @internal + */ + public Ecliptic(double lat, double lon) { + latitude = lat; + longitude = lon; + } + + /** + * Return a string representation of this object + * @internal + */ + public String toString() { + return Double.toString(longitude*RAD_DEG) + "," + (latitude*RAD_DEG); + } + + /** + * The ecliptic latitude, in radians. This specifies an object's + * position north or south of the plane of the ecliptic, + * with positive angles representing north. + * @internal + */ + public final double latitude; + + /** + * The ecliptic longitude, in radians. + * This specifies an object's position along the ecliptic plane + * relative to the "First Point of Aries", which is the Sun's position + * in the sky at the Vernal Equinox, + * with positive angles representing east. + *

    + * A bit of trivia: the first point of Aries is currently in the + * constellation Pisces, due to the precession of the earth's axis. + * @internal + */ + public final double longitude; + } + + /** + * Represents the position of an + * object in the sky relative to the plane of the earth's equator. + * The Right Ascension specifies the position east or west + * along the equator, relative to the sun's position at the vernal + * equinox. The Declination is the position north or south + * of the equatorial plane. + *

    + * Note that Equatorial objects are immutable and cannot be modified + * once they are constructed. This allows them to be passed and returned by + * value without worrying about whether other code will modify them. + * + * @see CalendarAstronomer.Ecliptic + * @see CalendarAstronomer.Horizon + * @internal + */ + public static final class Equatorial { + /** + * Constructs an Equatorial coordinate object. + *

    + * @param asc The right ascension, measured in radians. + * @param dec The declination, measured in radians. + * @internal + */ + public Equatorial(double asc, double dec) { + ascension = asc; + declination = dec; + } + + /** + * Return a string representation of this object, with the + * angles measured in degrees. + * @internal + */ + public String toString() { + return Double.toString(ascension*RAD_DEG) + "," + (declination*RAD_DEG); + } + + /** + * Return a string representation of this object with the right ascension + * measured in hours, minutes, and seconds. + * @internal + */ + public String toHmsString() { + return radToHms(ascension) + "," + radToDms(declination); + } + + /** + * The right ascension, in radians. + * This is the position east or west along the equator + * relative to the sun's position at the vernal equinox, + * with positive angles representing East. + * @internal + */ + public final double ascension; + + /** + * The declination, in radians. + * This is the position north or south of the equatorial plane, + * with positive angles representing north. + * @internal + */ + public final double declination; + } + + /** + * Represents the position of an object in the sky relative to + * the local horizon. + * The Altitude represents the object's elevation above the horizon, + * with objects below the horizon having a negative altitude. + * The Azimuth is the geographic direction of the object from the + * observer's position, with 0 representing north. The azimuth increases + * clockwise from north. + *

    + * Note that Horizon objects are immutable and cannot be modified + * once they are constructed. This allows them to be passed and returned by + * value without worrying about whether other code will modify them. + * + * @see CalendarAstronomer.Ecliptic + * @see CalendarAstronomer.Equatorial + * @internal + */ + public static final class Horizon { + /** + * Constructs a Horizon coordinate object. + *

    + * @param alt The altitude, measured in radians above the horizon. + * @param azim The azimuth, measured in radians clockwise from north. + * @internal + */ + public Horizon(double alt, double azim) { + altitude = alt; + azimuth = azim; + } + + /** + * Return a string representation of this object, with the + * angles measured in degrees. + * @internal + */ + public String toString() { + return Double.toString(altitude*RAD_DEG) + "," + (azimuth*RAD_DEG); + } + + /** + * The object's altitude above the horizon, in radians. + * @internal + */ + public final double altitude; + + /** + * The object's direction, in radians clockwise from north. + * @internal + */ + public final double azimuth; + } + + static private String radToHms(double angle) { + int hrs = (int) (angle*RAD_HOUR); + int min = (int)((angle*RAD_HOUR - hrs) * 60); + int sec = (int)((angle*RAD_HOUR - hrs - min/60.0) * 3600); + + return Integer.toString(hrs) + "h" + min + "m" + sec + "s"; + } + + static private String radToDms(double angle) { + int deg = (int) (angle*RAD_DEG); + int min = (int)((angle*RAD_DEG - deg) * 60); + int sec = (int)((angle*RAD_DEG - deg - min/60.0) * 3600); + + return Integer.toString(deg) + "\u00b0" + min + "'" + sec + "\""; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarCache.java b/main/classes/core/src/com/ibm/icu/impl/CalendarCache.java new file mode 100644 index 00000000000..c83562a9722 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CalendarCache.java @@ -0,0 +1,127 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2004, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +/** + * @internal + */ +public class CalendarCache +{ + /** + * @internal + */ + public CalendarCache() { + makeArrays(arraySize); + } + + private void makeArrays(int newSize) { + keys = new long[newSize]; + values = new long[newSize]; + + for (int i = 0; i < newSize; i++) { + values[i] = EMPTY; + } + arraySize = newSize; + threshold = (int)(arraySize * 0.75); + size = 0; + } + + /** + * @internal + */ + public synchronized long get(long key) { + return values[findIndex(key)]; + } + + /** + * @internal + */ + public synchronized void put(long key, long value) + { + if (size >= threshold) { + rehash(); + } + int index = findIndex(key); + + keys[index] = key; + values[index] = value; + size++; + } + + private final int findIndex(long key) { + int index = hash(key); + int delta = 0; + + while (values[index] != EMPTY && keys[index] != key) + { + if (delta == 0) { + delta = hash2(key); + } + index = (index + delta) % arraySize; + } + return index; + } + + private void rehash() + { + int oldSize = arraySize; + long[] oldKeys = keys; + long[] oldValues = values; + + if (pIndex < primes.length - 1) { + arraySize = primes[++pIndex]; + } else { + arraySize = arraySize * 2 + 1; + } + size = 0; + + makeArrays(arraySize); + for (int i = 0; i < oldSize; i++) { + if (oldValues[i] != EMPTY) { + put(oldKeys[i], oldValues[i]); + } + } + oldKeys = oldValues = null; // Help out the garbage collector + } + + + /** + * Produce a uniformly-distributed hash value from an integer key. + * This is essentially a linear congruential random number generator + * that uses the key as its seed value. + */ + private final int hash(long key) + { + int h = (int)((key * 15821 + 1) % arraySize); + if (h < 0) { + h += arraySize; + } + return h; + } + + private final int hash2(long key) { + return arraySize - 2 - (int)(key % (arraySize-2) ); + } + + static private final int primes[] = { // 5, 17, 31, 47, // for testing + 61, 127, 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, + 131071, 262139, + }; + + private int pIndex = 0; + private int size = 0; + private int arraySize = primes[pIndex]; + private int threshold = (arraySize * 3) / 4; + + private long[] keys = new long[arraySize]; + private long[] values = new long[arraySize]; + + /** + * @internal + */ + static public long EMPTY = Long.MIN_VALUE; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarData.java b/main/classes/core/src/com/ibm/icu/impl/CalendarData.java new file mode 100644 index 00000000000..06a9c22a225 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CalendarData.java @@ -0,0 +1,167 @@ +/* + ******************************************************************************* + * Copyright (C) 2004-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.ArrayList; +import java.util.MissingResourceException; + +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.UResourceBundleIterator; + +/** + * This class abstracts access to calendar (Calendar and DateFormat) data. + * @internal ICU 3.0 + */ +public class CalendarData { + /** + * Construct a CalendarData from the given locale. + * @param loc locale to use. The 'calendar' keyword will be ignored. + * @param type calendar type. NULL indicates the gregorian calendar. + * No default lookup is done. + */ + public CalendarData(ULocale loc, String type) { + this((ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, loc), type); + } + + public CalendarData(ICUResourceBundle b, String type) { + fBundle = b; + if((type == null) || (type.equals("")) || (type.equals("gregorian"))) { + fMainType = "gregorian"; + fFallbackType = null; + } else { + fMainType = type; + fFallbackType ="gregorian"; + } + } + + /** + * Load data for calendar. Note, this object owns the resources, do NOT call ures_close()! + * + * @param key Resource key to data + * @internal + */ + public ICUResourceBundle get(String key) { + try { + return fBundle.getWithFallback("calendar/" + fMainType + "/" + key); + } catch(MissingResourceException m) { + if(fFallbackType != null) { + return fBundle.getWithFallback("calendar/" + fFallbackType + "/" + key); + } + throw m; + + } + } + + /** + * Load data for calendar. Note, this object owns the resources, do NOT call ures_close()! + * There is an implicit key of 'format' + * data is located in: "calendar/key/format/subKey" + * for example, calendar/dayNames/format/abbreviated + * + * @param key Resource key to data + * @param subKey Resource key to data + * @internal + */ + public ICUResourceBundle get(String key, String subKey) { + try { + return fBundle.getWithFallback("calendar/" + fMainType + "/" + key + "/format/" + subKey); + } catch(MissingResourceException m) { + if(fFallbackType != null) { + return fBundle.getWithFallback("calendar/" + fFallbackType + "/" + key + "/format/" + subKey); + } + throw m; + + } + } + + /** + * Load data for calendar. Note, this object owns the resources, do NOT call ures_close()! + * data is located in: "calendar/key/contextKey/subKey" + * for example, calendar/dayNames/stand-alone/narrow + * + * @param key Resource key to data + * @param contextKey Resource key to data + * @param subKey Resource key to data + * @internal + */ + public ICUResourceBundle get(String key, String contextKey, String subKey) { + try { + return fBundle.getWithFallback("calendar/" + fMainType + "/" + key + "/" + contextKey + "/" + subKey); + } catch(MissingResourceException m) { + if(fFallbackType != null) { + return fBundle.getWithFallback("calendar/" + fFallbackType + "/" + key + "/" + contextKey + "/" + subKey); + } + throw m; + + } + } + + public String[] getStringArray(String key) { + return get(key).getStringArray(); + } + + public String[] getStringArray(String key, String subKey) { + return get(key, subKey).getStringArray(); + } + + public String[] getStringArray(String key, String contextKey, String subKey) { + return get(key, contextKey, subKey).getStringArray(); + } + public String[] getEras(String subkey){ + ICUResourceBundle bundle = get("eras/"+subkey); + return bundle.getStringArray(); + } + public String[] getDateTimePatterns(){ + ICUResourceBundle bundle = get("DateTimePatterns"); + ArrayList list = new ArrayList(); + UResourceBundleIterator iter = bundle.getIterator(); + while (iter.hasNext()) { + UResourceBundle patResource = iter.next(); + int resourceType = patResource.getType(); + switch (resourceType) { + case UResourceBundle.STRING: + list.add(patResource.getString()); + break; + case UResourceBundle.ARRAY: + String[] items = patResource.getStringArray(); + list.add(items[0]); + break; + } + } + + return list.toArray(new String[list.size()]); + } + + public String[] getOverrides(){ + ICUResourceBundle bundle = get("DateTimePatterns"); + ArrayList list = new ArrayList(); + UResourceBundleIterator iter = bundle.getIterator(); + while (iter.hasNext()) { + UResourceBundle patResource = iter.next(); + int resourceType = patResource.getType(); + switch (resourceType) { + case UResourceBundle.STRING: + list.add(null); + break; + case UResourceBundle.ARRAY: + String[] items = patResource.getStringArray(); + list.add(items[1]); + break; + } + } + return list.toArray(new String[list.size()]); + } + + public ULocale getULocale() { + return fBundle.getULocale(); + } + + private ICUResourceBundle fBundle; + private String fMainType; + private String fFallbackType; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java b/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java new file mode 100644 index 00000000000..c43675be412 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java @@ -0,0 +1,100 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.MissingResourceException; + +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +/** + * Calendar utilities. + * + * Date/time format service classes in com.ibm.icu.text packages + * sometimes need to access calendar internal APIs. But calendar + * classes are in com.ibm.icu.util package, so the package local + * cannot be used. This class is added in com.ibm.icu.impl + * package for sharing some calendar internal code for calendar + * and date format. + */ +public class CalendarUtil { + + private static ICUCache CALTYPE_CACHE = new SimpleCache(); + + private static final String CALKEY = "calendar"; + private static final String DEFCAL = "gregorian"; + + /** + * Returns a calendar type for the given locale. + * When the given locale has calendar keyword, the + * value of calendar keyword is returned. Otherwise, + * the default calendar type for the locale is returned. + * @param loc The locale + * @return Calendar type string, such as "gregorian" + */ + public static String getCalendarType(ULocale loc) { + String calType = null; + + calType = loc.getKeywordValue(CALKEY); + if (calType != null) { + return calType; + } + + String baseLoc = loc.getBaseName(); + + // Check the cache + calType = CALTYPE_CACHE.get(baseLoc); + if (calType != null) { + return calType; + } + + // Canonicalize, so grandfathered variant will be transformed to keywords + ULocale canonical = ULocale.createCanonical(loc.toString()); + calType = canonical.getKeywordValue("calendar"); + + if (calType == null) { + // When calendar keyword is not available, use the locale's + // region to get the default calendar type + String region = canonical.getCountry(); + if (region.length() == 0) { + ULocale fullLoc = ULocale.addLikelySubtags(canonical); + region = fullLoc.getCountry(); + } + + // Read supplementalData to get the default calendar type for + // the locale's region + try { + UResourceBundle rb = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, + "supplementalData", + ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle calPref = rb.get("calendarPreferenceData"); + UResourceBundle order = null; + try { + order = calPref.get(region); + } catch (MissingResourceException mre) { + // use "001" as fallback + order = calPref.get("001"); + } + // the first calendar type is the default for the region + calType = order.getString(0); + } catch (MissingResourceException mre) { + // fall through + } + + if (calType == null) { + // Use "gregorian" as the last resort fallback. + calType = DEFCAL; + } + } + + // Cache the resolved value for the next time + CALTYPE_CACHE.put(baseLoc, calType); + + return calType; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CharTrie.java b/main/classes/core/src/com/ibm/icu/impl/CharTrie.java new file mode 100644 index 00000000000..aee7264bb4f --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CharTrie.java @@ -0,0 +1,357 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +import com.ibm.icu.text.UTF16; + +/** + * Trie implementation which stores data in char, 16 bits. + * @author synwee + * @see com.ibm.icu.impl.Trie + * @since release 2.1, Jan 01 2002 + */ + + // note that i need to handle the block calculations later, since chartrie + // in icu4c uses the same index array. +public class CharTrie extends Trie +{ + // public constructors --------------------------------------------- + + /** + *

    Creates a new Trie with the settings for the trie data.

    + *

    Unserialize the 32-bit-aligned input stream and use the data for the + * trie.

    + * @param inputStream file input stream to a ICU data file, containing + * the trie + * @param dataManipulate object which provides methods to parse the char + * data + * @throws IOException thrown when data reading fails + */ + public CharTrie(InputStream inputStream, + DataManipulate dataManipulate) throws IOException + { + super(inputStream, dataManipulate); + + if (!isCharTrie()) { + throw new IllegalArgumentException( + "Data given does not belong to a char trie."); + } + m_friendAgent_ = new FriendAgent(); + } + + /** + * Make a dummy CharTrie. + * A dummy trie is an empty runtime trie, used when a real data trie cannot + * be loaded. + * + * The trie always returns the initialValue, + * or the leadUnitValue for lead surrogate code points. + * The Latin-1 part is always set up to be linear. + * + * @param initialValue the initial value that is set for all code points + * @param leadUnitValue the value for lead surrogate code _units_ that do not + * have associated supplementary data + * @param dataManipulate object which provides methods to parse the char data + */ + @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770 + public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { + super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); + + int dataLength, latin1Length, i, limit; + char block; + + /* calculate the actual size of the dummy trie data */ + + /* max(Latin-1, block 0) */ + dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; + if(leadUnitValue!=initialValue) { + dataLength+=DATA_BLOCK_LENGTH; + } + m_data_=new char[dataLength]; + m_dataLength_=dataLength; + + m_initialValue_=(char)initialValue; + + /* fill the index and data arrays */ + + /* indexes are preset to 0 (block 0) */ + + /* Latin-1 data */ + for(i=0; i>INDEX_STAGE_2_SHIFT_); + i=0xd800>>INDEX_STAGE_1_SHIFT_; + limit=0xdc00>>INDEX_STAGE_1_SHIFT_; + for(; i> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + + (ch & INDEX_STAGE_3_MASK_); + return m_data_[offset]; + } + + // handle U+D800..U+10FFFF + offset = getCodePointOffset(ch); + + // return -1 if there is an error, in this case we return the default + // value: m_initialValue_ + return (offset >= 0) ? m_data_[offset] : m_initialValue_; + } + + /** + * Gets the value to the data which this lead surrogate character points + * to. + * Returned data may contain folding offset information for the next + * trailing surrogate character. + * This method does not guarantee correct results for trail surrogates. + * @param ch lead surrogate character + * @return data value + */ + public final char getLeadValue(char ch) + { + return m_data_[getLeadOffset(ch)]; + } + + /** + * Get the value associated with the BMP code point. + * Lead surrogate code points are treated as normal code points, with + * unfolded values that may differ from getLeadValue() results. + * @param ch the input BMP code point + * @return trie data value associated with the BMP codepoint + */ + public final char getBMPValue(char ch) + { + return m_data_[getBMPOffset(ch)]; + } + + /** + * Get the value associated with a pair of surrogates. + * @param lead a lead surrogate + * @param trail a trail surrogate + */ + public final char getSurrogateValue(char lead, char trail) + { + int offset = getSurrogateOffset(lead, trail); + if (offset > 0) { + return m_data_[offset]; + } + return m_initialValue_; + } + + /** + *

    Get a value from a folding offset (from the value of a lead surrogate) + * and a trail surrogate.

    + *

    If the + * @param leadvalue value associated with the lead surrogate which contains + * the folding offset + * @param trail surrogate + * @return trie data value associated with the trail character + */ + public final char getTrailValue(int leadvalue, char trail) + { + if (m_dataManipulate_ == null) { + throw new NullPointerException( + "The field DataManipulate in this Trie is null"); + } + int offset = m_dataManipulate_.getFoldingOffset(leadvalue); + if (offset > 0) { + return m_data_[getRawOffset(offset, + (char)(trail & SURROGATE_MASK_))]; + } + return m_initialValue_; + } + + /** + *

    Gets the latin 1 fast path value.

    + *

    Note this only works if latin 1 characters have their own linear + * array.

    + * @param ch latin 1 characters + * @return value associated with latin character + */ + public final char getLatin1LinearValue(char ch) + { + return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch]; + } + + /** + * Checks if the argument Trie has the same data as this Trie + * @param other Trie to check + * @return true if the argument Trie has the same data as this Trie, false + * otherwise + */ + ///CLOVER:OFF + public boolean equals(Object other) + { + boolean result = super.equals(other); + if (result && other instanceof CharTrie) { + CharTrie othertrie = (CharTrie)other; + return m_initialValue_ == othertrie.m_initialValue_; + } + return false; + } + ///CLOVER:ON + + // protected methods ----------------------------------------------- + + /** + *

    Parses the input stream and stores its trie content into a index and + * data array

    + * @param inputStream data input stream containing trie data + * @exception IOException thrown when data reading fails + */ + protected final void unserialize(InputStream inputStream) + throws IOException + { + DataInputStream input = new DataInputStream(inputStream); + int indexDataLength = m_dataOffset_ + m_dataLength_; + m_index_ = new char[indexDataLength]; + for (int i = 0; i < indexDataLength; i ++) { + m_index_[i] = input.readChar(); + } + m_data_ = m_index_; + m_initialValue_ = m_data_[m_dataOffset_]; + } + + /** + * Gets the offset to the data which the surrogate pair points to. + * @param lead lead surrogate + * @param trail trailing surrogate + * @return offset to data + */ + protected final int getSurrogateOffset(char lead, char trail) + { + if (m_dataManipulate_ == null) { + throw new NullPointerException( + "The field DataManipulate in this Trie is null"); + } + + // get fold position for the next trail surrogate + int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); + + // get the real data from the folded lead/trail units + if (offset > 0) { + return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); + } + + // return -1 if there is an error, in this case we return the default + // value: m_initialValue_ + return -1; + } + + /** + * Gets the value at the argument index. + * For use internally in TrieIterator. + * @param index value at index will be retrieved + * @return 32 bit value + * @see com.ibm.icu.impl.TrieIterator + */ + protected final int getValue(int index) + { + return m_data_[index]; + } + + /** + * Gets the default initial value + * @return 32 bit value + */ + protected final int getInitialValue() + { + return m_initialValue_; + } + + // private data members -------------------------------------------- + + /** + * Default value + */ + private char m_initialValue_; + /** + * Array of char data + */ + private char m_data_[]; + /** + * Agent for friends + */ + private FriendAgent m_friendAgent_; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java b/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java new file mode 100644 index 00000000000..3e0dcd0f97c --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java @@ -0,0 +1,148 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.text.CharacterIterator; + +import com.ibm.icu.text.UCharacterIterator; + +/** + * This class is a wrapper around CharacterIterator and implements the + * UCharacterIterator protocol + * @author ram + */ + +public class CharacterIteratorWrapper extends UCharacterIterator { + + private CharacterIterator iterator; + + + public CharacterIteratorWrapper(CharacterIterator iter){ + if(iter==null){ + throw new IllegalArgumentException(); + } + iterator = iter; + } + + /** + * @see UCharacterIterator#current() + */ + public int current() { + int c = iterator.current(); + if(c==CharacterIterator.DONE){ + return DONE; + } + return c; + } + + /** + * @see UCharacterIterator#getLength() + */ + public int getLength() { + return (iterator.getEndIndex() - iterator.getBeginIndex()); + } + + /** + * @see UCharacterIterator#getIndex() + */ + public int getIndex() { + return iterator.getIndex(); + } + + /** + * @see UCharacterIterator#next() + */ + public int next() { + int i = iterator.current(); + iterator.next(); + if(i==CharacterIterator.DONE){ + return DONE; + } + return i; + } + + /** + * @see UCharacterIterator#previous() + */ + public int previous() { + int i = iterator.previous(); + if(i==CharacterIterator.DONE){ + return DONE; + } + return i; + } + + /** + * @see UCharacterIterator#setIndex(int) + */ + public void setIndex(int index) { + try{ + iterator.setIndex(index); + }catch(IllegalArgumentException e){ + throw new IndexOutOfBoundsException(); + } + } + + /** + * @see UCharacterIterator#setToLimit() + */ + public void setToLimit() { + iterator.setIndex(iterator.getEndIndex()); + } + + /** + * @see UCharacterIterator#getText(char[]) + */ + public int getText(char[] fillIn, int offset){ + int length =iterator.getEndIndex() - iterator.getBeginIndex(); + int currentIndex = iterator.getIndex(); + if(offset < 0 || offset + length > fillIn.length){ + throw new IndexOutOfBoundsException(Integer.toString(length)); + } + + for (char ch = iterator.first(); ch != CharacterIterator.DONE; ch = iterator.next()) { + fillIn[offset++] = ch; + } + iterator.setIndex(currentIndex); + + return length; + } + + /** + * Creates a clone of this iterator. Clones the underlying character iterator. + * @see UCharacterIterator#clone() + */ + public Object clone(){ + try { + CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone(); + result.iterator = (CharacterIterator)this.iterator.clone(); + return result; + } catch (CloneNotSupportedException e) { + return null; // only invoked if bad underlying character iterator + } + } + + + public int moveIndex(int delta){ + int length = iterator.getEndIndex() - iterator.getBeginIndex(); + int idx = iterator.getIndex()+delta; + + if(idx < 0) { + idx = 0; + } else if(idx > length) { + idx = length; + } + return iterator.setIndex(idx); + } + + /** + * @see UCharacterIterator#getCharacterIterator() + */ + public CharacterIterator getCharacterIterator(){ + return (CharacterIterator)iterator.clone(); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/CurrencyData.java b/main/classes/core/src/com/ibm/icu/impl/CurrencyData.java new file mode 100644 index 00000000000..c1af2ef9269 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/CurrencyData.java @@ -0,0 +1,152 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.Collections; +import java.util.Map; + +import com.ibm.icu.text.CurrencyDisplayNames; +import com.ibm.icu.util.ULocale; + +public class CurrencyData { + public static final CurrencyDisplayInfoProvider provider; + + public static interface CurrencyDisplayInfoProvider { + CurrencyDisplayInfo getInstance(ULocale locale, boolean withFallback); + boolean hasData(); + } + + public static abstract class CurrencyDisplayInfo extends CurrencyDisplayNames { + public abstract Map getUnitPatterns(); + public abstract CurrencyFormatInfo getFormatInfo(String isoCode); + public abstract CurrencySpacingInfo getSpacingInfo(); + } + + public static final class CurrencyFormatInfo { + public final String currencyPattern; + public final char monetarySeparator; + public final char monetaryGroupingSeparator; + + public CurrencyFormatInfo(String currencyPattern, char monetarySeparator, + char monetaryGroupingSeparator) { + this.currencyPattern = currencyPattern; + this.monetarySeparator = monetarySeparator; + this.monetaryGroupingSeparator = monetaryGroupingSeparator; + } + } + + public static final class CurrencySpacingInfo { + public final String beforeCurrencyMatch; + public final String beforeContextMatch; + public final String beforeInsert; + public final String afterCurrencyMatch; + public final String afterContextMatch; + public final String afterInsert; + + public CurrencySpacingInfo( + String beforeCurrencyMatch, String beforeContextMatch, String beforeInsert, + String afterCurrencyMatch, String afterContextMatch, String afterInsert) { + this.beforeCurrencyMatch = beforeCurrencyMatch; + this.beforeContextMatch = beforeContextMatch; + this.beforeInsert = beforeInsert; + this.afterCurrencyMatch = afterCurrencyMatch; + this.afterContextMatch = afterContextMatch; + this.afterInsert = afterInsert; + } + + + private static final String DEFAULT_CUR_MATCH = "[:letter:]"; + private static final String DEFAULT_CTX_MATCH = "[:digit:]"; + private static final String DEFAULT_INSERT = " "; + + public static final CurrencySpacingInfo DEFAULT = new CurrencySpacingInfo( + DEFAULT_CUR_MATCH, DEFAULT_CTX_MATCH, DEFAULT_INSERT, + DEFAULT_CUR_MATCH, DEFAULT_CTX_MATCH, DEFAULT_INSERT); + } + + static { + CurrencyDisplayInfoProvider temp = null; + try { + Class clzz = Class.forName("com.ibm.icu.impl.ICUCurrencyDisplayInfoProvider"); + temp = (CurrencyDisplayInfoProvider) clzz.newInstance(); + } catch (Throwable t) { + temp = new CurrencyDisplayInfoProvider() { + public CurrencyDisplayInfo getInstance(ULocale locale, boolean withFallback) { + return DefaultInfo.getWithFallback(withFallback); + } + + public boolean hasData() { + return false; + } + }; + } + provider = temp; + } + + public static class DefaultInfo extends CurrencyDisplayInfo { + private final boolean fallback; + + private DefaultInfo(boolean fallback) { + this.fallback = fallback; + } + + public static final CurrencyDisplayInfo getWithFallback(boolean fallback) { + return fallback ? FALLBACK_INSTANCE : NO_FALLBACK_INSTANCE; + } + + @Override + public String getName(String isoCode) { + return fallback ? isoCode : null; + } + + @Override + public String getPluralName(String isoCode, String pluralType) { + return fallback ? isoCode : null; + } + + @Override + public String getSymbol(String isoCode) { + return fallback ? isoCode : null; + } + + @Override + public Map symbolMap() { + return Collections.emptyMap(); + } + + @Override + public Map nameMap() { + return Collections.emptyMap(); + } + + @Override + public ULocale getLocale() { + return ULocale.ROOT; + } + + @Override + public Map getUnitPatterns() { + if (fallback) { + return Collections.emptyMap(); + } + return null; + } + + @Override + public CurrencyFormatInfo getFormatInfo(String isoCode) { + return null; + } + + @Override + public CurrencySpacingInfo getSpacingInfo() { + return fallback ? CurrencySpacingInfo.DEFAULT : null; + } + + private static final CurrencyDisplayInfo FALLBACK_INSTANCE = new DefaultInfo(true); + private static final CurrencyDisplayInfo NO_FALLBACK_INSTANCE = new DefaultInfo(false); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java b/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java new file mode 100644 index 00000000000..a6e2b8201da --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java @@ -0,0 +1,209 @@ +/* +******************************************************************************* +* Copyright (C) 2007-2009, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.math.BigInteger; +import java.text.FieldPosition; +import java.text.ParsePosition; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.math.BigDecimal; +import com.ibm.icu.text.NumberFormat; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +/* + * NumberFormat implementation dedicated/optimized for DateFormat, + * used by SimpleDateFormat implementation. + */ +public final class DateNumberFormat extends NumberFormat { + + private static final long serialVersionUID = -6315692826916346953L; + + private char zeroDigit; + private char minusSign; + private boolean positiveOnly = false; + + private transient char[] decimalBuf = new char[20]; // 20 digits is good enough to store Long.MAX_VALUE + + private static SimpleCache CACHE = new SimpleCache(); + + private int maxIntDigits; + private int minIntDigits; + + public DateNumberFormat(ULocale loc, char zeroDigitIn) { + initialize(loc,zeroDigitIn); + } + +/* public DateNumberFormat(char zeroDigit, char minusSign) { + this.zeroDigit = zeroDigit; + this.minusSign = minusSign; + } +*/ + + private void initialize(ULocale loc,char zeroDigitIn) { + char[] elems = CACHE.get(loc); + if (elems == null) { + // Missed cache + ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, loc); + String[] numberElements = rb.getStringArray("NumberElements"); + elems = new char[2]; + elems[0] = zeroDigitIn; + elems[1] = numberElements[6].charAt(0); + CACHE.put(loc, elems); + } + zeroDigit = elems[0]; + minusSign = elems[1]; + } + + public void setMaximumIntegerDigits(int newValue) { + maxIntDigits = newValue; + } + + public int getMaximumIntegerDigits() { + return maxIntDigits; + } + + public void setMinimumIntegerDigits(int newValue) { + minIntDigits = newValue; + } + + public int getMinimumIntegerDigits() { + return minIntDigits; + } + + /* For supporting SimpleDateFormat.parseInt */ + public void setParsePositiveOnly(boolean isPositiveOnly) { + positiveOnly = isPositiveOnly; + } + + public char getZeroDigit() { + return zeroDigit; + } + + public void setZeroDigit(char zero) { + zeroDigit = zero; + } + + public StringBuffer format(double number, StringBuffer toAppendTo, + FieldPosition pos) { + throw new UnsupportedOperationException("StringBuffer format(double, StringBuffer, FieldPostion) is not implemented"); + } + + public StringBuffer format(long numberL, StringBuffer toAppendTo, + FieldPosition pos) { + + if (numberL < 0) { + // negative + toAppendTo.append(minusSign); + } + + // Note: NumberFormat used by DateFormat only uses int numbers. + // Remainder operation on 32bit platform using long is significantly slower + // than int. So, this method casts long number into int. + int number = (int)numberL; + + int limit = decimalBuf.length < maxIntDigits ? decimalBuf.length : maxIntDigits; + int index = limit - 1; + while (true) { + decimalBuf[index] = (char)((number % 10) + zeroDigit); + number /= 10; + if (index == 0 || number == 0) { + break; + } + index--; + } + int padding = minIntDigits - (limit - index); + for (; padding > 0; padding--) { + decimalBuf[--index] = zeroDigit; + } + int length = limit - index; + toAppendTo.append(decimalBuf, index, length); + pos.setBeginIndex(0); + if (pos.getField() == NumberFormat.INTEGER_FIELD) { + pos.setEndIndex(length); + } else { + pos.setEndIndex(0); + } + return toAppendTo; + } + + public StringBuffer format(BigInteger number, StringBuffer toAppendTo, + FieldPosition pos) { + throw new UnsupportedOperationException("StringBuffer format(BigInteger, StringBuffer, FieldPostion) is not implemented"); + } + + public StringBuffer format(java.math.BigDecimal number, StringBuffer toAppendTo, + FieldPosition pos) { + throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented"); + } + + public StringBuffer format(BigDecimal number, + StringBuffer toAppendTo, FieldPosition pos) { + throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented"); + } + + /* + * Note: This method only parse integer numbers which can be represented by long + */ + public Number parse(String text, ParsePosition parsePosition) { + long num = 0; + boolean sawNumber = false; + boolean negative = false; + int base = parsePosition.getIndex(); + int offset = 0; + for (; base + offset < text.length(); offset++) { + char ch = text.charAt(base + offset); + if (offset == 0 && ch == minusSign) { + if (positiveOnly) { + break; + } + negative = true; + } else { + int digit = ch - zeroDigit; + if (digit < 0 || 9 < digit) { + digit = UCharacter.digit(ch); + } + if (0 <= digit && digit <= 9) { + sawNumber = true; + num = num * 10 + digit; + } else { + break; + } + } + } + Number result = null; + if (sawNumber) { + num = negative ? num * (-1) : num; + result = new Long(num); + parsePosition.setIndex(base + offset); + } + return result; + } + + public boolean equals(Object obj) { + if (obj == null || !super.equals(obj) || !(obj instanceof DateNumberFormat)) { + return false; + } + DateNumberFormat other = (DateNumberFormat)obj; + return (this.maxIntDigits == other.maxIntDigits + && this.minIntDigits == other.minIntDigits + && this.zeroDigit == other.zeroDigit + && this.minusSign == other.minusSign + && this.positiveOnly == other.positiveOnly); + } + + private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { + stream.defaultReadObject(); + // re-allocate the work buffer + decimalBuf = new char[20]; + } +} + +//eof diff --git a/main/classes/core/src/com/ibm/icu/impl/Differ.java b/main/classes/core/src/com/ibm/icu/impl/Differ.java new file mode 100644 index 00000000000..a4a29f3b4dc --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Differ.java @@ -0,0 +1,171 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.impl; + +/** VERY Basic Diff program. Compares two sequences of objects fed into it, and + * lets you know where they are different. + * @author Mark Davis + * @version 1.0 + */ + +final public class Differ { +// public static final String copyright = +// "Copyright (C) 2000, International Business Machines Corporation and others. All Rights Reserved."; + + /** + * @param stackSize The size of the largest difference you expect. + * @param matchCount The number of items that have to be the same to count as a match + */ + public Differ(int stackSize, int matchCount) { + this.STACKSIZE = stackSize; + this.EQUALSIZE = matchCount; + a = new Object[stackSize+matchCount]; + b = new Object[stackSize+matchCount]; + } + + public void add (Object aStr, Object bStr) { + addA(aStr); + addB(bStr); + } + + public void addA (Object aStr) { + flush(); + a[aCount++] = aStr; + } + + public void addB (Object bStr) { + flush(); + b[bCount++] = bStr; + } + + public int getALine(int offset) { + return aLine + maxSame + offset; + } + + public Object getA(int offset) { + if (offset < 0) return last; + if (offset > aTop-maxSame) return next; + return a[offset]; + } + + public int getACount() { + return aTop-maxSame; + } + + public int getBCount() { + return bTop-maxSame; + } + + public int getBLine(int offset) { + return bLine + maxSame + offset; + } + + public Object getB(int offset) { + if (offset < 0) return last; + if (offset > bTop-maxSame) return next; + return b[offset]; + } + + public void checkMatch(boolean finalPass) { + // find the initial strings that are the same + int max = aCount; + if (max > bCount) max = bCount; + int i; + for (i = 0; i < max; ++i) { + if (!a[i].equals(b[i])) break; + } + // at this point, all items up to i are equal + maxSame = i; + aTop = bTop = maxSame; + if (maxSame > 0) last = a[maxSame-1]; + next = ""; + + if (finalPass) { + aTop = aCount; + bTop = bCount; + next = ""; + return; + } + + if (aCount - maxSame < EQUALSIZE || bCount - maxSame < EQUALSIZE) return; + + // now see if the last few a's occur anywhere in the b's, or vice versa + int match = find (a, aCount-EQUALSIZE, aCount, b, maxSame, bCount); + if (match != -1) { + aTop = aCount-EQUALSIZE; + bTop = match; + next = a[aTop]; + return; + } + match = find (b, bCount-EQUALSIZE, bCount, a, maxSame, aCount); + if (match != -1) { + bTop = bCount-EQUALSIZE; + aTop = match; + next = b[bTop]; + return; + } + if (aCount >= STACKSIZE || bCount >= STACKSIZE) { + // flush some of them + aCount = (aCount + maxSame) / 2; + bCount = (bCount + maxSame) / 2; + next = ""; + } + } + + /** Convenient utility + * finds a segment of the first array in the second array. + * @return -1 if not found, otherwise start position in b + */ + + public int find (Object[] aArr, int aStart, int aEnd, Object[] bArr, int bStart, int bEnd) { + int len = aEnd - aStart; + int bEndMinus = bEnd - len; + tryA: + for (int i = bStart; i <= bEndMinus; ++i) { + for (int j = 0; j < len; ++j) { + if (!bArr[i + j].equals(aArr[aStart + j])) continue tryA; + } + return i; // we have a match! + } + return -1; + } + + // ====================== PRIVATES ====================== + + private void flush() { + if (aTop != 0) { + int newCount = aCount-aTop; + System.arraycopy(a, aTop, a, 0, newCount); + aCount = newCount; + aLine += aTop; + aTop = 0; + } + + if (bTop != 0) { + int newCount = bCount-bTop; + System.arraycopy(b, bTop, b, 0, newCount); + bCount = newCount; + bLine += bTop; + bTop = 0; + } + } + + private int STACKSIZE; + private int EQUALSIZE; + + private Object [] a; + private Object [] b; + private Object last = ""; + private Object next = ""; + private int aCount = 0; + private int bCount = 0; + private int aLine = 1; + private int bLine = 1; + private int maxSame = 0, aTop = 0, bTop = 0; + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Grego.java b/main/classes/core/src/com/ibm/icu/impl/Grego.java new file mode 100644 index 00000000000..e5fbaccee88 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Grego.java @@ -0,0 +1,213 @@ +/** + ******************************************************************************* + * Copyright (C) 2003-2008, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + * Partial port from ICU4C's Grego class in i18n/gregoimp.h. + * + * Methods ported, or moved here from OlsonTimeZone, initially + * for work on Jitterbug 5470: + * tzdata2006n Brazil incorrect fall-back date 2009-mar-01 + * Only the methods necessary for that work are provided - this is not a full + * port of ICU4C's Grego class (yet). + * + * These utilities are used by both OlsonTimeZone and SimpleTimeZone. + */ + +package com.ibm.icu.impl; + +import com.ibm.icu.util.Calendar; + +/** + * A utility class providing proleptic Gregorian calendar functions + * used by time zone and calendar code. Do not instantiate. + * + * Note: Unlike GregorianCalendar, all computations performed by this + * class occur in the pure proleptic GregorianCalendar. + */ +public class Grego { + + // Max/min milliseconds + public static final long MIN_MILLIS = -184303902528000000L; + public static final long MAX_MILLIS = 183882168921600000L; + + public static final int MILLIS_PER_SECOND = 1000; + public static final int MILLIS_PER_MINUTE = 60*MILLIS_PER_SECOND; + public static final int MILLIS_PER_HOUR = 60*MILLIS_PER_MINUTE; + public static final int MILLIS_PER_DAY = 24*MILLIS_PER_HOUR; + + // January 1, 1 CE Gregorian + private static final int JULIAN_1_CE = 1721426; + + // January 1, 1970 CE Gregorian + private static final int JULIAN_1970_CE = 2440588; + + private static final int[] MONTH_LENGTH = new int[] { + 31,28,31,30,31,30,31,31,30,31,30,31, + 31,29,31,30,31,30,31,31,30,31,30,31 + }; + + private static final int[] DAYS_BEFORE = new int[] { + 0,31,59,90,120,151,181,212,243,273,304,334, + 0,31,60,91,121,152,182,213,244,274,305,335 }; + + /** + * Return true if the given year is a leap year. + * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. + * @return true if the year is a leap year + */ + public static final boolean isLeapYear(int year) { + // year&0x3 == year%4 + return ((year&0x3) == 0) && ((year%100 != 0) || (year%400 == 0)); + } + + /** + * Return the number of days in the given month. + * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. + * @param month 0-based month, with 0==Jan + * @return the number of days in the given month + */ + public static final int monthLength(int year, int month) { + return MONTH_LENGTH[month + (isLeapYear(year) ? 12 : 0)]; + } + + /** + * Return the length of a previous month of the Gregorian calendar. + * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. + * @param month 0-based month, with 0==Jan + * @return the number of days in the month previous to the given month + */ + public static final int previousMonthLength(int year, int month) { + return (month > 0) ? monthLength(year, month-1) : 31; + } + + /** + * Convert a year, month, and day-of-month, given in the proleptic + * Gregorian calendar, to 1970 epoch days. + * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. + * @param month 0-based month, with 0==Jan + * @param dom 1-based day of month + * @return the day number, with day 0 == Jan 1 1970 + */ + public static long fieldsToDay(int year, int month, int dom) { + int y = year - 1; + long julian = + 365 * y + floorDivide(y, 4) + (JULIAN_1_CE - 3) + // Julian cal + floorDivide(y, 400) - floorDivide(y, 100) + 2 + // => Gregorian cal + DAYS_BEFORE[month + (isLeapYear(year) ? 12 : 0)] + dom; // => month/dom + return julian - JULIAN_1970_CE; // JD => epoch day + } + + /** + * Return the day of week on the 1970-epoch day + * @param day the 1970-epoch day (integral value) + * @return the day of week + */ + public static int dayOfWeek(long day) { + long[] remainder = new long[1]; + floorDivide(day + Calendar.THURSDAY, 7, remainder); + int dayOfWeek = (int)remainder[0]; + dayOfWeek = (dayOfWeek == 0) ? 7 : dayOfWeek; + return dayOfWeek; + } + + public static int[] dayToFields(long day, int[] fields) { + if (fields == null || fields.length < 5) { + fields = new int[5]; + } + // Convert from 1970 CE epoch to 1 CE epoch (Gregorian calendar) + day += JULIAN_1970_CE - JULIAN_1_CE; + + long[] rem = new long[1]; + long n400 = floorDivide(day, 146097, rem); + long n100 = floorDivide(rem[0], 36524, rem); + long n4 = floorDivide(rem[0], 1461, rem); + long n1 = floorDivide(rem[0], 365, rem); + + int year = (int)(400 * n400 + 100 * n100 + 4 * n4 + n1); + int dayOfYear = (int)rem[0]; + if (n100 == 4 || n1 == 4) { + dayOfYear = 365; // Dec 31 at end of 4- or 400-yr cycle + } + else { + ++year; + } + + boolean isLeap = isLeapYear(year); + int correction = 0; + int march1 = isLeap ? 60 : 59; // zero-based DOY for March 1 + if (dayOfYear >= march1) { + correction = isLeap ? 1 : 2; + } + int month = (12 * (dayOfYear + correction) + 6) / 367; // zero-based month + int dayOfMonth = dayOfYear - DAYS_BEFORE[isLeap ? month + 12 : month] + 1; // one-based DOM + int dayOfWeek = (int)((day + 2) % 7); // day 0 is Monday(2) + if (dayOfWeek < 1 /* Sunday */) { + dayOfWeek += 7; + } + dayOfYear++; // 1-based day of year + + fields[0] = year; + fields[1] = month; + fields[2] = dayOfMonth; + fields[3] = dayOfWeek; + fields[4] = dayOfYear; + + return fields; + } + + /* + * Convert long time to date/time fields + * + * result[0] : year + * result[1] : month + * result[2] : dayOfMonth + * result[3] : dayOfWeek + * result[4] : dayOfYear + * result[5] : millisecond in day + */ + public static int[] timeToFields(long time, int[] fields) { + if (fields == null || fields.length < 6) { + fields = new int[6]; + } + long[] remainder = new long[1]; + long day = floorDivide(time, 24*60*60*1000 /* milliseconds per day */, remainder); + dayToFields(day, fields); + fields[5] = (int)remainder[0]; + return fields; + } + + public static long floorDivide(long numerator, long denominator) { + // We do this computation in order to handle + // a numerator of Long.MIN_VALUE correctly + return (numerator >= 0) ? + numerator / denominator : + ((numerator + 1) / denominator) - 1; + } + + private static long floorDivide(long numerator, long denominator, long[] remainder) { + if (numerator >= 0) { + remainder[0] = numerator % denominator; + return numerator / denominator; + } + long quotient = ((numerator + 1) / denominator) - 1; + remainder[0] = numerator - (quotient * denominator); + return quotient; + } + + /* + * Returns the ordinal number for the specified day of week in the month. + * The valid return value is 1, 2, 3, 4 or -1. + */ + public static int getDayOfWeekInMonth(int year, int month, int dayOfMonth) { + int weekInMonth = (dayOfMonth + 6)/7; + if (weekInMonth == 4) { + if (dayOfMonth + 7 > monthLength(year, month)) { + weekInMonth = -1; + } + } else if (weekInMonth == 5) { + weekInMonth = -1; + } + return weekInMonth; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java new file mode 100644 index 00000000000..ee231f25936 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -0,0 +1,157 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +public final class ICUBinary +{ + // public inner interface ------------------------------------------------ + + /** + * Special interface for data authentication + */ + public static interface Authenticate + { + /** + * Method used in ICUBinary.readHeader() to provide data format + * authentication. + * @param version version of the current data + * @return true if dataformat is an acceptable version, false otherwise + */ + public boolean isDataVersionAcceptable(byte version[]); + } + + // public methods -------------------------------------------------------- + + /** + *

    ICU data header reader method. + * Takes a ICU generated big-endian input stream, parse the ICU standard + * file header and authenticates them.

    + *

    Header format: + *

      + *
    • Header size (char) + *
    • Magic number 1 (byte) + *
    • Magic number 2 (byte) + *
    • Rest of the header size (char) + *
    • Reserved word (char) + *
    • Big endian indicator (byte) + *
    • Character set family indicator (byte) + *
    • Size of a char (byte) for c++ and c use + *
    • Reserved byte (byte) + *
    • Data format identifier (4 bytes), each ICU data has its own + * identifier to distinguish them. [0] major [1] minor + * [2] milli [3] micro + *
    • Data version (4 bytes), the change version of the ICU data + * [0] major [1] minor [2] milli [3] micro + *
    • Unicode version (4 bytes) this ICU is based on. + *
    + *

    + *

    + * Example of use:
    + *

    +    * try {
    +    *    FileInputStream input = new FileInputStream(filename);
    +    *    If (Utility.readICUDataHeader(input, dataformat, dataversion, 
    +    *                                  unicode) {
    +    *        System.out.println("Verified file header, this is a ICU data file");
    +    *    }
    +    * } catch (IOException e) {
    +    *    System.out.println("This is not a ICU data file");
    +    * }
    +    * 
    + *

    + * @param inputStream input stream that contains the ICU data header + * @param dataFormatIDExpected Data format expected. An array of 4 bytes + * information about the data format. + * E.g. data format ID 1.2.3.4. will became an array of + * {1, 2, 3, 4} + * @param authenticate user defined extra data authentication. This value + * can be null, if no extra authentication is needed. + * @exception IOException thrown if there is a read error or + * when header authentication fails. + */ + public static final byte[] readHeader(InputStream inputStream, + byte dataFormatIDExpected[], + Authenticate authenticate) + throws IOException + { + DataInputStream input = new DataInputStream(inputStream); + char headersize = input.readChar(); + int readcount = 2; + //reading the header format + byte magic1 = input.readByte(); + readcount ++; + byte magic2 = input.readByte(); + readcount ++; + if (magic1 != MAGIC1 || magic2 != MAGIC2) { + throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_); + } + + input.readChar(); // reading size + readcount += 2; + input.readChar(); // reading reserved word + readcount += 2; + byte bigendian = input.readByte(); + readcount ++; + byte charset = input.readByte(); + readcount ++; + byte charsize = input.readByte(); + readcount ++; + input.readByte(); // reading reserved byte + readcount ++; + + byte dataFormatID[] = new byte[4]; + input.readFully(dataFormatID); + readcount += 4; + byte dataVersion[] = new byte[4]; + input.readFully(dataVersion); + readcount += 4; + byte unicodeVersion[] = new byte[4]; + input.readFully(unicodeVersion); + readcount += 4; + if (headersize < readcount) { + throw new IOException("Internal Error: Header size error"); + } + input.skipBytes(headersize - readcount); + + if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_ + || charsize != CHAR_SIZE_ + || !Arrays.equals(dataFormatIDExpected, dataFormatID) + || (authenticate != null + && !authenticate.isDataVersionAcceptable(dataVersion))) { + throw new IOException(HEADER_AUTHENTICATION_FAILED_); + } + return unicodeVersion; + } + + // private variables ------------------------------------------------- + + /** + * Magic numbers to authenticate the data file + */ + private static final byte MAGIC1 = (byte)0xda; + private static final byte MAGIC2 = (byte)0x27; + + /** + * File format authentication values + */ + private static final byte BIG_ENDIAN_ = 1; + private static final byte CHAR_SET_ = 0; + private static final byte CHAR_SIZE_ = 2; + + /** + * Error messages + */ + private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ = + "ICU data file error: Not an ICU data file"; + private static final String HEADER_AUTHENTICATION_FAILED_ = + "ICU data file error: Header authentication failed, please check if you have a valid ICU data file"; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUBinaryStream.java b/main/classes/core/src/com/ibm/icu/impl/ICUBinaryStream.java new file mode 100644 index 00000000000..f19da11900a --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUBinaryStream.java @@ -0,0 +1,61 @@ +/* +********************************************************************** +* Copyright (c) 2002-2010, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: November 5 2002 +* Since: ICU 2.4 +********************************************************************** +*/ +package com.ibm.icu.impl; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A DataInputStream that implements random-access seeking. For this + * to work, the size of the data stream must be known in advance, or + * the data must be supplied as a raw byte[] array. + * + * Seeking doesn't work directly on all streams. If a given stream + * doesn't support seeking, extract the bytes into a byte[] array and + * use the byte[] constructor. + */ +class ICUBinaryStream extends DataInputStream { + + /** + * Construct a stream from the given stream and size. + * @param stream the stream of data + * @param size the number of bytes that should be made available + * for seeking. Bytes beyond this may be read, but seeking will + * not work for offset >= size. + */ + public ICUBinaryStream(InputStream stream, int size) { + super(stream); + mark(size); + } + + /** + * Construct a stream from the given raw bytes. + */ + public ICUBinaryStream(byte[] raw) { + this(new ByteArrayInputStream(raw), raw.length); + } + + /** + * Seek to the given offset. Offset is from the position of the + * stream passed to the constructor, or from the start of the + * byte[] array. + */ + public void seek(int offset) throws IOException { + reset(); + int actual = skipBytes(offset); + if (actual != offset) { + throw new IllegalStateException("Skip(" + offset + ") only skipped " + + actual + " bytes"); + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUCache.java b/main/classes/core/src/com/ibm/icu/impl/ICUCache.java new file mode 100644 index 00000000000..5e8a08c2e43 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUCache.java @@ -0,0 +1,21 @@ +/* + *************************************************************************** + * Copyright (c) 2007-2009 International Business Machines Corporation and * + * others. All rights reserved. * + *************************************************************************** +*/ + +package com.ibm.icu.impl; + +public interface ICUCache { + // Type of reference holding the Map instance + public static final int SOFT = 0; + public static final int WEAK = 1; + + // NULL object, which may be used for a cache key + public static final Object NULL = new Object(); + + public void clear(); + public void put(K key, V value); + public V get(Object key); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java b/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java new file mode 100644 index 00000000000..b875286fffe --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java @@ -0,0 +1,77 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.security.AccessControlException; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.MissingResourceException; +import java.util.Properties; + +/** + * ICUConfig is a class used for accessing ICU4J runtime configuration. + */ +public class ICUConfig { + public static final String CONFIG_PROPS_FILE = "/com/ibm/icu/ICUConfig.properties"; + private static final Properties CONFIG_PROPS; + + static { + CONFIG_PROPS = new Properties(); + try { + InputStream is = ICUData.getStream(CONFIG_PROPS_FILE); + if (is != null) { + CONFIG_PROPS.load(is); + } + } catch (MissingResourceException mre) { + // If it does not exist, ignore. + } catch (IOException ioe) { + // Any IO errors, ignore + } + } + + /** + * Get ICU configuration property value for the given name. + * @param name The configuration property name + * @return The configuration property value, or null if it does not exist. + */ + public static String get(String name) { + return get(name, null); + } + + /** + * Get ICU configuration property value for the given name. + * @param name The configuration property name + * @param def The default value + * @return The configuration property value. If the property does not + * exist, def is returned. + */ + public static String get(String name, String def) { + String val = null; + final String fname = name; + if (System.getSecurityManager() != null) { + try { + val = AccessController.doPrivileged(new PrivilegedAction() { + public String run() { + return System.getProperty(fname); + } + }); + } catch (AccessControlException e) { + // ignore + // TODO log this message + } + } else { + val = System.getProperty(name); + } + + if (val == null) { + val = CONFIG_PROPS.getProperty(name, def); + } + return val; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUData.java b/main/classes/core/src/com/ibm/icu/impl/ICUData.java new file mode 100644 index 00000000000..b47b278db85 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUData.java @@ -0,0 +1,113 @@ +/* + ******************************************************************************* + * Copyright (C) 2004-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + * Created on Feb 4, 2004 + * + */ +package com.ibm.icu.impl; + +import java.io.InputStream; +import java.net.URL; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.MissingResourceException; + +/** + * Provides access to ICU data files as InputStreams. Implements security checking. + */ +public final class ICUData { + /* + * Return a URL to the ICU resource names resourceName. The + * resource name should either be an absolute path, or a path relative to + * com.ibm.icu.impl (e.g., most likely it is 'data/foo'). If required + * is true, throw an MissingResourceException instead of returning a null result. + */ + public static boolean exists(final String resourceName) { + URL i = null; + if (System.getSecurityManager() != null) { + i = AccessController.doPrivileged(new PrivilegedAction() { + public URL run() { + return ICUData.class.getResource(resourceName); + } + }); + } else { + i = ICUData.class.getResource(resourceName); + } + return i != null; + } + + private static InputStream getStream(final Class root, final String resourceName, boolean required) { + InputStream i = null; + + if (System.getSecurityManager() != null) { + i = AccessController.doPrivileged(new PrivilegedAction() { + public InputStream run() { + return root.getResourceAsStream(resourceName); + } + }); + } else { + i = root.getResourceAsStream(resourceName); + } + + if (i == null && required) { + throw new MissingResourceException("could not locate data " +resourceName, root.getPackage().getName(), resourceName); + } + return i; + } + + private static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) { + InputStream i = null; + if (System.getSecurityManager() != null) { + i = AccessController.doPrivileged(new PrivilegedAction() { + public InputStream run() { + return loader.getResourceAsStream(resourceName); + } + }); + } else { + i = loader.getResourceAsStream(resourceName); + } + if (i == null && required) { + throw new MissingResourceException("could not locate data", loader.toString(), resourceName); + } + return i; + } + + public static InputStream getStream(ClassLoader loader, String resourceName){ + return getStream(loader,resourceName, false); + } + + public static InputStream getRequiredStream(ClassLoader loader, String resourceName){ + return getStream(loader, resourceName, true); + } + + /* + * Convenience override that calls getStream(ICUData.class, resourceName, false); + */ + public static InputStream getStream(String resourceName) { + return getStream(ICUData.class, resourceName, false); + } + + /* + * Convenience method that calls getStream(ICUData.class, resourceName, true). + */ + public static InputStream getRequiredStream(String resourceName) { + return getStream(ICUData.class, resourceName, true); + } + + /* + * Convenience override that calls getStream(root, resourceName, false); + */ + public static InputStream getStream(Class root, String resourceName) { + return getStream(root, resourceName, false); + } + + /* + * Convenience method that calls getStream(root, resourceName, true). + */ + public static InputStream getRequiredStream(Class root, String resourceName) { + return getStream(root, resourceName, true); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUDataVersion.java b/main/classes/core/src/com/ibm/icu/impl/ICUDataVersion.java new file mode 100644 index 00000000000..5c1f02ac44b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUDataVersion.java @@ -0,0 +1,89 @@ +/* +******************************************************************************* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +*/ + +package com.ibm.icu.impl; + +import java.util.MissingResourceException; + +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.VersionInfo; + +public final class ICUDataVersion { + private static final String U_ICU_VERSION_BUNDLE = "icuver"; + private static final String U_ICU_STD_BUNDLE = "icustd"; + + private static final String U_ICU_DATA_KEY = "DataVersion"; + + /** + * This function loads up icuver and compares the data version to the wired-in ICU_DATA_VERSION. + * If icuver shows something less than ICU_DATA_VERSION it returns TRUE, else FALSE. The version + * found will be returned in the first fillin parameter (if non-null), and *isModified will be set + * to TRUE if "icustd" is NOT found. Thus, if the data has been repackaged or modified, "icustd" + * (standard ICU) will be missing, and the function will alert the caller that the data is not standard. + * + * @param dataVersionFillin icuver data version information to be filled in if not-null + * @return TRUE if ICU_DATA_VERSION is newer than icuver, else FALSE + */ + public static boolean isDataOlder(VersionInfo dataVersionFillin) { + boolean result = true; + + VersionInfo dataVersion = getDataVersion(); + + if (dataVersion!= null) { + if (dataVersion.compareTo(VersionInfo.ICU_DATA_VERSION) != -1) { + result = false; + } + + if (dataVersionFillin != null) { + dataVersionFillin = VersionInfo.getInstance(dataVersion.toString()); + } + } + + return result; + } + + /** + * This function tests whether "icustd" is available in the data. If the data has been repackaged or modified, "icustd" + * (standard ICU) will be missing, and the function will alert the caller that the data is not standard. + * + * @return TRUE if data has been modified, else FALSE + */ + public static boolean isDataModified() { + if (hasICUSTDBundle()) { + return false; + } + return true; + } + + /** + * This function retrieves the data version from icuver and returns a VersionInfo object with that version information. + * + * @return Current icu data version + */ + public static VersionInfo getDataVersion() { + UResourceBundle icudatares = null; + try { + icudatares = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, ICUDataVersion.U_ICU_VERSION_BUNDLE, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + icudatares = icudatares.get(ICUDataVersion.U_ICU_DATA_KEY); + } catch (MissingResourceException ex) { + return null; + } + + return VersionInfo.getInstance(icudatares.getString()); + } + + private static boolean hasICUSTDBundle() { + try { + UResourceBundle.getBundleInstance(ICUDataVersion.U_ICU_STD_BUNDLE); + } catch (MissingResourceException ex) { + return false; + } + + return true; + } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUDebug.java b/main/classes/core/src/com/ibm/icu/impl/ICUDebug.java new file mode 100644 index 00000000000..0076ee75c32 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUDebug.java @@ -0,0 +1,129 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import com.ibm.icu.util.VersionInfo; + +public final class ICUDebug { + private static String params; + static { + try { + params = System.getProperty("ICUDebug"); + } + catch (SecurityException e) { + } + } + private static boolean debug = params != null; + private static boolean help = debug && (params.equals("") || params.indexOf("help") != -1); + + static { + if (debug) { + System.out.println("\nICUDebug=" + params); + } + } + + public static final String javaVersionString = System.getProperty("java.version", "0"); + public static final boolean isJDK14OrHigher; + public static final VersionInfo javaVersion; + + public static VersionInfo getInstanceLenient(String s) { + // Extracting ASCII numbers up to 4 delimited by + // any non digit characters + int[] ver = new int[4]; + boolean numeric = false; + int i = 0, vidx = 0; + while (i < s.length()) { + char c = s.charAt(i++); + if (c < '0' || c > '9') { + if (numeric) { + if (vidx == 3) { + // up to 4 numbers + break; + } + numeric = false; + vidx++; + } + } else { + if (numeric) { + ver[vidx] = ver[vidx] * 10 + (c - '0'); + if (ver[vidx] > 255) { + // VersionInfo does not support numbers + // greater than 255. In such case, we + // ignore the number and the rest + ver[vidx] = 0; + break; + } + } else { + numeric = true; + ver[vidx] = c - '0'; + } + } + } + + return VersionInfo.getInstance(ver[0], ver[1], ver[2], ver[3]); + } + + static { + javaVersion = getInstanceLenient(javaVersionString); + + VersionInfo java14Version = VersionInfo.getInstance("1.4.0"); + + isJDK14OrHigher = javaVersion.compareTo(java14Version) >= 0; + } + + public static boolean enabled() { + return debug; + } + + public static boolean enabled(String arg) { + if (debug) { + boolean result = params.indexOf(arg) != -1; + if (help) System.out.println("\nICUDebug.enabled(" + arg + ") = " + result); + return result; + } + return false; + } + + public static String value(String arg) { + String result = "false"; + if (debug) { + int index = params.indexOf(arg); + if (index != -1) { + index += arg.length(); + if (params.length() > index && params.charAt(index) == '=') { + index += 1; + int limit = params.indexOf(",", index); + result = params.substring(index, limit == -1 ? params.length() : limit); + } else { + result = "true"; + } + } + + if (help) System.out.println("\nICUDebug.value(" + arg + ") = " + result); + } + return result; + } + +// static public void main(String[] args) { +// // test +// String[] tests = { +// "1.3.0", +// "1.3.0_02", +// "1.3.1ea", +// "1.4.1b43", +// "___41___5", +// "x1.4.51xx89ea.7f", +// "1.6_2009", +// "10-100-1000-10000", +// "beta", +// "0", +// }; +// for (int i = 0; i < tests.length; ++i) { +// System.out.println(tests[i] + " => " + getInstanceLenient(tests[i])); +// } +// } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java b/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java new file mode 100644 index 00000000000..22bbbdbb641 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java @@ -0,0 +1,615 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.Collections; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +import com.ibm.icu.util.ULocale; + +public class ICULocaleService extends ICUService { + private ULocale fallbackLocale; + private String fallbackLocaleName; + + /** + * Construct an ICULocaleService. + */ + public ICULocaleService() { + } + + /** + * Construct an ICULocaleService with a name (useful for debugging). + */ + public ICULocaleService(String name) { + super(name); + } + + /** + * Convenience override for callers using locales. This calls + * get(ULocale, int, ULocale[]) with KIND_ANY for kind and null for + * actualReturn. + */ + public Object get(ULocale locale) { + return get(locale, LocaleKey.KIND_ANY, null); + } + + /** + * Convenience override for callers using locales. This calls + * get(ULocale, int, ULocale[]) with a null actualReturn. + */ + public Object get(ULocale locale, int kind) { + return get(locale, kind, null); + } + + /** + * Convenience override for callers using locales. This calls + * get(ULocale, int, ULocale[]) with KIND_ANY for kind. + */ + public Object get(ULocale locale, ULocale[] actualReturn) { + return get(locale, LocaleKey.KIND_ANY, actualReturn); + } + + /** + * Convenience override for callers using locales. This uses + * createKey(ULocale.toString(), kind) to create a key, calls getKey, and then + * if actualReturn is not null, returns the actualResult from + * getKey (stripping any prefix) into a ULocale. + */ + public Object get(ULocale locale, int kind, ULocale[] actualReturn) { + Key key = createKey(locale, kind); + if (actualReturn == null) { + return getKey(key); + } + + String[] temp = new String[1]; + Object result = getKey(key, temp); + if (result != null) { + int n = temp[0].indexOf("/"); + if (n >= 0) { + temp[0] = temp[0].substring(n+1); + } + actualReturn[0] = new ULocale(temp[0]); + } + return result; + } + + /** + * Convenience override for callers using locales. This calls + * registerObject(Object, ULocale, int kind, boolean visible) + * passing KIND_ANY for the kind, and true for the visibility. + */ + public Factory registerObject(Object obj, ULocale locale) { + return registerObject(obj, locale, LocaleKey.KIND_ANY, true); + } + + /** + * Convenience override for callers using locales. This calls + * registerObject(Object, ULocale, int kind, boolean visible) + * passing KIND_ANY for the kind. + */ + public Factory registerObject(Object obj, ULocale locale, boolean visible) { + return registerObject(obj, locale, LocaleKey.KIND_ANY, visible); + } + + /** + * Convenience function for callers using locales. This calls + * registerObject(Object, ULocale, int kind, boolean visible) + * passing true for the visibility. + */ + public Factory registerObject(Object obj, ULocale locale, int kind) { + return registerObject(obj, locale, kind, true); + } + + /** + * Convenience function for callers using locales. This instantiates + * a SimpleLocaleKeyFactory, and registers the factory. + */ + public Factory registerObject(Object obj, ULocale locale, int kind, boolean visible) { + Factory factory = new SimpleLocaleKeyFactory(obj, locale, kind, visible); + return registerFactory(factory); + } + + /** + * Convenience method for callers using locales. This returns the standard + * Locale list, built from the Set of visible ids. + */ + public Locale[] getAvailableLocales() { + // TODO make this wrap getAvailableULocales later + Set visIDs = getVisibleIDs(); + Locale[] locales = new Locale[visIDs.size()]; + int n = 0; + for (String id : visIDs) { + Locale loc = LocaleUtility.getLocaleFromName(id); + locales[n++] = loc; + } + return locales; + } + + /** + * Convenience method for callers using locales. This returns the standard + * ULocale list, built from the Set of visible ids. + */ + public ULocale[] getAvailableULocales() { + Set visIDs = getVisibleIDs(); + ULocale[] locales = new ULocale[visIDs.size()]; + int n = 0; + for (String id : visIDs) { + locales[n++] = new ULocale(id); + } + return locales; + } + + /** + * A subclass of Key that implements a locale fallback mechanism. + * The first locale to search for is the locale provided by the + * client, and the fallback locale to search for is the current + * default locale. If a prefix is present, the currentDescriptor + * includes it before the locale proper, separated by "/". This + * is the default key instantiated by ICULocaleService.

    + * + *

    Canonicalization adjusts the locale string so that the + * section before the first understore is in lower case, and the rest + * is in upper case, with no trailing underscores.

    + */ + public static class LocaleKey extends ICUService.Key { + private int kind; + private int varstart; + private String primaryID; + private String fallbackID; + private String currentID; + + public static final int KIND_ANY = -1; + + /** + * Create a LocaleKey with canonical primary and fallback IDs. + */ + public static LocaleKey createWithCanonicalFallback(String primaryID, String canonicalFallbackID) { + return createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY); + } + + /** + * Create a LocaleKey with canonical primary and fallback IDs. + */ + public static LocaleKey createWithCanonicalFallback(String primaryID, String canonicalFallbackID, int kind) { + if (primaryID == null) { + return null; + } + String canonicalPrimaryID = ULocale.getName(primaryID); + return new LocaleKey(primaryID, canonicalPrimaryID, canonicalFallbackID, kind); + } + + /** + * Create a LocaleKey with canonical primary and fallback IDs. + */ + public static LocaleKey createWithCanonical(ULocale locale, String canonicalFallbackID, int kind) { + if (locale == null) { + return null; + } + String canonicalPrimaryID = locale.getName(); + return new LocaleKey(canonicalPrimaryID, canonicalPrimaryID, canonicalFallbackID, kind); + } + + /** + * PrimaryID is the user's requested locale string, + * canonicalPrimaryID is this string in canonical form, + * fallbackID is the current default locale's string in + * canonical form. + */ + protected LocaleKey(String primaryID, String canonicalPrimaryID, String canonicalFallbackID, int kind) { + super(primaryID); + this.kind = kind; + + if (canonicalPrimaryID == null || canonicalPrimaryID.equalsIgnoreCase("root")) { + this.primaryID = ""; + this.fallbackID = null; + } else { + int idx = canonicalPrimaryID.indexOf('@'); + if (idx == 4 && canonicalPrimaryID.regionMatches(true, 0, "root", 0, 4)) { + this.primaryID = canonicalPrimaryID.substring(4); + this.varstart = 0; + this.fallbackID = null; + } else { + this.primaryID = canonicalPrimaryID; + this.varstart = idx; + + if (canonicalFallbackID == null || this.primaryID.equals(canonicalFallbackID)) { + this.fallbackID = ""; + } else { + this.fallbackID = canonicalFallbackID; + } + } + } + + this.currentID = varstart == -1 ? this.primaryID : this.primaryID.substring(0, varstart); + } + + /** + * Return the prefix associated with the kind, or null if the kind is KIND_ANY. + */ + public String prefix() { + return kind == KIND_ANY ? null : Integer.toString(kind()); + } + + /** + * Return the kind code associated with this key. + */ + public int kind() { + return kind; + } + + /** + * Return the (canonical) original ID. + */ + public String canonicalID() { + return primaryID; + } + + /** + * Return the (canonical) current ID, or null if no current id. + */ + public String currentID() { + return currentID; + } + + /** + * Return the (canonical) current descriptor, or null if no current id. + * Includes the keywords, whereas the ID does not include keywords. + */ + public String currentDescriptor() { + String result = currentID(); + if (result != null) { + StringBuilder buf = new StringBuilder(); // default capacity 16 is usually good enough + if (kind != KIND_ANY) { + buf.append(prefix()); + } + buf.append('/'); + buf.append(result); + if (varstart != -1) { + buf.append(primaryID.substring(varstart, primaryID.length())); + } + result = buf.toString(); + } + return result; + } + + /** + * Convenience method to return the locale corresponding to the (canonical) original ID. + */ + public ULocale canonicalLocale() { + return new ULocale(primaryID); + } + + /** + * Convenience method to return the ulocale corresponding to the (canonical) currentID. + */ + public ULocale currentLocale() { + if (varstart == -1) { + return new ULocale(currentID); + } else { + return new ULocale(currentID + primaryID.substring(varstart)); + } + } + + /** + * If the key has a fallback, modify the key and return true, + * otherwise return false.

    + * + *

    First falls back through the primary ID, then through + * the fallbackID. The final fallback is "" (root) + * unless the primary id was "" (root), in which case + * there is no fallback. + */ + public boolean fallback() { + int x = currentID.lastIndexOf('_'); + if (x != -1) { + while (--x >= 0 && currentID.charAt(x) == '_') { // handle zh__PINYIN + } + currentID = currentID.substring(0, x+1); + return true; + } + if (fallbackID != null) { + currentID = fallbackID; + if (fallbackID.length() == 0) { + fallbackID = null; + } else { + fallbackID = ""; + } + return true; + } + currentID = null; + return false; + } + + /** + * If a key created from id would eventually fallback to match the + * canonical ID of this key, return true. + */ + public boolean isFallbackOf(String id) { + return LocaleUtility.isFallbackOf(canonicalID(), id); + } + } + + /** + * A subclass of Factory that uses LocaleKeys. If 'visible' the + * factory reports its IDs. + */ + public static abstract class LocaleKeyFactory implements Factory { + protected final String name; + protected final boolean visible; + + public static final boolean VISIBLE = true; + public static final boolean INVISIBLE = false; + + /** + * Constructor used by subclasses. + */ + protected LocaleKeyFactory(boolean visible) { + this.visible = visible; + this.name = null; + } + + /** + * Constructor used by subclasses. + */ + protected LocaleKeyFactory(boolean visible, String name) { + this.visible = visible; + this.name = name; + } + + /** + * Implement superclass abstract method. This checks the currentID of + * the key against the supported IDs, and passes the canonicalLocale and + * kind off to handleCreate (which subclasses must implement). + */ + public Object create(Key key, ICUService service) { + if (handlesKey(key)) { + LocaleKey lkey = (LocaleKey)key; + int kind = lkey.kind(); + + ULocale uloc = lkey.currentLocale(); + return handleCreate(uloc, kind, service); + } else { + // System.out.println("factory: " + this + " did not support id: " + key.currentID()); + // System.out.println("supported ids: " + getSupportedIDs()); + } + return null; + } + + protected boolean handlesKey(Key key) { + if (key != null) { + String id = key.currentID(); + Set supported = getSupportedIDs(); + return supported.contains(id); + } + return false; + } + + /** + * Override of superclass method. + */ + public void updateVisibleIDs(Map result) { + Set cache = getSupportedIDs(); + for (String id : cache) { + if (visible) { + result.put(id, this); + } else { + result.remove(id); + } + } + } + + /** + * Return a localized name for the locale represented by id. + */ + public String getDisplayName(String id, ULocale locale) { + // assume if the user called this on us, we must have handled some fallback of this id + // if (isSupportedID(id)) { + if (locale == null) { + return id; + } + ULocale loc = new ULocale(id); + return loc.getDisplayName(locale); + // } + // return null; + } + + ///CLOVER:OFF + /** + * Utility method used by create(Key, ICUService). Subclasses can + * implement this instead of create. + */ + protected Object handleCreate(ULocale loc, int kind, ICUService service) { + return null; + } + ///CLOVER:ON + + /** + * Return true if this id is one the factory supports (visible or + * otherwise). + */ + protected boolean isSupportedID(String id) { + return getSupportedIDs().contains(id); + } + + /** + * Return the set of ids that this factory supports (visible or + * otherwise). This can be called often and might need to be + * cached if it is expensive to create. + */ + protected Set getSupportedIDs() { + return Collections.emptySet(); + } + + /** + * For debugging. + */ + public String toString() { + StringBuilder buf = new StringBuilder(super.toString()); + if (name != null) { + buf.append(", name: "); + buf.append(name); + } + buf.append(", visible: "); + buf.append(visible); + return buf.toString(); + } + } + + /** + * A LocaleKeyFactory that just returns a single object for a kind/locale. + */ + public static class SimpleLocaleKeyFactory extends LocaleKeyFactory { + private final Object obj; + private final String id; + private final int kind; + + // TODO: remove when we no longer need this + public SimpleLocaleKeyFactory(Object obj, ULocale locale, int kind, boolean visible) { + this(obj, locale, kind, visible, null); + } + + public SimpleLocaleKeyFactory(Object obj, ULocale locale, int kind, boolean visible, String name) { + super(visible, name); + + this.obj = obj; + this.id = locale.getBaseName(); + this.kind = kind; + } + + /** + * Returns the service object if kind/locale match. Service is not used. + */ + public Object create(Key key, ICUService service) { + LocaleKey lkey = (LocaleKey)key; + if (kind == LocaleKey.KIND_ANY || kind == lkey.kind()) { + String keyID = lkey.currentID(); + if (id.equals(keyID)) { + return obj; + } + } + return null; + } + + protected boolean isSupportedID(String idToCheck) { + return this.id.equals(idToCheck); + } + + public void updateVisibleIDs(Map result) { + if (visible) { + result.put(id, this); + } else { + result.remove(id); + } + } + + public String toString() { + StringBuilder buf = new StringBuilder(super.toString()); + buf.append(", id: "); + buf.append(id); + buf.append(", kind: "); + buf.append(kind); + return buf.toString(); + } + } + + /** + * A LocaleKeyFactory that creates a service based on the ICU locale data. + * This is a base class for most ICU factories. Subclasses instantiate it + * with a constructor that takes a bundle name, which determines the supported + * IDs. Subclasses then override handleCreate to create the actual service + * object. The default implementation returns a resource bundle. + */ + public static class ICUResourceBundleFactory extends LocaleKeyFactory { + protected final String bundleName; + + /** + * Convenience constructor that uses the main ICU bundle name. + */ + public ICUResourceBundleFactory() { + this(ICUResourceBundle.ICU_BASE_NAME); + } + + /** + * A service factory based on ICU resource data in resources + * with the given name. + */ + public ICUResourceBundleFactory(String bundleName) { + super(true); + + this.bundleName = bundleName; + } + + /** + * Return the supported IDs. This is the set of all locale names for the bundleName. + */ + protected Set getSupportedIDs() { + return ICUResourceBundle.getFullLocaleNameSet(bundleName, loader()); + } + + /** + * Override of superclass method. + */ + public void updateVisibleIDs(Map result) { + Set visibleIDs = ICUResourceBundle.getAvailableLocaleNameSet(bundleName, loader()); // only visible ids + for (String id : visibleIDs) { + result.put(id, this); + } + } + + /** + * Create the service. The default implementation returns the resource bundle + * for the locale, ignoring kind, and service. + */ + protected Object handleCreate(ULocale loc, int kind, ICUService service) { + return ICUResourceBundle.getBundleInstance(bundleName, loc, loader()); + } + + protected ClassLoader loader() { + ClassLoader cl = getClass().getClassLoader(); + if (cl == null) { + cl = Utility.getFallbackClassLoader(); + } + return cl; + } + + public String toString() { + return super.toString() + ", bundle: " + bundleName; + } + } + + /** + * Return the name of the current fallback locale. If it has changed since this was + * last accessed, the service cache is cleared. + */ + public String validateFallbackLocale() { + ULocale loc = ULocale.getDefault(); + if (loc != fallbackLocale) { + synchronized (this) { + if (loc != fallbackLocale) { + fallbackLocale = loc; + fallbackLocaleName = loc.getBaseName(); + clearServiceCache(); + } + } + } + return fallbackLocaleName; + } + + public Key createKey(String id) { + return LocaleKey.createWithCanonicalFallback(id, validateFallbackLocale()); + } + + public Key createKey(String id, int kind) { + return LocaleKey.createWithCanonicalFallback(id, validateFallbackLocale(), kind); + } + + public Key createKey(ULocale l, int kind) { + return LocaleKey.createWithCanonical(l, validateFallbackLocale(), kind); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICULogger.java b/main/classes/core/src/com/ibm/icu/impl/ICULogger.java new file mode 100644 index 00000000000..4ccd8067421 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICULogger.java @@ -0,0 +1,190 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.logging.ConsoleHandler; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * + * Extends the Java Logger class adding a method to turn off/on logging. + * Classes where logging is wanted contains a static ICULogger object + * with logging turned off by default unless the system property + * "icu4j.debug.logging" is set to "all" + * + * If "icu4j.debug.logging" is not set to "all", then the individual loggers needs + * to be turned on manually. (e.g. TimeZone.TimeZoneLogger.turnLoggingOn()) + *

    + * To use logging, the system property "icu4j.debug.logging" must be set to "on" or "all", + * otherwise the static ICULogger object will be null. This will help lower any unneccessary + * resource usage when logging is not desired. + *

    + * Examples:

    + * Usage in code + *

    + *
    + * public class Class {
    + *     // Create logger object (usually with the class name)
    + *     public static ICULogger ClassLogger = ICULogger.getICULogger(Class.class.getName());
    + *     
    + *     // Method that will use logger.
    + *     public boolean hasSomething(Object obj) {
    + *         if (obj == null) {
    + *              // Log that obj is null.
    + *              // Note: Good to check for null and if logging is turned on to minimize resource usage when logging is not needed.
    + *              if (ClassLogger != null && ClassLogger.isLoggingOn()) {
    + *                  ClassLogger.warning("obj is null so false was returned by default.");
    + *              }
    + *             return false;
    + *         }
    + *         
    + *         ...
    + *         
    + *     }
    + * }
    + * 
    + *
    + * Turning on logging (using the default settings) + *
    + *
    + * java -Dicu4j.debug.logging=all program
    + * 
    + *
    + */ + +public class ICULogger extends Logger { + private static enum LOGGER_STATUS { ON, OFF, NULL }; + private static final String GLOBAL_FLAG_TURN_ON_LOGGING = "all"; + private static final String SYSTEM_PROP_LOGGER = "icu4j.debug.logging"; + + private LOGGER_STATUS currentStatus; + + /** + * ICULogger constructor that calls the parent constructor with the desired parameters. + */ + private ICULogger(String name, String resourceBundleName) { + super(name, resourceBundleName); + } + + /** + * Set the status to either on or off. Set the level of the logger to INFO. + */ + private void setStatus(LOGGER_STATUS newStatus) { + if (currentStatus != newStatus) { + /* Default to level INFO */ + if (currentStatus == LOGGER_STATUS.OFF && newStatus == LOGGER_STATUS.ON) { + this.setLevel(Level.INFO); + } + + currentStatus = newStatus; + + if (currentStatus == LOGGER_STATUS.OFF){ + this.setLevel(Level.OFF); + } + } + } + + /** + * Check the system property SYSTEM_PROP_LOGGER to see if it is set. + * return true if it is otherwise return false. + */ + private static LOGGER_STATUS checkGlobalLoggingFlag() { + try { + String prop = System.getProperty(SYSTEM_PROP_LOGGER); + + if (prop != null) { + if (prop.equals(GLOBAL_FLAG_TURN_ON_LOGGING)) { + return LOGGER_STATUS.ON; + } + return LOGGER_STATUS.OFF; + } + } catch (SecurityException e) { + // Ignore the security exception and fall-through + } + + return LOGGER_STATUS.NULL; + } + + /** + * Instantiates a new ICULogger object with logging turned off by default. + * + * @param name to be use by the logger (usually is the class name) + * @return a new ICULogger object + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public static ICULogger getICULogger(String name) { + return getICULogger(name, null); + } + + /** + * Instantiates a new ICULogger object with logging turned off by default + * unless the system property "icu4j.debug.logging" is set to "all" + * + * @param name to be use by the logger (usually is the class name) + * @param resourceBundleName name to localize messages (can be null) + * @return a new ICULogger object + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public static ICULogger getICULogger(String name, String resourceBundleName) { + LOGGER_STATUS flag = checkGlobalLoggingFlag(); + if (flag != LOGGER_STATUS.NULL) { + ICULogger logger = new ICULogger(name, resourceBundleName); + + /* Add a default handler to logger*/ + logger.addHandler(new ConsoleHandler()); + + /* Turn off logging by default unless SYSTEM_PROP_LOGGER property is set to "all" */ + if (flag == LOGGER_STATUS.ON) { + logger.turnOnLogging(); + } else { + logger.turnOffLogging(); + } + + return logger; + } + return null; + } + + /** + * Determined if logging is turned on or off. The return value is true if logging is on. + * + * @return whether logging is turned on or off. + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public boolean isLoggingOn() { + if (currentStatus == LOGGER_STATUS.ON) { + return true; + } else { + return false; + } + } + + /** + * Turn logging on. + * + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public void turnOnLogging() { + setStatus(LOGGER_STATUS.ON); + } + + /** + * Turn logging off. + * + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public void turnOffLogging() { + setStatus(LOGGER_STATUS.OFF); + } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java b/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java new file mode 100644 index 00000000000..90c9206c102 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java @@ -0,0 +1,169 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.ArrayList; +import java.util.EventListener; +import java.util.Iterator; +import java.util.List; + +/** + *

    Abstract implementation of a notification facility. Clients add + * EventListeners with addListener and remove them with removeListener. + * Notifiers call notifyChanged when they wish to notify listeners. + * This queues the listener list on the notification thread, which + * eventually dequeues the list and calls notifyListener on each + * listener in the list.

    + * + *

    Subclasses override acceptsListener and notifyListener + * to add type-safe notification. AcceptsListener should return + * true if the listener is of the appropriate type; ICUNotifier + * itself will ensure the listener is non-null and that the + * identical listener is not already registered with the Notifier. + * NotifyListener should cast the listener to the appropriate + * type and call the appropriate method on the listener. + */ +public abstract class ICUNotifier { + private final Object notifyLock = new Object(); + private NotifyThread notifyThread; + private List listeners; + + /** + * Add a listener to be notified when notifyChanged is called. + * The listener must not be null. AcceptsListener must return + * true for the listener. Attempts to concurrently + * register the identical listener more than once will be + * silently ignored. + */ + public void addListener(EventListener l) { + if (l == null) { + throw new NullPointerException(); + } + + if (acceptsListener(l)) { + synchronized (notifyLock) { + if (listeners == null) { + listeners = new ArrayList(); + } else { + // identity equality check + for (EventListener ll : listeners) { + if (ll == l) { + return; + } + } + } + + listeners.add(l); + } + } else { + throw new IllegalStateException("Listener invalid for this notifier."); + } + } + + /** + * Stop notifying this listener. The listener must + * not be null. Attemps to remove a listener that is + * not registered will be silently ignored. + */ + public void removeListener(EventListener l) { + if (l == null) { + throw new NullPointerException(); + } + synchronized (notifyLock) { + if (listeners != null) { + // identity equality check + Iterator iter = listeners.iterator(); + while (iter.hasNext()) { + if (iter.next() == l) { + iter.remove(); + if (listeners.size() == 0) { + listeners = null; + } + return; + } + } + } + } + } + + /** + * Queue a notification on the notification thread for the current + * listeners. When the thread unqueues the notification, notifyListener + * is called on each listener from the notification thread. + */ + public void notifyChanged() { + if (listeners != null) { + synchronized (notifyLock) { + if (listeners != null) { + if (notifyThread == null) { + notifyThread = new NotifyThread(this); + notifyThread.setDaemon(true); + notifyThread.start(); + } + notifyThread.queue(listeners.toArray(new EventListener[listeners.size()])); + } + } + } + } + + /** + * The notification thread. + */ + private static class NotifyThread extends Thread { + private final ICUNotifier notifier; + private final List queue = new ArrayList(); + + NotifyThread(ICUNotifier notifier) { + this.notifier = notifier; + } + + /** + * Queue the notification on the thread. + */ + public void queue(EventListener[] list) { + synchronized (this) { + queue.add(list); + notify(); + } + } + + /** + * Wait for a notification to be queued, then notify all + * listeners listed in the notification. + */ + public void run() { + EventListener[] list; + while (true) { + try { + synchronized (this) { + while (queue.isEmpty()) { + wait(); + } + list = queue.remove(0); + } + + for (int i = 0; i < list.length; ++i) { + notifier.notifyListener(list[i]); + } + } + catch (InterruptedException e) { + } + } + } + } + + /** + * Subclasses implement this to return true if the listener is + * of the appropriate type. + */ + protected abstract boolean acceptsListener(EventListener l); + + /** + * Subclasses implement this to notify the listener. + */ + protected abstract void notifyListener(EventListener l); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java b/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java new file mode 100644 index 00000000000..ec150708983 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java @@ -0,0 +1,297 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2006, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +// See Allan Holub's 1999 column in JavaWorld, and Doug Lea's code for RWLocks with writer preference. + + +/** + *

    A simple Reader/Writer lock. This assumes that there will + * be little writing contention. It also doesn't allow + * active readers to acquire and release a write lock, or + * deal with priority inversion issues.

    + * + *

    Access to the lock should be enclosed in a try/finally block + * in order to ensure that the lock is always released in case of + * exceptions:

    + * try {
    + *     lock.acquireRead();
    + *     // use service protected by the lock
    + * }
    + * finally {
    + *     lock.releaseRead();
    + * }
    + * 

    + * + *

    The lock provides utility methods getStats and clearStats + * to return statistics on the use of the lock.

    + */ +public class ICURWLock { + private Object writeLock = new Object(); + private Object readLock = new Object(); + private int wwc; // waiting writers + private int rc; // active readers, -1 if there's an active writer + private int wrc; // waiting readers + + private Stats stats = new Stats(); // maybe don't init to start... + + /** + * Internal class used to gather statistics on the RWLock. + */ + public final static class Stats { + /** + * Number of times read access granted (read count). + */ + public int _rc; + + /** + * Number of times concurrent read access granted (multiple read count). + */ + public int _mrc; + + /** + * Number of times blocked for read (waiting reader count). + */ + public int _wrc; // wait for read + + /** + * Number of times write access granted (writer count). + */ + public int _wc; + + /** + * Number of times blocked for write (waiting writer count). + */ + public int _wwc; + + private Stats() { + } + + private Stats(int rc, int mrc, int wrc, int wc, int wwc) { + this._rc = rc; + this._mrc = mrc; + this._wrc = wrc; + this._wc = wc; + this._wwc = wwc; + } + + private Stats(Stats rhs) { + this(rhs._rc, rhs._mrc, rhs._wrc, rhs._wc, rhs._wwc); + } + + /** + * Return a string listing all the stats. + */ + public String toString() { + return " rc: " + _rc + + " mrc: " + _mrc + + " wrc: " + _wrc + + " wc: " + _wc + + " wwc: " + _wwc; + } + } + + /** + * Reset the stats. Returns existing stats, if any. + */ + public synchronized Stats resetStats() { + Stats result = stats; + stats = new Stats(); + return result; + } + + /** + * Clear the stats (stop collecting stats). Returns existing stats, if any. + */ + public synchronized Stats clearStats() { + Stats result = stats; + stats = null; + return result; + } + + /** + * Return a snapshot of the current stats. This does not reset the stats. + */ + public synchronized Stats getStats() { + return stats == null ? null : new Stats(stats); + } + + // utilities + + private synchronized boolean gotRead() { + ++rc; + if (stats != null) { + ++stats._rc; + if (rc > 1) ++stats._mrc; + } + return true; + } + + private synchronized boolean getRead() { + if (rc >= 0 && wwc == 0) { + return gotRead(); + } + ++wrc; + return false; + } + + private synchronized boolean retryRead() { + if (stats != null) ++stats._wrc; + if (rc >= 0 && wwc == 0) { + --wrc; + return gotRead(); + } + return false; + } + + private synchronized boolean finishRead() { + if (rc > 0) { + return (0 == --rc && wwc > 0); + } + throw new IllegalStateException("no current reader to release"); + } + + private synchronized boolean gotWrite() { + rc = -1; + if (stats != null) { + ++stats._wc; + } + return true; + } + + private synchronized boolean getWrite() { + if (rc == 0) { + return gotWrite(); + } + ++wwc; + return false; + } + + private synchronized boolean retryWrite() { + if (stats != null) ++stats._wwc; + if (rc == 0) { + --wwc; + return gotWrite(); + } + return false; + } + + private static final int NOTIFY_NONE = 0; + private static final int NOTIFY_WRITERS = 1; + private static final int NOTIFY_READERS = 2; + + private synchronized int finishWrite() { + if (rc < 0) { + rc = 0; + if (wwc > 0) { + return NOTIFY_WRITERS; + } else if (wrc > 0) { + return NOTIFY_READERS; + } else { + return NOTIFY_NONE; + } + } + throw new IllegalStateException("no current writer to release"); + } + + /** + *

    Acquire a read lock, blocking until a read lock is + * available. Multiple readers can concurrently hold the read + * lock.

    + * + *

    If there's a writer, or a waiting writer, increment the + * waiting reader count and block on this. Otherwise + * increment the active reader count and return. Caller must call + * releaseRead when done (for example, in a finally block).

    + */ + public void acquireRead() { + if (!getRead()) { + for (;;) { + try { + synchronized (readLock) { + readLock.wait(); + } + if (retryRead()) { + return; + } + } + catch (InterruptedException e) { + } + } + } + } + + /** + *

    Release a read lock and return. An error will be thrown + * if a read lock is not currently held.

    + * + *

    If this is the last active reader, notify the oldest + * waiting writer. Call when finished with work + * controlled by acquireRead.

    + */ + public void releaseRead() { + if (finishRead()) { + synchronized (writeLock) { + writeLock.notify(); + } + } + } + + /** + *

    Acquire the write lock, blocking until the write lock is + * available. Only one writer can acquire the write lock, and + * when held, no readers can acquire the read lock.

    + * + *

    If there are no readers and no waiting writers, mark as + * having an active writer and return. Otherwise, add a lock to the + * end of the waiting writer list, and block on it. Caller + * must call releaseWrite when done (for example, in a finally + * block).

    + */ + public void acquireWrite() { + if (!getWrite()) { + for (;;) { + try { + synchronized (writeLock) { + writeLock.wait(); + } + if (retryWrite()) { + return; + } + } + catch (InterruptedException e) { + } + } + } + } + + /** + *

    Release the write lock and return. An error will be thrown + * if the write lock is not currently held.

    + * + *

    If there are waiting readers, make them all active and + * notify all of them. Otherwise, notify the oldest waiting + * writer, if any. Call when finished with work controlled by + * acquireWrite.

    + */ + public void releaseWrite() { + switch (finishWrite()) { + case NOTIFY_WRITERS: + synchronized (writeLock) { + writeLock.notify(); + } + break; + case NOTIFY_READERS: + synchronized (readLock) { + readLock.notifyAll(); + } + break; + case NOTIFY_NONE: + break; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java new file mode 100644 index 00000000000..5c2219c3483 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java @@ -0,0 +1,1422 @@ +/* + * ***************************************************************************** + * Copyright (C) 2005-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + * ***************************************************************************** + */ + +package com.ibm.icu.impl; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.lang.ref.SoftReference; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.MissingResourceException; +import java.util.ResourceBundle; +import java.util.Set; +import java.util.Vector; +import java.util.concurrent.ConcurrentHashMap; + +import com.ibm.icu.impl.URLHandler.URLVisitor; +import com.ibm.icu.util.StringTokenizer; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.UResourceBundleIterator; +import com.ibm.icu.util.VersionInfo; + +public class ICUResourceBundle extends UResourceBundle { + /** + * The data path to be used with getBundleInstance API + */ + protected static final String ICU_DATA_PATH = "com/ibm/icu/impl/"; + /** + * The data path to be used with getBundleInstance API + */ + public static final String ICU_BUNDLE = "data/icudt" + VersionInfo.ICU_DATA_VERSION_PATH; + + /** + * The base name of ICU data to be used with getBundleInstance API + */ + public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE; + + /** + * The base name of collation data to be used with getBundleInstance API + */ + public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll"; + + /** + * The base name of rbbi data to be used with getData API + */ + public static final String ICU_BRKITR_NAME = "/brkitr"; + + /** + * The base name of rbbi data to be used with getBundleInstance API + */ + public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + ICU_BRKITR_NAME; + + /** + * The base name of rbnf data to be used with getBundleInstance API + */ + public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf"; + + /** + * The base name of transliterator data to be used with getBundleInstance API + */ + public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit"; + + public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang"; + public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr"; + public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region"; + public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone"; + + /** + * The actual path of the resource + */ + protected String resPath; + + /** + * The class loader constant to be used with getBundleInstance API + */ + public static final ClassLoader ICU_DATA_CLASS_LOADER; + static { + ClassLoader loader = ICUData.class.getClassLoader(); + if (loader == null) { + loader = Utility.getFallbackClassLoader(); + } + ICU_DATA_CLASS_LOADER = loader; + } + + /** + * The name of the resource containing the installed locales + */ + protected static final String INSTALLED_LOCALES = "InstalledLocales"; + + public static final int FROM_FALLBACK = 1, FROM_ROOT = 2, FROM_DEFAULT = 3, FROM_LOCALE = 4; + + private int loadingStatus = -1; + + public void setLoadingStatus(int newStatus) { + loadingStatus = newStatus; + } + /** + * Returns the loading status of a particular resource. + * + * @return FROM_FALLBACK if the resource is fetched from fallback bundle + * FROM_ROOT if the resource is fetched from root bundle. + * FROM_DEFAULT if the resource is fetched from the default locale. + */ + public int getLoadingStatus() { + return loadingStatus; + } + + public void setLoadingStatus(String requestedLocale){ + String locale = getLocaleID(); + if(locale.equals("root")) { + setLoadingStatus(FROM_ROOT); + } else if(locale.equals(requestedLocale)) { + setLoadingStatus(FROM_LOCALE); + } else { + setLoadingStatus(FROM_FALLBACK); + } + } + + /** + * Returns the respath of this bundle + * @return the respath of the bundle + */ + public String getResPath(){ + return resPath; + } + + /** + * Returns a functionally equivalent locale, considering keywords as well, for the specified keyword. + * @param baseName resource specifier + * @param resName top level resource to consider (such as "collations") + * @param keyword a particular keyword to consider (such as "collation" ) + * @param locID The requested locale + * @param isAvailable If non-null, 1-element array of fillin parameter that indicates whether the + * requested locale was available. The locale is defined as 'available' if it physically + * exists within the specified tree and included in 'InstalledLocales'. + * @param omitDefault if true, omit keyword and value if default. + * 'de_DE\@collation=standard' -> 'de_DE' + * @return the locale + * @internal ICU 3.0 + */ + public static final ULocale getFunctionalEquivalent(String baseName, ClassLoader loader, + String resName, String keyword, ULocale locID, + boolean isAvailable[], boolean omitDefault) { + String kwVal = locID.getKeywordValue(keyword); + String baseLoc = locID.getBaseName(); + String defStr = null; + ULocale parent = new ULocale(baseLoc); + ULocale defLoc = null; // locale where default (found) resource is + boolean lookForDefault = false; // true if kwVal needs to be set + ULocale fullBase = null; // base locale of found (target) resource + int defDepth = 0; // depth of 'default' marker + int resDepth = 0; // depth of found resource; + + if ((kwVal == null) || (kwVal.length() == 0) + || kwVal.equals(DEFAULT_TAG)) { + kwVal = ""; // default tag is treated as no keyword + lookForDefault = true; + } + + // Check top level locale first + ICUResourceBundle r = null; + + r = (ICUResourceBundle) UResourceBundle.getBundleInstance(baseName, parent); + if (isAvailable != null) { + isAvailable[0] = false; + ULocale[] availableULocales = getAvailEntry(baseName, loader).getULocaleList(); + for (int i = 0; i < availableULocales.length; i++) { + if (parent.equals(availableULocales[i])) { + isAvailable[0] = true; + break; + } + } + } + // determine in which locale (if any) the currently relevant 'default' is + do { + try { + ICUResourceBundle irb = (ICUResourceBundle) r.get(resName); + defStr = irb.getString(DEFAULT_TAG); + if (lookForDefault == true) { + kwVal = defStr; + lookForDefault = false; + } + defLoc = r.getULocale(); + } catch (MissingResourceException t) { + // Ignore error and continue search. + } + if (defLoc == null) { + r = (ICUResourceBundle) r.getParent(); + defDepth++; + } + } while ((r != null) && (defLoc == null)); + + // Now, search for the named resource + parent = new ULocale(baseLoc); + r = (ICUResourceBundle) UResourceBundle.getBundleInstance(baseName, parent); + // determine in which locale (if any) the named resource is located + do { + try { + ICUResourceBundle irb = (ICUResourceBundle)r.get(resName); + /* UResourceBundle urb = */irb.get(kwVal); + fullBase = irb.getULocale(); + // If the get() completed, we have the full base locale + // If we fell back to an ancestor of the old 'default', + // we need to re calculate the "default" keyword. + if ((fullBase != null) && ((resDepth) > defDepth)) { + defStr = irb.getString(DEFAULT_TAG); + defLoc = r.getULocale(); + defDepth = resDepth; + } + } catch (MissingResourceException t) { + // Ignore error, + } + if (fullBase == null) { + r = (ICUResourceBundle) r.getParent(); + resDepth++; + } + } while ((r != null) && (fullBase == null)); + + if (fullBase == null && // Could not find resource 'kwVal' + (defStr != null) && // default was defined + !defStr.equals(kwVal)) { // kwVal is not default + // couldn't find requested resource. Fall back to default. + kwVal = defStr; // Fall back to default. + parent = new ULocale(baseLoc); + r = (ICUResourceBundle) UResourceBundle.getBundleInstance(baseName, parent); + resDepth = 0; + // determine in which locale (if any) the named resource is located + do { + try { + ICUResourceBundle irb = (ICUResourceBundle)r.get(resName); + UResourceBundle urb = irb.get(kwVal); + + // if we didn't fail before this.. + fullBase = r.getULocale(); + + // If the fetched item (urb) is in a different locale than our outer locale (r/fullBase) + // then we are in a 'fallback' situation. treat as a missing resource situation. + if(!fullBase.toString().equals(urb.getLocale().toString())) { + fullBase = null; // fallback condition. Loop and try again. + } + + // If we fell back to an ancestor of the old 'default', + // we need to re calculate the "default" keyword. + if ((fullBase != null) && ((resDepth) > defDepth)) { + defStr = irb.getString(DEFAULT_TAG); + defLoc = r.getULocale(); + defDepth = resDepth; + } + } catch (MissingResourceException t) { + // Ignore error, continue search. + } + if (fullBase == null) { + r = (ICUResourceBundle) r.getParent(); + resDepth++; + } + } while ((r != null) && (fullBase == null)); + } + + if (fullBase == null) { + throw new MissingResourceException( + "Could not find locale containing requested or default keyword.", + baseName, keyword + "=" + kwVal); + } + + if (omitDefault + && defStr.equals(kwVal) // if default was requested and + && resDepth <= defDepth) { // default was set in same locale or child + return fullBase; // Keyword value is default - no keyword needed in locale + } else { + return new ULocale(fullBase.toString() + "@" + keyword + "=" + kwVal); + } + } + + /** + * Given a tree path and keyword, return a string enumeration of all possible values for that keyword. + * @param baseName resource specifier + * @param keyword a particular keyword to consider, must match a top level resource name + * within the tree. (i.e. "collations") + * @internal ICU 3.0 + */ + public static final String[] getKeywordValues(String baseName, String keyword) { + Set keywords = new HashSet(); + ULocale locales[] = createULocaleList(baseName, ICU_DATA_CLASS_LOADER); + int i; + + for (i = 0; i < locales.length; i++) { + try { + UResourceBundle b = UResourceBundle.getBundleInstance(baseName, locales[i]); + // downcast to ICUResourceBundle? + ICUResourceBundle irb = (ICUResourceBundle) (b.getObject(keyword)); + Enumeration e = irb.getKeys(); + while (e.hasMoreElements()) { + String s = e.nextElement(); + if (!DEFAULT_TAG.equals(s)) { + // don't add 'default' items + keywords.add(s); + } + } + } catch (Throwable t) { + //System.err.println("Error in - " + new Integer(i).toString() + // + " - " + t.toString()); + // ignore the err - just skip that resource + } + } + return keywords.toArray(new String[0]); + } + + /** + * This method performs multilevel fallback for fetching items from the + * bundle e.g: If resource is in the form de__PHONEBOOK{ collations{ + * default{ "phonebook"} } } If the value of "default" key needs to be + * accessed, then do: + * UResourceBundle bundle = UResourceBundle.getBundleInstance("de__PHONEBOOK"); + * ICUResourceBundle result = null; + * if(bundle instanceof ICUResourceBundle){ + * result = ((ICUResourceBundle) bundle).getWithFallback("collations/default"); + * } + * + * + * @param path The path to the required resource key + * @return resource represented by the key + * @exception MissingResourceException If a resource was not found. + */ + public ICUResourceBundle getWithFallback(String path) throws MissingResourceException { + ICUResourceBundle result = null; + ICUResourceBundle actualBundle = this; + + // now recurse to pick up sub levels of the items + result = findResourceWithFallback(path, actualBundle, null); + + if (result == null) { + throw new MissingResourceException( + "Can't find resource for bundle " + + this.getClass().getName() + ", key " + getType(), + path, getKey()); + } + return result; + } + + public ICUResourceBundle at(int index) { + return (ICUResourceBundle) handleGet(index, null, this); + } + + public ICUResourceBundle at(String key) { + // don't ever presume the key is an int in disguise, like ResourceArray does. + if (this instanceof ICUResourceBundleImpl.ResourceTable) { + return (ICUResourceBundle) handleGet(key, null, this); + } + return null; + } + + @Override + public ICUResourceBundle findTopLevel(int index) { + return (ICUResourceBundle) super.findTopLevel(index); + } + + @Override + public ICUResourceBundle findTopLevel(String aKey) { + return (ICUResourceBundle) super.findTopLevel(aKey); + } + + /** + * Like getWithFallback, but returns null if the resource is not found instead of + * throwing an exception. + * @param path the path to the resource + * @return the resource, or null + */ + public ICUResourceBundle findWithFallback(String path) { + return findResourceWithFallback(path, this, null); + } + + // will throw type mismatch exception if the resource is not a string + public String getStringWithFallback(String path) throws MissingResourceException { + return getWithFallback(path).getString(); + } + + /** + * Return a set of the locale names supported by a collection of resource + * bundles. + * + * @param bundlePrefix the prefix of the resource bundles to use. + */ + public static Set getAvailableLocaleNameSet(String bundlePrefix, ClassLoader loader) { + return getAvailEntry(bundlePrefix, loader).getLocaleNameSet(); + } + + /** + * Return a set of all the locale names supported by a collection of + * resource bundles. + */ + public static Set getFullLocaleNameSet() { + return getFullLocaleNameSet(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER); + } + + /** + * Return a set of all the locale names supported by a collection of + * resource bundles. + * + * @param bundlePrefix the prefix of the resource bundles to use. + */ + public static Set getFullLocaleNameSet(String bundlePrefix, ClassLoader loader) { + return getAvailEntry(bundlePrefix, loader).getFullLocaleNameSet(); + } + + /** + * Return a set of the locale names supported by a collection of resource + * bundles. + */ + public static Set getAvailableLocaleNameSet() { + return getAvailableLocaleNameSet(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER); + } + + /** + * Get the set of Locales installed in the specified bundles. + * @return the list of available locales + */ + public static final ULocale[] getAvailableULocales(String baseName, ClassLoader loader) { + return getAvailEntry(baseName, loader).getULocaleList(); + } + + /** + * Get the set of ULocales installed the base bundle. + * @return the list of available locales + */ + public static final ULocale[] getAvailableULocales() { + return getAvailableULocales(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER); + } + + /** + * Get the set of Locales installed in the specified bundles. + * @return the list of available locales + */ + public static final Locale[] getAvailableLocales(String baseName, ClassLoader loader) { + return getAvailEntry(baseName, loader).getLocaleList(); + } + + /** + * Get the set of Locales installed the base bundle. + * @return the list of available locales + */ + public static final Locale[] getAvailableLocales() { + return getAvailEntry(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER).getLocaleList(); + } + + /** + * Convert a list of ULocales to a list of Locales. ULocales with a script code will not be converted + * since they cannot be represented as a Locale. This means that the two lists will not match + * one-to-one, and that the returned list might be shorter than the input list. + * @param ulocales a list of ULocales to convert to a list of Locales. + * @return the list of converted ULocales + */ + public static final Locale[] getLocaleList(ULocale[] ulocales) { + ArrayList list = new ArrayList(ulocales.length); + HashSet uniqueSet = new HashSet(); + for (int i = 0; i < ulocales.length; i++) { + Locale loc = ulocales[i].toLocale(); + if (!uniqueSet.contains(loc)) { + list.add(loc); + uniqueSet.add(loc); + } + } + return list.toArray(new Locale[list.size()]); + } + + /** + * Returns the locale of this resource bundle. This method can be used after + * a call to getBundle() to determine whether the resource bundle returned + * really corresponds to the requested locale or is a fallback. + * + * @return the locale of this resource bundle + */ + public Locale getLocale() { + return getULocale().toLocale(); + } + + + // ========== privates ========== + private static final String ICU_RESOURCE_INDEX = "res_index"; + + private static final String DEFAULT_TAG = "default"; + + // Flag for enabling/disabling debugging code + private static final boolean DEBUG = ICUDebug.enabled("localedata"); + + // Cache for getAvailableLocales + private static SoftReference> GET_AVAILABLE_CACHE; + private static final ULocale[] createULocaleList(String baseName, + ClassLoader root) { + // the canned list is a subset of all the available .res files, the idea + // is we don't export them + // all. gotta be a better way to do this, since to add a locale you have + // to update this list, + // and it's embedded in our binary resources. + ICUResourceBundle bundle = (ICUResourceBundle) UResourceBundle.instantiateBundle(baseName, ICU_RESOURCE_INDEX, root, true); + + bundle = (ICUResourceBundle)bundle.get(INSTALLED_LOCALES); + int length = bundle.getSize(); + int i = 0; + ULocale[] locales = new ULocale[length]; + UResourceBundleIterator iter = bundle.getIterator(); + iter.reset(); + while (iter.hasNext()) { + String locstr = iter.next().getKey(); + if (locstr.equals("root")) { + locales[i++] = ULocale.ROOT; + } else { + locales[i++] = new ULocale(locstr); + } + } + bundle = null; + return locales; + } + + private static final Locale[] createLocaleList(String baseName, ClassLoader loader) { + ULocale[] ulocales = getAvailEntry(baseName, loader).getULocaleList(); + return getLocaleList(ulocales); + } + + private static final String[] createLocaleNameArray(String baseName, + ClassLoader root) { + ICUResourceBundle bundle = (ICUResourceBundle) UResourceBundle.instantiateBundle( baseName, ICU_RESOURCE_INDEX, root, true); + bundle = (ICUResourceBundle)bundle.get(INSTALLED_LOCALES); + int length = bundle.getSize(); + int i = 0; + String[] locales = new String[length]; + UResourceBundleIterator iter = bundle.getIterator(); + iter.reset(); + while (iter.hasNext()) { + String locstr = iter.next(). getKey(); + if (locstr.equals("root")) { + locales[i++] = ULocale.ROOT.toString(); + } else { + locales[i++] = locstr; + } + } + bundle = null; + return locales; + } + + private static final List createFullLocaleNameArray( + final String baseName, final ClassLoader root) { + + List list = java.security.AccessController + .doPrivileged(new java.security.PrivilegedAction>() { + public List run() { + // WebSphere class loader will return null for a raw + // directory name without trailing slash + String bn = baseName.endsWith("/") + ? baseName + : baseName + "/"; + + // look for prebuilt indices first + try { + InputStream s = root.getResourceAsStream(bn + ICU_RESOURCE_INDEX + ".txt"); + if (s != null) { + List lst = new ArrayList(); + BufferedReader br = new BufferedReader(new InputStreamReader(s, "ASCII")); + String line; + while ((line = br.readLine()) != null) { + if (line.length() != 0 && !line.startsWith("#")) { + if (line.equalsIgnoreCase("root")) { + lst.add(ULocale.ROOT.toString()); + } else { + lst.add(line); + } + } + } + return lst; + } + } catch (IOException e) { + // swallow it + } + + try { + Enumeration urls = root.getResources(bn); + final List lst = new ArrayList(); + while (urls.hasMoreElements()) { + URL url = urls.nextElement(); + URLHandler handler = URLHandler.get(url); + if (handler != null) { + URLVisitor v = new URLVisitor() { + public void visit(String s) { + if (s.endsWith(".res") && !"res_index.res".equals(s)) { + String locstr = s.substring(0, s.length() - 4); + if (locstr.equalsIgnoreCase("root")) { + lst.add(ULocale.ROOT.toString()); + } else { + lst.add(locstr); + } + } + } + }; + handler.guide(v, false); + } else { + System.out.println("handler for " + url + " is null"); + } + } + return lst; + } catch (IOException e) { + System.out.println("ouch: " + e.getMessage()); + } + + return null; + } + }); + + return list; + } + + private static Set createFullLocaleNameSet(String baseName, ClassLoader loader) { + List list = createFullLocaleNameArray(baseName, loader); + HashSet set = new HashSet(); + if(list == null){ + throw new MissingResourceException("Could not find "+ ICU_RESOURCE_INDEX, "", ""); + } + set.addAll(list); + return Collections.unmodifiableSet(set); + } + + private static Set createLocaleNameSet(String baseName, ClassLoader loader) { + try { + String[] locales = createLocaleNameArray(baseName, loader); + + HashSet set = new HashSet(); + set.addAll(Arrays.asList(locales)); + return Collections.unmodifiableSet(set); + } catch (MissingResourceException e) { + if (DEBUG) { + System.out.println("couldn't find index for bundleName: " + baseName); + Thread.dumpStack(); + } + } + return Collections.emptySet(); + } + + /** + * Holds the prefix, and lazily creates the Locale[] list or the locale name + * Set as needed. + */ + private static final class AvailEntry { + private String prefix; + private ClassLoader loader; + private ULocale[] ulocales; + private Locale[] locales; + private Set nameSet; + private Set fullNameSet; + + AvailEntry(String prefix, ClassLoader loader) { + this.prefix = prefix; + this.loader = loader; + } + + ULocale[] getULocaleList() { + if (ulocales == null) { + ulocales = createULocaleList(prefix, loader); + } + return ulocales; + } + Locale[] getLocaleList() { + if (locales == null) { + locales = createLocaleList(prefix, loader); + } + return locales; + } + Set getLocaleNameSet() { + if (nameSet == null) { + nameSet = createLocaleNameSet(prefix, loader); + } + return nameSet; + } + Set getFullLocaleNameSet() { + if (fullNameSet == null) { + fullNameSet = createFullLocaleNameSet(prefix, loader); + } + return fullNameSet; + } + } + + /** + * Stores the locale information in a cache accessed by key (bundle prefix). + * The cached objects are AvailEntries. The cache is held by a SoftReference + * so it can be GC'd. + */ + private static AvailEntry getAvailEntry(String key, ClassLoader loader) { + AvailEntry ae = null; + Map lcache = null; + if (GET_AVAILABLE_CACHE != null) { + lcache = GET_AVAILABLE_CACHE.get(); + if (lcache != null) { + ae = lcache.get(key); + } + } + + if (ae == null) { + ae = new AvailEntry(key, loader); + if (lcache == null) { + lcache = new HashMap(); + lcache.put(key, ae); + GET_AVAILABLE_CACHE = new SoftReference>(lcache); + } else { + lcache.put(key, ae); + } + } + + return ae; + } + + protected static final ICUResourceBundle findResourceWithFallback(String path, + UResourceBundle actualBundle, UResourceBundle requested) { + ICUResourceBundle sub = null; + if (requested == null) { + requested = actualBundle; + } + while (actualBundle != null) { + ICUResourceBundle current = (ICUResourceBundle) actualBundle; + if (path.indexOf('/') == -1) { // skip the tokenizer + sub = (ICUResourceBundle) current.handleGet(path, null, requested); + if (sub != null) { + current = sub; + break; + } + } else { + StringTokenizer st = new StringTokenizer(path, "/"); + while (st.hasMoreTokens()) { + String subKey = st.nextToken(); + sub = (ICUResourceBundle) current.handleGet(subKey, null, requested); + if (sub == null) { + break; + } + current = sub; + } + if (sub != null) { + //we found it + break; + } + } + if (((ICUResourceBundle)actualBundle).resPath.length() != 0) { + path = ((ICUResourceBundle)actualBundle).resPath + "/" + path; + } + // if not try the parent bundle + actualBundle = ((ICUResourceBundle) actualBundle).getParent(); + + } + if(sub != null){ + sub.setLoadingStatus(((ICUResourceBundle)requested).getLocaleID()); + } + return sub; + } + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other instanceof ICUResourceBundle) { + ICUResourceBundle o = (ICUResourceBundle) other; + if (getBaseName().equals(o.getBaseName()) + && getLocaleID().equals(o.getLocaleID())) { + return true; + } + } + return false; + } + // This method is for super class's instantiateBundle method + public static UResourceBundle getBundleInstance(String baseName, String localeID, + ClassLoader root, boolean disableFallback){ + UResourceBundle b = instantiateBundle(baseName, localeID, root, disableFallback); + if(b==null){ + throw new MissingResourceException("Could not find the bundle "+ baseName+"/"+ localeID+".res","",""); + } + return b; + } + // recursively build bundle .. over-ride super class method. + protected synchronized static UResourceBundle instantiateBundle(String baseName, String localeID, + ClassLoader root, boolean disableFallback){ + ULocale defaultLocale = ULocale.getDefault(); + String localeName = localeID; + if(localeName.indexOf('@')>0){ + localeName = ULocale.getBaseName(localeID); + } + String fullName = getFullName(baseName, localeName); + ICUResourceBundle b = (ICUResourceBundle)loadFromCache(root, fullName, defaultLocale); + + // here we assume that java type resource bundle organization + // is required then the base name contains '.' else + // the resource organization is of ICU type + // so clients can instantiate resources of the type + // com.mycompany.data.MyLocaleElements_en.res and + // com.mycompany.data.MyLocaleElements.res + // + final String rootLocale = (baseName.indexOf('.')==-1) ? "root" : ""; + final String defaultID = defaultLocale.toString(); + + if(localeName.equals("")){ + localeName = rootLocale; + } + if(DEBUG) System.out.println("Creating "+fullName+ " currently b is "+b); + if (b == null) { + b = ICUResourceBundle.createBundle(baseName, localeName, root); + + if(DEBUG)System.out.println("The bundle created is: "+b+" and disableFallback="+disableFallback+" and bundle.getNoFallback="+(b!=null && b.getNoFallback())); + if(disableFallback || (b!=null && b.getNoFallback())){ + // no fallback because the caller said so or because the bundle says so + return addToCache(root, fullName, defaultLocale, b); + } + + // fallback to locale ID parent + if(b == null){ + int i = localeName.lastIndexOf('_'); + if (i != -1) { + String temp = localeName.substring(0, i); + b = (ICUResourceBundle)instantiateBundle(baseName, temp, root, disableFallback); + if(b!=null && b.getULocale().equals(temp)){ + b.setLoadingStatus(ICUResourceBundle.FROM_FALLBACK); + } + }else{ + if(defaultID.indexOf(localeName)==-1){ + b = (ICUResourceBundle)instantiateBundle(baseName, defaultID, root, disableFallback); + if(b!=null){ + b.setLoadingStatus(ICUResourceBundle.FROM_DEFAULT); + } + }else if(rootLocale.length()!=0){ + b = ICUResourceBundle.createBundle(baseName, rootLocale, root); + if(b!=null){ + b.setLoadingStatus(ICUResourceBundle.FROM_ROOT); + } + } + } + }else{ + UResourceBundle parent = null; + localeName = b.getLocaleID(); + int i = localeName.lastIndexOf('_'); + + b = (ICUResourceBundle)addToCache(root, fullName, defaultLocale, b); + + boolean ParentIsRoot = false; + if (b.getTableResource("%%ParentIsRoot") != RES_BOGUS) { + ParentIsRoot = true; + } + + if (i != -1 && !ParentIsRoot) { + parent = instantiateBundle(baseName, localeName.substring(0, i), root, disableFallback); + } else if (!localeName.equals(rootLocale)){ + parent = instantiateBundle(baseName, rootLocale, root, true); + } + + if (!b.equals(parent)){ + b.setParent(parent); + } + } + } + return b; + } + UResourceBundle get(String aKey, HashMap table, UResourceBundle requested) { + ICUResourceBundle obj = (ICUResourceBundle)handleGet(aKey, table, requested); + if (obj == null) { + obj = (ICUResourceBundle)getParent(); + if (obj != null) { + //call the get method to recursively fetch the resource + obj = (ICUResourceBundle)obj.get(aKey, table, requested); + } + if (obj == null) { + String fullName = getFullName(getBaseName(), getLocaleID()); + throw new MissingResourceException( + "Can't find resource for bundle " + fullName + ", key " + + aKey, this.getClass().getName(), aKey); + } + } + obj.setLoadingStatus(((ICUResourceBundle)requested).getLocaleID()); + return obj; + } + + private static final String ICU_RESOURCE_SUFFIX = ".res"; + /** + * Gets the full name of the resource with suffix. + */ + public static String getFullName(String baseName, String localeName){ + if(baseName==null || baseName.length()==0){ + if(localeName.length()==0){ + return localeName=ULocale.getDefault().toString(); + } + return localeName+ICU_RESOURCE_SUFFIX; + }else{ + if(baseName.indexOf('.')==-1){ + if(baseName.charAt(baseName.length()-1)!= '/'){ + return baseName+"/"+localeName+ICU_RESOURCE_SUFFIX; + }else{ + return baseName+localeName+ICU_RESOURCE_SUFFIX; + } + }else{ + baseName = baseName.replace('.','/'); + if(localeName.length()==0){ + return baseName+ICU_RESOURCE_SUFFIX; + }else{ + return baseName+"_"+localeName+ICU_RESOURCE_SUFFIX; + } + } + } + } + + protected String localeID; + protected String baseName; + protected ULocale ulocale; + protected ClassLoader loader; + + /** + * Access to the bits and bytes of the resource bundle. + * Hides low-level details. + */ + protected ICUResourceBundleReader reader; + /** Data member where the subclasses store the key. */ + protected String key; + /** Data member where the subclasses store the offset within resource data. */ + protected int resource; + + /** + * A resource word value that means "no resource". + * Note: 0xffffffff == -1 + * This has the same value as UResourceBundle.NONE, but they are semantically + * different and should be used appropriately according to context: + * NONE means "no type". + * (The type of RES_BOGUS is RES_RESERVED=15 which was defined in ICU4C ures.h.) + */ + public static final int RES_BOGUS = 0xffffffff; + + /** + * Resource type constant for aliases; + * internally stores a string which identifies the actual resource + * storing the data (can be in a different resource bundle). + * Resolved internally before delivering the actual resource through the API. + */ + public static final int ALIAS = 3; + + /** Resource type constant for tables with 32-bit count, key offsets and values. */ + public static final int TABLE32 = 4; + + /** + * Resource type constant for tables with 16-bit count, key offsets and values. + * All values are STRING_V2 strings. + */ + public static final int TABLE16 = 5; + + /** Resource type constant for 16-bit Unicode strings in formatVersion 2. */ + public static final int STRING_V2 = 6; + + /** + * Resource type constant for arrays with 16-bit count and values. + * All values are STRING_V2 strings. + */ + public static final int ARRAY16 = 9; + + private static final ConcurrentHashMap cache = + new ConcurrentHashMap(); + private static final ICUResourceBundle NULL_BUNDLE = + new ICUResourceBundle(null, null, null, 0, null) { + public int hashCode() { + return 0; + } + public boolean equals(Object rhs) { + return this == rhs; + } + }; + + /** + * + * @param baseName The name for the bundle. + * @param localeID The locale identification. + * @param root The ClassLoader object root. + * @return the new bundle + */ + public static ICUResourceBundle createBundle(String baseName, String localeID, + ClassLoader root) { + + String resKey = Integer.toHexString(root.hashCode()) + baseName + localeID; + ICUResourceBundle b = cache.get(resKey); + if (b == null) { + String resolvedName = getFullName(baseName, localeID); + ICUResourceBundleReader reader = ICUResourceBundleReader.getReader(resolvedName, root); + // could not open the .res file so return null + if (reader == null) { + b = NULL_BUNDLE; + } else { + b = getBundle(reader, baseName, localeID, root); + } + cache.put(resKey, b); + } + return b == NULL_BUNDLE ? null : b; + } + + protected String getLocaleID() { + return localeID; + } + + protected String getBaseName() { + return baseName; + } + + public ULocale getULocale() { + return ulocale; + } + + public UResourceBundle getParent() { + return (UResourceBundle) parent; + } + + protected void setParent(ResourceBundle parent) { + this.parent = parent; + } + + public String getKey() { + return key; + } + + private static final int[] gPublicTypes = new int[] { + STRING, + BINARY, + TABLE, + ALIAS, + + TABLE, /* TABLE32 */ + TABLE, /* TABLE16 */ + STRING, /* STRING_V2 */ + INT, + + ARRAY, + ARRAY, /* ARRAY16 */ + NONE, + NONE, + + NONE, + NONE, + INT_VECTOR, + NONE + }; + + public int getType() { + return gPublicTypes[ICUResourceBundleReader.RES_GET_TYPE(resource)]; + } + + /** + * Get the noFallback flag specified in the loaded bundle. + * @return The noFallback flag. + */ + private boolean getNoFallback() { + return reader.getNoFallback(); + } + + private static ICUResourceBundle getBundle(ICUResourceBundleReader reader, + String baseName, String localeID, + ClassLoader loader) { + ICUResourceBundleImpl bundle; + int rootRes = reader.getRootResource(); + if(gPublicTypes[ICUResourceBundleReader.RES_GET_TYPE(rootRes)] == TABLE) { + bundle = new ICUResourceBundleImpl.ResourceTable(reader, null, "", rootRes, null); + } else { + throw new IllegalStateException("Invalid format error"); + } + bundle.baseName = baseName; + bundle.localeID = localeID; + bundle.ulocale = new ULocale(localeID); + bundle.loader = loader; + if(bundle.reader.getUsesPoolBundle()) { + bundle.reader.setPoolBundleKeys( + ((ICUResourceBundleImpl)getBundleInstance(baseName, "pool", loader, true)).reader); + } + UResourceBundle alias = bundle.handleGetImpl("%%ALIAS", null, bundle, null, null); // handleGet will cache the bundle with no parent set + if(alias != null) { + return (ICUResourceBundle)UResourceBundle.getBundleInstance(baseName, alias.getString()); + } else { + return bundle; + } + } + // constructor for inner classes + protected ICUResourceBundle(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundle container) { + this.reader = reader; + this.key = key; + this.resPath = resPath; + this.resource = resource; + if(container != null) { + baseName = container.baseName; + localeID = container.localeID; + ulocale = container.ulocale; + loader = container.loader; + this.parent = container.parent; + } + } + + private String getAliasValue(int res) { + String result = reader.getAlias(res); + return result != null ? result : ""; + } + private static final char RES_PATH_SEP_CHAR = '/'; + private static final String RES_PATH_SEP_STR = "/"; + private static final String ICUDATA = "ICUDATA"; + private static final char HYPHEN = '-'; + private static final String LOCALE = "LOCALE"; + + protected ICUResourceBundle findResource(String _key, int _resource, + HashMap table, + UResourceBundle requested) { + ClassLoader loaderToUse = loader; + String locale = null, keyPath = null; + String bundleName; + String rpath = getAliasValue(_resource); + if (table == null) { + table = new HashMap(); + } + if (table.get(rpath) != null) { + throw new IllegalArgumentException( + "Circular references in the resource bundles"); + } + table.put(rpath, ""); + if (rpath.indexOf(RES_PATH_SEP_CHAR) == 0) { + int i = rpath.indexOf(RES_PATH_SEP_CHAR, 1); + int j = rpath.indexOf(RES_PATH_SEP_CHAR, i + 1); + bundleName = rpath.substring(1, i); + if (j < 0) { + locale = rpath.substring(i + 1); + } else { + locale = rpath.substring(i + 1, j); + keyPath = rpath.substring(j + 1, rpath.length()); + } + //there is a path included + if (bundleName.equals(ICUDATA)) { + bundleName = ICU_BASE_NAME; + loaderToUse = ICU_DATA_CLASS_LOADER; + }else if(bundleName.indexOf(ICUDATA)>-1){ + int idx = bundleName.indexOf(HYPHEN); + if(idx>-1){ + bundleName = ICU_BASE_NAME+RES_PATH_SEP_STR+bundleName.substring(idx+1,bundleName.length()); + loaderToUse = ICU_DATA_CLASS_LOADER; + } + } + } else { + //no path start with locale + int i = rpath.indexOf(RES_PATH_SEP_CHAR); + keyPath = rpath.substring(i + 1); + if (i != -1) { + locale = rpath.substring(0, i); + } else { + locale = keyPath; + keyPath = null;//keyPath.substring(i, keyPath.length()); + } + bundleName = baseName; + } + ICUResourceBundle bundle = null; + ICUResourceBundle sub = null; + if(bundleName.equals(LOCALE)){ + bundleName = baseName; + bundle = (ICUResourceBundle)requested; + keyPath = rpath.substring(LOCALE.length() + 2/* prepending and appending / */, rpath.length()); + locale = ((ICUResourceBundle)requested).getLocaleID(); + sub = ICUResourceBundle.findResourceWithFallback(keyPath, requested, null); + if (sub != null) { + sub.resPath = "/" + sub.getLocaleID() + "/" + keyPath; + } + }else{ + if (locale == null) { + // {dlf} must use requestor's class loader to get resources from same jar + bundle = (ICUResourceBundle) getBundleInstance(bundleName, "", + loaderToUse, false); + } else { + bundle = (ICUResourceBundle) getBundleInstance(bundleName, locale, + loaderToUse, false); + } + if (keyPath != null) { + StringTokenizer st = new StringTokenizer(keyPath, "/"); + ICUResourceBundle current = bundle; + while (st.hasMoreTokens()) { + String subKey = st.nextToken(); + sub = (ICUResourceBundle)current.get(subKey, table, requested); + if (sub == null) { + break; + } + current = sub; + } + } else { + // if the sub resource is not found + // try fetching the sub resource with + // the key of this alias resource + sub = (ICUResourceBundle)bundle.get(_key); + } + if (sub != null) { + sub.resPath = rpath; + } + } + if (sub == null) { + throw new MissingResourceException(localeID, baseName, _key); + } + return sub; + } + + // Resource bundle lookup cache, which may be used by subclasses + // which have nested resources + protected ICUCache lookup; + private static final int MAX_INITIAL_LOOKUP_SIZE = 64; + + protected void createLookupCache() { + lookup = new SimpleCache(ICUCache.WEAK, Math.max(getSize()*2, MAX_INITIAL_LOOKUP_SIZE)); + } + + protected UResourceBundle handleGet(String resKey, HashMap table, UResourceBundle requested) { + UResourceBundle res = null; + if (lookup != null) { + res = lookup.get(resKey); + } + if (res == null) { + int[] index = new int[1]; + boolean[] alias = new boolean[1]; + res = handleGetImpl(resKey, table, requested, index, alias); + if (res != null && lookup != null && !alias[0]) { + // We do not want to cache a result from alias entry + lookup.put(resKey, res); + lookup.put(Integer.valueOf(index[0]), res); + } + } + return res; + } + + protected UResourceBundle handleGet(int index, HashMap table, UResourceBundle requested) { + UResourceBundle res = null; + Integer indexKey = null; + if (lookup != null) { + indexKey = Integer.valueOf(index); + res = lookup.get(indexKey); + } + if (res == null) { + boolean[] alias = new boolean[1]; + res = handleGetImpl(index, table, requested, alias); + if (res != null && lookup != null && !alias[0]) { + // We do not want to cache a result from alias entry + lookup.put(res.getKey(), res); + lookup.put(indexKey, res); + } + } + return res; + } + + // Subclass which supports key based resource access to implement this method + protected UResourceBundle handleGetImpl(String resKey, HashMap table, UResourceBundle requested, + int[] index, boolean[] isAlias) { + return null; + } + + // Subclass which supports index based resource access to implement this method + protected UResourceBundle handleGetImpl(int index, HashMap table, UResourceBundle requested, + boolean[] isAlias) { + return null; + } + + + // TODO Below is a set of workarounds created for org.unicode.cldr.icu.ICU2LDMLWriter + /* + * Calling getKeys() on a table that has alias's can throw a NullPointerException if parent is not set, + * see trac bug: 6514 + * -Brian Rower - IBM - Sept. 2008 + */ + + /** + * Returns the resource handle for the given key within the calling resource table. + * + * @internal + * @deprecated This API is ICU internal only and a workaround see ticket #6514. + * @author Brian Rower + */ + protected int getTableResource(String resKey) { + return RES_BOGUS; + } + protected int getTableResource(int index) { + return RES_BOGUS; + } + + /** + * Determines if the object at the specified index of the calling resource table + * is an alias. If it is, returns true + * + * @param index The index of the resource to check + * @returns True if the resource at 'index' is an alias, false otherwise. + * + * @internal + * @deprecated This API is ICU internal only and part of a work around see ticket #6514 + * @author Brian Rower + */ + public boolean isAlias(int index) + { + //TODO this is part of a workaround for ticket #6514 + //if index is out of the resource, return false. + return ICUResourceBundleReader.RES_GET_TYPE(getTableResource(index)) == ALIAS; + } + + /** + * + * @internal + * @deprecated This API is ICU internal only and part of a workaround see ticket #6514. + * @author Brian Rower + */ + public boolean isAlias() + { + //TODO this is part of a workaround for ticket #6514 + return ICUResourceBundleReader.RES_GET_TYPE(resource) == ALIAS; + } + + /** + * Determines if the object with the specified key + * is an alias. If it is, returns true + * + * @returns True if the resource with 'key' is an alias, false otherwise. + * + * @internal + * @deprecated This API is ICU internal only and part of a workaround see ticket #6514. + * @author Brian Rower + */ + public boolean isAlias(String k) + { + //TODO this is part of a workaround for ticket #6514 + //this only applies to tables + return ICUResourceBundleReader.RES_GET_TYPE(getTableResource(k)) == ALIAS; + } + + /** + * This method can be used to retrieve the underlying alias path (aka where the alias points to) + * This method was written to allow conversion from ICU back to LDML format. + * + * @param index The index where the alias path points to. + * @return The alias path. + * @author Brian Rower + * @internal + * @deprecated This API is ICU internal only. + * @author Brian Rower + */ + public String getAliasPath(int index) + { + return getAliasValue(getTableResource(index)); + } + + /** + * + * @internal + * @deprecated This API is ICU internal only + * @author Brian Rower + */ + public String getAliasPath() + { + //TODO cannot allow alias path to end up in public API + return getAliasValue(resource); + } + + /** + * + * @internal + * @deprecated This API is ICU internal only + * @author Brian Rower + */ + public String getAliasPath(String k) + { + //TODO cannot allow alias path to end up in public API + return getAliasValue(getTableResource(k)); + } + + /* + * Helper method for getKeysSafe + */ + protected String getKey(int index) { + return null; + } + + /** + * Returns an Enumeration of the keys belonging to this table or array. + * This method differs from the getKeys() method by not following alias paths. This method exposes + * underlying alias's. For all general purposes of the ICU resource bundle please use getKeys(). + * + * @return Keys in this table or array. + * @internal + * @deprecated This API is ICU internal only and a workaround see ticket #6514. + * @author Brian Rower + */ + public Enumeration getKeysSafe() + { + //TODO this is part of a workaround for ticket #6514 + //the safeness only applies to tables, so use the other method if it's not a table + if(!ICUResourceBundleReader.URES_IS_TABLE(resource)) + { + return getKeys(); + } + Vector v = new Vector(); + int size = getSize(); + for(int index = 0; index < size; index++) + { + String curKey = getKey(index); + v.add(curKey); + } + return v.elements(); + } + + // This is the worker function for the public getKeys(). + // TODO: Now that UResourceBundle uses handleKeySet(), this function is obsolete. + // It is also not inherited from ResourceBundle, and it is not implemented + // by ResourceBundleWrapper despite its documentation requiring all subclasses to + // implement it. + // Consider deprecating UResourceBundle.handleGetKeys(), and consider making it always return null. + protected Enumeration handleGetKeys() { + return Collections.enumeration(handleKeySet()); + } + + protected boolean isTopLevelResource() { + return resPath.length() == 0; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleImpl.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleImpl.java new file mode 100644 index 00000000000..fe40d438780 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleImpl.java @@ -0,0 +1,213 @@ +/* + ******************************************************************************* + * Copyright (C) 2004-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Set; +import java.util.TreeSet; + +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.UResourceBundleIterator; +import com.ibm.icu.util.UResourceTypeMismatchException; + +class ICUResourceBundleImpl extends ICUResourceBundle { + protected ICUResourceBundleImpl(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + } + protected final ICUResourceBundle createBundleObject(String _key, + int _resource, + HashMap table, + UResourceBundle requested, + boolean[] isAlias) { + if (isAlias != null) { + isAlias[0] = false; + } + String _resPath = resPath + "/" + _key; + switch(ICUResourceBundleReader.RES_GET_TYPE(_resource)) { + case STRING : + case STRING_V2: + return new ICUResourceBundleImpl.ResourceString(reader, _key, _resPath, _resource, this); + case BINARY: + return new ICUResourceBundleImpl.ResourceBinary(reader, _key, _resPath, _resource, this); + case ALIAS: + if (isAlias != null) { + isAlias[0] = true; + } + return findResource(_key, _resource, table, requested); + case INT: + return new ICUResourceBundleImpl.ResourceInt(reader, _key, _resPath, _resource, this); + case INT_VECTOR: + return new ICUResourceBundleImpl.ResourceIntVector(reader, _key, _resPath, _resource, this); + case ARRAY: + case ARRAY16: + return new ICUResourceBundleImpl.ResourceArray(reader, _key, _resPath, _resource, this); + case TABLE: + case TABLE16: + case TABLE32: + return new ICUResourceBundleImpl.ResourceTable(reader, _key, _resPath, _resource, this); + default : + throw new IllegalStateException("The resource type is unknown"); + } + } + + // Scalar values ------------------------------------------------------- *** + + private static final class ResourceBinary extends ICUResourceBundleImpl { + public ByteBuffer getBinary() { + return reader.getBinary(resource); + } + public byte [] getBinary(byte []ba) { + return reader.getBinary(resource, ba); + } + ResourceBinary(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + } + } + private static final class ResourceInt extends ICUResourceBundleImpl { + public int getInt() { + return ICUResourceBundleReader.RES_GET_INT(resource); + } + public int getUInt() { + return ICUResourceBundleReader.RES_GET_UINT(resource); + } + ResourceInt(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + } + } + private static final class ResourceString extends ICUResourceBundleImpl { + private String value; + public String getString() { + return value; + } + ResourceString(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + value = reader.getString(resource); + } + } + private static final class ResourceIntVector extends ICUResourceBundleImpl { + private int[] value; + public int[] getIntVector() { + return value; + } + ResourceIntVector(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + value = reader.getIntVector(resource); + } + } + + // Container values ---------------------------------------------------- *** + + private static class ResourceContainer extends ICUResourceBundleImpl { + protected ICUResourceBundleReader.Container value; + + public int getSize() { + return value.getSize(); + } + protected int getContainerResource(int index) { + return value.getContainerResource(index); + } + protected UResourceBundle createBundleObject(int index, String resKey, HashMap table, + UResourceBundle requested, boolean[] isAlias) { + int item = getContainerResource(index); + if (item == RES_BOGUS) { + throw new IndexOutOfBoundsException(); + } + return createBundleObject(resKey, item, table, requested, isAlias); + } + ResourceContainer(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + } + } + private static class ResourceArray extends ResourceContainer { + protected String[] handleGetStringArray() { + String[] strings = new String[value.getSize()]; + UResourceBundleIterator iter = getIterator(); + int i = 0; + while (iter.hasNext()) { + strings[i++] = iter.next().getString(); + } + return strings; + } + public String[] getStringArray() { + return handleGetStringArray(); + } + protected UResourceBundle handleGetImpl(String indexStr, HashMap table, + UResourceBundle requested, + int[] index, boolean[] isAlias) { + int i = indexStr.length() > 0 ? Integer.valueOf(indexStr).intValue() : -1; + if(index != null) { + index[0] = i; + } + if (i < 0) { + throw new UResourceTypeMismatchException("Could not get the correct value for index: "+ index); + } + return createBundleObject(i, indexStr, table, requested, isAlias); + } + protected UResourceBundle handleGetImpl(int index, HashMap table, + UResourceBundle requested, boolean[] isAlias) { + return createBundleObject(index, Integer.toString(index), table, requested, isAlias); + } + ResourceArray(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + value = reader.getArray(resource); + createLookupCache(); // Use bundle cache to access array entries + } + } + static class ResourceTable extends ResourceContainer { + protected String getKey(int index) { + return ((ICUResourceBundleReader.Table)value).getKey(index); + } + protected Set handleKeySet() { + TreeSet keySet = new TreeSet(); + ICUResourceBundleReader.Table table = (ICUResourceBundleReader.Table)value; + for (int i = 0; i < table.getSize(); ++i) { + keySet.add(table.getKey(i)); + } + return keySet; + } + protected int getTableResource(String resKey) { + return ((ICUResourceBundleReader.Table)value).getTableResource(resKey); + } + protected int getTableResource(int index) { + return getContainerResource(index); + } + protected UResourceBundle handleGetImpl(String resKey, HashMap table, + UResourceBundle requested, + int[] index, boolean[] isAlias) { + int i = ((ICUResourceBundleReader.Table)value).findTableItem(resKey); + if(index != null) { + index[0] = i; + } + if (i < 0) { + return null; + } + return createBundleObject(i, resKey, table, requested, isAlias); + } + protected UResourceBundle handleGetImpl(int index, HashMap table, + UResourceBundle requested, boolean[] isAlias) { + String itemKey = ((ICUResourceBundleReader.Table)value).getKey(index); + if (itemKey == null) { + throw new IndexOutOfBoundsException(); + } + return createBundleObject(index, itemKey, table, requested, isAlias); + } + ResourceTable(ICUResourceBundleReader reader, String key, String resPath, int resource, + ICUResourceBundleImpl container) { + super(reader, key, resPath, resource, container); + value = reader.getTable(resource); + createLookupCache(); // Use bundle cache to access table entries + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java new file mode 100644 index 00000000000..1de1cba5173 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java @@ -0,0 +1,857 @@ +/* + ******************************************************************************* + * Copyright (C) 2004-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; + +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.VersionInfo; + +/** + * This class reads the *.res resource bundle format + * + * (For the latest version of the file format documentation see + * ICU4C's source/common/uresdata.h file.) + * + * File format for .res resource bundle files (formatVersion=2, ICU 4.4) + * + * New in formatVersion 2 compared with 1.3: ------------- + * + * Three new resource types -- String-v2, Table16 and Array16 -- have their + * values stored in a new array of 16-bit units between the table key strings + * and the start of the other resources. + * + * genrb eliminates duplicates among Unicode string-v2 values. + * Multiple Unicode strings may use the same offset and string data, + * or a short string may point to the suffix of a longer string. ("Suffix sharing") + * For example, one string "abc" may be reused for another string "bc" by pointing + * to the second character. (Short strings-v2 are NUL-terminated + * and not preceded by an explicit length value.) + * + * It is allowed for all resource types to share values. + * The swapper code (ures_swap()) has been modified so that it swaps each item + * exactly once. + * + * A resource bundle may use a special pool bundle. Some or all of the table key strings + * of the using-bundle are omitted, and the key string offsets for such key strings refer + * to offsets in the pool bundle. + * The using-bundle's and the pool-bundle's indexes[URES_INDEX_POOL_CHECKSUM] values + * must match. + * Two bits in indexes[URES_INDEX_ATTRIBUTES] indicate whether a resource bundle + * is or uses a pool bundle. + * + * Table key strings must be compared in ASCII order, even if they are not + * stored in ASCII. + * + * New in formatVersion 1.3 compared with 1.2: ------------- + * + * genrb eliminates duplicates among key strings. + * Multiple table items may share one key string, or one item may point + * to the suffix of another's key string. ("Suffix sharing") + * For example, one key "abc" may be reused for another key "bc" by pointing + * to the second character. (Key strings are NUL-terminated.) + * + * ------------- + * + * An ICU4C resource bundle file (.res) is a binary, memory-mappable file + * with nested, hierarchical data structures. + * It physically contains the following: + * + * Resource root; -- 32-bit Resource item, root item for this bundle's tree; + * currently, the root item must be a table or table32 resource item + * int32_t indexes[indexes[0]]; -- array of indexes for friendly + * reading and swapping; see URES_INDEX_* above + * new in formatVersion 1.1 (ICU 2.8) + * char keys[]; -- characters for key strings + * (formatVersion 1.0: up to 65k of characters; 1.1: <2G) + * (minus the space for root and indexes[]), + * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated; + * padded to multiple of 4 bytes for 4-alignment of the following data + * uint16_t 16BitUnits[]; -- resources that are stored entirely as sequences of 16-bit units + * (new in formatVersion 2/ICU 4.4) + * data is indexed by the offset values in 16-bit resource types, + * with offset 0 pointing to the beginning of this array; + * there is a 0 at offset 0, for empty resources; + * padded to multiple of 4 bytes for 4-alignment of the following data + * data; -- data directly and indirectly indexed by the root item; + * the structure is determined by walking the tree + * + * Each resource bundle item has a 32-bit Resource handle (see typedef above) + * which contains the item type number in its upper 4 bits (31..28) and either + * an offset or a direct value in its lower 28 bits (27..0). + * The order of items is undefined and only determined by walking the tree. + * Leaves of the tree may be stored first or last or anywhere in between, + * and it is in theory possible to have unreferenced holes in the file. + * + * 16-bit-unit values: + * Starting with formatVersion 2/ICU 4.4, some resources are stored in a special + * array of 16-bit units. Each resource value is a sequence of 16-bit units, + * with no per-resource padding to a 4-byte boundary. + * 16-bit container types (Table16 and Array16) contain Resource16 values + * which are offsets to String-v2 resources in the same 16-bit-units array. + * + * Direct values: + * - Empty Unicode strings have an offset value of 0 in the Resource handle itself. + * - Starting with formatVersion 2/ICU 4.4, an offset value of 0 for + * _any_ resource type indicates an empty value. + * - Integer values are 28-bit values stored in the Resource handle itself; + * the interpretation of unsigned vs. signed integers is up to the application. + * + * All other types and values use 28-bit offsets to point to the item's data. + * The offset is an index to the first 32-bit word of the value, relative to the + * start of the resource data (i.e., the root item handle is at offset 0). + * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits). + * All resource item values are 4-aligned. + * + * New in formatVersion 2/ICU 4.4: Some types use offsets into the 16-bit-units array, + * indexing 16-bit units in that array. + * + * The structures (memory layouts) for the values for each item type are listed + * in the table below. + * + * Nested, hierarchical structures: ------------- + * + * Table items contain key-value pairs where the keys are offsets to char * key strings. + * The values of these pairs are either Resource handles or + * offsets into the 16-bit-units array, depending on the table type. + * + * Array items are simple vectors of Resource handles, + * or of offsets into the 16-bit-units array, depending on the array type. + * + * Table key string offsets: ------- + * + * Key string offsets are relative to the start of the resource data (of the root handle), + * i.e., the first string has an offset of 4+sizeof(indexes). + * (After the 4-byte root handle and after the indexes array.) + * + * If the resource bundle uses a pool bundle, then some key strings are stored + * in the pool bundle rather than in the local bundle itself. + * - In a Table or Table16, the 16-bit key string offset is local if it is + * less than indexes[URES_INDEX_KEYS_TOP]<<2. + * Otherwise, subtract indexes[URES_INDEX_KEYS_TOP]<<2 to get the offset into + * the pool bundle key strings. + * - In a Table32, the 32-bit key string offset is local if it is non-negative. + * Otherwise, reset bit 31 to get the pool key string offset. + * + * Unlike the local offset, the pool key offset is relative to + * the start of the key strings, not to the start of the bundle. + * + * An alias item is special (and new in ICU 2.4): -------------- + * + * Its memory layout is just like for a UnicodeString, but at runtime it resolves to + * another resource bundle's item according to the path in the string. + * This is used to share items across bundles that are in different lookup/fallback + * chains (e.g., large collation data among zh_TW and zh_HK). + * This saves space (for large items) and maintenance effort (less duplication of data). + * + * -------------------------------------------------------------------------- + * + * Resource types: + * + * Most resources have their values stored at four-byte offsets from the start + * of the resource data. These values are at least 4-aligned. + * Some resource values are stored directly in the offset field of the Resource itself. + * See UResType in unicode/ures.h for enumeration constants for Resource types. + * + * Some resources have their values stored as sequences of 16-bit units, + * at 2-byte offsets from the start of a contiguous 16-bit-unit array between + * the table key strings and the other resources. (new in formatVersion 2/ICU 4.4) + * At offset 0 of that array is a 16-bit zero value for empty 16-bit resources. + * Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2 + * resources, with the offsets relative to the start of the 16-bit-units array. + * + * Type Name Memory layout of values + * (in parentheses: scalar, non-offset values) + * + * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding) + * or (empty string ("") if offset==0) + * 1 Binary: int32_t length, uint8_t[length], (padding) + * - the start of the bytes is 16-aligned - + * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count] + * 3 Alias: (physically same value layout as string, new in ICU 2.4) + * 4 Table32: int32_t count, int32_t keyStringOffsets[count], Resource[count] + * (new in formatVersion 1.1/ICU 2.8) + * 5 Table16: uint16_t count, uint16_t keyStringOffsets[count], Resource16[count] + * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4) + * 6 Unicode String-v2:UChar[length], (UChar)0; length determined by the first UChar: + * - if first is not a trail surrogate, then the length is implicit + * and u_strlen() needs to be called + * - if first<0xdfef then length=first&0x3ff (and skip first) + * - if first<0xdfff then length=((first-0xdfef)<<16) | second UChar + * - if first==0xdfff then length=((second UChar)<<16) | third UChar + * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4) + * 7 Integer: (28-bit offset is integer value) + * 8 Array: int32_t count, Resource[count] + * 9 Array16: uint16_t count, Resource16[count] + * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4) + * 14 Integer Vector: int32_t length, int32_t[length] + * 15 Reserved: This value denotes special purpose resources and is for internal use. + * + * Note that there are 3 types with data vector values: + * - Vectors of 8-bit bytes stored as type Binary. + * - Vectors of 16-bit words stored as type Unicode String or Unicode String-v2 + * (no value restrictions, all values 0..ffff allowed!). + * - Vectors of 32-bit words stored as type Integer Vector. + */ +public final class ICUResourceBundleReader implements ICUBinary.Authenticate { + /** + * File format version that this class understands. + * "ResB" + */ + private static final byte DATA_FORMAT_ID[] = {(byte)0x52, (byte)0x65, + (byte)0x73, (byte)0x42}; + + /* indexes[] value names; indexes are generally 32-bit (Resource) indexes */ + private static final int URES_INDEX_LENGTH = 0; /* contains URES_INDEX_TOP==the length of indexes[]; + * formatVersion==1: all bits contain the length of indexes[] + * but the length is much less than 0xff; + * formatVersion>1: + * only bits 7..0 contain the length of indexes[], + * bits 31..8 are reserved and set to 0 */ + private static final int URES_INDEX_KEYS_TOP = 1; /* contains the top of the key strings, */ + /* same as the bottom of resources or UTF-16 strings, rounded up */ + //ivate static final int URES_INDEX_RESOURCES_TOP = 2; /* contains the top of all resources */ + private static final int URES_INDEX_BUNDLE_TOP = 3; /* contains the top of the bundle, */ + /* in case it were ever different from [2] */ + //ivate static final int URES_INDEX_MAX_TABLE_LENGTH = 4; /* max. length of any table */ + private static final int URES_INDEX_ATTRIBUTES = 5; /* attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */ + private static final int URES_INDEX_16BIT_TOP = 6; /* top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16), + * rounded up (new in formatVersion 2.0, ICU 4.4) */ + private static final int URES_INDEX_POOL_CHECKSUM = 7; /* checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */ + //ivate static final int URES_INDEX_TOP = 8; + + /* + * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES]. + * New in formatVersion 1.2 (ICU 3.6). + * + * If set, then this resource bundle is a standalone bundle. + * If not set, then the bundle participates in locale fallback, eventually + * all the way to the root bundle. + * If indexes[] is missing or too short, then the attribute cannot be determined + * reliably. Dependency checking should ignore such bundles, and loading should + * use fallbacks. + */ + private static final int URES_ATT_NO_FALLBACK = 1; + + /* + * Attributes for bundles that are, or use, a pool bundle. + * A pool bundle provides key strings that are shared among several other bundles + * to reduce their total size. + * New in formatVersion 2 (ICU 4.4). + */ + private static final int URES_ATT_IS_POOL_BUNDLE = 2; + private static final int URES_ATT_USES_POOL_BUNDLE = 4; + + private static final boolean DEBUG = false; + + private byte[] /* formatVersion, */ dataVersion; + + // See the ResourceData struct in ICU4C/source/common/uresdata.h. + private String s16BitUnits; + private byte[] poolBundleKeys; + private String poolBundleKeysAsString; + private int rootRes; + private int localKeyLimit; + private boolean noFallback; /* see URES_ATT_NO_FALLBACK */ + private boolean isPoolBundle; + private boolean usesPoolBundle; + + // Fields specific to the Java port. + private int[] indexes; + private byte[] keyStrings; + private String keyStringsAsString; // null except if isPoolBundle + private byte[] resourceBytes; + private int resourceBottom; // File offset where the mixed-type resources start. + + private ICUResourceBundleReader(InputStream stream, String resolvedName){ + BufferedInputStream bs = new BufferedInputStream(stream); + try{ + if(DEBUG) System.out.println("The InputStream class is: " + stream.getClass().getName()); + if(DEBUG) System.out.println("The BufferedInputStream class is: " + bs.getClass().getName()); + if(DEBUG) System.out.println("The bytes avialable in stream before reading the header: " + bs.available()); + + dataVersion = ICUBinary.readHeader(bs,DATA_FORMAT_ID,this); + + if(DEBUG) System.out.println("The bytes available in stream after reading the header: " + bs.available()); + + readData(bs); + stream.close(); + }catch(IOException ex){ + throw new RuntimeException("Data file "+ resolvedName+ " is corrupt - " + ex.getMessage()); + } + } + static ICUResourceBundleReader getReader(String resolvedName, ClassLoader root) { + InputStream stream = ICUData.getStream(root,resolvedName); + + if(stream==null){ + return null; + } + ICUResourceBundleReader reader = new ICUResourceBundleReader(stream, resolvedName); + return reader; + } + + void setPoolBundleKeys(ICUResourceBundleReader poolBundleReader) { + if(!poolBundleReader.isPoolBundle) { + throw new IllegalStateException("pool.res is not a pool bundle"); + } + if(poolBundleReader.indexes[URES_INDEX_POOL_CHECKSUM] != indexes[URES_INDEX_POOL_CHECKSUM]) { + throw new IllegalStateException("pool.res has a different checksum than this bundle"); + } + poolBundleKeys = poolBundleReader.keyStrings; + poolBundleKeysAsString = poolBundleReader.keyStringsAsString; + } + + // See res_init() in ICU4C/source/common/uresdata.c. + private void readData(InputStream stream) throws IOException { + DataInputStream ds = new DataInputStream(stream); + + if(DEBUG) System.out.println("The DataInputStream class is: " + ds.getClass().getName()); + if(DEBUG) System.out.println("The available bytes in the stream before reading the data: "+ds.available()); + + rootRes = ds.readInt(); + + // read the variable-length indexes[] array + int indexes0 = ds.readInt(); + int indexLength = indexes0 & 0xff; + indexes = new int[indexLength]; + indexes[URES_INDEX_LENGTH] = indexes0; + for(int i=1; i URES_INDEX_ATTRIBUTES) { + // determine if this resource bundle falls back to a parent bundle + // along normal locale ID fallback + int att = indexes[URES_INDEX_ATTRIBUTES]; + noFallback = (att & URES_ATT_NO_FALLBACK) != 0; + isPoolBundle = (att & URES_ATT_IS_POOL_BUNDLE) != 0; + usesPoolBundle = (att & URES_ATT_USES_POOL_BUNDLE) != 0; + } + + int length = indexes[URES_INDEX_BUNDLE_TOP]*4; + if(DEBUG) System.out.println("The number of bytes in the bundle: "+length); + + // Read the local key strings. + // The keyStrings include NUL characters corresponding to the bytes + // up to the end of the indexes. + if(indexes[URES_INDEX_KEYS_TOP] > (1 + indexLength)) { + int keysBottom = (1 + indexLength) << 2; + int keysTop = indexes[URES_INDEX_KEYS_TOP] << 2; + resourceBottom = keysTop; + if(isPoolBundle) { + // Shift the key strings down: + // Pool bundle key strings are used with a 0-based index, + // unlike regular bundles' key strings for which indexes + // are based on the start of the bundle data. + keysTop -= keysBottom; + keysBottom = 0; + } else { + localKeyLimit = keysTop; + } + keyStrings = new byte[keysTop]; + ds.readFully(keyStrings, keysBottom, keysTop - keysBottom); + if(isPoolBundle) { + // Overwrite trailing padding bytes so that the conversion works. + while(keysBottom < keysTop && keyStrings[keysTop - 1] == (byte)0xaa) { + keyStrings[--keysTop] = 0; + } + keyStringsAsString = new String(keyStrings, "US-ASCII"); + } + } + + // Read the array of 16-bit units. + // We are not using + // new String(keys, "UTF-16BE") + // because the 16-bit units may not be well-formed Unicode. + if( indexLength > URES_INDEX_16BIT_TOP && + indexes[URES_INDEX_16BIT_TOP] > indexes[URES_INDEX_KEYS_TOP] + ) { + int num16BitUnits = (indexes[URES_INDEX_16BIT_TOP] - + indexes[URES_INDEX_KEYS_TOP]) * 2; + char[] c16BitUnits = new char[num16BitUnits]; + for(int i = 0; i < num16BitUnits; ++i) { + c16BitUnits[i] = ds.readChar(); + } + s16BitUnits = new String(c16BitUnits); + resourceBottom = indexes[URES_INDEX_16BIT_TOP] << 2; + } else { + s16BitUnits = "\0"; + } + + // Read the block of bytes for the mixed-type resources. + resourceBytes = new byte[length - resourceBottom]; + ds.readFully(resourceBytes); + } + + VersionInfo getVersion(){ + return VersionInfo.getInstance(dataVersion[0],dataVersion[1],dataVersion[2],dataVersion[3]); + } + public boolean isDataVersionAcceptable(byte version[]){ + // while ICU4C can read formatVersion 1.0 and up, + // ICU4J requires 1.1 as a minimum + // formatVersion = version; + return ((version[0] == 1 && version[1] >= 1) || version[0] == 2); + } + + int getRootResource() { + return rootRes; + } + boolean getNoFallback() { + return noFallback; + } + boolean getUsesPoolBundle() { + return usesPoolBundle; + } + + static int RES_GET_TYPE(int res) { + return res >>> 28; + } + private static int RES_GET_OFFSET(int res) { + return res & 0x0fffffff; + } + private int getResourceByteOffset(int offset) { + return (offset << 2) - resourceBottom; + } + /* get signed and unsigned integer values directly from the Resource handle */ + static int RES_GET_INT(int res) { + return (res << 4) >> 4; + } + static int RES_GET_UINT(int res) { + return res & 0x0fffffff; + } + static boolean URES_IS_TABLE(int type) { + return type==UResourceBundle.TABLE || type==ICUResourceBundle.TABLE16 || type==ICUResourceBundle.TABLE32; + } + + private static byte[] emptyBytes = new byte[0]; + private static ByteBuffer emptyByteBuffer = ByteBuffer.allocate(0).asReadOnlyBuffer(); + private static char[] emptyChars = new char[0]; + private static int[] emptyInts = new int[0]; + private static String emptyString = ""; + + private char getChar(int offset) { + return (char)((resourceBytes[offset] << 8) | (resourceBytes[offset + 1] & 0xff)); + } + private char[] getChars(int offset, int count) { + char[] chars = new char[count]; + for(int i = 0; i < count; offset += 2, ++i) { + chars[i] = (char)(((int)resourceBytes[offset] << 8) | (resourceBytes[offset + 1] & 0xff)); + } + return chars; + } + private int getInt(int offset) { + return (resourceBytes[offset] << 24) | + ((resourceBytes[offset+1] & 0xff) << 16) | + ((resourceBytes[offset+2] & 0xff) << 8) | + ((resourceBytes[offset+3] & 0xff)); + } + private int[] getInts(int offset, int count) { + int[] ints = new int[count]; + for(int i = 0; i < count; offset += 4, ++i) { + ints[i] = (resourceBytes[offset] << 24) | + ((resourceBytes[offset+1] & 0xff) << 16) | + ((resourceBytes[offset+2] & 0xff) << 8) | + ((resourceBytes[offset+3] & 0xff)); + } + return ints; + } + private char[] getTable16KeyOffsets(int offset) { + int length = s16BitUnits.charAt(offset++); + if(length > 0) { + return s16BitUnits.substring(offset, offset + length).toCharArray(); + } else { + return emptyChars; + } + } + private char[] getTableKeyOffsets(int offset) { + int length = getChar(offset); + if(length > 0) { + return getChars(offset + 2, length); + } else { + return emptyChars; + } + } + private int[] getTable32KeyOffsets(int offset) { + int length = getInt(offset); + if(length > 0) { + return getInts(offset + 4, length); + } else { + return emptyInts; + } + } + + /** Refers to ASCII key string bytes, for key string matching. */ + private static final class ByteSequence { + private byte[] bytes; + private int offset; + public ByteSequence(byte[] bytes, int offset) { + this.bytes = bytes; + this.offset = offset; + } + public byte charAt(int index) { + return bytes[offset + index]; + } + } + private String makeKeyStringFromBytes(int keyOffset) { + StringBuilder sb = new StringBuilder(); + byte b; + while((b = keyStrings[keyOffset++]) != 0) { + sb.append((char)b); + } + return sb.toString(); + } + private String makeKeyStringFromString(int keyOffset) { + int endOffset = keyOffset; + while(poolBundleKeysAsString.charAt(endOffset) != 0) { + ++endOffset; + } + return poolBundleKeysAsString.substring(keyOffset, endOffset); + } + private ByteSequence RES_GET_KEY16(char keyOffset) { + if(keyOffset < localKeyLimit) { + return new ByteSequence(keyStrings, keyOffset); + } else { + return new ByteSequence(poolBundleKeys, keyOffset - localKeyLimit); + } + } + private String getKey16String(int keyOffset) { + if(keyOffset < localKeyLimit) { + return makeKeyStringFromBytes(keyOffset); + } else { + return makeKeyStringFromString(keyOffset - localKeyLimit); + } + } + private ByteSequence RES_GET_KEY32(int keyOffset) { + if(keyOffset >= 0) { + return new ByteSequence(keyStrings, keyOffset); + } else { + return new ByteSequence(poolBundleKeys, keyOffset & 0x7fffffff); + } + } + private String getKey32String(int keyOffset) { + if(keyOffset >= 0) { + return makeKeyStringFromBytes(keyOffset); + } else { + return makeKeyStringFromString(keyOffset & 0x7fffffff); + } + } + // Compare the length-specified input key with the + // NUL-terminated tableKey. + private static int compareKeys(CharSequence key, ByteSequence tableKey) { + int i; + for(i = 0; i < key.length(); ++i) { + int c2 = tableKey.charAt(i); + if(c2 == 0) { + return 1; // key > tableKey because key is longer. + } + int diff = (int)key.charAt(i) - c2; + if(diff != 0) { + return diff; + } + } + return -(int)tableKey.charAt(i); + } + private int compareKeys(CharSequence key, char keyOffset) { + return compareKeys(key, RES_GET_KEY16(keyOffset)); + } + private int compareKeys32(CharSequence key, int keyOffset) { + return compareKeys(key, RES_GET_KEY32(keyOffset)); + } + + String getString(int res) { + int offset=RES_GET_OFFSET(res); + int length; + if(RES_GET_TYPE(res)==ICUResourceBundle.STRING_V2) { + int first = s16BitUnits.charAt(offset); + if((first&0xfffffc00)!=0xdc00) { // C: if(!U16_IS_TRAIL(first)) { + if(first==0) { + return emptyString; + } + int endOffset; + for(endOffset=offset+1; s16BitUnits.charAt(endOffset)!=0; ++endOffset) {} + return s16BitUnits.substring(offset, endOffset); + } else if(first<0xdfef) { + length=first&0x3ff; + ++offset; + } else if(first<0xdfff) { + length=((first-0xdfef)<<16)|s16BitUnits.charAt(offset+1); + offset+=2; + } else { + length=((int)s16BitUnits.charAt(offset+1)<<16)|s16BitUnits.charAt(offset+2); + offset+=3; + } + return s16BitUnits.substring(offset, offset+length); + } else if(res==offset) /* RES_GET_TYPE(res)==URES_STRING */ { + if(res==0) { + return emptyString; + } else { + offset=getResourceByteOffset(offset); + length=getInt(offset); + return new String(getChars(offset+4, length)); + } + } else { + return null; + } + } + + String getAlias(int res) { + int offset=RES_GET_OFFSET(res); + int length; + if(RES_GET_TYPE(res)==ICUResourceBundle.ALIAS) { + if(offset==0) { + return emptyString; + } else { + offset=getResourceByteOffset(offset); + length=getInt(offset); + return new String(getChars(offset+4, length)); + } + } else { + return null; + } + } + + byte[] getBinary(int res, byte[] ba) { + int offset=RES_GET_OFFSET(res); + int length; + if(RES_GET_TYPE(res)==UResourceBundle.BINARY) { + if(offset==0) { + return emptyBytes; + } else { + offset=getResourceByteOffset(offset); + length=getInt(offset); + if(ba==null || ba.length!=length) { + ba=new byte[length]; + } + System.arraycopy(resourceBytes, offset+4, ba, 0, length); + return ba; + } + } else { + return null; + } + } + + ByteBuffer getBinary(int res) { + int offset=RES_GET_OFFSET(res); + int length; + if(RES_GET_TYPE(res)==UResourceBundle.BINARY) { + if(offset==0) { + // Don't just + // return emptyByteBuffer; + // in case it matters whether the buffer's mark is defined or undefined. + return emptyByteBuffer.duplicate(); + } else { + offset=getResourceByteOffset(offset); + length=getInt(offset); + return ByteBuffer.wrap(resourceBytes, offset+4, length).slice().asReadOnlyBuffer(); + } + } else { + return null; + } + } + + int[] getIntVector(int res) { + int offset=RES_GET_OFFSET(res); + int length; + if(RES_GET_TYPE(res)==UResourceBundle.INT_VECTOR) { + if(offset==0) { + return emptyInts; + } else { + offset=getResourceByteOffset(offset); + length=getInt(offset); + return getInts(offset+4, length); + } + } else { + return null; + } + } + + Container getArray(int res) { + int type=RES_GET_TYPE(res); + int offset=RES_GET_OFFSET(res); + switch(type) { + case UResourceBundle.ARRAY: + case ICUResourceBundle.ARRAY16: + if(offset==0) { + return new Container(this); + } + break; + default: + return null; + } + switch(type) { + case UResourceBundle.ARRAY: + return new Array(this, offset); + case ICUResourceBundle.ARRAY16: + return new Array16(this, offset); + default: + return null; + } + } + + Table getTable(int res) { + int type=RES_GET_TYPE(res); + int offset=RES_GET_OFFSET(res); + switch(type) { + case UResourceBundle.TABLE: + case ICUResourceBundle.TABLE16: + case ICUResourceBundle.TABLE32: + if(offset==0) { + return new Table(this); + } + break; + default: + return null; + } + switch(type) { + case UResourceBundle.TABLE: + return new Table1632(this, offset); + case ICUResourceBundle.TABLE16: + return new Table16(this, offset); + case ICUResourceBundle.TABLE32: + return new Table32(this, offset); + default: + return null; + } + } + + // Container value classes --------------------------------------------- *** + + static class Container { + protected ICUResourceBundleReader reader; + protected int size; + protected int itemsOffset; + + int getSize() { + return size; + } + int getContainerResource(int index) { + return ICUResourceBundle.RES_BOGUS; + } + protected int getContainer16Resource(int index) { + if (index < 0 || size <= index) { + return ICUResourceBundle.RES_BOGUS; + } + return (ICUResourceBundle.STRING_V2 << 28) | + reader.s16BitUnits.charAt(itemsOffset + index); + } + protected int getContainer32Resource(int index) { + if (index < 0 || size <= index) { + return ICUResourceBundle.RES_BOGUS; + } + return reader.getInt(itemsOffset + 4 * index); + } + Container(ICUResourceBundleReader reader) { + this.reader = reader; + } + } + private static final class Array extends Container { + int getContainerResource(int index) { + return getContainer32Resource(index); + } + Array(ICUResourceBundleReader reader, int offset) { + super(reader); + offset = reader.getResourceByteOffset(offset); + size = reader.getInt(offset); + itemsOffset = offset + 4; + } + } + private static final class Array16 extends Container { + int getContainerResource(int index) { + return getContainer16Resource(index); + } + Array16(ICUResourceBundleReader reader, int offset) { + super(reader); + size = reader.s16BitUnits.charAt(offset); + itemsOffset = offset + 1; + } + } + static class Table extends Container { + protected char[] keyOffsets; + protected int[] key32Offsets; + + String getKey(int index) { + if (index < 0 || size <= index) { + return null; + } + return keyOffsets != null ? + reader.getKey16String(keyOffsets[index]) : + reader.getKey32String(key32Offsets[index]); + } + private static final int URESDATA_ITEM_NOT_FOUND = -1; + int findTableItem(CharSequence key) { + int mid, start, limit; + int result; + + /* do a binary search for the key */ + start=0; + limit=size; + while(start 0) { + start = mid + 1; + } else { + /* We found it! */ + return mid; + } + } + return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */ + } + int getTableResource(String resKey) { + return getContainerResource(findTableItem(resKey)); + } + Table(ICUResourceBundleReader reader) { + super(reader); + } + } + private static final class Table1632 extends Table { + int getContainerResource(int index) { + return getContainer32Resource(index); + } + Table1632(ICUResourceBundleReader reader, int offset) { + super(reader); + offset = reader.getResourceByteOffset(offset); + keyOffsets = reader.getTableKeyOffsets(offset); + size = keyOffsets.length; + itemsOffset = offset + 2 * ((size + 2) & ~1); // Skip padding for 4-alignment. + } + } + private static final class Table16 extends Table { + int getContainerResource(int index) { + return getContainer16Resource(index); + } + Table16(ICUResourceBundleReader reader, int offset) { + super(reader); + keyOffsets = reader.getTable16KeyOffsets(offset); + size = keyOffsets.length; + itemsOffset = offset + 1 + size; + } + } + private static final class Table32 extends Table { + int getContainerResource(int index) { + return getContainer32Resource(index); + } + Table32(ICUResourceBundleReader reader, int offset) { + super(reader); + offset = reader.getResourceByteOffset(offset); + key32Offsets = reader.getTable32KeyOffsets(offset); + size = key32Offsets.length; + itemsOffset = offset + 4 * (1 + size); + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceTableAccess.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceTableAccess.java new file mode 100644 index 00000000000..561124ec295 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceTableAccess.java @@ -0,0 +1,103 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +/** + * Static utility functions for probing resource tables, used by ULocale and + * LocaleDisplayNames. + */ +public class ICUResourceTableAccess { + /** + * Utility to fetch locale display data from resource bundle tables. Convenience + * wrapper for {@link #getTableString(ICUResourceBundle, String, String, String)}. + */ + public static String getTableString(String path, ULocale locale, String tableName, + String itemName) { + ICUResourceBundle bundle = (ICUResourceBundle) UResourceBundle. + getBundleInstance(path, locale.getBaseName()); + return getTableString(bundle, tableName, null, itemName); + } + + /** + * Utility to fetch locale display data from resource bundle tables. Uses fallback + * through the "Fallback" resource if available. + */ + public static String getTableString(ICUResourceBundle bundle, String tableName, + String subtableName, String item) { + try { + for (;;) { + // special case currency + if ("currency".equals(subtableName)) { + ICUResourceBundle table = bundle.getWithFallback("Currencies"); + table = table.getWithFallback(item); + return table.getString(1); + } else { + ICUResourceBundle table = lookup(bundle, tableName); + if (table == null) { + return item; + } + ICUResourceBundle stable = table; + if (subtableName != null) { + stable = lookup(table, subtableName); + } + if (stable != null) { + ICUResourceBundle sbundle = lookup(stable, item); + if (sbundle != null) { + return sbundle.getString(); // possible real exception + } + } + + // if we get here, stable was null, or sbundle was null + if (subtableName == null) { + // may be a deprecated code + String currentName = null; + if (tableName.equals("Countries")) { + currentName = LocaleIDs.getCurrentCountryID(item); + } else if (tableName.equals("Languages")) { + currentName = LocaleIDs.getCurrentLanguageID(item); + } + ICUResourceBundle sbundle = lookup(table, currentName); + if (sbundle != null) { + return sbundle.getString(); // possible real exception + } + } + + // still can't figure it out? try the fallback mechanism + ICUResourceBundle fbundle = lookup(table, "Fallback"); + if (fbundle == null) { + return item; + } + + String fallbackLocale = fbundle.getString(); // again, possible exception + if (fallbackLocale.length() == 0) { + fallbackLocale = "root"; + } + + if (fallbackLocale.equals(table.getULocale().getName())) { + return item; + } + + bundle = (ICUResourceBundle) UResourceBundle.getBundleInstance( + bundle.getBaseName(), fallbackLocale); + } + } + } catch (Exception e) { + // If something is seriously wrong, we might call getString on a resource that is + // not a string. That will throw an exception, which we catch and ignore here. + } + + return item; + } + + // utility to make the call sites in the above code cleaner + private static ICUResourceBundle lookup(ICUResourceBundle bundle, String resName) { + return ICUResourceBundle.findResourceWithFallback(resName, bundle, null); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUService.java b/main/classes/core/src/com/ibm/icu/impl/ICUService.java new file mode 100644 index 00000000000..a0c18159ac3 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ICUService.java @@ -0,0 +1,985 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.lang.ref.SoftReference; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.EventListener; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.Map.Entry; + +import com.ibm.icu.util.ULocale; + +/** + *

    A Service provides access to service objects that implement a + * particular service, e.g. transliterators. Users provide a String + * id (for example, a locale string) to the service, and get back an + * object for that id. Service objects can be any kind of object. + * The service object is cached and returned for later queries, so + * generally it should not be mutable, or the caller should clone the + * object before modifying it.

    + * + *

    Services 'canonicalize' the query id and use the canonical id to + * query for the service. The service also defines a mechanism to + * 'fallback' the id multiple times. Clients can optionally request + * the actual id that was matched by a query when they use an id to + * retrieve a service object.

    + * + *

    Service objects are instantiated by Factory objects registered with + * the service. The service queries each Factory in turn, from most recently + * registered to earliest registered, until one returns a service object. + * If none responds with a service object, a fallback id is generated, + * and the process repeats until a service object is returned or until + * the id has no further fallbacks.

    + * + *

    Factories can be dynamically registered and unregistered with the + * service. When registered, a Factory is installed at the head of + * the factory list, and so gets 'first crack' at any keys or fallback + * keys. When unregistered, it is removed from the service and can no + * longer be located through it. Service objects generated by this + * factory and held by the client are unaffected.

    + * + *

    ICUService uses Keys to query factories and perform + * fallback. The Key defines the canonical form of the id, and + * implements the fallback strategy. Custom Keys can be defined that + * parse complex IDs into components that Factories can more easily + * use. The Key can cache the results of this parsing to save + * repeated effort. ICUService provides convenience APIs that + * take Strings and generate default Keys for use in querying.

    + * + *

    ICUService provides API to get the list of ids publicly + * supported by the service (although queries aren't restricted to + * this list). This list contains only 'simple' IDs, and not fully + * unique ids. Factories are associated with each simple ID and + * the responsible factory can also return a human-readable localized + * version of the simple ID, for use in user interfaces. ICUService + * can also provide a sorted collection of the all the localized visible + * ids.

    + * + *

    ICUService implements ICUNotifier, so that clients can register + * to receive notification when factories are added or removed from + * the service. ICUService provides a default EventListener subinterface, + * ServiceListener, which can be registered with the service. When + * the service changes, the ServiceListener's serviceChanged method + * is called, with the service as the only argument.

    + * + *

    The ICUService API is both rich and generic, and it is expected + * that most implementations will statically 'wrap' ICUService to + * present a more appropriate API-- for example, to declare the type + * of the objects returned from get, to limit the factories that can + * be registered with the service, or to define their own listener + * interface with a custom callback method. They might also customize + * ICUService by overriding it, for example, to customize the Key and + * fallback strategy. ICULocaleService is a customized service that + * uses Locale names as ids and uses Keys that implement the standard + * resource bundle fallback strategy.

    + */ +public class ICUService extends ICUNotifier { + /** + * Name used for debugging. + */ + protected final String name; + + /** + * Constructor. + */ + public ICUService() { + name = ""; + } + + private static final boolean DEBUG = ICUDebug.enabled("service"); + /** + * Construct with a name (useful for debugging). + */ + public ICUService(String name) { + this.name = name; + } + + /** + * Access to factories is protected by a read-write lock. This is + * to allow multiple threads to read concurrently, but keep + * changes to the factory list atomic with respect to all readers. + */ + private final ICURWLock factoryLock = new ICURWLock(); + + /** + * All the factories registered with this service. + */ + private final List factories = new ArrayList(); + + /** + * Record the default number of factories for this service. + * Can be set by markDefault. + */ + private int defaultSize = 0; + + /** + * Keys are used to communicate with factories to generate an + * instance of the service. Keys define how ids are + * canonicalized, provide both a current id and a current + * descriptor to use in querying the cache and factories, and + * determine the fallback strategy.

    + * + *

    Keys provide both a currentDescriptor and a currentID. + * The descriptor contains an optional prefix, followed by '/' + * and the currentID. Factories that handle complex keys, + * for example number format factories that generate multiple + * kinds of formatters for the same locale, use the descriptor + * to provide a fully unique identifier for the service object, + * while using the currentID (in this case, the locale string), + * as the visible IDs that can be localized. + * + *

    The default implementation of Key has no fallbacks and + * has no custom descriptors.

    + */ + public static class Key { + private final String id; + + /** + * Construct a key from an id. + */ + public Key(String id) { + this.id = id; + } + + /** + * Return the original ID used to construct this key. + */ + public final String id() { + return id; + } + + /** + * Return the canonical version of the original ID. This implementation + * returns the original ID unchanged. + */ + public String canonicalID() { + return id; + } + + /** + * Return the (canonical) current ID. This implementation + * returns the canonical ID. + */ + public String currentID() { + return canonicalID(); + } + + /** + * Return the current descriptor. This implementation returns + * the current ID. The current descriptor is used to fully + * identify an instance of the service in the cache. A + * factory may handle all descriptors for an ID, or just a + * particular descriptor. The factory can either parse the + * descriptor or use custom API on the key in order to + * instantiate the service. + */ + public String currentDescriptor() { + return "/" + currentID(); + } + + /** + * If the key has a fallback, modify the key and return true, + * otherwise return false. The current ID will change if there + * is a fallback. No currentIDs should be repeated, and fallback + * must eventually return false. This implmentation has no fallbacks + * and always returns false. + */ + public boolean fallback() { + return false; + } + + /** + * If a key created from id would eventually fallback to match the + * canonical ID of this key, return true. + */ + public boolean isFallbackOf(String idToCheck) { + return canonicalID().equals(idToCheck); + } + } + + /** + * Factories generate the service objects maintained by the + * service. A factory generates a service object from a key, + * updates id->factory mappings, and returns the display name for + * a supported id. + */ + public static interface Factory { + + /** + * Create a service object from the key, if this factory + * supports the key. Otherwise, return null. + * + *

    If the factory supports the key, then it can call + * the service's getKey(Key, String[], Factory) method + * passing itself as the factory to get the object that + * the service would have created prior to the factory's + * registration with the service. This can change the + * key, so any information required from the key should + * be extracted before making such a callback. + */ + public Object create(Key key, ICUService service); + + /** + * Update the result IDs (not descriptors) to reflect the IDs + * this factory handles. This function and getDisplayName are + * used to support ICUService.getDisplayNames. Basically, the + * factory has to determine which IDs it will permit to be + * available, and of those, which it will provide localized + * display names for. In most cases this reflects the IDs that + * the factory directly supports. + */ + public void updateVisibleIDs(Map result); + + /** + * Return the display name for this id in the provided locale. + * This is an localized id, not a descriptor. If the id is + * not visible or not defined by the factory, return null. + * If locale is null, return id unchanged. + */ + public String getDisplayName(String id, ULocale locale); + } + + /** + * A default implementation of factory. This provides default + * implementations for subclasses, and implements a singleton + * factory that matches a single id and returns a single + * (possibly deferred-initialized) instance. This implements + * updateVisibleIDs to add a mapping from its ID to itself + * if visible is true, or to remove any existing mapping + * for its ID if visible is false. + */ + public static class SimpleFactory implements Factory { + protected Object instance; + protected String id; + protected boolean visible; + + /** + * Convenience constructor that calls SimpleFactory(Object, String, boolean) + * with visible true. + */ + public SimpleFactory(Object instance, String id) { + this(instance, id, true); + } + + /** + * Construct a simple factory that maps a single id to a single + * service instance. If visible is true, the id will be visible. + * Neither the instance nor the id can be null. + */ + public SimpleFactory(Object instance, String id, boolean visible) { + if (instance == null || id == null) { + throw new IllegalArgumentException("Instance or id is null"); + } + this.instance = instance; + this.id = id; + this.visible = visible; + } + + /** + * Return the service instance if the factory's id is equal to + * the key's currentID. Service is ignored. + */ + public Object create(Key key, ICUService service) { + if (id.equals(key.currentID())) { + return instance; + } + return null; + } + + /** + * If visible, adds a mapping from id -> this to the result, + * otherwise removes id from result. + */ + public void updateVisibleIDs(Map result) { + if (visible) { + result.put(id, this); + } else { + result.remove(id); + } + } + + /** + * If this.id equals id, returns id regardless of locale, + * otherwise returns null. (This default implementation has + * no localized id information.) + */ + public String getDisplayName(String identifier, ULocale locale) { + return (visible && id.equals(identifier)) ? identifier : null; + } + + /** + * For debugging. + */ + public String toString() { + StringBuilder buf = new StringBuilder(super.toString()); + buf.append(", id: "); + buf.append(id); + buf.append(", visible: "); + buf.append(visible); + return buf.toString(); + } + } + + /** + * Convenience override for get(String, String[]). This uses + * createKey to create a key for the provided descriptor. + */ + public Object get(String descriptor) { + return getKey(createKey(descriptor), null); + } + + /** + * Convenience override for get(Key, String[]). This uses + * createKey to create a key from the provided descriptor. + */ + public Object get(String descriptor, String[] actualReturn) { + if (descriptor == null) { + throw new NullPointerException("descriptor must not be null"); + } + return getKey(createKey(descriptor), actualReturn); + } + + /** + * Convenience override for get(Key, String[]). + */ + public Object getKey(Key key) { + return getKey(key, null); + } + + /** + *

    Given a key, return a service object, and, if actualReturn + * is not null, the descriptor with which it was found in the + * first element of actualReturn. If no service object matches + * this key, return null, and leave actualReturn unchanged.

    + * + *

    This queries the cache using the key's descriptor, and if no + * object in the cache matches it, tries the key on each + * registered factory, in order. If none generates a service + * object for the key, repeats the process with each fallback of + * the key, until either one returns a service object, or the key + * has no fallback.

    + * + *

    If key is null, just returns null.

    + */ + public Object getKey(Key key, String[] actualReturn) { + return getKey(key, actualReturn, null); + } + + // debugging + // Map hardRef; + + public Object getKey(Key key, String[] actualReturn, Factory factory) { + if (factories.size() == 0) { + return handleDefault(key, actualReturn); + } + + if (DEBUG) System.out.println("Service: " + name + " key: " + key.canonicalID()); + + CacheEntry result = null; + if (key != null) { + try { + // The factory list can't be modified until we're done, + // otherwise we might update the cache with an invalid result. + // The cache has to stay in synch with the factory list. + factoryLock.acquireRead(); + + Map cache = null; + SoftReference> cref = cacheref; // copy so we don't need to sync on this + if (cref != null) { + if (DEBUG) System.out.println("Service " + name + " ref exists"); + cache = cref.get(); + } + if (cache == null) { + if (DEBUG) System.out.println("Service " + name + " cache was empty"); + // synchronized since additions and queries on the cache must be atomic + // they can be interleaved, though + cache = Collections.synchronizedMap(new HashMap()); +// hardRef = cache; // debug + cref = new SoftReference>(cache); + } + + String currentDescriptor = null; + ArrayList cacheDescriptorList = null; + boolean putInCache = false; + + int NDebug = 0; + + int startIndex = 0; + int limit = factories.size(); + boolean cacheResult = true; + if (factory != null) { + for (int i = 0; i < limit; ++i) { + if (factory == factories.get(i)) { + startIndex = i + 1; + break; + } + } + if (startIndex == 0) { + throw new IllegalStateException("Factory " + factory + "not registered with service: " + this); + } + cacheResult = false; + } + + outer: + do { + currentDescriptor = key.currentDescriptor(); + if (DEBUG) System.out.println(name + "[" + NDebug++ + "] looking for: " + currentDescriptor); + result = cache.get(currentDescriptor); + if (result != null) { + if (DEBUG) System.out.println(name + " found with descriptor: " + currentDescriptor); + break outer; + } else { + if (DEBUG) System.out.println("did not find: " + currentDescriptor + " in cache"); + } + + // first test of cache failed, so we'll have to update + // the cache if we eventually succeed-- that is, if we're + // going to update the cache at all. + putInCache = cacheResult; + + // int n = 0; + int index = startIndex; + while (index < limit) { + Factory f = factories.get(index++); + if (DEBUG) System.out.println("trying factory[" + (index-1) + "] " + f.toString()); + Object service = f.create(key, this); + if (service != null) { + result = new CacheEntry(currentDescriptor, service); + if (DEBUG) System.out.println(name + " factory supported: " + currentDescriptor + ", caching"); + break outer; + } else { + if (DEBUG) System.out.println("factory did not support: " + currentDescriptor); + } + } + + // prepare to load the cache with all additional ids that + // will resolve to result, assuming we'll succeed. We + // don't want to keep querying on an id that's going to + // fallback to the one that succeeded, we want to hit the + // cache the first time next goaround. + if (cacheDescriptorList == null) { + cacheDescriptorList = new ArrayList(5); + } + cacheDescriptorList.add(currentDescriptor); + + } while (key.fallback()); + + if (result != null) { + if (putInCache) { + if (DEBUG) System.out.println("caching '" + result.actualDescriptor + "'"); + cache.put(result.actualDescriptor, result); + if (cacheDescriptorList != null) { + for (String desc : cacheDescriptorList) { + if (DEBUG) System.out.println(name + " adding descriptor: '" + desc + "' for actual: '" + result.actualDescriptor + "'"); + + cache.put(desc, result); + } + } + // Atomic update. We held the read lock all this time + // so we know our cache is consistent with the factory list. + // We might stomp over a cache that some other thread + // rebuilt, but that's the breaks. They're both good. + cacheref = cref; + } + + if (actualReturn != null) { + // strip null prefix + if (result.actualDescriptor.indexOf("/") == 0) { + actualReturn[0] = result.actualDescriptor.substring(1); + } else { + actualReturn[0] = result.actualDescriptor; + } + } + + if (DEBUG) System.out.println("found in service: " + name); + + return result.service; + } + } + finally { + factoryLock.releaseRead(); + } + } + + if (DEBUG) System.out.println("not found in service: " + name); + + return handleDefault(key, actualReturn); + } + private SoftReference> cacheref; + + // Record the actual id for this service in the cache, so we can return it + // even if we succeed later with a different id. + private static final class CacheEntry { + final String actualDescriptor; + final Object service; + CacheEntry(String actualDescriptor, Object service) { + this.actualDescriptor = actualDescriptor; + this.service = service; + } + } + + + /** + * Default handler for this service if no factory in the list + * handled the key. + */ + protected Object handleDefault(Key key, String[] actualIDReturn) { + return null; + } + + /** + * Convenience override for getVisibleIDs(String) that passes null + * as the fallback, thus returning all visible IDs. + */ + public Set getVisibleIDs() { + return getVisibleIDs(null); + } + + /** + *

    Return a snapshot of the visible IDs for this service. This + * set will not change as Factories are added or removed, but the + * supported ids will, so there is no guarantee that all and only + * the ids in the returned set are visible and supported by the + * service in subsequent calls.

    + * + *

    matchID is passed to createKey to create a key. If the + * key is not null, it is used to filter out ids that don't have + * the key as a fallback. + */ + public Set getVisibleIDs(String matchID) { + Set result = getVisibleIDMap().keySet(); + + Key fallbackKey = createKey(matchID); + + if (fallbackKey != null) { + Set temp = new HashSet(result.size()); + for (String id : result) { + if (fallbackKey.isFallbackOf(id)) { + temp.add(id); + } + } + result = temp; + } + return result; + } + + /** + * Return a map from visible ids to factories. + */ + private Map getVisibleIDMap() { + Map idcache = null; + SoftReference> ref = idref; + if (ref != null) { + idcache = ref.get(); + } + while (idcache == null) { + synchronized (this) { // or idref-only lock? + if (ref == idref || idref == null) { + // no other thread updated idref before we got the lock, so + // grab the factory list and update it ourselves + try { + factoryLock.acquireRead(); + idcache = new HashMap(); + ListIterator lIter = factories.listIterator(factories.size()); + while (lIter.hasPrevious()) { + Factory f = lIter.previous(); + f.updateVisibleIDs(idcache); + } + idcache = Collections.unmodifiableMap(idcache); + idref = new SoftReference>(idcache); + } + finally { + factoryLock.releaseRead(); + } + } else { + // another thread updated idref, but gc may have stepped + // in and undone its work, leaving idcache null. If so, + // retry. + ref = idref; + idcache = ref.get(); + } + } + } + + return idcache; + } + private SoftReference> idref; + + /** + * Convenience override for getDisplayName(String, ULocale) that + * uses the current default locale. + */ + public String getDisplayName(String id) { + return getDisplayName(id, ULocale.getDefault()); + } + + /** + * Given a visible id, return the display name in the requested locale. + * If there is no directly supported id corresponding to this id, return + * null. + */ + public String getDisplayName(String id, ULocale locale) { + Map m = getVisibleIDMap(); + Factory f = m.get(id); + if (f != null) { + return f.getDisplayName(id, locale); + } + + Key key = createKey(id); + while (key.fallback()) { + f = m.get(key.currentID()); + if (f != null) { + return f.getDisplayName(id, locale); + } + } + + return null; + } + + /** + * Convenience override of getDisplayNames(ULocale, Comparator, String) that + * uses the current default Locale as the locale, null as + * the comparator, and null for the matchID. + */ + public SortedMap getDisplayNames() { + ULocale locale = ULocale.getDefault(); + return getDisplayNames(locale, null, null); + } + + /** + * Convenience override of getDisplayNames(ULocale, Comparator, String) that + * uses null for the comparator, and null for the matchID. + */ + public SortedMap getDisplayNames(ULocale locale) { + return getDisplayNames(locale, null, null); + } + + /** + * Convenience override of getDisplayNames(ULocale, Comparator, String) that + * uses null for the matchID, thus returning all display names. + */ + public SortedMap getDisplayNames(ULocale locale, Comparator com) { + return getDisplayNames(locale, com, null); + } + + /** + * Convenience override of getDisplayNames(ULocale, Comparator, String) that + * uses null for the comparator. + */ + public SortedMap getDisplayNames(ULocale locale, String matchID) { + return getDisplayNames(locale, null, matchID); + } + + /** + * Return a snapshot of the mapping from display names to visible + * IDs for this service. This set will not change as factories + * are added or removed, but the supported ids will, so there is + * no guarantee that all and only the ids in the returned map will + * be visible and supported by the service in subsequent calls, + * nor is there any guarantee that the current display names match + * those in the set. The display names are sorted based on the + * comparator provided. + */ + public SortedMap getDisplayNames(ULocale locale, Comparator com, String matchID) { + SortedMap dncache = null; + LocaleRef ref = dnref; + + if (ref != null) { + dncache = ref.get(locale, com); + } + + while (dncache == null) { + synchronized (this) { + if (ref == dnref || dnref == null) { + dncache = new TreeMap(com); // sorted + + Map m = getVisibleIDMap(); + Iterator> ei = m.entrySet().iterator(); + while (ei.hasNext()) { + Entry e = ei.next(); + String id = e.getKey(); + Factory f = e.getValue(); + dncache.put(f.getDisplayName(id, locale), id); + } + + dncache = Collections.unmodifiableSortedMap(dncache); + dnref = new LocaleRef(dncache, locale, com); + } else { + ref = dnref; + dncache = ref.get(locale, com); + } + } + } + + Key matchKey = createKey(matchID); + if (matchKey == null) { + return dncache; + } + + SortedMap result = new TreeMap(dncache); + Iterator> iter = result.entrySet().iterator(); + while (iter.hasNext()) { + Entry e = iter.next(); + if (!matchKey.isFallbackOf(e.getValue())) { + iter.remove(); + } + } + return result; + } + + // we define a class so we get atomic simultaneous access to the + // locale, comparator, and corresponding map. + private static class LocaleRef { + private final ULocale locale; + private SoftReference> ref; + private Comparator com; + + LocaleRef(SortedMap dnCache, ULocale locale, Comparator com) { + this.locale = locale; + this.com = com; + this.ref = new SoftReference>(dnCache); + } + + + SortedMap get(ULocale loc, Comparator comp) { + SortedMap m = ref.get(); + if (m != null && + this.locale.equals(loc) && + (this.com == comp || (this.com != null && this.com.equals(comp)))) { + + return m; + } + return null; + } + } + private LocaleRef dnref; + + /** + * Return a snapshot of the currently registered factories. There + * is no guarantee that the list will still match the current + * factory list of the service subsequent to this call. + */ + public final List factories() { + try { + factoryLock.acquireRead(); + return new ArrayList(factories); + } + finally{ + factoryLock.releaseRead(); + } + } + + /** + * A convenience override of registerObject(Object, String, boolean) + * that defaults visible to true. + */ + public Factory registerObject(Object obj, String id) { + return registerObject(obj, id, true); + } + + /** + * Register an object with the provided id. The id will be + * canonicalized. The canonicalized ID will be returned by + * getVisibleIDs if visible is true. + */ + public Factory registerObject(Object obj, String id, boolean visible) { + String canonicalID = createKey(id).canonicalID(); + return registerFactory(new SimpleFactory(obj, canonicalID, visible)); + } + + /** + * Register a Factory. Returns the factory if the service accepts + * the factory, otherwise returns null. The default implementation + * accepts all factories. + */ + public final Factory registerFactory(Factory factory) { + if (factory == null) { + throw new NullPointerException(); + } + try { + factoryLock.acquireWrite(); + factories.add(0, factory); + clearCaches(); + } + finally { + factoryLock.releaseWrite(); + } + notifyChanged(); + return factory; + } + + /** + * Unregister a factory. The first matching registered factory will + * be removed from the list. Returns true if a matching factory was + * removed. + */ + public final boolean unregisterFactory(Factory factory) { + if (factory == null) { + throw new NullPointerException(); + } + + boolean result = false; + try { + factoryLock.acquireWrite(); + if (factories.remove(factory)) { + result = true; + clearCaches(); + } + } + finally { + factoryLock.releaseWrite(); + } + + if (result) { + notifyChanged(); + } + return result; + } + + /** + * Reset the service to the default factories. The factory + * lock is acquired and then reInitializeFactories is called. + */ + public final void reset() { + try { + factoryLock.acquireWrite(); + reInitializeFactories(); + clearCaches(); + } + finally { + factoryLock.releaseWrite(); + } + notifyChanged(); + } + + /** + * Reinitialize the factory list to its default state. By default + * this clears the list. Subclasses can override to provide other + * default initialization of the factory list. Subclasses must + * not call this method directly, as it must only be called while + * holding write access to the factory list. + */ + protected void reInitializeFactories() { + factories.clear(); + } + + /** + * Return true if the service is in its default state. The default + * implementation returns true if there are no factories registered. + */ + public boolean isDefault() { + return factories.size() == defaultSize; + } + + /** + * Set the default size to the current number of registered factories. + * Used by subclasses to customize the behavior of isDefault. + */ + protected void markDefault() { + defaultSize = factories.size(); + } + + /** + * Create a key from an id. This creates a Key instance. + * Subclasses can override to define more useful keys appropriate + * to the factories they accept. If id is null, returns null. + */ + public Key createKey(String id) { + return id == null ? null : new Key(id); + } + + /** + * Clear caches maintained by this service. Subclasses can + * override if they implement additional that need to be cleared + * when the service changes. Subclasses should generally not call + * this method directly, as it must only be called while + * synchronized on this. + */ + protected void clearCaches() { + // we don't synchronize on these because methods that use them + // copy before use, and check for changes if they modify the + // caches. + cacheref = null; + idref = null; + dnref = null; + } + + /** + * Clears only the service cache. + * This can be called by subclasses when a change affects the service + * cache but not the id caches, e.g., when the default locale changes + * the resolution of ids changes, but not the visible ids themselves. + */ + protected void clearServiceCache() { + cacheref = null; + } + + /** + * ServiceListener is the listener that ICUService provides by default. + * ICUService will notifiy this listener when factories are added to + * or removed from the service. Subclasses can provide + * different listener interfaces that extend EventListener, and modify + * acceptsListener and notifyListener as appropriate. + */ + public static interface ServiceListener extends EventListener { + public void serviceChanged(ICUService service); + } + + /** + * Return true if the listener is accepted; by default this + * requires a ServiceListener. Subclasses can override to accept + * different listeners. + */ + protected boolean acceptsListener(EventListener l) { + return l instanceof ServiceListener; + } + + /** + * Notify the listener, which by default is a ServiceListener. + * Subclasses can override to use a different listener. + */ + protected void notifyListener(EventListener l) { + ((ServiceListener)l).serviceChanged(this); + } + + /** + * Return a string describing the statistics for this service. + * This also resets the statistics. Used for debugging purposes. + */ + public String stats() { + ICURWLock.Stats stats = factoryLock.resetStats(); + if (stats != null) { + return stats.toString(); + } + return "no stats"; + } + + /** + * Return the name of this service. This will be the empty string if none was assigned. + */ + public String getName() { + return name; + } + + /** + * Returns the result of super.toString, appending the name in curly braces. + */ + public String toString() { + return super.toString() + "{" + name + "}"; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java b/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java new file mode 100644 index 00000000000..204a26477fb --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java @@ -0,0 +1,32 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, Google, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +/** + * @author markdavis + * + */ +public class IllegalIcuArgumentException extends IllegalArgumentException { + private static final long serialVersionUID = 3789261542830211225L; + + public IllegalIcuArgumentException(String errorMessage) { + super(errorMessage); + } + + public IllegalIcuArgumentException(Throwable cause) { + super(cause); + } + + public IllegalIcuArgumentException(String errorMessage, Throwable cause) { + super(errorMessage, cause); + } + + public synchronized IllegalIcuArgumentException initCause(Throwable cause) { + return (IllegalIcuArgumentException) super.initCause(cause); + } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ImplicitCEGenerator.java b/main/classes/core/src/com/ibm/icu/impl/ImplicitCEGenerator.java new file mode 100644 index 00000000000..345adcb6a39 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ImplicitCEGenerator.java @@ -0,0 +1,389 @@ +/** + ******************************************************************************* + * Copyright (C) 2004-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +/** + * For generation of Implicit CEs + * @author Mark Davis + * + * Cleaned up so that changes can be made more easily. + * Old values: +# First Implicit: E26A792D +# Last Implicit: E3DC70C0 +# First CJK: E0030300 +# Last CJK: E0A9DD00 +# First CJK_A: E0A9DF00 +# Last CJK_A: E0DE3100 +@internal + */ +public class ImplicitCEGenerator { + + /** + * constants + */ + static final boolean DEBUG = false; + + static final long topByte = 0xFF000000L; + static final long bottomByte = 0xFFL; + static final long fourBytes = 0xFFFFFFFFL; + + static final int MAX_INPUT = 0x220001; // 2 * Unicode range + 2 + + public static final int CJK_BASE = 0x4E00; + public static final int CJK_LIMIT = 0x9FFF+1; + public static final int CJK_COMPAT_USED_BASE = 0xFA0E; + public static final int CJK_COMPAT_USED_LIMIT = 0xFA2F+1; + public static final int CJK_A_BASE = 0x3400; + public static final int CJK_A_LIMIT = 0x4DBF+1; + public static final int CJK_B_BASE = 0x20000; + public static final int CJK_B_LIMIT = 0x2A6DF+1; + +// private void throwError(String title, int cp) { +// throw new IllegalArgumentException(title + "\t" + Utility.hex(cp, 6) + "\t" + +// Utility.hex(getImplicitFromRaw(cp) & fourBytes)); +// } +// +// private void throwError(String title, long ce) { +// throw new IllegalArgumentException(title + "\t" + Utility.hex(ce & fourBytes)); +// } +// +// private void show(int i) { +// if (i >= 0 && i <= MAX_INPUT) { +// System.out.println(Utility.hex(i) + "\t" + Utility.hex(getImplicitFromRaw(i) & fourBytes)); +// } +// } + + /** + * Precomputed by constructor + */ + int final3Multiplier; + int final4Multiplier; + int final3Count; + int final4Count; + int medialCount; + int min3Primary; + int min4Primary; + int max4Primary; + int minTrail; + int maxTrail; + int max3Trail; + int max4Trail; + int min4Boundary; + + public int getGap4() { + return final4Multiplier - 1; + } + + public int getGap3() { + return final3Multiplier - 1; + } + + // old comment + // we must skip all 00, 01, 02, FF bytes, so most bytes have 252 values + // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case) + // we shift so that HAN all has the same first primary, for compression. + // for the 4 byte case, we make the gap as large as we can fit. + + /** + * Supply parameters for generating implicit CEs + */ + public ImplicitCEGenerator(int minPrimary, int maxPrimary) { + // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms. + this(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1); + } + + /** + * Set up to generate implicits. + * @param minPrimary The minimum primary value. + * @param maxPrimary The maximum primary value. + * @param minTrail final byte + * @param maxTrail final byte + * @param gap3 the gap we leave for tailoring for 3-byte forms + * @param primaries3count number of 3-byte primarys we can use (normally 1) + */ + public ImplicitCEGenerator(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int primaries3count) { + // some simple parameter checks + if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) { + throw new IllegalArgumentException("bad lead bytes"); + } + if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) { + throw new IllegalArgumentException("bad trail bytes"); + } + if (primaries3count < 1) { + throw new IllegalArgumentException("bad three-byte primaries"); + } + + this.minTrail = minTrail; + this.maxTrail = maxTrail; + + min3Primary = minPrimary; + max4Primary = maxPrimary; + // compute constants for use later. + // number of values we can use in trailing bytes + // leave room for empty values between AND above, e.g. if gap = 2 + // range 3..7 => +3 -4 -5 -6 -7: so 1 value + // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values + // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values + final3Multiplier = gap3 + 1; + final3Count = (maxTrail - minTrail + 1) / final3Multiplier; + max3Trail = minTrail + (final3Count - 1) * final3Multiplier; + + // medials can use full range + medialCount = (maxTrail - minTrail + 1); + // find out how many values fit in each form + int threeByteCount = medialCount * final3Count; + // now determine where the 3/4 boundary is. + // we use 3 bytes below the boundary, and 4 above + int primariesAvailable = maxPrimary - minPrimary + 1; + int primaries4count = primariesAvailable - primaries3count; + + int min3ByteCoverage = primaries3count * threeByteCount; + min4Primary = minPrimary + primaries3count; + min4Boundary = min3ByteCoverage; + // Now expand out the multiplier for the 4 bytes, and redo. + + int totalNeeded = MAX_INPUT - min4Boundary; + int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count); + if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte); + + int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount); + if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte); + + int gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte; + if (DEBUG) System.out.println("expandedGap: " + gap4); + if (gap4 < 1) throw new IllegalArgumentException("must have larger gap4s"); + + final4Multiplier = gap4 + 1; + final4Count = neededPerFinalByte; + max4Trail = minTrail + (final4Count - 1) * final4Multiplier; + + if (primaries4count * medialCount * medialCount * final4Count < MAX_INPUT) { + throw new IllegalArgumentException("internal error"); + } + if (DEBUG) { + System.out.println("final4Count: " + final4Count); + for (int counter = 0; counter < final4Count; ++counter) { + int value = minTrail + (1 + counter)*final4Multiplier; + System.out.println(counter + "\t" + value + "\t" + Utility.hex(value)); + } + } + } + + static public int divideAndRoundUp(int a, int b) { + return 1 + (a-1)/b; + } + + /** + * Converts implicit CE into raw integer + * @param implicit The implicit value passed. + * @return -1 if illegal format + */ + public int getRawFromImplicit(int implicit) { + int result; + int b3 = implicit & 0xFF; + implicit >>= 8; + int b2 = implicit & 0xFF; + implicit >>= 8; + int b1 = implicit & 0xFF; + implicit >>= 8; + int b0 = implicit & 0xFF; + + // simple parameter checks + if (b0 < min3Primary || b0 > max4Primary + || b1 < minTrail || b1 > maxTrail) return -1; + // normal offsets + b1 -= minTrail; + + // take care of the final values, and compose + if (b0 < min4Primary) { + if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1; + b2 -= minTrail; + int remainder = b2 % final3Multiplier; + if (remainder != 0) return -1; + b0 -= min3Primary; + b2 /= final3Multiplier; + result = ((b0 * medialCount) + b1) * final3Count + b2; + } else { + if (b2 < minTrail || b2 > maxTrail + || b3 < minTrail || b3 > max4Trail) return -1; + b2 -= minTrail; + b3 -= minTrail; + int remainder = b3 % final4Multiplier; + if (remainder != 0) return -1; + b3 /= final4Multiplier; + b0 -= min4Primary; + result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary; + } + // final check + if (result < 0 || result > MAX_INPUT) return -1; + return result; + } + + /** + * Generate the implicit CE, from raw integer. + * Left shifted to put the first byte at the top of an int. + * @param cp code point + * @return Primary implicit weight + */ + public int getImplicitFromRaw(int cp) { + if (cp < 0 || cp > MAX_INPUT) { + throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp)); + } + int last0 = cp - min4Boundary; + if (last0 < 0) { + int last1 = cp / final3Count; + last0 = cp % final3Count; + + int last2 = last1 / medialCount; + last1 %= medialCount; + + last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start + last1 = minTrail + last1; // offset + last2 = min3Primary + last2; // offset + + if (last2 >= min4Primary) { + throw new IllegalArgumentException("4-byte out of range: " + + Utility.hex(cp) + ", " + Utility.hex(last2)); + } + + return (last2 << 24) + (last1 << 16) + (last0 << 8); + } else { + int last1 = last0 / final4Count; + last0 %= final4Count; + + int last2 = last1 / medialCount; + last1 %= medialCount; + + int last3 = last2 / medialCount; + last2 %= medialCount; + + last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start + last1 = minTrail + last1; // offset + last2 = minTrail + last2; // offset + last3 = min4Primary + last3; // offset + + if (last3 > max4Primary) { + throw new IllegalArgumentException("4-byte out of range: " + + Utility.hex(cp) + ", " + Utility.hex(last3)); + } + + return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0; + } + } + + /** + * Gets an Implicit from a code point. Internally, + * swaps (which produces a raw value 0..220000, + * then converts raw to implicit. + * @param cp The code point to convert to implicit. + * @return Primary implicit weight + */ + public int getImplicitFromCodePoint(int cp) { + if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp)); + + // Produce Raw value + // note, we add 1 so that the first value is always empty!! + cp = ImplicitCEGenerator.swapCJK(cp) + 1; + // we now have a range of numbers from 0 to 220000. + + if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp)); + + return getImplicitFromRaw(cp); + } + + /** + * Function used to: + * a) collapse the 2 different Han ranges from UCA into one (in the right order), and + * b) bump any non-CJK characters by 10FFFF. + * The relevant blocks are: + * A: 4E00..9FFF; CJK Unified Ideographs + * F900..FAFF; CJK Compatibility Ideographs + * B: 3400..4DBF; CJK Unified Ideographs Extension A + * 20000..XX; CJK Unified Ideographs Extension B (and others later on) + * As long as + * no new B characters are allocated between 4E00 and FAFF, and + * no new A characters are outside of this range, + * (very high probability) this simple code will work. + * The reordered blocks are: + * Block1 is CJK + * Block2 is CJK_COMPAT_USED + * Block3 is CJK_A + * (all contiguous) + * Any other CJK gets its normal code point + * Any non-CJK gets +10FFFF + * When we reorder Block1, we make sure that it is at the very start, + * so that it will use a 3-byte form. + * Warning: the we only pick up the compatibility characters that are + * NOT decomposed, so that block is smaller! + */ + + static int NON_CJK_OFFSET = 0x110000; + + static int swapCJK(int i) { + + if (i >= CJK_BASE) { + if (i < CJK_LIMIT) return i - CJK_BASE; + + if (i < CJK_COMPAT_USED_BASE) return i + NON_CJK_OFFSET; + + if (i < CJK_COMPAT_USED_LIMIT) return i - CJK_COMPAT_USED_BASE + + (CJK_LIMIT - CJK_BASE); + if (i < CJK_B_BASE) return i + NON_CJK_OFFSET; + + if (i < CJK_B_LIMIT) return i; // non-BMP-CJK + + return i + NON_CJK_OFFSET; // non-CJK + } + if (i < CJK_A_BASE) return i + NON_CJK_OFFSET; + + if (i < CJK_A_LIMIT) return i - CJK_A_BASE + + (CJK_LIMIT - CJK_BASE) + + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE); + return i + NON_CJK_OFFSET; // non-CJK + } + + + /** + * @return Minimal trail value + */ + public int getMinTrail() { + return minTrail; + } + + /** + * @return Maximal trail value + */ + public int getMaxTrail() { + return maxTrail; + } + + public int getCodePointFromRaw(int i) { + i--; + int result = 0; + if(i >= NON_CJK_OFFSET) { + result = i - NON_CJK_OFFSET; + } else if(i >= CJK_B_BASE) { + result = i; + } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { + // rest of CJKs, compacted + if(i < CJK_LIMIT - CJK_BASE) { + result = i + CJK_BASE; + } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { + result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE); + } else { + result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE); + } + } else { + result = -1; + } + return result; + } + + public int getRawFromCodePoint(int i) { + return swapCJK(i)+1; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/IntTrie.java b/main/classes/core/src/com/ibm/icu/impl/IntTrie.java new file mode 100644 index 00000000000..edcf19e7364 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/IntTrie.java @@ -0,0 +1,333 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import com.ibm.icu.text.UTF16; + +/** + * Trie implementation which stores data in int, 32 bits. + * @author synwee + * @see com.ibm.icu.impl.Trie + * @since release 2.1, Jan 01 2002 + */ +public class IntTrie extends Trie +{ + // public constructors --------------------------------------------- + + /** + *

    Creates a new Trie with the settings for the trie data.

    + *

    Unserialize the 32-bit-aligned input stream and use the data for the + * trie.

    + * @param inputStream file input stream to a ICU data file, containing + * the trie + * @param dataManipulate object which provides methods to parse the char + * data + * @throws IOException thrown when data reading fails + */ + public IntTrie(InputStream inputStream, DataManipulate dataManipulate) + throws IOException + { + super(inputStream, dataManipulate); + if (!isIntTrie()) { + throw new IllegalArgumentException( + "Data given does not belong to a int trie."); + } + } + + /** + * Make a dummy IntTrie. + * A dummy trie is an empty runtime trie, used when a real data trie cannot + * be loaded. + * + * The trie always returns the initialValue, + * or the leadUnitValue for lead surrogate code points. + * The Latin-1 part is always set up to be linear. + * + * @param initialValue the initial value that is set for all code points + * @param leadUnitValue the value for lead surrogate code _units_ that do not + * have associated supplementary data + * @param dataManipulate object which provides methods to parse the char data + */ + @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770 + public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { + super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); + + int dataLength, latin1Length, i, limit; + char block; + + /* calculate the actual size of the dummy trie data */ + + /* max(Latin-1, block 0) */ + dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; + if(leadUnitValue!=initialValue) { + dataLength+=DATA_BLOCK_LENGTH; + } + m_data_=new int[dataLength]; + m_dataLength_=dataLength; + + m_initialValue_=initialValue; + + /* fill the index and data arrays */ + + /* indexes are preset to 0 (block 0) */ + + /* Latin-1 data */ + for(i=0; i>INDEX_STAGE_2_SHIFT_); + i=0xd800>>INDEX_STAGE_1_SHIFT_; + limit=0xdc00>>INDEX_STAGE_1_SHIFT_; + for(; i> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + + (ch & INDEX_STAGE_3_MASK_); + return m_data_[offset]; + } + + // handle U+D800..U+10FFFF + offset = getCodePointOffset(ch); + return (offset >= 0) ? m_data_[offset] : m_initialValue_; + } + + /** + * Gets the value to the data which this lead surrogate character points + * to. + * Returned data may contain folding offset information for the next + * trailing surrogate character. + * This method does not guarantee correct results for trail surrogates. + * @param ch lead surrogate character + * @return data value + */ + public final int getLeadValue(char ch) + { + return m_data_[getLeadOffset(ch)]; + } + + /** + * Get the value associated with the BMP code point. + * Lead surrogate code points are treated as normal code points, with + * unfolded values that may differ from getLeadValue() results. + * @param ch the input BMP code point + * @return trie data value associated with the BMP codepoint + */ + public final int getBMPValue(char ch) + { + return m_data_[getBMPOffset(ch)]; + } + + /** + * Get the value associated with a pair of surrogates. + * @param lead a lead surrogate + * @param trail a trail surrogate + */ + public final int getSurrogateValue(char lead, char trail) + { + if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) { + throw new IllegalArgumentException( + "Argument characters do not form a supplementary character"); + } + // get fold position for the next trail surrogate + int offset = getSurrogateOffset(lead, trail); + + // get the real data from the folded lead/trail units + if (offset > 0) { + return m_data_[offset]; + } + + // return m_initialValue_ if there is an error + return m_initialValue_; + } + + /** + * Get a value from a folding offset (from the value of a lead surrogate) + * and a trail surrogate. + * @param leadvalue the value of a lead surrogate that contains the + * folding offset + * @param trail surrogate + * @return trie data value associated with the trail character + */ + public final int getTrailValue(int leadvalue, char trail) + { + if (m_dataManipulate_ == null) { + throw new NullPointerException( + "The field DataManipulate in this Trie is null"); + } + int offset = m_dataManipulate_.getFoldingOffset(leadvalue); + if (offset > 0) { + return m_data_[getRawOffset(offset, + (char)(trail & SURROGATE_MASK_))]; + } + return m_initialValue_; + } + + /** + *

    Gets the latin 1 fast path value.

    + *

    Note this only works if latin 1 characters have their own linear + * array.

    + * @param ch latin 1 characters + * @return value associated with latin character + */ + public final int getLatin1LinearValue(char ch) + { + return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch]; + } + + /** + * Checks if the argument Trie has the same data as this Trie + * @param other Trie to check + * @return true if the argument Trie has the same data as this Trie, false + * otherwise + */ + ///CLOVER:OFF + public boolean equals(Object other) + { + boolean result = super.equals(other); + if (result && other instanceof IntTrie) { + IntTrie othertrie = (IntTrie)other; + if (m_initialValue_ != othertrie.m_initialValue_ + || !Arrays.equals(m_data_, othertrie.m_data_)) { + return false; + } + return true; + } + return false; + } + ///CLOVER:ON + + // protected methods ----------------------------------------------- + + /** + *

    Parses the input stream and stores its trie content into a index and + * data array

    + * @param inputStream data input stream containing trie data + * @exception IOException thrown when data reading fails + */ + protected final void unserialize(InputStream inputStream) + throws IOException + { + super.unserialize(inputStream); + // one used for initial value + m_data_ = new int[m_dataLength_]; + DataInputStream input = new DataInputStream(inputStream); + for (int i = 0; i < m_dataLength_; i ++) { + m_data_[i] = input.readInt(); + } + m_initialValue_ = m_data_[0]; + } + + /** + * Gets the offset to the data which the surrogate pair points to. + * @param lead lead surrogate + * @param trail trailing surrogate + * @return offset to data + */ + protected final int getSurrogateOffset(char lead, char trail) + { + if (m_dataManipulate_ == null) { + throw new NullPointerException( + "The field DataManipulate in this Trie is null"); + } + // get fold position for the next trail surrogate + int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); + + // get the real data from the folded lead/trail units + if (offset > 0) { + return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); + } + + // return -1 if there is an error, in this case we return the default + // value: m_initialValue_ + return -1; + } + + /** + * Gets the value at the argument index. + * For use internally in TrieIterator + * @param index value at index will be retrieved + * @return 32 bit value + * @see com.ibm.icu.impl.TrieIterator + */ + protected final int getValue(int index) + { + return m_data_[index]; + } + + /** + * Gets the default initial value + * @return 32 bit value + */ + protected final int getInitialValue() + { + return m_initialValue_; + } + + // package private methods ----------------------------------------- + + /** + * Internal constructor for builder use + * @param index the index array to be slotted into this trie + * @param data the data array to be slotted into this trie + * @param initialvalue the initial value for this trie + * @param options trie options to use + * @param datamanipulate folding implementation + */ + IntTrie(char index[], int data[], int initialvalue, int options, + DataManipulate datamanipulate) + { + super(index, options, datamanipulate); + m_data_ = data; + m_dataLength_ = m_data_.length; + m_initialValue_ = initialvalue; + } + + // private data members -------------------------------------------- + + /** + * Default value + */ + private int m_initialValue_; + /** + * Array of char data + */ + private int m_data_[]; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java b/main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java new file mode 100644 index 00000000000..ad5a77485dd --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java @@ -0,0 +1,792 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; + +/** + * Builder class to manipulate and generate a trie. + * This is useful for ICU data in primitive types. + * Provides a compact way to store information that is indexed by Unicode + * values, such as character properties, types, keyboard values, etc. This is + * very useful when you have a block of Unicode data that contains significant + * values while the rest of the Unicode data is unused in the application or + * when you have a lot of redundance, such as where all 21,000 Han ideographs + * have the same value. However, lookup is much faster than a hash table. + * A trie of any primitive data type serves two purposes: + *
      + *
    • Fast access of the indexed values. + *
    • Smaller memory footprint. + *
    + * This is a direct port from the ICU4C version + * @author Syn Wee Quek + */ +public class IntTrieBuilder extends TrieBuilder +{ + // public constructor ---------------------------------------------- + + /** + * Copy constructor + */ + public IntTrieBuilder(IntTrieBuilder table) + { + super(table); + m_data_ = new int[m_dataCapacity_]; + System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_); + m_initialValue_ = table.m_initialValue_; + m_leadUnitValue_ = table.m_leadUnitValue_; + } + + /** + * Constructs a build table + * @param aliasdata data to be filled into table + * @param maxdatalength maximum data length allowed in table + * @param initialvalue inital data value + * @param latin1linear is latin 1 to be linear + */ + public IntTrieBuilder(int aliasdata[], int maxdatalength, + int initialvalue, int leadunitvalue, + boolean latin1linear) + { + super(); + if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear + && maxdatalength < 1024)) { + throw new IllegalArgumentException( + "Argument maxdatalength is too small"); + } + + if (aliasdata != null) { + m_data_ = aliasdata; + } + else { + m_data_ = new int[maxdatalength]; + } + + // preallocate and reset the first data block (block index 0) + int j = DATA_BLOCK_LENGTH; + + if (latin1linear) { + // preallocate and reset the first block (number 0) and Latin-1 + // (U+0000..U+00ff) after that made sure above that + // maxDataLength >= 1024 + // set indexes to point to consecutive data blocks + int i = 0; + do { + // do this at least for trie->index[0] even if that block is + // only partly used for Latin-1 + m_index_[i ++] = j; + j += DATA_BLOCK_LENGTH; + } while (i < (256 >> SHIFT_)); + } + + m_dataLength_ = j; + // reset the initially allocated blocks to the initial value + Arrays.fill(m_data_, 0, m_dataLength_, initialvalue); + m_initialValue_ = initialvalue; + m_leadUnitValue_ = leadunitvalue; + m_dataCapacity_ = maxdatalength; + m_isLatin1Linear_ = latin1linear; + m_isCompacted_ = false; + } + + // public methods ------------------------------------------------------- + + /*public final void print() + { + int i = 0; + int oldvalue = m_index_[i]; + int count = 0; + System.out.println("index length " + m_indexLength_ + + " --------------------------"); + while (i < m_indexLength_) { + if (m_index_[i] != oldvalue) { + System.out.println("index has " + count + " counts of " + + Integer.toHexString(oldvalue)); + count = 0; + oldvalue = m_index_[i]; + } + count ++; + i ++; + } + System.out.println("index has " + count + " counts of " + + Integer.toHexString(oldvalue)); + i = 0; + oldvalue = m_data_[i]; + count = 0; + System.out.println("data length " + m_dataLength_ + + " --------------------------"); + while (i < m_dataLength_) { + if (m_data_[i] != oldvalue) { + if ((oldvalue & 0xf1000000) == 0xf1000000) { + int temp = oldvalue & 0xffffff; + temp += 0x320; + oldvalue = 0xf1000000 | temp; + } + if ((oldvalue & 0xf2000000) == 0xf2000000) { + int temp = oldvalue & 0xffffff; + temp += 0x14a; + oldvalue = 0xf2000000 | temp; + } + System.out.println("data has " + count + " counts of " + + Integer.toHexString(oldvalue)); + count = 0; + oldvalue = m_data_[i]; + } + count ++; + i ++; + } + if ((oldvalue & 0xf1000000) == 0xf1000000) { + int temp = oldvalue & 0xffffff; + temp += 0x320; + oldvalue = 0xf1000000 | temp; + } + if ((oldvalue & 0xf2000000) == 0xf2000000) { + int temp = oldvalue & 0xffffff; + temp += 0x14a; + oldvalue = 0xf2000000 | temp; + } + System.out.println("data has " + count + " counts of " + + Integer.toHexString(oldvalue)); + } + */ + /** + * Gets a 32 bit data from the table data + * @param ch codepoint which data is to be retrieved + * @return the 32 bit data + */ + public int getValue(int ch) + { + // valid, uncompacted trie and valid c? + if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { + return 0; + } + + int block = m_index_[ch >> SHIFT_]; + return m_data_[Math.abs(block) + (ch & MASK_)]; + } + + /** + * Get a 32 bit data from the table data + * @param ch code point for which data is to be retrieved. + * @param inBlockZero Output parameter, inBlockZero[0] returns true if the + * char maps into block zero, otherwise false. + * @return the 32 bit data value. + */ + public int getValue(int ch, boolean [] inBlockZero) + { + // valid, uncompacted trie and valid c? + if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { + if (inBlockZero != null) { + inBlockZero[0] = true; + } + return 0; + } + + int block = m_index_[ch >> SHIFT_]; + if (inBlockZero != null) { + inBlockZero[0] = (block == 0); + } + return m_data_[Math.abs(block) + (ch & MASK_)]; + } + + + /** + * Sets a 32 bit data in the table data + * @param ch codepoint which data is to be set + * @param value to set + * @return true if the set is successful, otherwise + * if the table has been compacted return false + */ + public boolean setValue(int ch, int value) + { + // valid, uncompacted trie and valid c? + if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { + return false; + } + + int block = getDataBlock(ch); + if (block < 0) { + return false; + } + + m_data_[block + (ch & MASK_)] = value; + return true; + } + + /** + * Serializes the build table with 32 bit data + * @param datamanipulate builder raw fold method implementation + * @param triedatamanipulate result trie fold method + * @return a new trie + */ + public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate, + Trie.DataManipulate triedatamanipulate) + { + if (datamanipulate == null) { + throw new IllegalArgumentException("Parameters can not be null"); + } + // fold and compact if necessary, also checks that indexLength is + // within limits + if (!m_isCompacted_) { + // compact once without overlap to improve folding + compact(false); + // fold the supplementary part of the index array + fold(datamanipulate); + // compact again with overlap for minimum data array length + compact(true); + m_isCompacted_ = true; + } + // is dataLength within limits? + if (m_dataLength_ >= MAX_DATA_LENGTH_) { + throw new ArrayIndexOutOfBoundsException("Data length too small"); + } + + char index[] = new char[m_indexLength_]; + int data[] = new int[m_dataLength_]; + // write the index (stage 1) array and the 32-bit data (stage 2) array + // write 16-bit index values shifted right by INDEX_SHIFT_ + for (int i = 0; i < m_indexLength_; i ++) { + index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_); + } + // write 32-bit data values + System.arraycopy(m_data_, 0, data, 0, m_dataLength_); + + int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_); + options |= OPTIONS_DATA_IS_32_BIT_; + if (m_isLatin1Linear_) { + options |= OPTIONS_LATIN1_IS_LINEAR_; + } + return new IntTrie(index, data, m_initialValue_, options, + triedatamanipulate); + } + + + /** + * Serializes the build table to an output stream. + * + * Compacts the build-time trie after all values are set, and then + * writes the serialized form onto an output stream. + * + * After this, this build-time Trie can only be serialized again and/or closed; + * no further values can be added. + * + * This function is the rough equivalent of utrie_seriaize() in ICU4C. + * + * @param os the output stream to which the seriaized trie will be written. + * If nul, the function still returns the size of the serialized Trie. + * @param reduceTo16Bits If true, reduce the data size to 16 bits. The resulting + * serialized form can then be used to create a CharTrie. + * @param datamanipulate builder raw fold method implementation + * @return the number of bytes written to the output stream. + */ + public int serialize(OutputStream os, boolean reduceTo16Bits, + TrieBuilder.DataManipulate datamanipulate) throws IOException { + if (datamanipulate == null) { + throw new IllegalArgumentException("Parameters can not be null"); + } + + // fold and compact if necessary, also checks that indexLength is + // within limits + if (!m_isCompacted_) { + // compact once without overlap to improve folding + compact(false); + // fold the supplementary part of the index array + fold(datamanipulate); + // compact again with overlap for minimum data array length + compact(true); + m_isCompacted_ = true; + } + + // is dataLength within limits? + int length; + if (reduceTo16Bits) { + length = m_dataLength_ + m_indexLength_; + } else { + length = m_dataLength_; + } + if (length >= MAX_DATA_LENGTH_) { + throw new ArrayIndexOutOfBoundsException("Data length too small"); + } + + // struct UTrieHeader { + // int32_t signature; + // int32_t options (a bit field) + // int32_t indexLength + // int32_t dataLength + length = Trie.HEADER_LENGTH_ + 2*m_indexLength_; + if(reduceTo16Bits) { + length+=2*m_dataLength_; + } else { + length+=4*m_dataLength_; + } + + if (os == null) { + // No output stream. Just return the length of the serialized Trie, in bytes. + return length; + } + + DataOutputStream dos = new DataOutputStream(os); + dos.writeInt(Trie.HEADER_SIGNATURE_); + + int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<>> Trie.INDEX_STAGE_2_SHIFT_; + dos.writeChar(v); + } + + /* write 16-bit data values */ + for(int i=0; i>> Trie.INDEX_STAGE_2_SHIFT_; + dos.writeChar(v); + } + + /* write 32-bit data values */ + for(int i=0; i UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE + || limit > (UCharacter.MAX_VALUE + 1) || start > limit) { + return false; + } + + if (start == limit) { + return true; // nothing to do + } + + if ((start & MASK_) != 0) { + // set partial block at [start..following block boundary[ + int block = getDataBlock(start); + if (block < 0) { + return false; + } + + int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_; + if (nextStart <= limit) { + fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH, + value, overwrite); + start = nextStart; + } + else { + fillBlock(block, start & MASK_, limit & MASK_, + value, overwrite); + return true; + } + } + + // number of positions in the last, partial block + int rest = limit & MASK_; + + // round down limit to a block boundary + limit &= ~MASK_; + + // iterate over all-value blocks + int repeatBlock = 0; + if (value == m_initialValue_) { + // repeatBlock = 0; assigned above + } + else { + repeatBlock = -1; + } + while (start < limit) { + // get index value + int block = m_index_[start >> SHIFT_]; + if (block > 0) { + // already allocated, fill in value + fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite); + } + else if (m_data_[-block] != value && (block == 0 || overwrite)) { + // set the repeatBlock instead of the current block 0 or range + // block + if (repeatBlock >= 0) { + m_index_[start >> SHIFT_] = -repeatBlock; + } + else { + // create and set and fill the repeatBlock + repeatBlock = getDataBlock(start); + if (repeatBlock < 0) { + return false; + } + + // set the negative block number to indicate that it is a + // repeat block + m_index_[start >> SHIFT_] = -repeatBlock; + fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true); + } + } + + start += DATA_BLOCK_LENGTH; + } + + if (rest > 0) { + // set partial block at [last block boundary..limit[ + int block = getDataBlock(start); + if (block < 0) { + return false; + } + fillBlock(block, 0, rest, value, overwrite); + } + + return true; + } + + // protected data member ------------------------------------------------ + + protected int m_data_[]; + protected int m_initialValue_; + + // private data member ------------------------------------------------ + + private int m_leadUnitValue_; + + // private methods ------------------------------------------------------ + + private int allocDataBlock() + { + int newBlock = m_dataLength_; + int newTop = newBlock + DATA_BLOCK_LENGTH; + if (newTop > m_dataCapacity_) { + // out of memory in the data array + return -1; + } + m_dataLength_ = newTop; + return newBlock; + } + + /** + * No error checking for illegal arguments. + * @param ch codepoint to look for + * @return -1 if no new data block available (out of memory in data array) + */ + private int getDataBlock(int ch) + { + ch >>= SHIFT_; + int indexValue = m_index_[ch]; + if (indexValue > 0) { + return indexValue; + } + + // allocate a new data block + int newBlock = allocDataBlock(); + if (newBlock < 0) { + // out of memory in the data array + return -1; + } + m_index_[ch] = newBlock; + + // copy-on-write for a block from a setRange() + System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock, + DATA_BLOCK_LENGTH << 2); + return newBlock; + } + + /** + * Compact a folded build-time trie. + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps adjacent blocks as much as possible (if overlap == true) + * - moves blocks in steps of the data granularity + * - moves and overlaps blocks that overlap with multiple values in the overlap region + * + * It does not + * - try to move and overlap blocks that are not already adjacent + * @param overlap flag + */ + private void compact(boolean overlap) + { + if (m_isCompacted_) { + return; // nothing left to do + } + + // compaction + // initialize the index map with "block is used/unused" flags + findUnusedBlocks(); + + // if Latin-1 is preallocated and linear, then do not compact Latin-1 + // data + int overlapStart = DATA_BLOCK_LENGTH; + if (m_isLatin1Linear_ && SHIFT_ <= 8) { + overlapStart += 256; + } + + int newStart = DATA_BLOCK_LENGTH; + int i; + for (int start = newStart; start < m_dataLength_;) { + // start: index of first entry of current block + // newStart: index where the current block is to be moved + // (right after current end of already-compacted data) + // skip blocks that are not used + if (m_map_[start >>> SHIFT_] < 0) { + // advance start to the next block + start += DATA_BLOCK_LENGTH; + // leave newStart with the previous block! + continue; + } + // search for an identical block + if (start >= overlapStart) { + i = findSameDataBlock(m_data_, newStart, start, + overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH); + if (i >= 0) { + // found an identical block, set the other block's index + // value for the current block + m_map_[start >>> SHIFT_] = i; + // advance start to the next block + start += DATA_BLOCK_LENGTH; + // leave newStart with the previous block! + continue; + } + } + // see if the beginning of this block can be overlapped with the + // end of the previous block + if(overlap && start>=overlapStart) { + /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ + for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_; + i>0 && !equal_int(m_data_, newStart-i, start, i); + i-=DATA_GRANULARITY_) {} + } else { + i=0; + } + if (i > 0) { + // some overlap + m_map_[start >>> SHIFT_] = newStart - i; + // move the non-overlapping indexes to their new positions + start += i; + for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) { + m_data_[newStart ++] = m_data_[start ++]; + } + } + else if (newStart < start) { + // no overlap, just move the indexes to their new positions + m_map_[start >>> SHIFT_] = newStart; + for (i = DATA_BLOCK_LENGTH; i > 0; -- i) { + m_data_[newStart ++] = m_data_[start ++]; + } + } + else { // no overlap && newStart==start + m_map_[start >>> SHIFT_] = start; + newStart += DATA_BLOCK_LENGTH; + start = newStart; + } + } + // now adjust the index (stage 1) table + for (i = 0; i < m_indexLength_; ++ i) { + m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_]; + } + m_dataLength_ = newStart; + } + + /** + * Find the same data block + * @param data array + * @param dataLength + * @param otherBlock + * @param step + */ + private static final int findSameDataBlock(int data[], int dataLength, + int otherBlock, int step) + { + // ensure that we do not even partially get past dataLength + dataLength -= DATA_BLOCK_LENGTH; + + for (int block = 0; block <= dataLength; block += step) { + if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) { + return block; + } + } + return -1; + } + + /** + * Fold the normalization data for supplementary code points into + * a compact area on top of the BMP-part of the trie index, + * with the lead surrogates indexing this compact area. + * + * Duplicate the index values for lead surrogates: + * From inside the BMP area, where some may be overridden with folded values, + * to just after the BMP area, where they can be retrieved for + * code point lookups. + * @param manipulate fold implementation + */ + private final void fold(DataManipulate manipulate) + { + int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_]; + int index[] = m_index_; + // copy the lead surrogate indexes into a temporary array + System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0, + SURROGATE_BLOCK_COUNT_); + + // set all values for lead surrogate code *units* to leadUnitValue + // so that by default runtime lookups will find no data for associated + // supplementary code points, unless there is data for such code points + // which will result in a non-zero folding value below that is set for + // the respective lead units + // the above saved the indexes for surrogate code *points* + // fill the indexes with simplified code from utrie_setRange32() + int block = 0; + if (m_leadUnitValue_ == m_initialValue_) { + // leadUnitValue == initialValue, use all-initial-value block + // block = 0; if block here left empty + } + else { + // create and fill the repeatBlock + block = allocDataBlock(); + if (block < 0) { + // data table overflow + throw new IllegalStateException("Internal error: Out of memory space"); + } + fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true); + // negative block number to indicate that it is a repeat block + block = -block; + } + for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) { + m_index_[c] = block; + } + + // Fold significant index values into the area just after the BMP + // indexes. + // In case the first lead surrogate has significant data, + // its index block must be used first (in which case the folding is a + // no-op). + // Later all folded index blocks are moved up one to insert the copied + // lead surrogate indexes. + int indexLength = BMP_INDEX_LENGTH_; + // search for any index (stage 1) entries for supplementary code points + for (int c = 0x10000; c < 0x110000;) { + if (index[c >> SHIFT_] != 0) { + // there is data, treat the full block for a lead surrogate + c &= ~0x3ff; + // is there an identical index block? + block = findSameIndexBlock(index, indexLength, c >> SHIFT_); + + // get a folded value for [c..c+0x400[ and, + // if different from the value for the lead surrogate code + // point, set it for the lead surrogate code unit + + int value = manipulate.getFoldedValue(c, + block + SURROGATE_BLOCK_COUNT_); + if (value != getValue(UTF16.getLeadSurrogate(c))) { + if (!setValue(UTF16.getLeadSurrogate(c), value)) { + // data table overflow + throw new ArrayIndexOutOfBoundsException( + "Data table overflow"); + } + // if we did not find an identical index block... + if (block == indexLength) { + // move the actual index (stage 1) entries from the + // supplementary position to the new one + System.arraycopy(index, c >> SHIFT_, index, indexLength, + SURROGATE_BLOCK_COUNT_); + indexLength += SURROGATE_BLOCK_COUNT_; + } + } + c += 0x400; + } + else { + c += DATA_BLOCK_LENGTH; + } + } + + // index array overflow? + // This is to guarantee that a folding offset is of the form + // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. + // If the index is too large, then n>=1024 and more than 10 bits are + // necessary. + // In fact, it can only ever become n==1024 with completely unfoldable + // data and the additional block of duplicated values for lead + // surrogates. + if (indexLength >= MAX_INDEX_LENGTH_) { + throw new ArrayIndexOutOfBoundsException("Index table overflow"); + } + // make space for the lead surrogate index block and insert it between + // the BMP indexes and the folded ones + System.arraycopy(index, BMP_INDEX_LENGTH_, index, + BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_, + indexLength - BMP_INDEX_LENGTH_); + System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_, + SURROGATE_BLOCK_COUNT_); + indexLength += SURROGATE_BLOCK_COUNT_; + m_indexLength_ = indexLength; + } + + /** + * @internal + */ + private void fillBlock(int block, int start, int limit, int value, + boolean overwrite) + { + limit += block; + block += start; + if (overwrite) { + while (block < limit) { + m_data_[block ++] = value; + } + } + else { + while (block < limit) { + if (m_data_[block] == m_initialValue_) { + m_data_[block] = value; + } + ++ block; + } + } + } +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java b/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java new file mode 100644 index 00000000000..6f7c2a93c9b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java @@ -0,0 +1,21 @@ +/** +******************************************************************************* +* Copyright (C) 2006, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +******************************************************************************* +*/ +package com.ibm.icu.impl; + +public class InvalidFormatException extends Exception { + + // Generated by serialver from JDK 1.4.1_01 + static final long serialVersionUID = 8883328905089345791L; + + public InvalidFormatException(){} + + public InvalidFormatException(String message){ + super(message); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java b/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java new file mode 100644 index 00000000000..a3061ce9da9 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java @@ -0,0 +1,59 @@ +/* + ******************************************************************************* + * Copyright (C) 2007-2009, Google Inc, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.Comparator; +import java.util.Iterator; + +public class IterableComparator implements Comparator> { + private final Comparator comparator; + private final int shorterFirst; // = 1 for shorter first, -1 otherwise + + public IterableComparator() { + this(null,true); + } + + public IterableComparator(Comparator comparator) { + this(comparator,true); + } + + public IterableComparator(Comparator comparator, boolean shorterFirst) { + this.comparator = comparator; + this.shorterFirst = shorterFirst ? 1 : -1; + } + + @SuppressWarnings("unchecked") + public int compare(Iterable a, Iterable b) { + if (a == null) { + return b == null ? 0 : -shorterFirst; + } else if (b == null) { + return shorterFirst; + } + Iterator ai = a.iterator(); + Iterator bi = b.iterator(); + while (true) { + if (!ai.hasNext()) { + return bi.hasNext() ? -shorterFirst : 0; + } + if (!bi.hasNext()) { + return shorterFirst; + } + T aItem = ai.next(); + T bItem = bi.next(); + int result = comparator != null ? comparator.compare(aItem, bItem) : ((Comparable) aItem).compareTo(bItem); + if (result != 0) { + return result; + } + } + } + @SuppressWarnings("unchecked") + public static int compareIterables(Iterable a, Iterable b) { + return NOCOMPARATOR.compare(a, b); + } + @SuppressWarnings("unchecked") + private static final IterableComparator NOCOMPARATOR = new IterableComparator(); +} \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/JavaTimeZone.java b/main/classes/core/src/com/ibm/icu/impl/JavaTimeZone.java new file mode 100644 index 00000000000..bcdcc2e6b0e --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/JavaTimeZone.java @@ -0,0 +1,193 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.TreeSet; + +import com.ibm.icu.util.TimeZone; + +/** + * JavaTimeZone inherits com.ibm.icu.util.TimeZone and wraps java.util.TimeZone. + * We used to have JDKTimeZone which wrapped Java TimeZone and used it as primary + * TimeZone implementation until ICU4J 3.4.1. This class works exactly like + * JDKTimeZone and allows ICU users who use ICU4J and JDK date/time/calendar + * services in mix to maintain only JDK timezone rules. + * + * This TimeZone subclass is returned by the TimeZone factory method getTimeZone(String) + * when the default timezone type in TimeZone class is TimeZone.TIMEZONE_JDK. + */ +public class JavaTimeZone extends TimeZone { + + private static final long serialVersionUID = 6977448185543929364L; + + private static final TreeSet AVAILABLESET; + + private java.util.TimeZone javatz; + + static { + AVAILABLESET = new TreeSet(); + String[] availableIds = java.util.TimeZone.getAvailableIDs(); + for (int i = 0; i < availableIds.length; i++) { + AVAILABLESET.add(availableIds[i]); + } + } + + /** + * Constructs a JavaTimeZone with the default Java TimeZone + */ + public JavaTimeZone() { + javatz = java.util.TimeZone.getDefault(); + setID(javatz.getID()); + } + + /** + * Constructs a JavaTimeZone with the given timezone ID. + * @param id A timezone ID, either a system ID or a custom ID. + */ + public JavaTimeZone(String id) { + if (AVAILABLESET.contains(id)) { + javatz = java.util.TimeZone.getTimeZone(id); + } + if (javatz == null) { + // Use ICU's canonical ID mapping + boolean[] isSystemID = new boolean[1]; + String canonicalID = TimeZone.getCanonicalID(id, isSystemID); + if (isSystemID[0] && AVAILABLESET.contains(canonicalID)) { + javatz = java.util.TimeZone.getTimeZone(canonicalID); + } + } + + if (javatz == null){ + int[] fields = new int[4]; + if (ZoneMeta.parseCustomID(id, fields)) { + // JDK does not support offset seconds. + // If custom ID, we create java.util.SimpleTimeZone here. + id = ZoneMeta.formatCustomID(fields[1], fields[2], fields[3], fields[0] < 0); + int offset = fields[0] * ((fields[1] * 60 + fields[2]) * 60 + fields[3]) * 1000; + javatz = new java.util.SimpleTimeZone(offset, id); + } + } + if (javatz == null) { + // Final fallback + id = "GMT"; + javatz = java.util.TimeZone.getTimeZone(id); + } + setID(id); + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#getOffset(int, int, int, int, int, int) + */ + public int getOffset(int era, int year, int month, int day, int dayOfWeek, int milliseconds) { + return javatz.getOffset(era, year, month, day, dayOfWeek, milliseconds); + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#getOffset(long, boolean, int[]) + */ + public void getOffset(long date, boolean local, int[] offsets) { + int offset; + int dstOffset = 0; + + if (local) { + int fields[] = new int[6]; + Grego.timeToFields(date, fields); + + int era = GregorianCalendar.AD; + int year = fields[0]; + if (year <= 0) { + era = GregorianCalendar.BC; + year = 1 - year; + } + + offset = javatz.getOffset(era, year, fields[1], fields[2], fields[3], fields[5]); + + } else { + offset = javatz.getOffset(date); + } + + if (javatz.inDaylightTime(new Date(date))) { + dstOffset = javatz.getDSTSavings(); + } + + offsets[0] = offset - dstOffset; + offsets[1] = dstOffset; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#getRawOffset() + */ + public int getRawOffset() { + return javatz.getRawOffset(); + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#inDaylightTime(java.util.Date) + */ + public boolean inDaylightTime(Date date) { + return javatz.inDaylightTime(date); + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#setRawOffset(int) + */ + public void setRawOffset(int offsetMillis) { + javatz.setRawOffset(offsetMillis); + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#useDaylightTime() + */ + public boolean useDaylightTime() { + return javatz.useDaylightTime(); + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#getDSTSavings() + */ + public int getDSTSavings() { + int dstSavings = super.getDSTSavings(); + try { + // hack so test compiles and runs in both JDK 1.3 and JDK 1.4+ + final Object[] args = new Object[0]; + final Class[] argtypes = new Class[0]; + java.lang.reflect.Method m = javatz.getClass().getMethod("getDSTSavings", argtypes); + dstSavings = ((Integer) m.invoke(javatz, args)).intValue(); + } catch (Exception e) { + // just use the result returned by super.getDSTSavings() + } + return dstSavings; + } + + public java.util.TimeZone unwrap() { + return javatz; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#clone() + */ + public Object clone() { + JavaTimeZone other = (JavaTimeZone)super.clone(); + other.javatz = (java.util.TimeZone)javatz.clone(); + return other; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#hashCode() + */ + public int hashCode() { + return super.hashCode() + javatz.hashCode(); + } + + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException { + s.defaultReadObject(); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java b/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java new file mode 100644 index 00000000000..0ee5cb65024 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java @@ -0,0 +1,332 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.Iterator; +import java.util.Locale; + +import com.ibm.icu.lang.UScript; +import com.ibm.icu.text.LocaleDisplayNames; +import com.ibm.icu.text.MessageFormat; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +public class LocaleDisplayNamesImpl extends LocaleDisplayNames { + private final ULocale locale; + private final DialectHandling dialectHandling; + private final DataTable langData; + private final DataTable regionData; + private final Appender appender; + private final MessageFormat format; + + private static final Cache cache = new Cache(); + + public static LocaleDisplayNames getInstance(ULocale locale, DialectHandling dialectHandling) { + synchronized (cache) { + return cache.get(locale, dialectHandling); + } + } + + public LocaleDisplayNamesImpl(ULocale locale, DialectHandling dialectHandling) { + this.dialectHandling = dialectHandling; + this.langData = LangDataTables.impl.get(locale); + this.regionData = RegionDataTables.impl.get(locale); + this.locale = ULocale.ROOT.equals(langData.getLocale()) ? regionData.getLocale() : + langData.getLocale(); + + // Note, by going through DataTable, this uses table lookup rather than straight lookup. + // That should get us the same data, I think. This way we don't have to explicitly + // load the bundle again. Using direct lookup didn't seem to make an appreciable + // difference in performance. + String sep = langData.get("localeDisplayPattern", "separator"); + if ("separator".equals(sep)) { + sep = ", "; + } + this.appender = new Appender(sep); + + String pattern = langData.get("localeDisplayPattern", "pattern"); + if ("pattern".equals(pattern)) { + pattern = "{0} ({1})"; + } + this.format = new MessageFormat(pattern); + } + + @Override + public ULocale getLocale() { + return locale; + } + + @Override + public DialectHandling getDialectHandling() { + return dialectHandling; + } + + @Override + public String localeDisplayName(ULocale locale) { + return localeDisplayNameInternal(locale); + } + + @Override + public String localeDisplayName(Locale locale) { + return localeDisplayNameInternal(ULocale.forLocale(locale)); + } + + @Override + public String localeDisplayName(String localeId) { + return localeDisplayNameInternal(new ULocale(localeId)); + } + + private String localeDisplayNameInternal(ULocale locale) { + // lang + // lang (script, country, variant, keyword=value, ...) + // script, country, variant, keyword=value, ... + + String resultName = null; + + String lang = locale.getLanguage(); + + // Empty basename indicates root locale (keywords are ignored for this). + // Our data uses 'root' to access display names for the root locale in the + // "Languages" table. + if (locale.getBaseName().length() == 0) { + lang = "root"; + } + String script = locale.getScript(); + String country = locale.getCountry(); + String variant = locale.getVariant(); + + boolean hasScript = script.length() > 0; + boolean hasCountry = country.length() > 0; + boolean hasVariant = variant.length() > 0; + + // always have a value for lang + if (dialectHandling == DialectHandling.DIALECT_NAMES) { + do { // loop construct is so we can break early out of search + if (hasScript && hasCountry) { + String langScriptCountry = lang + '_' + script + '_' + country; + String result = localeIdName(langScriptCountry); + if (!result.equals(langScriptCountry)) { + resultName = result; + hasScript = false; + hasCountry = false; + break; + } + } + if (hasScript) { + String langScript = lang + '_' + script; + String result = localeIdName(langScript); + if (!result.equals(langScript)) { + resultName = result; + hasScript = false; + break; + } + } + if (hasCountry) { + String langCountry = lang + '_' + country; + String result = localeIdName(langCountry); + if (!result.equals(langCountry)) { + resultName = result; + hasCountry = false; + break; + } + } + } while (false); + } + + if (resultName == null) { + resultName = localeIdName(lang); + } + + StringBuilder buf = new StringBuilder(); + if (hasScript) { + // first element, don't need appender + buf.append(scriptDisplayName(script)); + } + if (hasCountry) { + appender.append(regionDisplayName(country), buf); + } + if (hasVariant) { + appender.append(variantDisplayName(variant), buf); + } + + Iterator keys = locale.getKeywords(); + if (keys != null) { + while (keys.hasNext()) { + String key = keys.next(); + String value = locale.getKeywordValue(key); + appender.append(keyDisplayName(key), buf) + .append("=") + .append(keyValueDisplayName(key, value)); + } + } + + String resultRemainder = null; + if (buf.length() > 0) { + resultRemainder = buf.toString(); + } + + if (resultRemainder != null) { + return format.format(new Object[] {resultName, resultRemainder}); + } + + return resultName; + } + + private String localeIdName(String localeId) { + return langData.get("Languages", localeId); + } + + @Override + public String languageDisplayName(String lang) { + // Special case to eliminate non-languages, which pollute our data. + if (lang.equals("root") || lang.indexOf('_') != -1) { + return lang; + } + return langData.get("Languages", lang); + } + + @Override + public String scriptDisplayName(String script) { + return langData.get("Scripts", script); + } + + @Override + public String scriptDisplayName(int scriptCode) { + return scriptDisplayName(UScript.getShortName(scriptCode)); + } + + @Override + public String regionDisplayName(String region) { + return regionData.get("Countries", region); + } + + @Override + public String variantDisplayName(String variant) { + return langData.get("Variants", variant); + } + + @Override + public String keyDisplayName(String key) { + return langData.get("Keys", key); + } + + @Override + public String keyValueDisplayName(String key, String value) { + return langData.get("Types", key, value); + } + + public static class DataTable { + ULocale getLocale() { + return ULocale.ROOT; + } + + String get(String tableName, String code) { + return get(tableName, null, code); + } + + String get(String tableName, String subTableName, String code) { + return code; + } + } + + static class ICUDataTable extends DataTable { + private final ICUResourceBundle bundle; + + public ICUDataTable(String path, ULocale locale) { + this.bundle = (ICUResourceBundle) UResourceBundle.getBundleInstance( + path, locale.getBaseName()); + } + + public ULocale getLocale() { + return bundle.getULocale(); + } + + public String get(String tableName, String subTableName, String code) { + return ICUResourceTableAccess.getTableString(bundle, tableName, subTableName, + code); + } + } + + static abstract class DataTables { + public abstract DataTable get(ULocale locale); + public static DataTables load(String className) { + try { + return (DataTables) Class.forName(className).newInstance(); + } catch (Throwable t) { + final DataTable NO_OP = new DataTable(); + return new DataTables() { + public DataTable get(ULocale locale) { + return NO_OP; + } + }; + } + } + } + + static abstract class ICUDataTables extends DataTables { + private final String path; + + protected ICUDataTables(String path) { + this.path = path; + } + + @Override + public DataTable get(ULocale locale) { + return new ICUDataTable(path, locale); + } + } + + static class LangDataTables { + static final DataTables impl = DataTables.load("com.ibm.icu.impl.ICULangDataTables"); + } + + static class RegionDataTables { + static final DataTables impl = DataTables.load("com.ibm.icu.impl.ICURegionDataTables"); + } + + public static enum DataTableType { + LANG, REGION; + } + + public static boolean haveData(DataTableType type) { + switch (type) { + case LANG: return LangDataTables.impl instanceof ICUDataTables; + case REGION: return RegionDataTables.impl instanceof ICUDataTables; + default: + throw new IllegalArgumentException("unknown type: " + type); + } + } + + static class Appender { + private final String sep; + + Appender(String sep) { + this.sep = sep; + } + StringBuilder append(String s, StringBuilder b) { + if (b.length() > 0) { + b.append(sep); + } + b.append(s); + return b; + } + } + + private static class Cache { + private ULocale locale; + private DialectHandling dialectHandling; + private LocaleDisplayNames cache; + public LocaleDisplayNames get(ULocale locale, DialectHandling dialectHandling) { + if (!(dialectHandling == this.dialectHandling && locale.equals(this.locale))) { + this.locale = locale; + this.dialectHandling = dialectHandling; + this.cache = new LocaleDisplayNamesImpl(locale, dialectHandling); + } + return cache; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java b/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java new file mode 100644 index 00000000000..6a4d392fff8 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java @@ -0,0 +1,741 @@ +/* +****************************************************************************** +* Copyright (C) 2003-2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; + +import com.ibm.icu.impl.locale.AsciiUtil; + +/** + * Utility class to parse and normalize locale ids (including POSIX style) + */ +public final class LocaleIDParser { + private char[] id; + private int index; + private char[] buffer; + private int blen; + // um, don't handle POSIX ids unless we request it. why not? well... because. + private boolean canonicalize; + private boolean hadCountry; + + // used when canonicalizing + Map keywords; + String baseName; + + /** + * Parsing constants. + */ + private static final char KEYWORD_SEPARATOR = '@'; + private static final char HYPHEN = '-'; + private static final char KEYWORD_ASSIGN = '='; + private static final char COMMA = ','; + private static final char ITEM_SEPARATOR = ';'; + private static final char DOT = '.'; + private static final char UNDERSCORE = '_'; + + public LocaleIDParser(String localeID) { + this(localeID, false); + } + + public LocaleIDParser(String localeID, boolean canonicalize) { + id = localeID.toCharArray(); + index = 0; + buffer = new char[id.length + 5]; + blen = 0; + this.canonicalize = canonicalize; + } + + private void reset() { + index = blen = 0; + } + + // utilities for working on text in the buffer + + /** + * Append c to the buffer. + */ + private void append(char c) { + try { + buffer[blen] = c; + } + catch (IndexOutOfBoundsException e) { + if (buffer.length > 512) { + // something is seriously wrong, let this go + throw e; + } + char[] nbuffer = new char[buffer.length * 2]; + System.arraycopy(buffer, 0, nbuffer, 0, buffer.length); + nbuffer[blen] = c; + buffer = nbuffer; + } + ++blen; + } + + private void addSeparator() { + append(UNDERSCORE); + } + + /** + * Returns the text in the buffer from start to blen as a String. + */ + private String getString(int start) { + if (start == blen) { + return ""; + } + return new String(buffer, start, blen-start); + } + + /** + * Set the length of the buffer to pos, then append the string. + */ + private void set(int pos, String s) { + this.blen = pos; // no safety + append(s); + } + + /** + * Append the string to the buffer. + */ + private void append(String s) { + for (int i = 0; i < s.length(); ++i) { + append(s.charAt(i)); + } + } + + // utilities for parsing text out of the id + + /** + * Character to indicate no more text is available in the id. + */ + private static final char DONE = '\uffff'; + + /** + * Returns the character at index in the id, and advance index. The returned character + * is DONE if index was at the limit of the buffer. The index is advanced regardless + * so that decrementing the index will always 'unget' the last character returned. + */ + private char next() { + if (index == id.length) { + index++; + return DONE; + } + + return id[index++]; + } + + /** + * Advance index until the next terminator or id separator, and leave it there. + */ + private void skipUntilTerminatorOrIDSeparator() { + while (!isTerminatorOrIDSeparator(next())) { + } + --index; + } + + /** + * Returns true if the character at index in the id is a terminator. + */ + private boolean atTerminator() { + return index >= id.length || isTerminator(id[index]); + } + + /* + * Returns true if the character is an id separator (underscore or hyphen). + */ + /* private boolean isIDSeparator(char c) { + return c == UNDERSCORE || c == HYPHEN; + }*/ + + /** + * Returns true if the character is a terminator (keyword separator, dot, or DONE). + * Dot is a terminator because of the POSIX form, where dot precedes the codepage. + */ + private boolean isTerminator(char c) { + // always terminate at DOT, even if not handling POSIX. It's an error... + return c == KEYWORD_SEPARATOR || c == DONE || c == DOT; + } + + /** + * Returns true if the character is a terminator or id separator. + */ + private boolean isTerminatorOrIDSeparator(char c) { + return c == KEYWORD_SEPARATOR || c == UNDERSCORE || c == HYPHEN || + c == DONE || c == DOT; + } + + /** + * Returns true if the start of the buffer has an experimental or private language + * prefix, the pattern '[ixIX][-_].' shows the syntax checked. + */ + private boolean haveExperimentalLanguagePrefix() { + if (id.length > 2) { + char c = id[1]; + if (c == HYPHEN || c == UNDERSCORE) { + c = id[0]; + return c == 'x' || c == 'X' || c == 'i' || c == 'I'; + } + } + return false; + } + + /** + * Returns true if a value separator occurs at or after index. + */ + private boolean haveKeywordAssign() { + // assume it is safe to start from index + for (int i = index; i < id.length; ++i) { + if (id[i] == KEYWORD_ASSIGN) { + return true; + } + } + return false; + } + + /** + * Advance index past language, and accumulate normalized language code in buffer. + * Index must be at 0 when this is called. Index is left at a terminator or id + * separator. Returns the start of the language code in the buffer. + */ + private int parseLanguage() { + if (haveExperimentalLanguagePrefix()) { + append(Character.toLowerCase(id[0])); + append(HYPHEN); + index = 2; + } + + char c; + while(!isTerminatorOrIDSeparator(c = next())) { + append(Character.toLowerCase(c)); + } + --index; // unget + + if (blen == 3) { + String lang = LocaleIDs.threeToTwoLetterLanguage(getString(0)); + if (lang != null) { + set(0, lang); + } + } + + return 0; + } + + /** + * Advance index past language. Index must be at 0 when this is called. Index + * is left at a terminator or id separator. + */ + private void skipLanguage() { + if (haveExperimentalLanguagePrefix()) { + index = 2; + } + skipUntilTerminatorOrIDSeparator(); + } + + /** + * Advance index past script, and accumulate normalized script in buffer. + * Index must be immediately after the language. + * If the item at this position is not a script (is not four characters + * long) leave index and buffer unchanged. Otherwise index is left at + * a terminator or id separator. Returns the start of the script code + * in the buffer (this may be equal to the buffer length, if there is no + * script). + */ + private int parseScript() { + if (!atTerminator()) { + int oldIndex = index; // save original index + ++index; + + int oldBlen = blen; // get before append hyphen, if we truncate everything is undone + char c; + while(!isTerminatorOrIDSeparator(c = next())) { + if (blen == oldBlen) { // first pass + addSeparator(); + append(Character.toUpperCase(c)); + } else { + append(Character.toLowerCase(c)); + } + } + --index; // unget + + /* If it's not exactly 4 characters long, then it's not a script. */ + if (index - oldIndex != 5) { // +1 to account for separator + index = oldIndex; + blen = oldBlen; + } else { + oldBlen++; // index past hyphen, for clients who want to extract just the script + } + + return oldBlen; + } + return blen; + } + + /** + * Advance index past script. + * Index must be immediately after the language and IDSeparator. + * If the item at this position is not a script (is not four characters + * long) leave index. Otherwise index is left at a terminator or + * id separator. + */ + private void skipScript() { + if (!atTerminator()) { + int oldIndex = index; + ++index; + + skipUntilTerminatorOrIDSeparator(); + if (index - oldIndex != 5) { // +1 to account for separator + index = oldIndex; + } + } + } + + /** + * Advance index past country, and accumulate normalized country in buffer. + * Index must be immediately after the script (if there is one, else language) + * and IDSeparator. Return the start of the country code in the buffer. + */ + private int parseCountry() { + if (!atTerminator()) { + int oldIndex = index; + ++index; + + int oldBlen = blen; + char c; + while (!isTerminatorOrIDSeparator(c = next())) { + if (oldBlen == blen) { // first, add hyphen + hadCountry = true; // we have a country, let variant parsing know + addSeparator(); + ++oldBlen; // increment past hyphen + } + append(Character.toUpperCase(c)); + } + --index; // unget + + int charsAppended = blen - oldBlen; + + if (charsAppended == 0) { + // Do nothing. + } + else if (charsAppended < 2 || charsAppended > 3) { + // It's not a country, so return index and blen to + // their previous values. + index = oldIndex; + --oldBlen; + blen = oldBlen; + hadCountry = false; + } + else if (charsAppended == 3) { + String region = LocaleIDs.threeToTwoLetterRegion(getString(oldBlen)); + if (region != null) { + set(oldBlen, region); + } + } + + return oldBlen; + } + + return blen; + } + + /** + * Advance index past country. + * Index must be immediately after the script (if there is one, else language) + * and IDSeparator. + */ + private void skipCountry() { + if (!atTerminator()) { + ++index; + /* + * Save the index point after the separator, since the format + * requires two separators if the country is not present. + */ + int oldIndex = index; + + skipUntilTerminatorOrIDSeparator(); + int charsSkipped = index - oldIndex; + if (charsSkipped < 2 || charsSkipped > 3) { + index = oldIndex; + } + } + } + + /** + * Advance index past variant, and accumulate normalized variant in buffer. This ignores + * the codepage information from POSIX ids. Index must be immediately after the country + * or script. Index is left at the keyword separator or at the end of the text. Return + * the start of the variant code in the buffer. + * + * In standard form, we can have the following forms: + * ll__VVVV + * ll_CC_VVVV + * ll_Ssss_VVVV + * ll_Ssss_CC_VVVV + * + * This also handles POSIX ids, which can have the following forms (pppp is code page id): + * ll_CC.pppp --> ll_CC + * ll_CC.pppp@VVVV --> ll_CC_VVVV + * ll_CC@VVVV --> ll_CC_VVVV + * + * We identify this use of '@' in POSIX ids by looking for an '=' following + * the '@'. If there is one, we consider '@' to start a keyword list, instead of + * being part of a POSIX id. + * + * Note: since it was decided that we want an option to not handle POSIX ids, this + * becomes a bit more complex. + */ + private int parseVariant() { + int oldBlen = blen; + + boolean start = true; + boolean needSeparator = true; + boolean skipping = false; + char c; + while ((c = next()) != DONE) { + if (c == DOT) { + start = false; + skipping = true; + } else if (c == KEYWORD_SEPARATOR) { + if (haveKeywordAssign()) { + break; + } + skipping = false; + start = false; + needSeparator = true; // add another underscore if we have more text + } else if (start) { + start = false; + } else if (!skipping) { + if (needSeparator) { + boolean incOldBlen = blen == oldBlen; // need to skip separators + needSeparator = false; + if (incOldBlen && !hadCountry) { // no country, we'll need two + addSeparator(); + ++oldBlen; // for sure + } + addSeparator(); + if (incOldBlen) { // only for the first separator + ++oldBlen; + } + } + c = Character.toUpperCase(c); + if (c == HYPHEN || c == COMMA) { + c = UNDERSCORE; + } + append(c); + } + } + --index; // unget + + return oldBlen; + } + + // no need for skipvariant, to get the keywords we'll just scan directly for + // the keyword separator + + /** + * Returns the normalized language id, or the empty string. + */ + public String getLanguage() { + reset(); + return getString(parseLanguage()); + } + + /** + * Returns the normalized script id, or the empty string. + */ + public String getScript() { + reset(); + skipLanguage(); + return getString(parseScript()); + } + + /** + * return the normalized country id, or the empty string. + */ + public String getCountry() { + reset(); + skipLanguage(); + skipScript(); + return getString(parseCountry()); + } + + /** + * Returns the normalized variant id, or the empty string. + */ + public String getVariant() { + reset(); + skipLanguage(); + skipScript(); + skipCountry(); + return getString(parseVariant()); + } + + /** + * Returns the language, script, country, and variant as separate strings. + */ + public String[] getLanguageScriptCountryVariant() { + reset(); + return new String[] { + getString(parseLanguage()), + getString(parseScript()), + getString(parseCountry()), + getString(parseVariant()) + }; + } + + public void setBaseName(String baseName) { + this.baseName = baseName; + } + + public void parseBaseName() { + if (baseName != null) { + set(0, baseName); + } else { + reset(); + parseLanguage(); + parseScript(); + parseCountry(); + parseVariant(); + + // catch unwanted trailing underscore after country if there was no variant + if (blen > 1 && buffer[blen-1] == UNDERSCORE) { + --blen; + } + } + } + + /** + * Returns the normalized base form of the locale id. The base + * form does not include keywords. + */ + public String getBaseName() { + if (baseName != null) { + return baseName; + } + parseBaseName(); + return getString(0); + } + + /** + * Returns the normalized full form of the locale id. The full + * form includes keywords if they are present. + */ + public String getName() { + parseBaseName(); + parseKeywords(); + return getString(0); + } + + // keyword utilities + + /** + * If we have keywords, advance index to the start of the keywords and return true, + * otherwise return false. + */ + private boolean setToKeywordStart() { + for (int i = index; i < id.length; ++i) { + if (id[i] == KEYWORD_SEPARATOR) { + if (canonicalize) { + for (int j = ++i; j < id.length; ++j) { // increment i past separator for return + if (id[j] == KEYWORD_ASSIGN) { + index = i; + return true; + } + } + } else { + if (++i < id.length) { + index = i; + return true; + } + } + break; + } + } + return false; + } + + private static boolean isDoneOrKeywordAssign(char c) { + return c == DONE || c == KEYWORD_ASSIGN; + } + + private static boolean isDoneOrItemSeparator(char c) { + return c == DONE || c == ITEM_SEPARATOR; + } + + private String getKeyword() { + int start = index; + while (!isDoneOrKeywordAssign(next())) { + } + --index; + return AsciiUtil.toLowerString(new String(id, start, index-start).trim()); + } + + private String getValue() { + int start = index; + while (!isDoneOrItemSeparator(next())) { + } + --index; + return new String(id, start, index-start).trim(); // leave case alone + } + + private Comparator getKeyComparator() { + final Comparator comp = new Comparator() { + public int compare(String lhs, String rhs) { + return lhs.compareTo(rhs); + } + }; + return comp; + } + + /** + * Returns a map of the keywords and values, or null if there are none. + */ + public Map getKeywordMap() { + if (keywords == null) { + TreeMap m = null; + if (setToKeywordStart()) { + // trim spaces and convert to lower case, both keywords and values. + do { + String key = getKeyword(); + if (key.length() == 0) { + break; + } + char c = next(); + if (c != KEYWORD_ASSIGN) { + // throw new IllegalArgumentException("key '" + key + "' missing a value."); + if (c == DONE) { + break; + } else { + continue; + } + } + String value = getValue(); + if (value.length() == 0) { + // throw new IllegalArgumentException("key '" + key + "' missing a value."); + continue; + } + if (m == null) { + m = new TreeMap(getKeyComparator()); + } else if (m.containsKey(key)) { + // throw new IllegalArgumentException("key '" + key + "' already has a value."); + continue; + } + m.put(key, value); + } while (next() == ITEM_SEPARATOR); + } + keywords = m != null ? m : Collections.emptyMap(); + } + + return keywords; + } + + + /** + * Parse the keywords and return start of the string in the buffer. + */ + private int parseKeywords() { + int oldBlen = blen; + Map m = getKeywordMap(); + if (!m.isEmpty()) { + boolean first = true; + for (Map.Entry e : m.entrySet()) { + append(first ? KEYWORD_SEPARATOR : ITEM_SEPARATOR); + first = false; + append(e.getKey()); + append(KEYWORD_ASSIGN); + append(e.getValue()); + } + if (blen != oldBlen) { + ++oldBlen; + } + } + return oldBlen; + } + + /** + * Returns an iterator over the keywords, or null if we have an empty map. + */ + public Iterator getKeywords() { + Map m = getKeywordMap(); + return m.isEmpty() ? null : m.keySet().iterator(); + } + + /** + * Returns the value for the named keyword, or null if the keyword is not + * present. + */ + public String getKeywordValue(String keywordName) { + Map m = getKeywordMap(); + return m.isEmpty() ? null : m.get(AsciiUtil.toLowerString(keywordName.trim())); + } + + /** + * Set the keyword value only if it is not already set to something else. + */ + public void defaultKeywordValue(String keywordName, String value) { + setKeywordValue(keywordName, value, false); + } + + /** + * Set the value for the named keyword, or unset it if value is null. If + * keywordName itself is null, unset all keywords. If keywordName is not null, + * value must not be null. + */ + public void setKeywordValue(String keywordName, String value) { + setKeywordValue(keywordName, value, true); + } + + /** + * Set the value for the named keyword, or unset it if value is null. If + * keywordName itself is null, unset all keywords. If keywordName is not null, + * value must not be null. If reset is true, ignore any previous value for + * the keyword, otherwise do not change the keyword (including removal of + * one or all keywords). + */ + private void setKeywordValue(String keywordName, String value, boolean reset) { + if (keywordName == null) { + if (reset) { + // force new map, ignore value + keywords = Collections.emptyMap(); + } + } else { + keywordName = AsciiUtil.toLowerString(keywordName.trim()); + if (keywordName.length() == 0) { + throw new IllegalArgumentException("keyword must not be empty"); + } + if (value != null) { + value = value.trim(); + if (value.length() == 0) { + throw new IllegalArgumentException("value must not be empty"); + } + } + Map m = getKeywordMap(); + if (m.isEmpty()) { // it is EMPTY_MAP + if (value != null) { + // force new map + keywords = new TreeMap(getKeyComparator()); + keywords.put(keywordName, value.trim()); + } + } else { + if (reset || !m.containsKey(keywordName)) { + if (value != null) { + m.put(keywordName, value); + } else { + m.remove(keywordName); + if (m.isEmpty()) { + // force new map + keywords = Collections.emptyMap(); + } + } + } + } + } + } +} \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleIDs.java b/main/classes/core/src/com/ibm/icu/impl/LocaleIDs.java new file mode 100644 index 00000000000..02f633e2294 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/LocaleIDs.java @@ -0,0 +1,536 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.MissingResourceException; + +import com.ibm.icu.util.ULocale; + + +/** + * Utilities for mapping between old and new language, country, and other + * locale ID related names. + */ +public class LocaleIDs { + + /** + * Returns a list of all 2-letter country codes defined in ISO 3166. + * Can be used to create Locales. + * @stable ICU 3.0 + */ + public static String[] getISOCountries() { + initCountryTables(); + return _countries.clone(); + } + + /** + * Returns a list of all 2-letter language codes defined in ISO 639. + * Can be used to create Locales. + * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed. + * The list this function returns includes both the new and the old codes for the + * languages whose codes have changed.] + * @stable ICU 3.0 + */ + public static String[] getISOLanguages() { + initLanguageTables(); + return _languages.clone(); + } + + /** + * Returns a three-letter abbreviation for the provided country. If the provided + * country is empty, returns the empty string. Otherwise, returns + * an uppercase ISO 3166 3-letter country code. + * @exception MissingResourceException Throws MissingResourceException if the + * three-letter country abbreviation is not available for this locale. + * @stable ICU 3.0 + */ + public static String getISO3Country(String country){ + initCountryTables(); + + int offset = findIndex(_countries, country); + if(offset>=0){ + return _countries3[offset]; + }else{ + offset = findIndex(_obsoleteCountries, country); + if(offset>=0){ + return _obsoleteCountries3[offset]; + } + } + return ""; + } + /** + * Returns a three-letter abbreviation for the language. If language is + * empty, returns the empty string. Otherwise, returns + * a lowercase ISO 639-2/T language code. + * The ISO 639-2 language codes can be found on-line at + * ftp://dkuug.dk/i18n/iso-639-2.txt + * @exception MissingResourceException Throws MissingResourceException if the + * three-letter language abbreviation is not available for this locale. + * @stable ICU 3.0 + */ + public static String getISO3Language(String language) { + initLanguageTables(); + + int offset = findIndex(_languages, language); + if(offset>=0){ + return _languages3[offset]; + } else { + offset = findIndex(_obsoleteLanguages, language); + if (offset >= 0) { + return _obsoleteLanguages3[offset]; + } + } + return ""; + } + + public static String threeToTwoLetterLanguage(String lang) { + initLanguageTables(); + + /* convert 3 character code to 2 character code if possible *CWB*/ + int offset = findIndex(_languages3, lang); + if (offset >= 0) { + return _languages[offset]; + } + + offset = findIndex(_obsoleteLanguages3, lang); + if (offset >= 0) { + return _obsoleteLanguages[offset]; + } + + return null; + } + + public static String threeToTwoLetterRegion(String region) { + initCountryTables(); + + /* convert 3 character code to 2 character code if possible *CWB*/ + int offset = findIndex(_countries3, region); + if (offset >= 0) { + return _countries[offset]; + } + + offset = findIndex(_obsoleteCountries3, region); + if (offset >= 0) { + return _obsoleteCountries[offset]; + } + + return null; + } + + /** + * linear search of the string array. the arrays are unfortunately ordered by the + * two-letter target code, not the three-letter search code, which seems backwards. + */ + private static int findIndex(String[] array, String target){ + for (int i = 0; i < array.length; i++) { + if (target.equals(array[i])) { + return i; + } + } + return -1; + } + + + /** + * Tables used in normalizing portions of the id. + */ + /* tables updated per http://lcweb.loc.gov/standards/iso639-2/ + to include the revisions up to 2001/7/27 *CWB*/ + /* The 3 character codes are the terminology codes like RFC 3066. + This is compatible with prior ICU codes */ + /* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in + the table but now at the end of the table because + 3 character codes are duplicates. This avoids bad searches + going from 3 to 2 character codes.*/ + /* The range qaa-qtz is reserved for local use. */ + + private static String[] _languages; + private static String[] _replacementLanguages; + private static String[] _obsoleteLanguages; + private static String[] _languages3; + private static String[] _obsoleteLanguages3; + + // Avoid initializing languages tables unless we have to. + private static void initLanguageTables() { + if (_languages == null) { + + /* This list MUST be in sorted order, and MUST contain the two-letter codes + if one exists otherwise use the three letter code */ + String[] tempLanguages = { + "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", + "afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa", + "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", + "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", + "bai", "bal", "ban", "bas", "bat", "be", "bej", + "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", + "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", + "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", + "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", + "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", + "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", + "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", + "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", + "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", + "enm", "eo", "es", "et", "eu", "ewo", "fa", + "fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon", + "fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay", + "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", + "goh", "gon", "gor", "got", "grb", "grc", "gu", "gv", + "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", + "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", + "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", + "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", + "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", + "kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi", + "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", + "ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks", + "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", + "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", + "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", + "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", + "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", + "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", + "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", + "mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", + "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", + "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", + "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", + "om", "or", "os", "osa", "ota", "oto", "pa", "paa", + "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", + "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", + "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", + "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", + "sas", "sat", "sc", "sco", "sd", "se", "sel", "sem", + "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", + "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", + "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", + "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", + "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", + "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", + "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", + "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", + "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", + "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", + "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", + "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", + "zu", "zun", + }; + + String[] tempReplacementLanguages = { + "id", "he", "yi", "jv", "sr", "nb",/* replacement language codes */ + }; + + String[] tempObsoleteLanguages = { + "in", "iw", "ji", "jw", "sh", "no", /* obsolete language codes */ + }; + + /* This list MUST contain a three-letter code for every two-letter code in the + list above, and they MUST ne in the same order (i.e., the same language must + be in the same place in both lists)! */ + String[] tempLanguages3 = { + /*"aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */ + "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa", + /*"afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa", */ + "afh", "aka", "akk", "ale", "alg", "amh", "arg", "ang", "apa", + /*"ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */ + "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast", + /*"ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */ + "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad", + /*"bai", "bal", "ban", "bas", "bat", "be", "bej", */ + "bai", "bal", "ban", "bas", "bat", "bel", "bej", + /*"bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */ + "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin", + /*"bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */ + "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos", + /*"btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */ + "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau", + /*"ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */ + "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm", + /*"chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */ + "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop", + /*"cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */ + "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus", + /*"cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */ + "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den", + /*"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */ + "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu", + /*"dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */ + "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng", + /*"enm", "eo", "es", "et", "eu", "ewo", "fa", */ + "enm", "epo", "spa", "est", "eus", "ewo", "fas", + /*"fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon", */ + "fan", "fat", "ful", "fin", "fiu", "fij", "fao", "fon", + /*"fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay", */ + "fra", "frm", "fro", "fur", "fry", "gle", "gaa", "gay", + /*"gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */ + "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn", + /*"goh", "gon", "gor", "got", "grb", "grc", "gu", "gv", */ + "goh", "gon", "gor", "got", "grb", "grc", "guj", "glv", + /*"gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */ + "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him", + /*"hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */ + "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her", + /*"ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */ + "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk", + /*"ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */ + "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita", + /*"iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */ + "iku", "jpn", "jbo", "jpr", "jrb", "jaw", "kat", "kaa", "kab", + /*"kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi", */ + "kac", "kam", "kar", "kaw", "kbd", "kon", "kha", "khi", + /*"kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */ + "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan", + /*"ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks", */ + "kor", "kok", "kos", "kpe", "kau", "krc", "kro", "kru", "kas", + /*"ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */ + "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad", + /*"lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */ + "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol", + /*"loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */ + "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus", + /*"lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */ + "lav", "mad", "mag", "mai", "mak", "man", "map", "mas", + /*"mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */ + "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min", + /*"mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */ + "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno", + /*"mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */ + "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun", + /*"mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */ + "mus", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap", + /*"nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */ + "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic", + /*"niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", */ + "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub", + /*"nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */ + "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji", + /*"om", "or", "os", "osa", "ota", "oto", "pa", "paa", */ + "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa", + /*"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */ + "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", + /*"pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */ + "pli", "pol", "pon", "pra", "pro", "pus", "por", "que", + /*"raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */ + "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom", + /*"ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */ + "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam", + /*"sas", "sat", "sc", "sco", "sd", "se", "sel", "sem", */ + "sas", "sat", "srd", "sco", "snd", "sme", "sel", "sem", + /*"sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */ + "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit", + /*"sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */ + "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn", + /*"sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */ + "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp", + /*"srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */ + "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux", + /*"sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", */ + "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter", + /*"tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */ + "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl", + /*"tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */ + "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", + /*"ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */ + "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", + /*"ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */ + "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd", + /*"uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */ + "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak", + /*"wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */ + "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap", + /*"yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", */ + "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd", + /*"zu", "zun", */ + "zul", "zun", + }; + + String[] tempObsoleteLanguages3 = { + /* "in", "iw", "ji", "jw", "sh", */ + "ind", "heb", "yid", "jaw", "srp", + }; + + synchronized (ULocale.class) { + if (_languages == null) { + _languages = tempLanguages; + _replacementLanguages = tempReplacementLanguages; + _obsoleteLanguages = tempObsoleteLanguages; + _languages3 = tempLanguages3; + _obsoleteLanguages3 = tempObsoleteLanguages3; + } + } + } + } + + private static String[] _countries; + private static String[] _deprecatedCountries; + private static String[] _replacementCountries; + private static String[] _obsoleteCountries; + private static String[] _countries3; + private static String[] _obsoleteCountries3; + + // Avoid initializing country tables unless we have to. + private static void initCountryTables() { + if (_countries == null) { + /* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per + http://www.evertype.com/standards/iso3166/iso3166-1-en.html + added new codes keeping the old ones for compatibility + updated to include 1999/12/03 revisions *CWB*/ + + /* RO(ROM) is now RO(ROU) according to + http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html + */ + + /* This list MUST be in sorted order, and MUST contain only two-letter codes! */ + String[] tempCountries = { + "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", + "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", + "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", + "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", + "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", + "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", + "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", + "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", + "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", + "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", + "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", + "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", + "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", + "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", + "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", + "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", + "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", + "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", + "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", + "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", + "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", + "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", + "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", + "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", + "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", + "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", + "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", + "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", + "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", + "WS", "YE", "YT", "ZA", "ZM", "ZW", + }; + + /* this table is used for 3 letter codes */ + String[] tempObsoleteCountries = { + "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */ + }; + + String[] tempDeprecatedCountries = { + "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" /* deprecated country list */ + }; + String[] tempReplacementCountries = { + /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */ + "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", /* replacement country codes */ + }; + + /* This list MUST contain a three-letter code for every two-letter code in + the above list, and they MUST be listed in the same order! */ + String[] tempCountries3 = { + /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */ + "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT", + /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ + "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", + /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ + "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", + /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */ + "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT", + /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ + "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", + /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ + "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", + /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */ + "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", + /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ + "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", + /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ + "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", + /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ + "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", + /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ + "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", + /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ + "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", + /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ + "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", + /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ + "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", + /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ + "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", + /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ + "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", + /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ + "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", + /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ + "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", + /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ + "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", + /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ + "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", + /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ + "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", + /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ + "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", + /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ + "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", + /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ + "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", + /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */ + "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV", + /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ + "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", + /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ + "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", + /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ + "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", + /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ + "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", + /* "WS", "YE", "YT", "ZA", "ZM", "ZW" */ + "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", + }; + + String[] tempObsoleteCountries3 = { + /*"FX", "CS", "RO", "TP", "YU", "ZR", */ + "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR", + }; + + synchronized (ULocale.class) { + if (_countries == null) { + _countries = tempCountries; + _deprecatedCountries = tempDeprecatedCountries; + _replacementCountries = tempReplacementCountries; + _obsoleteCountries = tempObsoleteCountries; + _countries3 = tempCountries3; + _obsoleteCountries3 = tempObsoleteCountries3; + } + } + } + } + + public static String getCurrentCountryID(String oldID){ + initCountryTables(); + int offset = findIndex(_deprecatedCountries, oldID); + if (offset >= 0) { + return _replacementCountries[offset]; + } + return oldID; + } + + public static String getCurrentLanguageID(String oldID){ + initLanguageTables(); + int offset = findIndex(_obsoleteLanguages, oldID); + if (offset >= 0) { + return _replacementLanguages[offset]; + } + return oldID; + } + + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleUtility.java b/main/classes/core/src/com/ibm/icu/impl/LocaleUtility.java new file mode 100644 index 00000000000..06337386474 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/LocaleUtility.java @@ -0,0 +1,132 @@ +/* + ****************************************************************************** + * Copyright (C) 1996-2007, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + * + ****************************************************************************** + */ + +package com.ibm.icu.impl; + +import java.util.Locale; + +/** + * A class to hold utility functions missing from java.util.Locale. + */ +public class LocaleUtility { + + /** + * A helper function to convert a string of the form + * aa_BB_CC to a locale object. Why isn't this in Locale? + */ + public static Locale getLocaleFromName(String name) { + String language = ""; + String country = ""; + String variant = ""; + + int i1 = name.indexOf('_'); + if (i1 < 0) { + language = name; + } else { + language = name.substring(0, i1); + ++i1; + int i2 = name.indexOf('_', i1); + if (i2 < 0) { + country = name.substring(i1); + } else { + country = name.substring(i1, i2); + variant = name.substring(i2+1); + } + } + + return new Locale(language, country, variant); + } + + /** + * Compare two locale strings of the form aa_BB_CC, and + * return true if parent is a 'strict' fallback of child, that is, + * if child =~ "^parent(_.+)*" (roughly). + */ + public static boolean isFallbackOf(String parent, String child) { + if (!child.startsWith(parent)) { + return false; + } + int i = parent.length(); + return (i == child.length() || + child.charAt(i) == '_'); + } + + /** + * Compare two locales, and return true if the parent is a + * 'strict' fallback of the child (parent string is a fallback + * of child string). + */ + public static boolean isFallbackOf(Locale parent, Locale child) { + return isFallbackOf(parent.toString(), child.toString()); + } + + + /* + * Convenience method that calls canonicalLocaleString(String) with + * locale.toString(); + */ + /*public static String canonicalLocaleString(Locale locale) { + return canonicalLocaleString(locale.toString()); + }*/ + + /* + * You'd think that Locale canonicalizes, since it munges the + * renamed languages, but it doesn't quite. It forces the region + * to be upper case but doesn't do anything about the language or + * variant. Our canonical form is 'lower_UPPER_UPPER'. + */ + /*public static String canonicalLocaleString(String id) { + if (id != null) { + int x = id.indexOf("_"); + if (x == -1) { + id = id.toLowerCase(Locale.ENGLISH); + } else { + StringBuffer buf = new StringBuffer(); + buf.append(id.substring(0, x).toLowerCase(Locale.ENGLISH)); + buf.append(id.substring(x).toUpperCase(Locale.ENGLISH)); + + int len = buf.length(); + int n = len; + while (--n >= 0 && buf.charAt(n) == '_') { + } + if (++n != len) { + buf.delete(n, len); + } + id = buf.toString(); + } + } + return id; + }*/ + + /** + * Fallback from the given locale name by removing the rightmost _-delimited + * element. If there is none, return the root locale ("", "", ""). If this + * is the root locale, return null. NOTE: The string "root" is not + * recognized; do not use it. + * + * @return a new Locale that is a fallback from the given locale, or null. + */ + public static Locale fallback(Locale loc) { + + // Split the locale into parts and remove the rightmost part + String[] parts = new String[] + { loc.getLanguage(), loc.getCountry(), loc.getVariant() }; + int i; + for (i=2; i>=0; --i) { + if (parts[i].length() != 0) { + parts[i] = ""; + break; + } + } + if (i<0) { + return null; // All parts were empty + } + return new Locale(parts[0], parts[1], parts[2]); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/MultiComparator.java b/main/classes/core/src/com/ibm/icu/impl/MultiComparator.java new file mode 100644 index 00000000000..01ade38fa4d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/MultiComparator.java @@ -0,0 +1,36 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.Comparator; + +public class MultiComparator implements Comparator { + private Comparator[] comparators; + + public MultiComparator (Comparator... comparators) { + this.comparators = comparators; + } + + /* Lexigraphic compare. Returns the first difference + * @return zero if equal. Otherwise +/- (i+1) + * where i is the index of the first comparator finding a difference + * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) + */ + public int compare(T arg0, T arg1) { + for (int i = 0; i < comparators.length; ++i) { + int result = comparators[i].compare(arg0, arg1); + if (result == 0) { + continue; + } + if (result > 0) { + return i + 1; + } + return -(i + 1); + } + return 0; + } +} \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java b/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java new file mode 100644 index 00000000000..e7bf63f1c1b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java @@ -0,0 +1,367 @@ +/* +******************************************************************************* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.util.MissingResourceException; + +import com.ibm.icu.text.Normalizer; +import com.ibm.icu.text.Normalizer2; + +public final class Norm2AllModes { + // Public API dispatch via Normalizer2 subclasses -------------------------- *** + + // Normalizer2 implementation for the old UNORM_NONE. + public static final class NoopNormalizer2 extends Normalizer2 { + @Override + public StringBuilder normalize(CharSequence src, StringBuilder dest) { + if(dest!=src) { + dest.setLength(0); + return dest.append(src); + } else { + throw new IllegalArgumentException(); + } + } + @Override + public Appendable normalize(CharSequence src, Appendable dest) { + if(dest!=src) { + try { + return dest.append(src); + } catch(IOException e) { + throw new RuntimeException(e); // Avoid declaring "throws IOException". + } + } else { + throw new IllegalArgumentException(); + } + } + @Override + public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) { + if(first!=second) { + return first.append(second); + } else { + throw new IllegalArgumentException(); + } + } + @Override + public StringBuilder append(StringBuilder first, CharSequence second) { + if(first!=second) { + return first.append(second); + } else { + throw new IllegalArgumentException(); + } + } + @Override + public boolean isNormalized(CharSequence s) { return true; } + @Override + public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return Normalizer.YES; } + @Override + public int spanQuickCheckYes(CharSequence s) { return s.length(); } + @Override + public boolean hasBoundaryBefore(int c) { return true; } + @Override + public boolean hasBoundaryAfter(int c) { return true; } + @Override + public boolean isInert(int c) { return true; } + } + + // Intermediate class: + // Has Normalizer2Impl and does boilerplate argument checking and setup. + public static abstract class Normalizer2WithImpl extends Normalizer2 { + public Normalizer2WithImpl(Normalizer2Impl ni) { + impl=ni; + } + + // normalize + @Override + public StringBuilder normalize(CharSequence src, StringBuilder dest) { + if(dest==src) { + throw new IllegalArgumentException(); + } + dest.setLength(0); + normalize(src, new Normalizer2Impl.ReorderingBuffer(impl, dest, src.length())); + return dest; + } + @Override + public Appendable normalize(CharSequence src, Appendable dest) { + if(dest==src) { + throw new IllegalArgumentException(); + } + Normalizer2Impl.ReorderingBuffer buffer= + new Normalizer2Impl.ReorderingBuffer(impl, dest, src.length()); + normalize(src, buffer); + buffer.flush(); + return dest; + } + protected abstract void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer); + + // normalize and append + @Override + public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) { + return normalizeSecondAndAppend(first, second, true); + } + @Override + public StringBuilder append(StringBuilder first, CharSequence second) { + return normalizeSecondAndAppend(first, second, false); + } + public StringBuilder normalizeSecondAndAppend( + StringBuilder first, CharSequence second, boolean doNormalize) { + if(first==second) { + throw new IllegalArgumentException(); + } + normalizeAndAppend( + second, doNormalize, + new Normalizer2Impl.ReorderingBuffer(impl, first, first.length()+second.length())); + return first; + } + protected abstract void normalizeAndAppend( + CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer); + + // quick checks + @Override + public boolean isNormalized(CharSequence s) { + return s.length()==spanQuickCheckYes(s); + } + @Override + public Normalizer.QuickCheckResult quickCheck(CharSequence s) { + return isNormalized(s) ? Normalizer.YES : Normalizer.NO; + } + + public int getQuickCheck(int c) { + return 1; + } + + public final Normalizer2Impl impl; + } + + public static final class DecomposeNormalizer2 extends Normalizer2WithImpl { + public DecomposeNormalizer2(Normalizer2Impl ni) { + super(ni); + } + + @Override + protected void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer) { + impl.decompose(src, 0, src.length(), buffer); + } + @Override + protected void normalizeAndAppend( + CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) { + impl.decomposeAndAppend(src, doNormalize, buffer); + } + @Override + public int spanQuickCheckYes(CharSequence s) { + return impl.decompose(s, 0, s.length(), null); + } + @Override + public int getQuickCheck(int c) { + return impl.isDecompYes(impl.getNorm16(c)) ? 1 : 0; + } + @Override + public boolean hasBoundaryBefore(int c) { return impl.hasDecompBoundary(c, true); } + @Override + public boolean hasBoundaryAfter(int c) { return impl.hasDecompBoundary(c, false); } + @Override + public boolean isInert(int c) { return impl.isDecompInert(c); } + } + + public static final class ComposeNormalizer2 extends Normalizer2WithImpl { + public ComposeNormalizer2(Normalizer2Impl ni, boolean fcc) { + super(ni); + onlyContiguous=fcc; + } + + @Override + protected void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer) { + impl.compose(src, 0, src.length(), onlyContiguous, true, buffer); + } + @Override + protected void normalizeAndAppend( + CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) { + impl.composeAndAppend(src, doNormalize, onlyContiguous, buffer); + } + + @Override + public boolean isNormalized(CharSequence s) { + // 5: small destCapacity for substring normalization + return impl.compose(s, 0, s.length(), + onlyContiguous, false, + new Normalizer2Impl.ReorderingBuffer(impl, new StringBuilder(), 5)); + } + @Override + public Normalizer.QuickCheckResult quickCheck(CharSequence s) { + int spanLengthAndMaybe=impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, false); + if((spanLengthAndMaybe&1)!=0) { + return Normalizer.MAYBE; + } else if((spanLengthAndMaybe>>>1)==s.length()) { + return Normalizer.YES; + } else { + return Normalizer.NO; + } + } + @Override + public int spanQuickCheckYes(CharSequence s) { + return impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, true)>>>1; + } + @Override + public int getQuickCheck(int c) { + return impl.getCompQuickCheck(impl.getNorm16(c)); + } + @Override + public boolean hasBoundaryBefore(int c) { return impl.hasCompBoundaryBefore(c); } + @Override + public boolean hasBoundaryAfter(int c) { + return impl.hasCompBoundaryAfter(c, onlyContiguous, false); + } + @Override + public boolean isInert(int c) { + return impl.hasCompBoundaryAfter(c, onlyContiguous, true); + } + + private final boolean onlyContiguous; + } + + public static final class FCDNormalizer2 extends Normalizer2WithImpl { + public FCDNormalizer2(Normalizer2Impl ni) { + super(ni); + } + + @Override + protected void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer) { + impl.makeFCD(src, 0, src.length(), buffer); + } + @Override + protected void normalizeAndAppend( + CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) { + impl.makeFCDAndAppend(src, doNormalize, buffer); + } + @Override + public int spanQuickCheckYes(CharSequence s) { + return impl.makeFCD(s, 0, s.length(), null); + } + @Override + public int getQuickCheck(int c) { + return impl.isDecompYes(impl.getNorm16(c)) ? 1 : 0; + } + @Override + public boolean hasBoundaryBefore(int c) { return impl.hasFCDBoundaryBefore(c); } + @Override + public boolean hasBoundaryAfter(int c) { return impl.hasFCDBoundaryAfter(c); } + @Override + public boolean isInert(int c) { return impl.isFCDInert(c); } + } + + // instance cache ---------------------------------------------------------- *** + + private Norm2AllModes(Normalizer2Impl ni) { + impl=ni; + comp=new ComposeNormalizer2(ni, false); + decomp=new DecomposeNormalizer2(ni); + fcd=new FCDNormalizer2(ni); + fcc=new ComposeNormalizer2(ni, true); + } + + public final Normalizer2Impl impl; + public final ComposeNormalizer2 comp; + public final DecomposeNormalizer2 decomp; + public final FCDNormalizer2 fcd; + public final ComposeNormalizer2 fcc; + + private static Norm2AllModes getInstanceFromSingleton(Norm2AllModesSingleton singleton) { + if(singleton.exception!=null) { + throw singleton.exception; + } + return singleton.allModes; + } + public static Norm2AllModes getNFCInstance() { + return getInstanceFromSingleton(NFCSingleton.INSTANCE); + } + public static Norm2AllModes getNFKCInstance() { + return getInstanceFromSingleton(NFKCSingleton.INSTANCE); + } + public static Norm2AllModes getNFKC_CFInstance() { + return getInstanceFromSingleton(NFKC_CFSingleton.INSTANCE); + } + // For use in properties APIs. + public static Normalizer2WithImpl getN2WithImpl(int index) { + switch(index) { + case 0: return getNFCInstance().decomp; // NFD + case 1: return getNFKCInstance().decomp; // NFKD + case 2: return getNFCInstance().comp; // NFC + case 3: return getNFKCInstance().comp; // NFKC + default: return null; + } + } + public static Norm2AllModes getInstance(InputStream data, String name) { + if(data==null) { + Norm2AllModesSingleton singleton; + if(name.equals("nfc")) { + singleton=NFCSingleton.INSTANCE; + } else if(name.equals("nfkc")) { + singleton=NFKCSingleton.INSTANCE; + } else if(name.equals("nfkc_cf")) { + singleton=NFKC_CFSingleton.INSTANCE; + } else { + singleton=null; + } + if(singleton!=null) { + if(singleton.exception!=null) { + throw singleton.exception; + } + return singleton.allModes; + } + } + return cache.getInstance(name, data); + } + private static CacheBase cache = + new SoftCache() { + protected Norm2AllModes createInstance(String key, InputStream data) { + if(data==null) { + throw new MissingResourceException( + "No Normalizer2 data name \""+key+"\" cached, and InputStream is null", + "Normalizer2", + key); + } + Normalizer2Impl impl=new Normalizer2Impl().load(data); + return new Norm2AllModes(impl); + } + }; + + public static final NoopNormalizer2 NOOP_NORMALIZER2=new NoopNormalizer2(); + /** + * Gets the FCD normalizer, with the FCD data initialized. + * @return FCD normalizer + */ + public static Normalizer2 getFCDNormalizer2() { + Norm2AllModes allModes=getNFCInstance(); + allModes.impl.getFCDTrie(); + return allModes.fcd; + } + + private static final class Norm2AllModesSingleton { + private Norm2AllModesSingleton(String name) { + try { + Normalizer2Impl impl=new Normalizer2Impl().load( + ICUResourceBundle.ICU_BUNDLE+"/"+name+".nrm"); + allModes=new Norm2AllModes(impl); + } catch(RuntimeException e) { + exception=e; + } + } + + private Norm2AllModes allModes; + private RuntimeException exception; + } + private static final class NFCSingleton { + private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfc"); + } + private static final class NFKCSingleton { + private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfkc"); + } + private static final class NFKC_CFSingleton { + private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfkc_cf"); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java new file mode 100644 index 00000000000..be7b6e7c359 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java @@ -0,0 +1,2005 @@ +/* +******************************************************************************* +* Copyright (C) 2009-2010, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.VersionInfo; + +public final class Normalizer2Impl { + public static final class Hangul { + /* Korean Hangul and Jamo constants */ + public static final int JAMO_L_BASE=0x1100; /* "lead" jamo */ + public static final int JAMO_V_BASE=0x1161; /* "vowel" jamo */ + public static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */ + + public static final int HANGUL_BASE=0xac00; + + public static final int JAMO_L_COUNT=19; + public static final int JAMO_V_COUNT=21; + public static final int JAMO_T_COUNT=28; + + public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT; + public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT; + + public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT; + + public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT; + public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT; + + public static boolean isHangul(int c) { + return HANGUL_BASE<=c && c + * If dest is a StringBuilder, then the buffer writes directly to it. + * Otherwise, the buffer maintains a StringBuilder for intermediate text segments + * until no further changes are necessary and whole segments are appended. + * append() methods that take combining-class values always write to the StringBuilder. + * Other append() methods flush and append to the Appendable. + */ + public static final class ReorderingBuffer implements Appendable { + public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) { + impl=ni; + app=dest; + if(app instanceof StringBuilder) { + appIsStringBuilder=true; + str=(StringBuilder)dest; + // In Java, the constructor subsumes public void init(int destCapacity) { + str.ensureCapacity(destCapacity); + reorderStart=0; + if(str.length()==0) { + lastCC=0; + } else { + setIterator(); + lastCC=previousCC(); + // Set reorderStart after the last code point with cc<=1 if there is one. + if(lastCC>1) { + while(previousCC()>1) {} + } + reorderStart=codePointLimit; + } + } else { + appIsStringBuilder=false; + str=new StringBuilder(); + reorderStart=0; + lastCC=0; + } + } + + public boolean isEmpty() { return str.length()==0; } + public int length() { return str.length(); } + public int getLastCC() { return lastCC; } + + public StringBuilder getStringBuilder() { return str; } + + public boolean equals(CharSequence s, int start, int limit) { + return UTF16Plus.equal(str, 0, str.length(), s, start, limit); + } + + // For Hangul composition, replacing the Leading consonant Jamo with the syllable. + public void setLastChar(char c) { + str.setCharAt(str.length()-1, c); + } + + public void append(int c, int cc) { + if(lastCC<=cc || cc==0) { + str.appendCodePoint(c); + lastCC=cc; + if(cc<=1) { + reorderStart=str.length(); + } + } else { + insert(c, cc); + } + } + // s must be in NFD, otherwise change the implementation. + public void append(CharSequence s, int start, int limit, + int leadCC, int trailCC) { + if(start==limit) { + return; + } + if(lastCC<=leadCC || leadCC==0) { + if(trailCC<=1) { + reorderStart=str.length()+(limit-start); + } else if(leadCC<=1) { + reorderStart=str.length()+1; // Ok if not a code point boundary. + } + str.append(s, start, limit); + lastCC=trailCC; + } else { + int c=Character.codePointAt(s, start); + start+=Character.charCount(c); + insert(c, leadCC); // insert first code point + while(startcc;) {} + // insert c at codePointLimit, after the character with prevCC<=cc + if(c<=0xffff) { + str.insert(codePointLimit, (char)c); + if(cc<=1) { + reorderStart=codePointLimit+1; + } + } else { + str.insert(codePointLimit, Character.toChars(c)); + if(cc<=1) { + reorderStart=codePointLimit+2; + } + } + } + + private final Normalizer2Impl impl; + private final Appendable app; + private final StringBuilder str; + private final boolean appIsStringBuilder; + private int reorderStart; + private int lastCC; + + // private backward iterator + private void setIterator() { codePointStart=str.length(); } + private void skipPrevious() { // Requires 0=codePointStart) { + return 0; + } + int c=str.codePointBefore(codePointStart); + codePointStart-=Character.charCount(c); + if(c(nextOffset-offset)) { + throw new IOException("Normalizer2 data: not enough bytes for normTrie"); + } + ds.skipBytes((nextOffset-offset)-trieLength); // skip padding after trie bytes + + // Read the composition and mapping data. + offset=nextOffset; + nextOffset=inIndexes[IX_RESERVED2_OFFSET]; + int numChars=(nextOffset-offset)/2; + char[] chars; + if(numChars!=0) { + chars=new char[numChars]; + for(int i=0; i trieIterator=normTrie.iterator(); + Trie2.Range range; + while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { + /* add the start code point to the USet */ + set.add(range.startCodePoint); + } + + /* add Hangul LV syllables and LV+1 because of skippables */ + for(int c=Hangul.HANGUL_BASE; c trieIterator=canonIterData.iterator(segmentStarterMapper); + Trie2.Range range; + while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { + /* add the start code point to the USet */ + set.add(range.startCodePoint); + } + } + private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() { + public int map(int in) { + return in&CANON_NOT_SEGMENT_STARTER; + } + }; + + // low-level properties ------------------------------------------------ *** + + public Trie2_16 getNormTrie() { return normTrie; } + public synchronized Trie2_16 getFCDTrie() { + if(fcdTrie!=null) { + return fcdTrie; + } + Trie2Writable newFCDTrie=new Trie2Writable(0, 0); + Iterator trieIterator=normTrie.iterator(); + Trie2.Range range; + while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { + // Set the FCD value for a range of same-norm16 characters. + if(range.value!=0) { + setFCD16FromNorm16(range.startCodePoint, range.endCodePoint, range.value, newFCDTrie); + } + } + for(char lead=0xd800; lead<0xdc00; ++lead) { + // Collect (OR together) the FCD values for a range of supplementary characters, + // for their lead surrogate code unit. + int oredValue=newFCDTrie.get(lead); + trieIterator=normTrie.iteratorForLeadSurrogate(lead); + while(trieIterator.hasNext()) { + oredValue|=trieIterator.next().value; + } + if(oredValue!=0) { + // Set a "bad" value for makeFCD() to break the quick check loop + // and look up the value for the supplementary code point. + // If there is any lccc, then set the worst-case lccc of 1. + // The ORed-together value's tccc is already the worst case. + if(oredValue>0xff) { + oredValue=0x100|(oredValue&0xff); + } + newFCDTrie.setForLeadSurrogateCodeUnit(lead, oredValue); + } + } + return fcdTrie=newFCDTrie.toTrie2_16(); + } + + public synchronized Normalizer2Impl ensureCanonIterData() { + if(canonIterData==null) { + Trie2Writable newData=new Trie2Writable(0, 0); + canonStartSets=new ArrayList(); + Iterator trieIterator=normTrie.iterator(); + Trie2.Range range; + while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { + final int norm16=range.value; + if(norm16==0 || (minYesNo<=norm16 && norm16=minMaybeYes) { + // not a segment starter if it occurs in a decomposition or has cc!=0 + newValue|=CANON_NOT_SEGMENT_STARTER; + if(norm16=minNoNo) { + while((norm16_2+=Character.charCount(c2))=MIN_NORMAL_MAYBE_YES) { + return norm16&0xff; + } + if(norm16=MIN_NORMAL_MAYBE_YES ? norm16&0xff : 0; + } + + public int getFCD16(int c) { return fcdTrie.get(c); } + public int getFCD16FromSingleLead(char c) { return fcdTrie.getFromU16SingleLead(c); } + + void setFCD16FromNorm16(int start, int end, int norm16, Trie2Writable newFCDTrie) { + // Only loops for 1:1 algorithmic mappings. + for(;;) { + if(norm16>=MIN_NORMAL_MAYBE_YES) { + norm16&=0xff; + norm16|=norm16<<8; + } else if(norm16<=minYesNo || minMaybeYes<=norm16) { + // no decomposition or Hangul syllable, all zeros + break; + } else if(limitNoNo<=norm16) { + int delta=norm16-(minMaybeYes-MAX_DELTA-1); + if(start==end) { + start+=delta; + norm16=getNorm16(start); + } else { + // the same delta leads from different original characters to different mappings + do { + int c=start+delta; + setFCD16FromNorm16(c, c, getNorm16(c), newFCDTrie); + } while(++start<=end); + break; + } + } else { + // c decomposes, get everything from the variable-length extra data + int firstUnit=extraData.charAt(norm16); + if((firstUnit&MAPPING_LENGTH_MASK)==0) { + // A character that is deleted (maps to an empty string) must + // get the worst-case lccc and tccc values because arbitrary + // characters on both sides will become adjacent. + norm16=0x1ff; + } else { + if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { + norm16=extraData.charAt(norm16+1)&0xff00; // lccc + } else { + norm16=0; + } + norm16|=firstUnit>>8; // tccc + } + } + newFCDTrie.setRange(start, end, norm16, true); + break; + } + } + + /** + * Get the decomposition for one code point. + * @param c code point + * @return c's decomposition, if it has one; returns null if it does not have a decomposition + */ + public String getDecomposition(int c) { + int decomp=-1; + int norm16; + for(;;) { + if(c=0; + } + public boolean getCanonStartSet(int c, UnicodeSet set) { + int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER; + if(canonValue==0) { + return false; + } + set.clear(); + int value=canonValue&CANON_VALUE_MASK; + if((canonValue&CANON_HAS_SET)!=0) { + set.addAll(canonStartSets.get(value)); + } else if(value!=0) { + set.add(value); + } + if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { + int norm16=getNorm16(c); + if(norm16==JAMO_L) { + int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT; + set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1); + } else { + addComposites(getCompositionsList(norm16), set); + } + } + return true; + } + + public static final int MIN_CCC_LCCC_CP=0x300; + + public static final int MIN_YES_YES_WITH_CC=0xff01; + public static final int JAMO_VT=0xff00; + public static final int MIN_NORMAL_MAYBE_YES=0xfe00; + public static final int JAMO_L=1; + public static final int MAX_DELTA=0x40; + + // Byte offsets from the start of the data, after the generic header. + public static final int IX_NORM_TRIE_OFFSET=0; + public static final int IX_EXTRA_DATA_OFFSET=1; + public static final int IX_RESERVED2_OFFSET=2; + public static final int IX_TOTAL_SIZE=7; + + // Code point thresholds for quick check codes. + public static final int IX_MIN_DECOMP_NO_CP=8; + public static final int IX_MIN_COMP_NO_MAYBE_CP=9; + + // Norm16 value thresholds for quick check combinations and types of extra data. + public static final int IX_MIN_YES_NO=10; + public static final int IX_MIN_NO_NO=11; + public static final int IX_LIMIT_NO_NO=12; + public static final int IX_MIN_MAYBE_YES=13; + + public static final int IX_COUNT=16; + + public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80; + public static final int MAPPING_PLUS_COMPOSITION_LIST=0x40; + public static final int MAPPING_NO_COMP_BOUNDARY_AFTER=0x20; + public static final int MAPPING_LENGTH_MASK=0x1f; + + public static final int COMP_1_LAST_TUPLE=0x8000; + public static final int COMP_1_TRIPLE=1; + public static final int COMP_1_TRAIL_LIMIT=0x3400; + public static final int COMP_1_TRAIL_MASK=0x7ffe; + public static final int COMP_1_TRAIL_SHIFT=9; // 10-1 for the "triple" bit + public static final int COMP_2_TRAIL_SHIFT=6; + public static final int COMP_2_TRAIL_MASK=0xffc0; + + // higher-level functionality ------------------------------------------ *** + + // Dual functionality: + // buffer!=NULL: normalize + // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes + public int decompose(CharSequence s, int src, int limit, + ReorderingBuffer buffer) { + int minNoCP=minDecompNoCP; + + int prevSrc; + int c=0; + int norm16=0; + + // only for quick check + int prevBoundary=src; + int prevCC=0; + + for(;;) { + // count code units below the minimum or with irrelevant data for the quick check + for(prevSrc=src; src!=limit;) { + if( (c=s.charAt(src))=limit) { + break; + } + c=Character.codePointAt(s, src); + cc=getCC(getNorm16(c)); + }; + buffer.append(s, 0, src, firstCC, prevCC); + buffer.append(s, src, limit); + } + // Very similar to composeQuickCheck(): Make the same changes in both places if relevant. + // doCompose: normalize + // !doCompose: isNormalized (buffer must be empty and initialized) + public boolean compose(CharSequence s, int src, int limit, + boolean onlyContiguous, + boolean doCompose, + ReorderingBuffer buffer) { + int minNoMaybeCP=minCompNoMaybeCP; + + /* + * prevBoundary points to the last character before the current one + * that has a composition boundary before it with ccc==0 and quick check "yes". + * Keeping track of prevBoundary saves us looking for a composition boundary + * when we find a "no" or "maybe". + * + * When we back out from prevSrc back to prevBoundary, + * then we also remove those same characters (which had been simply copied + * or canonically-order-inserted) from the ReorderingBuffer. + * Therefore, at all times, the [prevBoundary..prevSrc[ source units + * must correspond 1:1 to destination units at the end of the destination buffer. + */ + int prevBoundary=src; + int prevSrc; + int c=0; + int norm16=0; + + // only for isNormalized + int prevCC=0; + + for(;;) { + // count code units below the minimum or with irrelevant data for the quick check + for(prevSrc=src; src!=limit;) { + if( (c=s.charAt(src))=minNoNo. + * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) + * or has ccc!=0. + * Check for Jamo V/T, then for regular characters. + * c is not a Hangul syllable or Jamo L because those have "yes" properties. + */ + if(isJamoVT(norm16) && prevBoundary!=prevSrc) { + char prev=s.charAt(prevSrc-1); + boolean needToDecompose=false; + if(c=MIN_YES_YES_WITH_CC) { + int cc=norm16&0xff; // cc!=0 + if( onlyContiguous && // FCC + (doCompose ? buffer.getLastCC() : prevCC)==0 && + prevBoundarycc + ) { + // Fails FCD test, need to decompose and contiguously recompose. + if(!doCompose) { + return false; + } + } else if(doCompose) { + buffer.append(c, cc); + continue; + } else if(prevCC<=cc) { + prevCC=cc; + continue; + } else { + return false; + } + } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) { + return false; + } + + /* + * Find appropriate boundaries around this character, + * decompose the source text from between the boundaries, + * and recompose it. + * + * We may need to remove the last few characters from the ReorderingBuffer + * to account for source text that was copied or appended + * but needs to take part in the recomposition. + */ + + /* + * Find the last composition boundary in [prevBoundary..src[. + * It is either the decomposition of the current character (at prevSrc), + * or prevBoundary. + */ + if(hasCompBoundaryBefore(c, norm16)) { + prevBoundary=prevSrc; + } else if(doCompose) { + buffer.removeSuffix(prevSrc-prevBoundary); + } + + // Find the next composition boundary in [src..limit[ - + // modifies src to point to the next starter. + src=findNextCompBoundary(s, src, limit); + + // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it. + int recomposeStartIndex=buffer.length(); + decomposeShort(s, prevBoundary, src, buffer); + recompose(buffer, recomposeStartIndex, onlyContiguous); + if(!doCompose) { + if(!buffer.equals(s, prevBoundary, src)) { + return false; + } + buffer.remove(); + prevCC=0; + } + + // Move to the next starter. We never need to look back before this point again. + prevBoundary=src; + } + return true; + } + /** + * Very similar to compose(): Make the same changes in both places if relevant. + * doSpan: spanQuickCheckYes (ignore bit 0 of the return value) + * !doSpan: quickCheck + * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and + * bit 0: set if "maybe"; otherwise, if the span length<s.length() + * then the quick check result is "no" + */ + public int composeQuickCheck(CharSequence s, int src, int limit, + boolean onlyContiguous, boolean doSpan) { + int qcResult=0; + int minNoMaybeCP=minCompNoMaybeCP; + + /* + * prevBoundary points to the last character before the current one + * that has a composition boundary before it with ccc==0 and quick check "yes". + */ + int prevBoundary=src; + int prevSrc; + int c=0; + int norm16=0; + int prevCC=0; + + for(;;) { + // count code units below the minimum or with irrelevant data for the quick check + for(prevSrc=src;;) { + if(src==limit) { + return (src<<1)|qcResult; // "yes" or "maybe" + } + if( (c=s.charAt(src))=minNoNo. + * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) + * or has ccc!=0. + */ + if(isMaybeOrNonZeroCC(norm16)) { + int cc=getCCFromYesOrMaybe(norm16); + if( onlyContiguous && // FCC + cc!=0 && + prevCC==0 && + prevBoundarycc + ) { + // Fails FCD test. + } else if(prevCC<=cc || cc==0) { + prevCC=cc; + if(norm16appendZeroCC() because we track + // the lead and trail combining classes here, rather than leaving it to + // the ReorderingBuffer. + // The exception is the call to decomposeShort() which uses the buffer + // in the normal way. + + // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. + // Similar to the prevBoundary in the compose() implementation. + int prevBoundary=src; + int prevSrc; + int c=0; + int prevFCD16=0; + int fcd16=0; + + for(;;) { + // count code units with lccc==0 + for(prevSrc=src; src!=limit;) { + if((c=s.charAt(src))1) { + --prevBoundary; + } + } else { + int p=src-1; + if( Character.isLowSurrogate(s.charAt(p)) && prevSrc

    1) { + prevBoundary=p; + } + } + if(buffer!=null) { + // The last lccc==0 character is excluded from the + // flush-and-append call in case it needs to be modified. + buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary); + buffer.append(s, prevBoundary, src); + } + // The start of the current character (c). + prevSrc=src; + } else if(src==limit) { + break; + } + + src+=Character.charCount(c); + // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. + // Check for proper order, and decompose locally if necessary. + if((prevFCD16&0xff)<=(fcd16>>8)) { + // proper order: prev tccc <= current lccc + if((fcd16&0xff)<=1) { + prevBoundary=src; + } + if(buffer!=null) { + buffer.appendZeroCC(c); + } + prevFCD16=fcd16; + continue; + } else if(buffer==null) { + return prevBoundary; // quick check "no" + } else { + /* + * Back out the part of the source that we copied or appended + * already but is now going to be decomposed. + * prevSrc is set to after what was copied/appended. + */ + buffer.removeSuffix(prevSrc-prevBoundary); + /* + * Find the part of the source that needs to be decomposed, + * up to the next safe boundary. + */ + src=findNextFCDBoundary(s, src, limit); + /* + * The source text does not fulfill the conditions for FCD. + * Decompose and reorder a limited piece of the text. + */ + decomposeShort(s, prevBoundary, src, buffer); + prevBoundary=src; + prevFCD16=0; + } + } + return src; + } + public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) { + int src=0, limit=s.length(); + if(!buffer.isEmpty()) { + int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit); + if(0!=firstBoundaryInSrc) { + int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(), + buffer.length()); + StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+ + firstBoundaryInSrc+16); + middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length()); + buffer.removeSuffix(buffer.length()-lastBoundaryInDest); + middle.append(s, 0, firstBoundaryInSrc); + makeFCD(middle, 0, middle.length(), buffer); + src=firstBoundaryInSrc; + } + } + if(doMakeFCD) { + makeFCD(s, src, limit, buffer); + } else { + buffer.append(s, src, limit); + } + } + + // Note: hasDecompBoundary() could be implemented as aliases to + // hasFCDBoundaryBefore() and hasFCDBoundaryAfter() + // at the cost of building the FCD trie for a decomposition normalizer. + public boolean hasDecompBoundary(int c, boolean before) { + for(;;) { + if(cMIN_NORMAL_MAYBE_YES) { + return false; // ccc!=0 + } else if(isDecompNoAlgorithmic(norm16)) { + c=mapAlgorithmic(c, norm16); + } else { + // c decomposes, get everything from the variable-length extra data + int firstUnit=extraData.charAt(norm16++); + if((firstUnit&MAPPING_LENGTH_MASK)==0) { + return false; + } + if(!before) { + // decomp after-boundary: same as hasFCDBoundaryAfter(), + // fcd16<=1 || trailCC==0 + if(firstUnit>0x1ff) { + return false; // trailCC>1 + } + if(firstUnit<=0xff) { + return true; // trailCC==0 + } + // if(trailCC==1) test leadCC==0, same as checking for before-boundary + } + // true if leadCC==0 (hasFCDBoundaryBefore()) + return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16)&0xff00)==0; + } + } + } + public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); } + + public boolean hasCompBoundaryBefore(int c) { + return c= (testInert ? minNoNo : minMaybeYes)) { + return false; + } else if(isDecompNoAlgorithmic(norm16)) { + c=mapAlgorithmic(c, norm16); + } else { + // c decomposes, get everything from the variable-length extra data. + // If testInert, then c must be a yesNo character which has lccc=0, + // otherwise it could be a noNo. + int firstUnit=extraData.charAt(norm16); + // true if + // c is not deleted, and + // it and its decomposition do not combine forward, and it has a starter, and + // if FCC then trailCC<=1 + return + (firstUnit&MAPPING_LENGTH_MASK)!=0 && + (firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 && + (!onlyContiguous || firstUnit<=0x1ff); + } + } + } + + public boolean hasFCDBoundaryBefore(int c) { return c=minMaybeYes; } + private static boolean isInert(int norm16) { return norm16==0; } + // static UBool isJamoL(uint16_t norm16) const { return norm16==1; } + private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; } + private boolean isHangul(int norm16) { return norm16==minYesNo; } + private boolean isCompYesAndZeroCC(int norm16) { return norm16=MIN_YES_YES_WITH_CC || norm16=limitNoNo; } + + // For use with isCompYes(). + // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. + // static uint8_t getCCFromYes(uint16_t norm16) { + // return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0; + // } + private int getCCFromNoNo(int norm16) { + if((extraData.charAt(norm16)&MAPPING_HAS_CCC_LCCC_WORD)!=0) { + return extraData.charAt(norm16+1)&0xff; + } else { + return 0; + } + } + // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC() + int getTrailCCFromCompYesAndZeroCC(CharSequence s, int cpStart, int cpLimit) { + int c; + if(cpStart==(cpLimit-1)) { + c=s.charAt(cpStart); + } else { + c=Character.codePointAt(s, cpStart); + } + int prevNorm16=getNorm16(c); + if(prevNorm16<=minYesNo) { + return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0 + } else { + return extraData.charAt(prevNorm16)>>8; // tccc from yesNo + } + } + + // Requires algorithmic-NoNo. + private int mapAlgorithmic(int c, int norm16) { + return c+norm16-(minMaybeYes-MAX_DELTA-1); + } + + // Requires minYesNo>7)&1); // +1 if MAPPING_HAS_CCC_LCCC_WORD + } + /** + * @param c code point must have compositions + * @return index into maybeYesCompositions + */ + private int getCompositionsList(int norm16) { + return isDecompYes(norm16) ? + getCompositionsListForDecompYes(norm16) : + getCompositionsListForComposite(norm16); + } + + // Decompose a short piece of text which is likely to contain characters that + // fail the quick check loop and/or where the quick check loop's overhead + // is unlikely to be amortized. + // Called by the compose() and makeFCD() implementations. + // Public in Java for collation implementation code. + public void decomposeShort(CharSequence s, int src, int limit, + ReorderingBuffer buffer) { + while(src>8; + if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { + leadCC=extraData.charAt(norm16++)>>8; + } else { + leadCC=0; + } + buffer.append(extraData, norm16, norm16+length, leadCC, trailCC); + } + return; + } + } + + /* + * Finds the recomposition result for + * a forward-combining "lead" character, + * specified with a pointer to its compositions list, + * and a backward-combining "trail" character. + * + * If the lead and trail characters combine, then this function returns + * the following "compositeAndFwd" value: + * Bits 21..1 composite character + * Bit 0 set if the composite is a forward-combining starter + * otherwise it returns -1. + * + * The compositions list has (trail, compositeAndFwd) pair entries, + * encoded as either pairs or triples of 16-bit units. + * The last entry has the high bit of its first unit set. + * + * The list is sorted by ascending trail characters (there are no duplicates). + * A linear search is used. + * + * See normalizer2impl.h for a more detailed description + * of the compositions list format. + */ + private static int combine(String compositions, int list, int trail) { + int key1, firstUnit; + if(trail(firstUnit=compositions.charAt(list))) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } + if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if((firstUnit&COMP_1_TRIPLE)!=0) { + return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2); + } else { + return compositions.charAt(list+1); + } + } + } else { + // trail character is 3400..10FFFF + // result entry has 3 units + key1=COMP_1_TRAIL_LIMIT+((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE; + int key2=(trail<(firstUnit=compositions.charAt(list))) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if(key2>(secondUnit=compositions.charAt(list+1))) { + if((firstUnit&COMP_1_LAST_TUPLE)!=0) { + break; + } else { + list+=3; + } + } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { + return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2); + } else { + break; + } + } else { + break; + } + } + } + return -1; + } + /** + * @param c Character which has compositions + * @param set recursively receives the composites from c's compositions + */ + private void addComposites(int list, UnicodeSet set) { + int firstUnit, compositeAndFwd; + do { + firstUnit=maybeYesCompositions.charAt(list); + if((firstUnit&COMP_1_TRIPLE)==0) { + compositeAndFwd=maybeYesCompositions.charAt(list+1); + list+=2; + } else { + compositeAndFwd=(((int)maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)| + maybeYesCompositions.charAt(list+2); + list+=3; + } + int composite=compositeAndFwd>>1; + if((compositeAndFwd&1)!=0) { + addComposites(getCompositionsListForComposite(getNorm16(composite)), set); + } + set.add(composite); + } while((firstUnit&COMP_1_LAST_TUPLE)==0); + } + /* + * Recomposes the buffer text starting at recomposeStartIndex + * (which is in NFD - decomposed and canonically ordered), + * and truncates the buffer contents. + * + * Note that recomposition never lengthens the text: + * Any character consists of either one or two code units; + * a composition may contain at most one more code unit than the original starter, + * while the combining mark that is removed has at least one code unit. + */ + private void recompose(ReorderingBuffer buffer, int recomposeStartIndex, + boolean onlyContiguous) { + StringBuilder sb=buffer.getStringBuilder(); + int p=recomposeStartIndex; + if(p==sb.length()) { + return; + } + + int starter, pRemove; + int compositionsList; + int c, compositeAndFwd; + int norm16; + int cc, prevCC; + boolean starterIsSupplementary; + + // Some of the following variables are not used until we have a forward-combining starter + // and are only initialized now to avoid compiler warnings. + compositionsList=-1; // used as indicator for whether we have a forward-combining starter + starter=-1; + starterIsSupplementary=false; + prevCC=0; + + for(;;) { + c=sb.codePointAt(p); + p+=Character.charCount(c); + norm16=getNorm16(c); + cc=getCCFromYesOrMaybe(norm16); + if( // this character combines backward and + isMaybe(norm16) && + // we have seen a starter that combines forward and + compositionsList>=0 && + // the backward-combining character is not blocked + (prevCC=0) { + // The starter and the combining mark (c) do combine. + int composite=compositeAndFwd>>1; + + // Remove the combining mark. + pRemove=p-Character.charCount(c); // pRemove & p: start & limit of the combining mark + sb.delete(pRemove, p); + p=pRemove; + // Replace the starter with the composite. + if(starterIsSupplementary) { + if(composite>0xffff) { + // both are supplementary + sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); + sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite)); + } else { + sb.setCharAt(starter, (char)c); + sb.deleteCharAt(starter+1); + // The composite is shorter than the starter, + // move the intermediate characters forward one. + starterIsSupplementary=false; + --p; + } + } else if(composite>0xffff) { + // The composite is longer than the starter, + // move the intermediate characters back one. + starterIsSupplementary=true; + sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); + sb.insert(starter+1, UTF16.getTrailSurrogate(composite)); + ++p; + } else { + // both are on the BMP + sb.setCharAt(starter, (char)composite); + } + + // Keep prevCC because we removed the combining mark. + + if(p==sb.length()) { + break; + } + // Is the composite a starter that combines forward? + if((compositeAndFwd&1)!=0) { + compositionsList= + getCompositionsListForComposite(getNorm16(composite)); + } else { + compositionsList=-1; + } + + // We combined; continue with looking for compositions. + continue; + } + } + + // no combination this time + prevCC=cc; + if(p==sb.length()) { + break; + } + + // If c did not combine, then check if it is a starter. + if(cc==0) { + // Found a new starter. + if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) { + // It may combine with something, prepare for it. + if(c<=0xffff) { + starterIsSupplementary=false; + starter=p-1; + } else { + starterIsSupplementary=true; + starter=p-2; + } + } + } else if(onlyContiguous) { + // FCC: no discontiguous compositions; any intervening character blocks. + compositionsList=-1; + } + } + buffer.flush(); + } + + /** + * Does c have a composition boundary before it? + * True if its decomposition begins with a character that has + * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()). + * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes + * (isCompYesAndZeroCC()) so we need not decompose. + */ + private boolean hasCompBoundaryBefore(int c, int norm16) { + for(;;) { + if(isCompYesAndZeroCC(norm16)) { + return true; + } else if(isMaybeOrNonZeroCC(norm16)) { + return false; + } else if(isDecompNoAlgorithmic(norm16)) { + c=mapAlgorithmic(c, norm16); + norm16=getNorm16(c); + } else { + // c decomposes, get everything from the variable-length extra data + int firstUnit=extraData.charAt(norm16++); + if((firstUnit&MAPPING_LENGTH_MASK)==0) { + return false; + } + if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0 && (extraData.charAt(norm16++)&0xff00)!=0) { + return false; // non-zero leadCC + } + return isCompYesAndZeroCC(getNorm16(Character.codePointAt(extraData, norm16))); + } + } + } + private int findPreviousCompBoundary(CharSequence s, int p) { + while(p>0) { + int c=Character.codePointBefore(s, p); + p-=Character.charCount(c); + if(hasCompBoundaryBefore(c)) { + break; + } + // We could also test hasCompBoundaryAfter() and return iter.codePointLimit, + // but that's probably not worth the extra cost. + } + return p; + } + private int findNextCompBoundary(CharSequence s, int p, int limit) { + while(p0) { + int c=Character.codePointBefore(s, p); + p-=Character.charCount(c); + if(fcdTrie.get(c)<=0xff) { + break; + } + } + return p; + } + private int findNextFCDBoundary(CharSequence s, int p, int limit) { + while(p canonStartSets; + + // bits in canonIterData + private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000; + private static final int CANON_HAS_COMPOSITIONS = 0x40000000; + private static final int CANON_HAS_SET = 0x200000; + private static final int CANON_VALUE_MASK = 0x1fffff; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java b/main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java new file mode 100644 index 00000000000..c2d3b3820ed --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java @@ -0,0 +1,1153 @@ + /* + ******************************************************************************* + * Copyright (C) 2005-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.Arrays; +import java.util.Date; +import java.util.MissingResourceException; + +import com.ibm.icu.util.AnnualTimeZoneRule; +import com.ibm.icu.util.BasicTimeZone; +import com.ibm.icu.util.Calendar; +import com.ibm.icu.util.DateTimeRule; +import com.ibm.icu.util.GregorianCalendar; +import com.ibm.icu.util.InitialTimeZoneRule; +import com.ibm.icu.util.SimpleTimeZone; +import com.ibm.icu.util.TimeArrayTimeZoneRule; +import com.ibm.icu.util.TimeZone; +import com.ibm.icu.util.TimeZoneRule; +import com.ibm.icu.util.TimeZoneTransition; +import com.ibm.icu.util.UResourceBundle; + +/** + * A time zone based on the Olson tz database. Olson time zones change + * behavior over time. The raw offset, rules, presence or absence of + * daylight savings time, and even the daylight savings amount can all + * vary. + * + * This class uses a resource bundle named "zoneinfo". Zoneinfo is a + * table containing different kinds of resources. In several places, + * zones are referred to using integers. A zone's integer is a number + * from 0..n-1, where n is the number of zones, with the zones sorted + * in lexicographic order. + * + * 1. Zones. These have keys corresponding to the Olson IDs, e.g., + * "Asia/Shanghai". Each resource describes the behavior of the given + * zone. Zones come in two different formats. + * + * a. Zone (table). A zone is a table resource contains several + * type of resources below: + * + * - typeOffsets:intvector (Required) + * + * Sets of UTC raw/dst offset pairs in seconds. Entries at + * 2n represents raw offset and 2n+1 represents dst offset + * paired with the raw offset at 2n. The very first pair represents + * the initial zone offset (before the first transition) always. + * + * - trans:intvector (Optional) + * + * List of transition times represented by 32bit seconds from the + * epoch (1970-01-01T00:00Z) in ascending order. + * + * - transPre32/transPost32:intvector (Optional) + * + * List of transition times before/after 32bit minimum seconds. + * Each time is represented by a pair of 32bit integer. + * + * - typeMap:bin (Optional) + * + * Array of bytes representing the mapping between each transition + * time (transPre32/trans/transPost32) and its corresponding offset + * data (typeOffsets). + * + * - finalRule:string (Optional) + * + * If a recurrent transition rule is applicable to a zone forever + * after the final transition time, finalRule represents the rule + * in Rules data. + * + * - finalRaw:int (Optional) + * + * When finalRule is available, finalRaw is required and specifies + * the raw (base) offset of the rule. + * + * - finalYear:int (Optional) + * + * When finalRule is available, finalYear is required and specifies + * the start year of the rule. + * + * - links:intvector (Optional) + * + * When this zone data is shared with other zones, links specifies + * all zones including the zone itself. Each zone is referenced by + * integer index. + * + * b. Link (int, length 1). A link zone is an int resource. The + * integer is the zone number of the target zone. The key of this + * resource is an alternate name for the target zone. This data + * is corresponding to Link data in the tz database. + * + * + * 2. Rules. These have keys corresponding to the Olson rule IDs, + * with an underscore prepended, e.g., "_EU". Each resource describes + * the behavior of the given rule using an intvector, containing the + * onset list, the cessation list, and the DST savings. The onset and + * cessation lists consist of the month, dowim, dow, time, and time + * mode. The end result is that the 11 integers describing the rule + * can be passed directly into the SimpleTimeZone 13-argument + * constructor (the other two arguments will be the raw offset, taken + * from the complex zone element 5, and the ID string, which is not + * used), with the times and the DST savings multiplied by 1000 to + * scale from seconds to milliseconds. + * + * 3. Regions. An array specifies mapping between zones and regions. + * Each item is either a 2-letter ISO country code or "001" + * (UN M.49 - World). This data is generated from "zone.tab" + * in the tz database. + */ +public class OlsonTimeZone extends BasicTimeZone { + + // Generated by serialver from JDK 1.4.1_01 + static final long serialVersionUID = -6281977362477515376L; + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#getOffset(int, int, int, int, int, int) + */ + public int getOffset(int era, int year, int month, int day, int dayOfWeek, int milliseconds) { + if (month < Calendar.JANUARY || month > Calendar.DECEMBER) { + throw new IllegalArgumentException("Month is not in the legal range: " +month); + } else { + return getOffset(era, year, month, day, dayOfWeek, milliseconds, Grego.monthLength(year, month)); + } + } + + /** + * TimeZone API. + */ + public int getOffset(int era, int year, int month,int dom, int dow, int millis, int monthLength){ + + if ((era != GregorianCalendar.AD && era != GregorianCalendar.BC) + || month < Calendar.JANUARY + || month > Calendar.DECEMBER + || dom < 1 + || dom > monthLength + || dow < Calendar.SUNDAY + || dow > Calendar.SATURDAY + || millis < 0 + || millis >= Grego.MILLIS_PER_DAY + || monthLength < 28 + || monthLength > 31) { + throw new IllegalArgumentException(); + } + + if (era == GregorianCalendar.BC) { + year = -year; + } + + if (finalZone != null && year >= finalStartYear) { + return finalZone.getOffset(era, year, month, dom, dow, millis); + } + + // Compute local epoch millis from input fields + long time = Grego.fieldsToDay(year, month, dom) * Grego.MILLIS_PER_DAY + millis; + + int[] offsets = new int[2]; + getHistoricalOffset(time, true, LOCAL_DST, LOCAL_STD, offsets); + return offsets[0] + offsets[1]; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#setRawOffset(int) + */ + public void setRawOffset(int offsetMillis) { + if (getRawOffset() == offsetMillis) { + return; + } + long current = System.currentTimeMillis(); + + if (current < finalStartMillis) { + SimpleTimeZone stz = new SimpleTimeZone(offsetMillis, getID()); + + boolean bDst = useDaylightTime(); + if (bDst) { + TimeZoneRule[] currentRules = getSimpleTimeZoneRulesNear(current); + if (currentRules.length != 3) { + // DST was observed at the beginning of this year, so useDaylightTime + // returned true. getSimpleTimeZoneRulesNear requires at least one + // future transition for making a pair of rules. This implementation + // rolls back the time before the latest offset transition. + TimeZoneTransition tzt = getPreviousTransition(current, false); + if (tzt != null) { + currentRules = getSimpleTimeZoneRulesNear(tzt.getTime() - 1); + } + } + if (currentRules.length == 3 + && (currentRules[1] instanceof AnnualTimeZoneRule) + && (currentRules[2] instanceof AnnualTimeZoneRule)) { + // A pair of AnnualTimeZoneRule + AnnualTimeZoneRule r1 = (AnnualTimeZoneRule)currentRules[1]; + AnnualTimeZoneRule r2 = (AnnualTimeZoneRule)currentRules[2]; + DateTimeRule start, end; + int offset1 = r1.getRawOffset() + r1.getDSTSavings(); + int offset2 = r2.getRawOffset() + r2.getDSTSavings(); + int sav; + if (offset1 > offset2) { + start = r1.getRule(); + end = r2.getRule(); + sav = offset1 - offset2; + } else { + start = r2.getRule(); + end = r1.getRule(); + sav = offset2 - offset1; + } + // getSimpleTimeZoneRulesNear always return rules using DOW / WALL_TIME + stz.setStartRule(start.getRuleMonth(), start.getRuleWeekInMonth(), start.getRuleDayOfWeek(), + start.getRuleMillisInDay()); + stz.setEndRule(end.getRuleMonth(), end.getRuleWeekInMonth(), end.getRuleDayOfWeek(), + end.getRuleMillisInDay()); + // set DST saving amount and start year + stz.setDSTSavings(sav); + } else { + // This could only happen if last rule is DST + // and the rule used forever. For example, Asia/Dhaka + // in tzdata2009i stays in DST forever. + + // Hack - set DST starting at midnight on Jan 1st, + // ending 23:59:59.999 on Dec 31st + stz.setStartRule(0, 1, 0); + stz.setEndRule(11, 31, Grego.MILLIS_PER_DAY - 1); + } + } + + int[] fields = Grego.timeToFields(current, null); + + finalStartYear = fields[0]; + finalStartMillis = Grego.fieldsToDay(fields[0], 0, 1); + + if (bDst) { + // we probably do not need to set start year of final rule + // to finalzone itself, but we always do this for now. + stz.setStartYear(finalStartYear); + } + + finalZone = stz; + + } else { + finalZone.setRawOffset(offsetMillis); + } + + transitionRulesInitialized = false; + } + + public Object clone() { + OlsonTimeZone other = (OlsonTimeZone) super.clone(); + if(finalZone != null){ + finalZone.setID(getID()); + other.finalZone = (SimpleTimeZone)finalZone.clone(); + } + + // Following data are read-only and never changed. + // Therefore, shallow copies should be sufficient. + + /* + if (transitionTimes64 != null) { + other.transitionTimes64 = transitionTimes64.clone(); + } + if (typeMapData != null) { + other.typeMapData = typeMapData.clone(); + } + other.typeOffsets = typeOffsets.clone(); + */ + + return other; + } + + /** + * TimeZone API. + */ + public void getOffset(long date, boolean local, int[] offsets) { + if (finalZone != null && date >= finalStartMillis) { + finalZone.getOffset(date, local, offsets); + } else { + getHistoricalOffset(date, local, + LOCAL_FORMER, LOCAL_LATTER, offsets); + } + } + + /** + * {@inheritDoc} + * @internal + * @deprecated This API is ICU internal only. + */ + public void getOffsetFromLocal(long date, + int nonExistingTimeOpt, int duplicatedTimeOpt, int[] offsets) { + if (finalZone != null && date >= finalStartMillis) { + finalZone.getOffsetFromLocal(date, nonExistingTimeOpt, duplicatedTimeOpt, offsets); + } else { + getHistoricalOffset(date, true, nonExistingTimeOpt, duplicatedTimeOpt, offsets); + } + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#getRawOffset() + */ + public int getRawOffset() { + int[] ret = new int[2]; + getOffset(System.currentTimeMillis(), false, ret); + return ret[0]; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#useDaylightTime() + */ + public boolean useDaylightTime() { + // If DST was observed in 1942 (for example) but has never been + // observed from 1943 to the present, most clients will expect + // this method to return FALSE. This method determines whether + // DST is in use in the current year (at any point in the year) + // and returns TRUE if so. + long current = System.currentTimeMillis(); + + if (finalZone != null && current >= finalStartMillis) { + return (finalZone != null && finalZone.useDaylightTime()); + } + + int[] fields = Grego.timeToFields(current, null); + + // Find start of this year, and start of next year + long start = Grego.fieldsToDay(fields[0], 0, 1) * SECONDS_PER_DAY; + long limit = Grego.fieldsToDay(fields[0] + 1, 0, 1) * SECONDS_PER_DAY; + + // Return TRUE if DST is observed at any time during the current + // year. + for (int i = 0; i < transitionCount; ++i) { + if (transitionTimes64[i] >= limit) { + break; + } + if ((transitionTimes64[i] >= start && dstOffsetAt(i) != 0) + || (transitionTimes64[i] > start && i > 0 && dstOffsetAt(i - 1) != 0)) { + return true; + } + } + return false; + } + + /** + * TimeZone API + * Returns the amount of time to be added to local standard time + * to get local wall clock time. + */ + public int getDSTSavings() { + if (finalZone != null){ + return finalZone.getDSTSavings(); + } + return super.getDSTSavings(); + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#inDaylightTime(java.util.Date) + */ + public boolean inDaylightTime(Date date) { + int[] temp = new int[2]; + getOffset(date.getTime(), false, temp); + return temp[1] != 0; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.TimeZone#hasSameRules(com.ibm.icu.util.TimeZone) + */ + public boolean hasSameRules(TimeZone other) { + // The super class implementation only check raw offset and + // use of daylight saving time. + if (!super.hasSameRules(other)) { + return false; + } + + if (!(other instanceof OlsonTimeZone)) { + // We cannot reasonably compare rules in different types + return false; + } + + // Check final zone + OlsonTimeZone o = (OlsonTimeZone)other; + if (finalZone == null) { + if (o.finalZone != null) { + return false; + } + } else { + if (o.finalZone == null + || finalStartYear != o.finalStartYear + || !(finalZone.hasSameRules(o.finalZone))) { + return false; + } + } + // Check transitions + // Note: The code below actually fails to compare two equivalent rules in + // different representation properly. + if (transitionCount != o.transitionCount || + !Arrays.equals(transitionTimes64, o.transitionTimes64) || + typeCount != o.typeCount || + !Arrays.equals(typeMapData, o.typeMapData) || + !Arrays.equals(typeOffsets, o.typeOffsets)){ + return false; + } + return true; + } + + /** + * Construct a GMT+0 zone with no transitions. This is done when a + * constructor fails so the resultant object is well-behaved. + */ + private void constructEmpty(){ + transitionCount = 0; + transitionTimes64 = null; + typeMapData = null; + + typeCount = 1; + typeOffsets = new int[]{0,0}; + finalZone = null; + finalStartYear = Integer.MAX_VALUE; + finalStartMillis = Double.MAX_VALUE; + + transitionRulesInitialized = false; + } + + /** + * Construct from a resource bundle + * @param top the top-level zoneinfo resource bundle. This is used + * to lookup the rule that `res' may refer to, if there is one. + * @param res the resource bundle of the zone to be constructed + */ + public OlsonTimeZone(UResourceBundle top, UResourceBundle res){ + construct(top, res); + } + + private void construct(UResourceBundle top, UResourceBundle res){ + + if ((top == null || res == null)) { + throw new IllegalArgumentException(); + } + if(DEBUG) System.out.println("OlsonTimeZone(" + res.getKey() +")"); + + UResourceBundle r; + int[] transPre32, trans32, transPost32; + transPre32 = trans32 = transPost32 = null; + + transitionCount = 0; + + // Pre-32bit second transitions + try { + r = res.get("transPre32"); + transPre32 = r.getIntVector(); + if (transPre32.length % 2 != 0) { + // elements in the pre-32bit must be an even number + throw new IllegalArgumentException("Invalid Format"); + } + transitionCount += transPre32.length / 2; + } catch (MissingResourceException e) { + // Pre-32bit transition data is optional + } + + // 32bit second transitions + try { + r = res.get("trans"); + trans32 = r.getIntVector(); + transitionCount += trans32.length; + } catch (MissingResourceException e) { + // 32bit transition data is optional + } + + // Post-32bit second transitions + try { + r = res.get("transPost32"); + transPost32 = r.getIntVector(); + if (transPost32.length % 2 != 0) { + // elements in the post-32bit must be an even number + throw new IllegalArgumentException("Invalid Format"); + } + transitionCount += transPost32.length / 2; + } catch (MissingResourceException e) { + // Post-32bit transition data is optional + } + + transitionTimes64 = new long[transitionCount]; + int idx = 0; + if (transPre32 != null) { + for (int i = 0; i < transPre32.length / 2; i++, idx++) { + transitionTimes64[idx] = + (((long)transPre32[i * 2]) & 0x00000000FFFFFFFFL) << 32 + | (((long)transPre32[i * 2 + 1]) & 0x00000000FFFFFFFFL); + } + } + if (trans32 != null) { + for (int i = 0; i < trans32.length; i++, idx++) { + transitionTimes64[idx] = (long)trans32[i]; + } + } + if (transPost32 != null) { + for (int i = 0; i < transPost32.length / 2; i++, idx++) { + transitionTimes64[idx] = + (((long)transPost32[i * 2]) & 0x00000000FFFFFFFFL) << 32 + | (((long)transPost32[i * 2 + 1]) & 0x00000000FFFFFFFFL); + } + } + + // Type offsets list must be of even size, with size >= 2 + r = res.get("typeOffsets"); + typeOffsets = r.getIntVector(); + if ((typeOffsets.length < 2 || typeOffsets.length > 0x7FFE || typeOffsets.length % 2 != 0)) { + throw new IllegalArgumentException("Invalid Format"); + } + typeCount = typeOffsets.length / 2; + + // Type map data must be of the same size as the transition count + typeMapData = null; + if (transitionCount > 0) { + r = res.get("typeMap"); + typeMapData = r.getBinary(null); + if (typeMapData.length != transitionCount) { + throw new IllegalArgumentException("Invalid Format"); + } + } + + // Process final rule and data, if any + finalZone = null; + finalStartYear = Integer.MAX_VALUE; + finalStartMillis = Double.MAX_VALUE; + + String ruleID = null; + try { + ruleID = res.getString("finalRule"); + + r = res.get("finalRaw"); + int ruleRaw = r.getInt() * Grego.MILLIS_PER_SECOND; + r = loadRule(top, ruleID); + int[] ruleData = r.getIntVector(); + + if (ruleData == null || ruleData.length != 11) { + throw new IllegalArgumentException("Invalid Format"); + } + finalZone = new SimpleTimeZone(ruleRaw, "", + ruleData[0], ruleData[1], ruleData[2], + ruleData[3] * Grego.MILLIS_PER_SECOND, + ruleData[4], + ruleData[5], ruleData[6], ruleData[7], + ruleData[8] * Grego.MILLIS_PER_SECOND, + ruleData[9], + ruleData[10] * Grego.MILLIS_PER_SECOND); + + r = res.get("finalYear"); + finalStartYear = r.getInt(); + + // Note: Setting finalStartYear to the finalZone is problematic. When a date is around + // year boundary, SimpleTimeZone may return false result when DST is observed at the + // beginning of year. We could apply safe margin (day or two), but when one of recurrent + // rules falls around year boundary, it could return false result. Without setting the + // start year, finalZone works fine around the year boundary of the start year. + + // finalZone.setStartYear(finalStartYear); + + // Compute the millis for Jan 1, 0:00 GMT of the finalYear + + // Note: finalStartMillis is used for detecting either if + // historic transition data or finalZone to be used. In an + // extreme edge case - for example, two transitions fall into + // small windows of time around the year boundary, this may + // result incorrect offset computation. But I think it will + // never happen practically. Yoshito - Feb 20, 2010 + finalStartMillis = Grego.fieldsToDay(finalStartYear, 0, 1) * Grego.MILLIS_PER_DAY; + } catch (MissingResourceException e) { + if (ruleID != null) { + // ruleID is found, but missing other data required for + // creating finalZone + throw new IllegalArgumentException("Invalid Format"); + } + } + } + + // This constructor is used for testing purpose only + public OlsonTimeZone(String id){ + UResourceBundle top = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, + ZONEINFORES, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle res = ZoneMeta.openOlsonResource(top, id); + construct(top, res); + if (finalZone != null){ + finalZone.setID(id); + } + super.setID(id); + } + + public void setID(String id){ + if (finalZone != null){ + finalZone.setID(id); + } + super.setID(id); + transitionRulesInitialized = false; + } + + private void getHistoricalOffset(long date, boolean local, + int NonExistingTimeOpt, int DuplicatedTimeOpt, int[] offsets) { + if (transitionCount != 0) { + long sec = Grego.floorDivide(date, Grego.MILLIS_PER_SECOND); + if (!local && sec < transitionTimes64[0]) { + // Before the first transition time + offsets[0] = initialRawOffset() * Grego.MILLIS_PER_SECOND; + offsets[1] = initialDstOffset() * Grego.MILLIS_PER_SECOND; + } else { + // Linear search from the end is the fastest approach, since + // most lookups will happen at/near the end. + int transIdx; + for (transIdx = transitionCount - 1; transIdx >= 0; transIdx--) { + long transition = transitionTimes64[transIdx]; + if (local) { + int offsetBefore = zoneOffsetAt(transIdx - 1); + boolean dstBefore = dstOffsetAt(transIdx - 1) != 0; + + int offsetAfter = zoneOffsetAt(transIdx); + boolean dstAfter = dstOffsetAt(transIdx) != 0; + + boolean dstToStd = dstBefore && !dstAfter; + boolean stdToDst = !dstBefore && dstAfter; + + if (offsetAfter - offsetBefore >= 0) { + // Positive transition, which makes a non-existing local time range + if (((NonExistingTimeOpt & STD_DST_MASK) == LOCAL_STD && dstToStd) + || ((NonExistingTimeOpt & STD_DST_MASK) == LOCAL_DST && stdToDst)) { + transition += offsetBefore; + } else if (((NonExistingTimeOpt & STD_DST_MASK) == LOCAL_STD && stdToDst) + || ((NonExistingTimeOpt & STD_DST_MASK) == LOCAL_DST && dstToStd)) { + transition += offsetAfter; + } else if ((NonExistingTimeOpt & FORMER_LATTER_MASK) == LOCAL_LATTER) { + transition += offsetBefore; + } else { + // Interprets the time with rule before the transition, + // default for non-existing time range + transition += offsetAfter; + } + } else { + // Negative transition, which makes a duplicated local time range + if (((DuplicatedTimeOpt & STD_DST_MASK) == LOCAL_STD && dstToStd) + || ((DuplicatedTimeOpt & STD_DST_MASK) == LOCAL_DST && stdToDst)) { + transition += offsetAfter; + } else if (((DuplicatedTimeOpt & STD_DST_MASK) == LOCAL_STD && stdToDst) + || ((DuplicatedTimeOpt & STD_DST_MASK) == LOCAL_DST && dstToStd)) { + transition += offsetBefore; + } else if ((DuplicatedTimeOpt & FORMER_LATTER_MASK) == LOCAL_FORMER) { + transition += offsetBefore; + } else { + // Interprets the time with rule after the transition, + // default for duplicated local time range + transition += offsetAfter; + } + } + } + if (sec >= transition) { + break; + } + } + // transIdx could be -1 when local=true + offsets[0] = rawOffsetAt(transIdx) * Grego.MILLIS_PER_SECOND; + offsets[1] = dstOffsetAt(transIdx) * Grego.MILLIS_PER_SECOND; + } + } else { + // No transitions, single pair of offsets only + offsets[0] = initialRawOffset() * Grego.MILLIS_PER_SECOND; + offsets[1] = initialDstOffset() * Grego.MILLIS_PER_SECOND; + } + } + + private int getInt(byte val){ + return val & 0xFF; + } + + /* + * Following 3 methods return an offset at the given transition time index. + * When the index is negative, return the initial offset. + */ + private int zoneOffsetAt(int transIdx) { + int typeIdx = transIdx >= 0 ? getInt(typeMapData[transIdx]) * 2 : 0; + return typeOffsets[typeIdx] + typeOffsets[typeIdx + 1]; + } + + private int rawOffsetAt(int transIdx) { + int typeIdx = transIdx >= 0 ? getInt(typeMapData[transIdx]) * 2 : 0; + return typeOffsets[typeIdx]; + } + + private int dstOffsetAt(int transIdx) { + int typeIdx = transIdx >= 0 ? getInt(typeMapData[transIdx]) * 2 : 0; + return typeOffsets[typeIdx + 1]; + } + + private int initialRawOffset() { + return typeOffsets[0]; + } + + private int initialDstOffset() { + return typeOffsets[1]; + } + + // temp + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(super.toString()); + buf.append('['); + buf.append("transitionCount=" + transitionCount); + buf.append(",typeCount=" + typeCount); + buf.append(",transitionTimes="); + if (transitionTimes64 != null) { + buf.append('['); + for (int i = 0; i < transitionTimes64.length; ++i) { + if (i > 0) { + buf.append(','); + } + buf.append(Long.toString(transitionTimes64[i])); + } + buf.append(']'); + } else { + buf.append("null"); + } + buf.append(",typeOffsets="); + if (typeOffsets != null) { + buf.append('['); + for (int i = 0; i < typeOffsets.length; ++i) { + if (i > 0) { + buf.append(','); + } + buf.append(Integer.toString(typeOffsets[i])); + } + buf.append(']'); + } else { + buf.append("null"); + } + buf.append(",finalStartYear=" + finalStartYear); + buf.append(",finalStartMillis=" + finalStartMillis); + buf.append(",finalZone=" + finalZone); + buf.append(']'); + + return buf.toString(); + } + + /** + * Number of transitions, 0..~370 + */ + private int transitionCount; + + /** + * Number of types, 1..255 + */ + private int typeCount; + + /** + * Time of each transition in seconds from 1970 epoch. + */ + private long[] transitionTimes64; + + /** + * Offset from GMT in seconds for each type. + * Length is equal to typeCount + */ + private int[] typeOffsets; // alias into res; do not delete + + /** + * Type description data, consisting of transitionCount uint8_t + * type indices (from 0..typeCount-1). + * Length is equal to transitionCount + */ + private byte[] typeMapData; // alias into res; do not delete + + /** + * For year >= finalStartYear, the finalZone will be used. + */ + private int finalStartYear = Integer.MAX_VALUE; + + /** + * For date >= finalStartMillis, the finalZone will be used. + */ + private double finalStartMillis = Double.MAX_VALUE; + + /** + * A SimpleTimeZone that governs the behavior for years >= finalYear. + * If and only if finalYear == INT32_MAX then finalZone == 0. + */ + private SimpleTimeZone finalZone = null; // owned, may be NULL + + private static final String ZONEINFORES = "zoneinfo64"; + + private static final boolean DEBUG = ICUDebug.enabled("olson"); + private static final int SECONDS_PER_DAY = 24*60*60; + + private static UResourceBundle loadRule(UResourceBundle top, String ruleid) { + UResourceBundle r = top.get("Rules"); + r = r.get(ruleid); + return r; + } + + public boolean equals(Object obj){ + if (!super.equals(obj)) return false; // super does class check + + OlsonTimeZone z = (OlsonTimeZone) obj; + + return (Utility.arrayEquals(typeMapData, z.typeMapData) || + // If the pointers are not equal, the zones may still + // be equal if their rules and transitions are equal + (finalStartYear == z.finalStartYear && + // Don't compare finalMillis; if finalYear is ==, so is finalMillis + ((finalZone == null && z.finalZone == null) || + (finalZone != null && z.finalZone != null && + finalZone.equals(z.finalZone)) && + transitionCount == z.transitionCount && + typeCount == z.typeCount && + Utility.arrayEquals(transitionTimes64, z.transitionTimes64) && + Utility.arrayEquals(typeOffsets, z.typeOffsets) && + Utility.arrayEquals(typeMapData, z.typeMapData) + ))); + + } + + public int hashCode(){ + int ret = (int) (finalStartYear ^ (finalStartYear>>>4) + + transitionCount ^ (transitionCount>>>6) + + typeCount ^ (typeCount>>>8) + + Double.doubleToLongBits(finalStartMillis)+ + (finalZone == null ? 0 : finalZone.hashCode()) + + super.hashCode()); + for(int i=0; i>>8); + } + for(int i=0; i>>8); + } + for(int i=0; i= firstFinalTZTransition.getTime()) { + if (finalZone.useDaylightTime()) { + //return finalZone.getNextTransition(base, inclusive); + return finalZoneWithStartYear.getNextTransition(base, inclusive); + } else { + // No more transitions + return null; + } + } + } + if (historicRules != null) { + // Find a historical transition + int ttidx = transitionCount - 1; + for (; ttidx >= firstTZTransitionIdx; ttidx--) { + long t = transitionTimes64[ttidx] * Grego.MILLIS_PER_SECOND; + if (base > t || (!inclusive && base == t)) { + break; + } + } + if (ttidx == transitionCount - 1) { + return firstFinalTZTransition; + } else if (ttidx < firstTZTransitionIdx) { + return firstTZTransition; + } else { + // Create a TimeZoneTransition + TimeZoneRule to = historicRules[getInt(typeMapData[ttidx + 1])]; + TimeZoneRule from = historicRules[getInt(typeMapData[ttidx])]; + long startTime = transitionTimes64[ttidx+1] * Grego.MILLIS_PER_SECOND; + + // The transitions loaded from zoneinfo.res may contain non-transition data + if (from.getName().equals(to.getName()) && from.getRawOffset() == to.getRawOffset() + && from.getDSTSavings() == to.getDSTSavings()) { + return getNextTransition(startTime, false); + } + + return new TimeZoneTransition(startTime, from, to); + } + } + return null; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.BasicTimeZone#getPreviousTransition(long, boolean) + */ + public TimeZoneTransition getPreviousTransition(long base, boolean inclusive) { + initTransitionRules(); + + if (finalZone != null) { + if (inclusive && base == firstFinalTZTransition.getTime()) { + return firstFinalTZTransition; + } else if (base > firstFinalTZTransition.getTime()) { + if (finalZone.useDaylightTime()) { + //return finalZone.getPreviousTransition(base, inclusive); + return finalZoneWithStartYear.getPreviousTransition(base, inclusive); + } else { + return firstFinalTZTransition; + } + } + } + + if (historicRules != null) { + // Find a historical transition + int ttidx = transitionCount - 1; + for (; ttidx >= firstTZTransitionIdx; ttidx--) { + long t = transitionTimes64[ttidx] * Grego.MILLIS_PER_SECOND; + if (base > t || (inclusive && base == t)) { + break; + } + } + if (ttidx < firstTZTransitionIdx) { + // No more transitions + return null; + } else if (ttidx == firstTZTransitionIdx) { + return firstTZTransition; + } else { + // Create a TimeZoneTransition + TimeZoneRule to = historicRules[getInt(typeMapData[ttidx])]; + TimeZoneRule from = historicRules[getInt(typeMapData[ttidx-1])]; + long startTime = transitionTimes64[ttidx] * Grego.MILLIS_PER_SECOND; + + // The transitions loaded from zoneinfo.res may contain non-transition data + if (from.getName().equals(to.getName()) && from.getRawOffset() == to.getRawOffset() + && from.getDSTSavings() == to.getDSTSavings()) { + return getPreviousTransition(startTime, false); + } + + return new TimeZoneTransition(startTime, from, to); + } + } + return null; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.BasicTimeZone#getTimeZoneRules() + */ + public TimeZoneRule[] getTimeZoneRules() { + initTransitionRules(); + int size = 1; + if (historicRules != null) { + // historicRules may contain null entries when original zoneinfo data + // includes non transition data. + for (int i = 0; i < historicRules.length; i++) { + if (historicRules[i] != null) { + size++; + } + } + } + if (finalZone != null) { + if (finalZone.useDaylightTime()) { + size += 2; + } else { + size++; + } + } + + TimeZoneRule[] rules = new TimeZoneRule[size]; + int idx = 0; + rules[idx++] = initialRule; + + if (historicRules != null) { + for (int i = 0; i < historicRules.length; i++) { + if (historicRules[i] != null) { + rules[idx++] = historicRules[i]; + } + } + } + + if (finalZone != null) { + if (finalZone.useDaylightTime()) { + TimeZoneRule[] stzr = finalZoneWithStartYear.getTimeZoneRules(); + // Adding only transition rules + rules[idx++] = stzr[1]; + rules[idx++] = stzr[2]; + } else { + // Create a TimeArrayTimeZoneRule at finalMillis + rules[idx++] = new TimeArrayTimeZoneRule(getID() + "(STD)", finalZone.getRawOffset(), 0, + new long[] {(long)finalStartMillis}, DateTimeRule.UTC_TIME); + } + } + return rules; + } + + private transient InitialTimeZoneRule initialRule; + private transient TimeZoneTransition firstTZTransition; + private transient int firstTZTransitionIdx; + private transient TimeZoneTransition firstFinalTZTransition; + private transient TimeArrayTimeZoneRule[] historicRules; + private transient SimpleTimeZone finalZoneWithStartYear; // hack + + private transient boolean transitionRulesInitialized; + + private synchronized void initTransitionRules() { + if (transitionRulesInitialized) { + return; + } + + initialRule = null; + firstTZTransition = null; + firstFinalTZTransition = null; + historicRules = null; + firstTZTransitionIdx = 0; + finalZoneWithStartYear = null; + + String stdName = getID() + "(STD)"; + String dstName = getID() + "(DST)"; + + int raw, dst; + + // Create initial rule + raw = initialRawOffset() * Grego.MILLIS_PER_SECOND; + dst = initialDstOffset() * Grego.MILLIS_PER_SECOND; + initialRule = new InitialTimeZoneRule((dst == 0 ? stdName : dstName), raw, dst); + + if (transitionCount > 0) { + int transitionIdx, typeIdx; + + // We probably no longer need to check the first "real" transition + // here, because the new tzcode remove such transitions already. + // For now, keeping this code for just in case. Feb 19, 2010 Yoshito + for (transitionIdx = 0; transitionIdx < transitionCount; transitionIdx++) { + if (getInt(typeMapData[transitionIdx]) != 0) { // type 0 is the initial type + break; + } + firstTZTransitionIdx++; + } + if (transitionIdx == transitionCount) { + // Actually no transitions... + } else { + // Build historic rule array + long[] times = new long[transitionCount]; + for (typeIdx = 0; typeIdx < typeCount; typeIdx++) { + // Gather all start times for each pair of offsets + int nTimes = 0; + for (transitionIdx = firstTZTransitionIdx; transitionIdx < transitionCount; transitionIdx++) { + if (typeIdx == getInt(typeMapData[transitionIdx])) { + long tt = transitionTimes64[transitionIdx] * Grego.MILLIS_PER_SECOND; + if (tt < finalStartMillis) { + // Exclude transitions after finalMillis + times[nTimes++] = tt; + } + } + } + if (nTimes > 0) { + long[] startTimes = new long[nTimes]; + System.arraycopy(times, 0, startTimes, 0, nTimes); + // Create a TimeArrayTimeZoneRule + raw = typeOffsets[typeIdx*2]*Grego.MILLIS_PER_SECOND; + dst = typeOffsets[typeIdx*2 + 1]*Grego.MILLIS_PER_SECOND; + if (historicRules == null) { + historicRules = new TimeArrayTimeZoneRule[typeCount]; + } + historicRules[typeIdx] = new TimeArrayTimeZoneRule((dst == 0 ? stdName : dstName), + raw, dst, startTimes, DateTimeRule.UTC_TIME); + } + } + + // Create initial transition + typeIdx = getInt(typeMapData[firstTZTransitionIdx]); + firstTZTransition = new TimeZoneTransition(transitionTimes64[firstTZTransitionIdx] * Grego.MILLIS_PER_SECOND, + initialRule, historicRules[typeIdx]); + + } + } + + if (finalZone != null) { + // Get the first occurrence of final rule starts + long startTime = (long)finalStartMillis; + TimeZoneRule firstFinalRule; + if (finalZone.useDaylightTime()) { + /* + * Note: When an OlsonTimeZone is constructed, we should set the final year + * as the start year of finalZone. However, the boundary condition used for + * getting offset from finalZone has some problems. So setting the start year + * in the finalZone will cause a problem. For now, we do not set the valid + * start year when the construction time and create a clone and set the + * start year when extracting rules. + */ + finalZoneWithStartYear = (SimpleTimeZone)finalZone.clone(); + finalZoneWithStartYear.setStartYear(finalStartYear); + + TimeZoneTransition tzt = finalZoneWithStartYear.getNextTransition(startTime, false); + firstFinalRule = tzt.getTo(); + startTime = tzt.getTime(); + } else { + finalZoneWithStartYear = finalZone; + firstFinalRule = new TimeArrayTimeZoneRule(finalZone.getID(), + finalZone.getRawOffset(), 0, new long[] {startTime}, DateTimeRule.UTC_TIME); + } + TimeZoneRule prevRule = null; + if (transitionCount > 0) { + prevRule = historicRules[getInt(typeMapData[transitionCount - 1])]; + } + if (prevRule == null) { + // No historic transitions, but only finalZone available + prevRule = initialRule; + } + firstFinalTZTransition = new TimeZoneTransition(startTime, prevRule, firstFinalRule); + } + + transitionRulesInitialized = true; + } + + // Note: This class does not support back level serialization compatibility + // very well. ICU 4.4 introduced the 64bit transition data. It is probably + // possible to implement this class to make old version of ICU to deserialize + // object stream serialized by ICU 4.4+. However, such implementation will + // introduce unnecessary complexity other than serialization support. + // I decided to provide minimum level of backward compatibility, which + // only support ICU 4.4+ to create an instance of OlsonTimeZone by reloading + // the zone rules from bundles. ICU 4.2 or older version of ICU cannot + // deserialize object stream created by ICU 4.4+. Yoshito -Feb 22, 2010 + + private static final int currentSerialVersion = 1; + private int serialVersionOnStream = currentSerialVersion; + + private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { + stream.defaultReadObject(); + + if (serialVersionOnStream < 1) { + // No version - 4.2 or older + // Just reloading the rule from bundle + boolean initialized = false; + String tzid = getID(); + if (tzid != null) { + try { + UResourceBundle top = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, + ZONEINFORES, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle res = ZoneMeta.openOlsonResource(top, tzid); + construct(top, res); + if (finalZone != null){ + finalZone.setID(tzid); + } + initialized = true; + } catch (Exception e) { + // throw away + } + } + if (!initialized) { + // final resort + constructEmpty(); + } + } + + // need to rebuild transition rules when requested + transitionRulesInitialized = false; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/PVecToTrieCompactHandler.java b/main/classes/core/src/com/ibm/icu/impl/PVecToTrieCompactHandler.java new file mode 100644 index 00000000000..3fb604900e6 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/PVecToTrieCompactHandler.java @@ -0,0 +1,40 @@ +/* + ****************************************************************************** + * Copyright (C) 1996-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +/* + * @author Shaopeng Jia + */ + +package com.ibm.icu.impl; + +import com.ibm.icu.impl.PropsVectors.CompactHandler; + +public class PVecToTrieCompactHandler implements CompactHandler { + public IntTrieBuilder builder; + public int initialValue; + + public void setRowIndexForErrorValue(int rowIndex) { + } + + public void setRowIndexForInitialValue(int rowIndex) { + initialValue = rowIndex; + } + + public void setRowIndexForRange(int start, int end, int rowIndex) { + builder.setRange(start, end + 1, rowIndex, true); + } + + public void startRealValues(int rowIndex) { + if (rowIndex > 0xffff) { + // too many rows for a 16-bit trie + throw new IndexOutOfBoundsException(); + } else { + builder = new IntTrieBuilder(null, 100000, initialValue, + initialValue, false); + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/PatternTokenizer.java b/main/classes/core/src/com/ibm/icu/impl/PatternTokenizer.java new file mode 100644 index 00000000000..3f576b56ebd --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/PatternTokenizer.java @@ -0,0 +1,390 @@ +/* + ******************************************************************************* + * Copyright (C) 2006-2009, Google, International Business Machines Corporation * + * and others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * A simple parsing class for patterns and rules. Handles '...' quotations, \\uxxxx and \\Uxxxxxxxx, and symple syntax. + * The '' (two quotes) is treated as a single quote, inside or outside a quote + *

      + *
    • Any ignorable characters are ignored in parsing.
    • + *
    • Any syntax characters are broken into separate tokens
    • + *
    • Quote characters can be specified: '...', "...", and \x
    • + *
    • Other characters are treated as literals
    • + *
    + */ +public class PatternTokenizer { + // settings used in the interpretation of the pattern + private UnicodeSet ignorableCharacters = new UnicodeSet(); + private UnicodeSet syntaxCharacters = new UnicodeSet(); + private UnicodeSet extraQuotingCharacters = new UnicodeSet(); + private UnicodeSet escapeCharacters = new UnicodeSet(); + private boolean usingSlash = false; + private boolean usingQuote = false; + + // transient data, set when needed. Null it out for any changes in the above fields. + private transient UnicodeSet needingQuoteCharacters = null; + + // data about the current pattern being parsed. start gets moved as we go along. + private int start; + private int limit; + private String pattern; + + public UnicodeSet getIgnorableCharacters() { + return (UnicodeSet) ignorableCharacters.clone(); + } + /** + * Sets the characters to be ignored in parsing, eg new UnicodeSet("[:pattern_whitespace:]"); + * @param ignorableCharacters Characters to be ignored. + * @return A PatternTokenizer object in which characters are specified as ignored characters. + */ + public PatternTokenizer setIgnorableCharacters(UnicodeSet ignorableCharacters) { + this.ignorableCharacters = (UnicodeSet) ignorableCharacters.clone(); + needingQuoteCharacters = null; + return this; + } + public UnicodeSet getSyntaxCharacters() { + return (UnicodeSet) syntaxCharacters.clone(); + } + public UnicodeSet getExtraQuotingCharacters() { + return (UnicodeSet) extraQuotingCharacters.clone(); + } + /** + * Sets the characters to be interpreted as syntax characters in parsing, eg new UnicodeSet("[:pattern_syntax:]") + * @param syntaxCharacters Characters to be set as syntax characters. + * @return A PatternTokenizer object in which characters are specified as syntax characters. + */ + public PatternTokenizer setSyntaxCharacters(UnicodeSet syntaxCharacters) { + this.syntaxCharacters = (UnicodeSet) syntaxCharacters.clone(); + needingQuoteCharacters = null; + return this; + } + /** + * Sets the extra characters to be quoted in literals + * @param syntaxCharacters Characters to be set as extra quoting characters. + * @return A PatternTokenizer object in which characters are specified as extra quoting characters. + */ + public PatternTokenizer setExtraQuotingCharacters(UnicodeSet syntaxCharacters) { + this.extraQuotingCharacters = (UnicodeSet) syntaxCharacters.clone(); + needingQuoteCharacters = null; + return this; + } + + public UnicodeSet getEscapeCharacters() { + return (UnicodeSet) escapeCharacters.clone(); + } + /** + * Set characters to be escaped in literals, in quoteLiteral and normalize, eg new UnicodeSet("[^\\u0020-\\u007E]"); + * @param escapeCharacters Characters to be set as escape characters. + * @return A PatternTokenizer object in which characters are specified as escape characters. + */ + public PatternTokenizer setEscapeCharacters(UnicodeSet escapeCharacters) { + this.escapeCharacters = (UnicodeSet) escapeCharacters.clone(); + return this; + } + public boolean isUsingQuote() { + return usingQuote; + } + public PatternTokenizer setUsingQuote(boolean usingQuote) { + this.usingQuote = usingQuote; + needingQuoteCharacters = null; + return this; + } + public boolean isUsingSlash() { + return usingSlash; + } + public PatternTokenizer setUsingSlash(boolean usingSlash) { + this.usingSlash = usingSlash; + needingQuoteCharacters = null; + return this; + } + // public UnicodeSet getQuoteCharacters() { +// return (UnicodeSet) quoteCharacters.clone(); +// } +// public PatternTokenizer setQuoteCharacters(UnicodeSet quoteCharacters) { +// this.quoteCharacters = (UnicodeSet) quoteCharacters.clone(); +// needingQuoteCharacters = null; +// return this; +// } + public int getLimit() { + return limit; + } + public PatternTokenizer setLimit(int limit) { + this.limit = limit; + return this; + } + public int getStart() { + return start; + } + public PatternTokenizer setStart(int start) { + this.start = start; + return this; + } + + public PatternTokenizer setPattern(CharSequence pattern) { + return setPattern(pattern.toString()); + } + + public PatternTokenizer setPattern(String pattern) { + if (pattern == null) { + throw new IllegalArgumentException("Inconsistent arguments"); + } + this.start = 0; + this.limit = pattern.length(); + this.pattern = pattern; + return this; + } + + public static final char SINGLE_QUOTE = '\''; + public static final char BACK_SLASH = '\\'; + private static int NO_QUOTE = -1, IN_QUOTE = -2; + + public String quoteLiteral(CharSequence string) { + return quoteLiteral(string.toString()); + } + + /** + * Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes. + * @param string String passed to quote a literal string. + * @return A string using the available settings will place syntax, quote, or ignorable characters into quotes. + */ + public String quoteLiteral(String string) { + if (needingQuoteCharacters == null) { + needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters).addAll(extraQuotingCharacters); // .addAll(quoteCharacters) + if (usingSlash) needingQuoteCharacters.add(BACK_SLASH); + if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE); + } + StringBuffer result = new StringBuffer(); + int quotedChar = NO_QUOTE; + int cp; + for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(string, i); + if (escapeCharacters.contains(cp)) { + // we may have to fix up previous characters + if (quotedChar == IN_QUOTE) { + result.append(SINGLE_QUOTE); + quotedChar = NO_QUOTE; + } + appendEscaped(result, cp); + continue; + } + + if (needingQuoteCharacters.contains(cp)) { + // if we have already started a quote + if (quotedChar == IN_QUOTE) { + UTF16.append(result, cp); + if (usingQuote && cp == SINGLE_QUOTE) { // double it + result.append(SINGLE_QUOTE); + } + continue; + } + // otherwise not already in quote + if (usingSlash) { + result.append(BACK_SLASH); + UTF16.append(result, cp); + continue; + } + if (usingQuote) { + if (cp == SINGLE_QUOTE) { // double it and continue + result.append(SINGLE_QUOTE); + result.append(SINGLE_QUOTE); + continue; + } + result.append(SINGLE_QUOTE); + UTF16.append(result, cp); + quotedChar = IN_QUOTE; + continue; + } + // we have no choice but to use \\u or \\U + appendEscaped(result, cp); + continue; + } + // otherwise cp doesn't need quoting + // we may have to fix up previous characters + if (quotedChar == IN_QUOTE) { + result.append(SINGLE_QUOTE); + quotedChar = NO_QUOTE; + } + UTF16.append(result, cp); + } + // all done. + // we may have to fix up previous characters + if (quotedChar == IN_QUOTE) { + result.append(SINGLE_QUOTE); + } + return result.toString(); + } + + private void appendEscaped(StringBuffer result, int cp) { + if (cp <= 0xFFFF) { + result.append("\\u").append(Utility.hex(cp,4)); + } else { + result.append("\\U").append(Utility.hex(cp,8)); + } + } + + public String normalize() { + int oldStart = start; + StringBuffer result = new StringBuffer(); + StringBuffer buffer = new StringBuffer(); + while (true) { + buffer.setLength(0); + int status = next(buffer); + if (status == DONE) { + start = oldStart; + return result.toString(); + } + if (status != SYNTAX) { + result.append(quoteLiteral(buffer)); + } else { + result.append(buffer); + } + } + } + + public static final int DONE = 0, SYNTAX = 1, LITERAL = 2, BROKEN_QUOTE = 3, BROKEN_ESCAPE = 4, UNKNOWN = 5; + + private static final int AFTER_QUOTE = -1, NONE = 0, START_QUOTE = 1, NORMAL_QUOTE = 2, SLASH_START = 3, HEX = 4; + + public int next(StringBuffer buffer) { + if (start >= limit) return DONE; + int status = UNKNOWN; + int lastQuote = UNKNOWN; + int quoteStatus = NONE; + int hexCount = 0; + int hexValue = 0; + int cp; + main: + for (int i = start; i < limit; i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(pattern, i); + // if we are in a quote, then handle it. + switch (quoteStatus) { + case SLASH_START: + switch (cp) { + case 'u': + quoteStatus = HEX; + hexCount = 4; + hexValue = 0; + continue main; + case 'U': + quoteStatus = HEX; + hexCount = 8; + hexValue = 0; + continue main; + default: + if (usingSlash) { + UTF16.append(buffer, cp); + quoteStatus = NONE; + continue main; + } else { + buffer.append(BACK_SLASH); + quoteStatus = NONE; + } + } + break; // fall through to NONE + case HEX: + hexValue <<= 4; + hexValue += cp; + switch (cp) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + hexValue -= '0'; break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + hexValue -= 'a' - 10; break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + hexValue -= 'A' - 10; break; + default: + start = i; + return BROKEN_ESCAPE; + } + --hexCount; + if (hexCount == 0) { + quoteStatus = NONE; + UTF16.append(buffer, hexValue); + } + continue main; + case AFTER_QUOTE: + // see if we get another quote character + // if we just ended a quote BUT the following character is the lastQuote character, then we have a situation like '...''...', so we restart the quote + if (cp == lastQuote) { + UTF16.append(buffer, cp); + quoteStatus = NORMAL_QUOTE; + continue main; + } + quoteStatus = NONE; + break; // fall through to NONE + case START_QUOTE: + // if we are at the very start of a quote, and we hit another quote mark then we emit a literal quote character and end the quote + if (cp == lastQuote) { + UTF16.append(buffer, cp); + quoteStatus = NONE; // get out of quote, with no trace remaining + continue; + } + // otherwise get into quote + UTF16.append(buffer, cp); + quoteStatus = NORMAL_QUOTE; + continue main; + case NORMAL_QUOTE: + if (cp == lastQuote) { + quoteStatus = AFTER_QUOTE; // get out of quote + continue main; + } + UTF16.append(buffer, cp); + continue main; + } + + if (ignorableCharacters.contains(cp)) { + continue; + } + // do syntax characters + if (syntaxCharacters.contains(cp)) { + if (status == UNKNOWN) { + UTF16.append(buffer, cp); + start = i + UTF16.getCharCount(cp); + return SYNTAX; + } else { // LITERAL, so back up and break + start = i; + return status; + } + } + // otherwise it is a literal; keep on going + status = LITERAL; + if (cp == BACK_SLASH) { + quoteStatus = SLASH_START; + continue; + } else if (usingQuote && cp == SINGLE_QUOTE) { + lastQuote = cp; + quoteStatus = START_QUOTE; + continue; + } + // normal literals + UTF16.append(buffer, cp); + } + // handle final cleanup + start = limit; + switch (quoteStatus) { + case HEX: + status = BROKEN_ESCAPE; + break; + case SLASH_START: + if (usingSlash) { + status = BROKEN_ESCAPE; + } else { + buffer.append(BACK_SLASH); + } + break; + case START_QUOTE: case NORMAL_QUOTE: + status = BROKEN_QUOTE; + break; + } + return status; + } + + +} +//eof diff --git a/main/classes/core/src/com/ibm/icu/impl/PluralRulesLoader.java b/main/classes/core/src/com/ibm/icu/impl/PluralRulesLoader.java new file mode 100644 index 00000000000..51b1e5d3f47 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/PluralRulesLoader.java @@ -0,0 +1,209 @@ +/* + ******************************************************************************* + * Copyright (C) 2008-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.text.ParseException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.MissingResourceException; +import java.util.Set; +import java.util.TreeMap; + +import com.ibm.icu.text.PluralRules; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +/** + * Loader for plural rules data. + */ +public class PluralRulesLoader { + private final Map rulesIdToRules; + private Map localeIdToRulesId; // lazy init, use + // getLocaleIdToRulesIdMap to + // access + private Map rulesIdToEquivalentULocale; // lazy init, use + // getRulesIdToEquivalentULocaleMap + // to access + + /** + * Access through singleton. + */ + private PluralRulesLoader() { + rulesIdToRules = new HashMap(); + } + + /** + * Returns the locales for which we have plurals data. Utility for testing. + */ + public ULocale[] getAvailableULocales() { + Set keys = getLocaleIdToRulesIdMap().keySet(); + ULocale[] locales = new ULocale[keys.size()]; + int n = 0; + for (Iterator iter = keys.iterator(); iter.hasNext();) { + locales[n++] = ULocale.createCanonical(iter.next()); + } + return locales; + } + + /** + * Returns the functionally equivalent locale. + */ + public ULocale getFunctionalEquivalent(ULocale locale, boolean[] isAvailable) { + if (isAvailable != null && isAvailable.length > 0) { + String localeId = ULocale.canonicalize(locale.getBaseName()); + Map idMap = getLocaleIdToRulesIdMap(); + isAvailable[0] = idMap.containsKey(localeId); + } + + String rulesId = getRulesIdForLocale(locale); + if (rulesId == null || rulesId.trim().length() == 0) { + return ULocale.ROOT; // ultimate fallback + } + + ULocale result = getRulesIdToEquivalentULocaleMap().get( + rulesId); + if (result == null) { + return ULocale.ROOT; // ultimate fallback + } + + return result; + } + + /** + * Returns the lazily-constructed map. + */ + private Map getLocaleIdToRulesIdMap() { + checkBuildRulesIdMaps(); + return localeIdToRulesId; + } + + /** + * Returns the lazily-constructed map. + */ + private Map getRulesIdToEquivalentULocaleMap() { + checkBuildRulesIdMaps(); + return rulesIdToEquivalentULocale; + } + + /** + * Lazily constructs the localeIdToRulesId and rulesIdToEquivalentULocale + * maps if necessary. These exactly reflect the contents of the locales + * resource in plurals.res. + */ + private void checkBuildRulesIdMaps() { + if (localeIdToRulesId == null) { + try { + UResourceBundle pluralb = getPluralBundle(); + UResourceBundle localeb = pluralb.get("locales"); + localeIdToRulesId = new TreeMap(); // sort for + // convenience + // of + // getAvailableULocales + rulesIdToEquivalentULocale = new HashMap(); // not + // visible + for (int i = 0; i < localeb.getSize(); ++i) { + UResourceBundle b = localeb.get(i); + String id = b.getKey(); + String value = b.getString().intern(); + localeIdToRulesId.put(id, value); + + if (!rulesIdToEquivalentULocale.containsKey(value)) { + rulesIdToEquivalentULocale.put(value, new ULocale(id)); + } + } + } catch (MissingResourceException e) { + localeIdToRulesId = Collections.emptyMap(); // dummy so we don't + // try again, can + // read + rulesIdToEquivalentULocale = Collections.emptyMap(); + } + } + } + + /** + * Gets the rulesId from the locale,with locale fallback. If there is no + * rulesId, return null. The rulesId might be the empty string if the rule + * is the default rule. + */ + public String getRulesIdForLocale(ULocale locale) { + Map idMap = getLocaleIdToRulesIdMap(); + String localeId = ULocale.canonicalize(locale.getBaseName()); + String rulesId = null; + while (null == (rulesId = idMap.get(localeId))) { + int ix = localeId.lastIndexOf("_"); + if (ix == -1) { + break; + } + localeId = localeId.substring(0, ix); + } + return rulesId; + } + + /** + * Gets the rule from the rulesId. If there is no rule for this rulesId, + * return null. + */ + public PluralRules getRulesForRulesId(String rulesId) { + PluralRules rules = rulesIdToRules.get(rulesId); + if (rules == null) { + try { + UResourceBundle pluralb = getPluralBundle(); + UResourceBundle rulesb = pluralb.get("rules"); + UResourceBundle setb = rulesb.get(rulesId); + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < setb.getSize(); ++i) { + UResourceBundle b = setb.get(i); + if (i > 0) { + sb.append("; "); + } + sb.append(b.getKey()); + sb.append(": "); + sb.append(b.getString()); + } + rules = PluralRules.parseDescription(sb.toString()); + } catch (ParseException e) { + } catch (MissingResourceException e) { + } + rulesIdToRules.put(rulesId, rules); // put even if null + } + return rules; + } + + /** + * Return the plurals resource. Note MissingResourceException is unchecked, + * listed here for clarity. Callers should handle this exception. + */ + public UResourceBundle getPluralBundle() throws MissingResourceException { + return ICUResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, "plurals", + ICUResourceBundle.ICU_DATA_CLASS_LOADER, true); + } + + /** + * Returns the plural rules for the the locale. If we don't have data, + * com.ibm.icu.text.PluralRules.DEFAULT is returned. + */ + public PluralRules forLocale(ULocale locale) { + String rulesId = getRulesIdForLocale(locale); + if (rulesId == null || rulesId.trim().length() == 0) { + return PluralRules.DEFAULT; + } + PluralRules rules = getRulesForRulesId(rulesId); + if (rules == null) { + rules = PluralRules.DEFAULT; + } + return rules; + } + + /** + * The only instance of the loader. + */ + public static final PluralRulesLoader loader = new PluralRulesLoader(); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/PropsVectors.java b/main/classes/core/src/com/ibm/icu/impl/PropsVectors.java new file mode 100644 index 00000000000..63784850a46 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/PropsVectors.java @@ -0,0 +1,559 @@ +/* + ****************************************************************************** + * Copyright (C) 1996-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +/** + * Store bits (Unicode character properties) in bit set vectors. + * + * This is a port of the C++ class UPropsVectors from ICU4C + * + * @author Shaopeng Jia + * @internal + */ + +package com.ibm.icu.impl; + +import java.util.Arrays; +import java.util.Comparator; + +/** + * Unicode Properties Vectors associated with code point ranges. + * + * Rows of primitive integers in a contiguous array store the range limits and + * the properties vectors. + * + * In each row, row[0] contains the start code point and row[1] contains the + * limit code point, which is the start of the next range. + * + * Initially, there is only one range [0..0x110000] with values 0. + * + * It would be possible to store only one range boundary per row, but + * self-contained rows allow to later sort them by contents. + */ +public class PropsVectors { + private int v[]; + private int columns; // number of columns, plus two for start + // and limit values + private int maxRows; + private int rows; + private int prevRow; // search optimization: remember last row seen + private boolean isCompacted; + + // internal function to compare elements in v and target. Return true iff + // elements in v starting from index1 to index1 + length - 1 + // are exactly the same as elements in target + // starting from index2 to index2 + length - 1 + private boolean areElementsSame(int index1, int[] target, int index2, + int length) { + for (int i = 0; i < length; ++i) { + if (v[index1 + i] != target[index2 + i]) { + return false; + } + } + return true; + } + + // internal function which given rangeStart, returns + // index where v[index]<=rangeStart= v[index]) { + if (rangeStart < v[index + 1]) { + // same row as last seen + return index; + } else { + index += columns; + if (rangeStart < v[index + 1]) { + ++prevRow; + return index; + } else { + index += columns; + if (rangeStart < v[index + 1]) { + prevRow += 2; + return index; + } else if ((rangeStart - v[index + 1]) < 10) { + // we are close, continue looping + prevRow += 2; + do { + ++prevRow; + index += columns; + } while (rangeStart >= v[index + 1]); + return index; + } + } + } + } else if (rangeStart < v[1]) { + // the very first row + prevRow = 0; + return 0; + } + + // do a binary search for the start of the range + int start = 0; + int mid = 0; + int limit = rows; + while (start < limit - 1) { + mid = (start + limit) / 2; + index = columns * mid; + if (rangeStart < v[index]) { + limit = mid; + } else if (rangeStart < v[index + 1]) { + prevRow = mid; + return index; + } else { + start = mid; + } + } + + // must be found because all ranges together always cover + // all of Unicode + prevRow = start; + index = start * columns; + return index; + } + + /* + * Special pseudo code points for storing the initialValue and the + * errorValue which are used to initialize a Trie or similar. + */ + public final static int FIRST_SPECIAL_CP = 0x110000; + public final static int INITIAL_VALUE_CP = 0x110000; + public final static int ERROR_VALUE_CP = 0x110001; + public final static int MAX_CP = 0x110001; + + public final static int INITIAL_ROWS = 1 << 12; + public final static int MEDIUM_ROWS = 1 << 16; + public final static int MAX_ROWS = MAX_CP + 1; + + /* + * Constructor. + * @param numOfColumns Number of value integers (32-bit int) per row. + */ + public PropsVectors(int numOfColumns) { + if (numOfColumns < 1) { + throw new IllegalArgumentException("numOfColumns need to be no " + + "less than 1; but it is " + numOfColumns); + } + columns = numOfColumns + 2; // count range start and limit columns + v = new int[INITIAL_ROWS * columns]; + maxRows = INITIAL_ROWS; + rows = 2 + (MAX_CP - FIRST_SPECIAL_CP); + prevRow = 0; + isCompacted = false; + v[0] = 0; + v[1] = 0x110000; + int index = columns; + for (int cp = FIRST_SPECIAL_CP; cp <= MAX_CP; ++cp) { + v[index] = cp; + v[index + 1] = cp + 1; + index += columns; + } + } + + /* + * In rows for code points [start..end], select the column, reset the mask + * bits and set the value bits (ANDed with the mask). + * + * @throws IllegalArgumentException + * + * @throws IllegalStateException + * + * @throws IndexOutOfBoundsException + */ + public void setValue(int start, int end, int column, int value, int mask) { + if (start < 0 || start > end || end > MAX_CP || column < 0 + || column >= (columns - 2)) { + throw new IllegalArgumentException(); + } + if (isCompacted) { + throw new IllegalStateException("Shouldn't be called after" + + "compact()!"); + } + + int firstRow, lastRow; + int limit = end + 1; + boolean splitFirstRow, splitLastRow; + // skip range start and limit columns + column += 2; + value &= mask; + + // find the rows whose ranges overlap with the input range + // find the first and last row, always successful + firstRow = findRow(start); + lastRow = findRow(end); + + /* + * Rows need to be split if they partially overlap with the input range + * (only possible for the first and last rows) and if their value + * differs from the input value. + */ + splitFirstRow = (start != v[firstRow] && value != (v[firstRow + column] & mask)); + splitLastRow = (limit != v[lastRow + 1] && value != (v[lastRow + column] & mask)); + + // split first/last rows if necessary + if (splitFirstRow || splitLastRow) { + int rowsToExpand = 0; + if (splitFirstRow) { + ++rowsToExpand; + } + if (splitLastRow) { + ++rowsToExpand; + } + int newMaxRows = 0; + if ((rows + rowsToExpand) > maxRows) { + if (maxRows < MEDIUM_ROWS) { + newMaxRows = MEDIUM_ROWS; + } else if (maxRows < MAX_ROWS) { + newMaxRows = MAX_ROWS; + } else { + throw new IndexOutOfBoundsException( + "MAX_ROWS exceeded! Increase it to a higher value" + + "in the implementation"); + } + int[] temp = new int[newMaxRows * columns]; + System.arraycopy(v, 0, temp, 0, rows * columns); + v = temp; + maxRows = newMaxRows; + } + + // count the number of row cells to move after the last row, + // and move them + int count = (rows * columns) - (lastRow + columns); + if (count > 0) { + System.arraycopy(v, lastRow + columns, v, lastRow + + (1 + rowsToExpand) * columns, count); + } + rows += rowsToExpand; + + // split the first row, and move the firstRow pointer + // to the second part + if (splitFirstRow) { + // copy all affected rows up one and move the lastRow pointer + count = lastRow - firstRow + columns; + System.arraycopy(v, firstRow, v, firstRow + columns, count); + lastRow += columns; + + // split the range and move the firstRow pointer + v[firstRow + 1] = v[firstRow + columns] = start; + firstRow += columns; + } + + // split the last row + if (splitLastRow) { + // copy the last row data + System.arraycopy(v, lastRow, v, lastRow + columns, columns); + + // split the range and move the firstRow pointer + v[lastRow + 1] = v[lastRow + columns] = limit; + } + } + + // set the "row last seen" to the last row for the range + prevRow = lastRow / columns; + + // set the input value in all remaining rows + firstRow += column; + lastRow += column; + mask = ~mask; + for (;;) { + v[firstRow] = (v[firstRow] & mask) | value; + if (firstRow == lastRow) { + break; + } + firstRow += columns; + } + } + + /* + * Always returns 0 if called after compact(). + */ + public int getValue(int c, int column) { + if (isCompacted || c < 0 || c > MAX_CP || column < 0 + || column >= (columns - 2)) { + return 0; + } + int index = findRow(c); + return v[index + 2 + column]; + } + + /* + * Returns an array which contains value elements + * in row rowIndex. + * + * @throws IllegalStateException + * @throws IllegalArgumentException + */ + public int[] getRow(int rowIndex) { + if (isCompacted) { + throw new IllegalStateException( + "Illegal Invocation of the method after compact()"); + } + if (rowIndex < 0 || rowIndex > rows) { + throw new IllegalArgumentException("rowIndex out of bound!"); + } + int[] rowToReturn = new int[columns - 2]; + System.arraycopy(v, rowIndex * columns + 2, rowToReturn, 0, + columns - 2); + return rowToReturn; + } + + /* + * Returns an int which is the start codepoint + * in row rowIndex. + * + * @throws IllegalStateException + * + * @throws IllegalArgumentException + */ + public int getRowStart(int rowIndex) { + if (isCompacted) { + throw new IllegalStateException( + "Illegal Invocation of the method after compact()"); + } + if (rowIndex < 0 || rowIndex > rows) { + throw new IllegalArgumentException("rowIndex out of bound!"); + } + return v[rowIndex * columns]; + } + + /* + * Returns an int which is the limit codepoint + * minus 1 in row rowIndex. + * + * @throws IllegalStateException + * + * @throws IllegalArgumentException + */ + public int getRowEnd(int rowIndex) { + if (isCompacted) { + throw new IllegalStateException( + "Illegal Invocation of the method after compact()"); + } + if (rowIndex < 0 || rowIndex > rows) { + throw new IllegalArgumentException("rowIndex out of bound!"); + } + return v[rowIndex * columns + 1] - 1; + } + + /* + * Compact the vectors: + * - modify the memory + * - keep only unique vectors + * - store them contiguously from the beginning of the memory + * - for each (non-unique) row, call the respective function in + * CompactHandler + * + * The handler's rowIndex is the index of the row in the compacted + * memory block. Therefore, it starts at 0 increases in increments of the + * columns value. + * + * In a first phase, only special values are delivered (each exactly once). + * Then CompactHandler::startRealValues() is called + * where rowIndex is the length of the compacted array. + * Then, in the second phase, the CompactHandler::setRowIndexForRange() is + * called for each row of real values. + */ + public void compact(CompactHandler compactor) { + if (isCompacted) { + return; + } + + // Set the flag now: Sorting and compacting destroys the builder + // data structure. + isCompacted = true; + int valueColumns = columns - 2; // not counting start & limit + + // sort the properties vectors to find unique vector values + Integer[] indexArray = new Integer[rows]; + for (int i = 0; i < rows; ++i) { + indexArray[i] = new Integer(columns * i); + } + + Arrays.sort(indexArray, new Comparator() { + public int compare(Integer o1, Integer o2) { + int indexOfRow1 = o1.intValue(); + int indexOfRow2 = o2.intValue(); + int count = columns; // includes start/limit columns + + // start comparing after start/limit + // but wrap around to them + int index = 2; + do { + if (v[indexOfRow1 + index] != v[indexOfRow2 + index]) { + return v[indexOfRow1 + index] < v[indexOfRow2 + index] ? -1 + : 1; + } + if (++index == columns) { + index = 0; + } + } while (--count > 0); + + return 0; + } + }); + + /* + * Find and set the special values. This has to do almost the same work + * as the compaction below, to find the indexes where the special-value + * rows will move. + */ + int count = -valueColumns; + for (int i = 0; i < rows; ++i) { + int start = v[indexArray[i].intValue()]; + + // count a new values vector if it is different + // from the current one + if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, v, + indexArray[i-1].intValue() + 2, valueColumns)) { + count += valueColumns; + } + + if (start == INITIAL_VALUE_CP) { + compactor.setRowIndexForInitialValue(count); + } else if (start == ERROR_VALUE_CP) { + compactor.setRowIndexForErrorValue(count); + } + } + + // count is at the beginning of the last vector, + // add valueColumns to include that last vector + count += valueColumns; + + // Call the handler once more to signal the start of + // delivering real values. + compactor.startRealValues(count); + + /* + * Move vector contents up to a contiguous array with only unique + * vector values, and call the handler function for each vector. + * + * This destroys the Properties Vector structure and replaces it + * with an array of just vector values. + */ + int[] temp = new int[count]; + count = -valueColumns; + for (int i = 0; i < rows; ++i) { + int start = v[indexArray[i].intValue()]; + int limit = v[indexArray[i].intValue() + 1]; + + // count a new values vector if it is different + // from the current one + if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, + temp, count, valueColumns)) { + count += valueColumns; + System.arraycopy(v, indexArray[i].intValue() + 2, temp, count, + valueColumns); + } + + if (start < FIRST_SPECIAL_CP) { + compactor.setRowIndexForRange(start, limit - 1, count); + } + } + v = temp; + + // count is at the beginning of the last vector, + // add one to include that last vector + rows = count / valueColumns + 1; + } + + /* + * Get the vectors array after calling compact(). + * + * @throws IllegalStateException + */ + public int[] getCompactedArray() { + if (!isCompacted) { + throw new IllegalStateException( + "Illegal Invocation of the method before compact()"); + } + return v; + } + + /* + * Get the number of rows for the compacted array. + * + * @throws IllegalStateException + */ + public int getCompactedRows() { + if (!isCompacted) { + throw new IllegalStateException( + "Illegal Invocation of the method before compact()"); + } + return rows; + } + + /* + * Get the number of columns for the compacted array. + * + * @throws IllegalStateException + */ + public int getCompactedColumns() { + if (!isCompacted) { + throw new IllegalStateException( + "Illegal Invocation of the method before compact()"); + } + return columns - 2; + } + + /* + * Call compact(), create a IntTrie with indexes into the compacted + * vectors array. + */ + public IntTrie compactToTrieWithRowIndexes() { + PVecToTrieCompactHandler compactor = new PVecToTrieCompactHandler(); + compact(compactor); + return compactor.builder.serialize(new DefaultGetFoldedValue( + compactor.builder), new DefaultGetFoldingOffset()); + } + + // inner class implementation of Trie.DataManipulate + private static class DefaultGetFoldingOffset implements Trie.DataManipulate { + public int getFoldingOffset(int value) { + return value; + } + } + + // inner class implementation of TrieBuilder.DataManipulate + private static class DefaultGetFoldedValue implements + TrieBuilder.DataManipulate { + private IntTrieBuilder builder; + + public DefaultGetFoldedValue(IntTrieBuilder inBuilder) { + builder = inBuilder; + } + + public int getFoldedValue(int start, int offset) { + int initialValue = builder.m_initialValue_; + int limit = start + 0x400; + while (start < limit) { + boolean[] inBlockZero = new boolean[1]; + int value = builder.getValue(start, inBlockZero); + if (inBlockZero[0]) { + start += TrieBuilder.DATA_BLOCK_LENGTH; + } else if (value != initialValue) { + return offset; + } else { + ++start; + } + } + return 0; + } + } + + public static interface CompactHandler { + public void setRowIndexForRange(int start, int end, int rowIndex); + public void setRowIndexForInitialValue(int rowIndex); + public void setRowIndexForErrorValue(int rowIndex); + public void startRealValues(int rowIndex); + } +} \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/Punycode.java b/main/classes/core/src/com/ibm/icu/impl/Punycode.java new file mode 100644 index 00000000000..2b9f7265a9c --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Punycode.java @@ -0,0 +1,476 @@ +/* + ******************************************************************************* + * Copyright (C) 2003-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.StringPrepParseException; +import com.ibm.icu.text.UTF16; + +/** + * Ported code from ICU punycode.c + * @author ram + */ + +/* Package Private class */ +public final class Punycode { + + /* Punycode parameters for Bootstring */ + private static final int BASE = 36; + private static final int TMIN = 1; + private static final int TMAX = 26; + private static final int SKEW = 38; + private static final int DAMP = 700; + private static final int INITIAL_BIAS = 72; + private static final int INITIAL_N = 0x80; + + /* "Basic" Unicode/ASCII code points */ + private static final int HYPHEN = 0x2d; + private static final int DELIMITER = HYPHEN; + + private static final int ZERO = 0x30; + //private static final int NINE = 0x39; + + private static final int SMALL_A = 0x61; + private static final int SMALL_Z = 0x7a; + + private static final int CAPITAL_A = 0x41; + private static final int CAPITAL_Z = 0x5a; + private static final int MAX_CP_COUNT = 200; + //private static final int UINT_MAGIC = 0x80000000; + //private static final long ULONG_MAGIC = 0x8000000000000000L; + + private static int adaptBias(int delta, int length, boolean firstTime){ + if(firstTime){ + delta /=DAMP; + }else{ + delta /= 2; + } + delta += delta/length; + + int count=0; + for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { + delta/=(BASE-TMIN); + } + + return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); + } + + /** + * basicToDigit[] contains the numeric value of a basic code + * point (for use in representing integers) in the range 0 to + * BASE-1, or -1 if b is does not represent a value. + */ + static final int[] basicToDigit= new int[]{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, + + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + + ///CLOVER:OFF + private static char asciiCaseMap(char b, boolean uppercase) { + if(uppercase) { + if(SMALL_A<=b && b<=SMALL_Z) { + b-=(SMALL_A-CAPITAL_A); + } + } else { + if(CAPITAL_A<=b && b<=CAPITAL_Z) { + b+=(SMALL_A-CAPITAL_A); + } + } + return b; + } + ///CLOVER:ON + /** + * digitToBasic() returns the basic code point whose value + * (when used for representing integers) is d, which must be in the + * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is + * nonzero, in which case the uppercase form is used. + */ + private static char digitToBasic(int digit, boolean uppercase) { + /* 0..25 map to ASCII a..z or A..Z */ + /* 26..35 map to ASCII 0..9 */ + if(digit<26) { + if(uppercase) { + return (char)(CAPITAL_A+digit); + } else { + return (char)(SMALL_A+digit); + } + } else { + return (char)((ZERO-26)+digit); + } + } + /** + * Converts Unicode to Punycode. + * The input string must not contain single, unpaired surrogates. + * The output will be represented as an array of ASCII code points. + * + * @param src The source of the String Buffer passed. + * @param caseFlags The boolean array of case flags. + * @return An array of ASCII code points. + */ + public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws StringPrepParseException{ + + int[] cpBuffer = new int[MAX_CP_COUNT]; + int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; + char c, c2; + int srcLength = src.length(); + int destCapacity = MAX_CP_COUNT; + char[] dest = new char[destCapacity]; + StringBuffer result = new StringBuffer(); + /* + * Handle the basic code points and + * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): + */ + srcCPCount=destLength=0; + + for(j=0; j0) { + if(destLength state to , but guard against overflow: + */ + if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { + throw new IllegalStateException("Internal program error"); + } + delta+=(m-n)*(handledCPCount+1); + n=m; + + /* Encode a sequence of same code points n */ + for(j=0; jTMAX) { + t=TMAX; + } + */ + + t=k-bias; + if(t=(bias+TMAX)) { + t=TMAX; + } + + if(q= CAPITAL_Z); + } + ///CLOVER:ON + private static boolean isSurrogate(int ch){ + return (((ch)&0xfffff800)==0xd800); + } + /** + * Converts Punycode to Unicode. + * The Unicode string will be at most as long as the Punycode string. + * + * @param src The source of the string buffer being passed. + * @param caseFlags The array of boolean case flags. + * @return StringBuffer string. + */ + public static StringBuffer decode(StringBuffer src, boolean[] caseFlags) + throws StringPrepParseException{ + int srcLength = src.length(); + StringBuffer result = new StringBuffer(); + int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, + destCPCount, firstSupplementaryIndex, cpLength; + char b; + int destCapacity = MAX_CP_COUNT; + char[] dest = new char[destCapacity]; + + /* + * Handle the basic code points: + * Let basicLength be the number of input code points + * before the last delimiter, or 0 if there is none, + * then copy the first basicLength code points to the output. + * + * The two following loops iterate backward. + */ + for(j=srcLength; j>0;) { + if(src.charAt(--j)==DELIMITER) { + break; + } + } + destLength=basicLength=destCPCount=j; + + while(j>0) { + b=src.charAt(--j); + if(!isBasic(b)) { + throw new StringPrepParseException("Illegal char found", StringPrepParseException.INVALID_CHAR_FOUND); + } + + if(j0 ? basicLength+1 : 0; in=srcLength) { + throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); + } + + digit=basicToDigit[src.charAt(in++) & 0xFF]; + if(digit<0) { + throw new StringPrepParseException("Invalid char found", StringPrepParseException.INVALID_CHAR_FOUND); + } + if(digit>(0x7fffffff-i)/w) { + /* integer overflow */ + throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); + } + + i+=digit*w; + t=k-bias; + if(t=(bias+TMAX)) { + t=TMAX; + } + if(digit0x7fffffff/(BASE-t)) { + /* integer overflow */ + throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); + } + w*=BASE-t; + } + + /* + * Modification from sample code: + * Increments destCPCount here, + * where needed instead of in for() loop tail. + */ + ++destCPCount; + bias=adaptBias(i-oldi, destCPCount, (oldi==0)); + + /* + * i was supposed to wrap around from (incremented) destCPCount to 0, + * incrementing n each time, so we'll fix that now: + */ + if(i/destCPCount>(0x7fffffff-n)) { + /* integer overflow */ + throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); + } + + n+=i/destCPCount; + i%=destCPCount; + /* not needed for Punycode: */ + /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ + + if(n>0x10ffff || isSurrogate(n)) { + /* Unicode code point overflow */ + throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND); + } + + /* Insert n at position i of the output: */ + cpLength=UTF16.getCharCount(n); + if((destLength+cpLength)1) { + firstSupplementaryIndex=codeUnitIndex; + } else { + ++firstSupplementaryIndex; + } + } else { + codeUnitIndex=firstSupplementaryIndex; + codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex); + } + + /* use the UChar index codeUnitIndex instead of the code point index i */ + if(codeUnitIndex 0 && (offset = toAppendTo.toString().indexOf(dayString)) >= 0 ) { + // fieldPosition.getField() was found in dayString, offset start & end based on final position of dayString + fieldPosition.setBeginIndex( fieldPosition.getBeginIndex() + offset ); + fieldPosition.setEndIndex( fieldPosition.getEndIndex() + offset ); + } else if (timePos.getEndIndex() > 0 && (offset = toAppendTo.toString().indexOf(timeString)) >= 0) { + // fieldPosition.getField() was found in timeString, offset start & end based on final position of timeString + fieldPosition.setBeginIndex( timePos.getBeginIndex() + offset ); + fieldPosition.setEndIndex( timePos.getEndIndex() + offset ); + } + } + return toAppendTo; + } + + /* (non-Javadoc) + * @see com.ibm.icu.text.DateFormat#parse(java.lang.String, com.ibm.icu.util.Calendar, java.text.ParsePosition) + */ + public void parse(String text, Calendar cal, ParsePosition pos) { + throw new UnsupportedOperationException("Relative Date parse is not implemented yet"); + } + + private DateFormat fDateFormat; // the held date format + private DateFormat fTimeFormat; // the held time format + private MessageFormat fCombinedFormat; // the {0} {1} format. + + int fDateStyle; + int fTimeStyle; + ULocale fLocale; + + private transient URelativeString fDates[] = null; // array of strings + + + /** + * Get the string at a specific offset. + * @param day day offset ( -1, 0, 1, etc.. ) + * @return the string, or NULL if none at that location. + */ + private String getStringForDay(int day) { + if(fDates == null) { + loadDates(); + } + for(int i=0;i datesSet = new TreeSet(new Comparator() { + public int compare(URelativeString r1, URelativeString r2) { + + if(r1.offset == r2.offset) { + return 0; + } else if(r1.offset < r2.offset) { + return -1; + } else { + return 1; + } + } + }) ; + + for(UResourceBundleIterator i = rb.getIterator();i.hasNext();) { + UResourceBundle line = i.next(); + + String k = line.getKey(); + String v = line.getString(); + URelativeString rs = new URelativeString(k,v); + datesSet.add(rs); + } + fDates = datesSet.toArray(new URelativeString[0]); + } + + /** + * @return the number of days in "until-now" + */ + private static int dayDifference(Calendar until) { + Calendar nowCal = (Calendar)until.clone(); + Date nowDate = new Date(System.currentTimeMillis()); + nowCal.clear(); + nowCal.setTime(nowDate); + int dayDiff = until.get(Calendar.JULIAN_DAY) - nowCal.get(Calendar.JULIAN_DAY); + return dayDiff; + } + + /** + * initializes fCalendar from parameters. Returns fCalendar as a convenience. + * @param zone Zone to be adopted, or NULL for TimeZone::createDefault(). + * @param locale Locale of the calendar + * @param status Error code + * @return the newly constructed fCalendar + */ + private Calendar initializeCalendar(TimeZone zone, ULocale locale) { + if (calendar == null) { + if(zone == null) { + calendar = Calendar.getInstance(locale); + } else { + calendar = Calendar.getInstance(zone, locale); + } + } + return calendar; + } + + private MessageFormat initializeCombinedFormat(Calendar cal, ULocale locale) { + String pattern = "{1} {0}"; + try { + CalendarData calData = new CalendarData(locale, cal.getType()); + String[] patterns = calData.getDateTimePatterns(); + if (patterns != null && patterns.length >= 9) { + int glueIndex = 8; + if (patterns.length >= 13) + { + switch (fDateStyle) + { + case DateFormat.RELATIVE_FULL: + case DateFormat.FULL: + glueIndex += (DateFormat.FULL + 1); + break; + case DateFormat.RELATIVE_LONG: + case DateFormat.LONG: + glueIndex += (DateFormat.LONG +1); + break; + case DateFormat.RELATIVE_MEDIUM: + case DateFormat.MEDIUM: + glueIndex += (DateFormat.MEDIUM +1); + break; + case DateFormat.RELATIVE_SHORT: + case DateFormat.SHORT: + glueIndex += (DateFormat.SHORT + 1); + break; + default: + break; + } + } + pattern = patterns[glueIndex]; + } + } catch (MissingResourceException e) { + // use default + } + fCombinedFormat = new MessageFormat(pattern, locale); + return fCombinedFormat; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java b/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java new file mode 100644 index 00000000000..8a62218b8c2 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java @@ -0,0 +1,203 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import com.ibm.icu.text.Replaceable; +import com.ibm.icu.text.ReplaceableString; +import com.ibm.icu.text.UCharacterIterator; +import com.ibm.icu.text.UTF16; + +/** + * DLF docs must define behavior when Replaceable is mutated underneath + * the iterator. + * + * This and ICUCharacterIterator share some code, maybe they should share + * an implementation, or the common state and implementation should be + * moved up into UCharacterIterator. + * + * What are first, last, and getBeginIndex doing here?!?!?! + */ +public class ReplaceableUCharacterIterator extends UCharacterIterator { + + // public constructor ------------------------------------------------------ + + /** + * Public constructor + * @param replaceable text which the iterator will be based on + */ + public ReplaceableUCharacterIterator(Replaceable replaceable){ + if(replaceable==null){ + throw new IllegalArgumentException(); + } + this.replaceable = replaceable; + this.currentIndex = 0; + } + + /** + * Public constructor + * @param str text which the iterator will be based on + */ + public ReplaceableUCharacterIterator(String str){ + if(str==null){ + throw new IllegalArgumentException(); + } + this.replaceable = new ReplaceableString(str); + this.currentIndex = 0; + } + + /** + * Public constructor + * @param buf buffer of text on which the iterator will be based + */ + public ReplaceableUCharacterIterator(StringBuffer buf){ + if(buf==null){ + throw new IllegalArgumentException(); + } + this.replaceable = new ReplaceableString(buf); + this.currentIndex = 0; + } + + // public methods ---------------------------------------------------------- + + /** + * Creates a copy of this iterator, does not clone the underlying + * Replaceableobject + * @return copy of this iterator + */ + public Object clone(){ + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + return null; // never invoked + } + } + + /** + * Returns the current UTF16 character. + * @return current UTF16 character + */ + public int current(){ + if (currentIndex < replaceable.length()) { + return replaceable.charAt(currentIndex); + } + return DONE; + } + + /** + * Returns the current codepoint + * @return current codepoint + */ + public int currentCodePoint(){ + // cannot use charAt due to it different + // behaviour when index is pointing at a + // trail surrogate, check for surrogates + + int ch = current(); + if(UTF16.isLeadSurrogate((char)ch)){ + // advance the index to get the next code point + next(); + // due to post increment semantics current() after next() + // actually returns the next char which is what we want + int ch2 = current(); + // current should never change the current index so back off + previous(); + + if(UTF16.isTrailSurrogate((char)ch2)){ + // we found a surrogate pair + return UCharacterProperty.getRawSupplementary( + (char)ch,(char)ch2 + ); + } + } + return ch; + } + + /** + * Returns the length of the text + * @return length of the text + */ + public int getLength(){ + return replaceable.length(); + } + + /** + * Gets the current currentIndex in text. + * @return current currentIndex in text. + */ + public int getIndex(){ + return currentIndex; + } + + /** + * Returns next UTF16 character and increments the iterator's currentIndex by 1. + * If the resulting currentIndex is greater or equal to the text length, the + * currentIndex is reset to the text length and a value of DONECODEPOINT is + * returned. + * @return next UTF16 character in text or DONE if the new currentIndex is off the + * end of the text range. + */ + public int next(){ + if (currentIndex < replaceable.length()) { + return replaceable.charAt(currentIndex++); + } + return DONE; + } + + + /** + * Returns previous UTF16 character and decrements the iterator's currentIndex by + * 1. + * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a + * value of DONECODEPOINT is returned. + * @return next UTF16 character in text or DONE if the new currentIndex is off the + * start of the text range. + */ + public int previous(){ + if (currentIndex > 0) { + return replaceable.charAt(--currentIndex); + } + return DONE; + } + + /** + *

    Sets the currentIndex to the specified currentIndex in the text and returns that + * single UTF16 character at currentIndex. + * This assumes the text is stored as 16-bit code units.

    + * @param currentIndex the currentIndex within the text. + * @exception IllegalArgumentException is thrown if an invalid currentIndex is + * supplied. i.e. currentIndex is out of bounds. + * @returns the character at the specified currentIndex or DONE if the specified + * currentIndex is equal to the end of the text. + */ + public void setIndex(int currentIndex) throws IndexOutOfBoundsException{ + if (currentIndex < 0 || currentIndex > replaceable.length()) { + throw new IndexOutOfBoundsException(); + } + this.currentIndex = currentIndex; + } + + public int getText(char[] fillIn, int offset){ + int length = replaceable.length(); + if(offset < 0 || offset + length > fillIn.length){ + throw new IndexOutOfBoundsException(Integer.toString(length)); + } + replaceable.getChars(0,length,fillIn,offset); + return length; + } + + // private data members ---------------------------------------------------- + + /** + * Replacable object + */ + private Replaceable replaceable; + /** + * Current currentIndex + */ + private int currentIndex; + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java b/main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java new file mode 100644 index 00000000000..66fcb8a3c66 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java @@ -0,0 +1,231 @@ +/* +****************************************************************************** +* Copyright (C) 2004-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.io.InputStream; +import java.util.Enumeration; +import java.util.MissingResourceException; +import java.util.PropertyResourceBundle; +import java.util.ResourceBundle; +import java.util.Vector; + +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +/** + * just a wrapper for Java ListResourceBundles and + * @author ram + * + */ +public class ResourceBundleWrapper extends UResourceBundle { + private ResourceBundle bundle = null; + private String localeID = null; + private String baseName = null; + private Vector keys = null; +// private int loadingStatus = -1; + + private ResourceBundleWrapper(ResourceBundle bundle){ + this.bundle=bundle; + } + + protected void setLoadingStatus(int newStatus){ +// loadingStatus = newStatus; + } + + protected Object handleGetObject(String aKey){ + ResourceBundleWrapper current = this; + Object obj = null; + while(current!=null){ + try{ + obj = current.bundle.getObject(aKey); + break; + }catch(MissingResourceException ex){ + current = (ResourceBundleWrapper)current.getParent(); + } + } + if (obj == null){ + throw new MissingResourceException("Can't find resource for bundle " + +baseName + +", key "+aKey, + this.getClass().getName(), + aKey); + } + return obj; + } + + public Enumeration getKeys(){ + return keys.elements(); + } + + private void initKeysVector(){ + ResourceBundleWrapper current = this; + keys = new Vector(); + while(current!=null){ + Enumeration e = current.bundle.getKeys(); + while(e.hasMoreElements()){ + String elem = e.nextElement(); + if(!keys.contains(elem)){ + keys.add(elem); + } + } + current = (ResourceBundleWrapper)current.getParent(); + } + } + protected String getLocaleID(){ + return localeID; + } + + protected String getBaseName(){ + return bundle.getClass().getName().replace('.','/'); + } + + public ULocale getULocale(){ + return new ULocale(localeID); + } + + public UResourceBundle getParent(){ + return (UResourceBundle)parent; + } + + // Flag for enabling/disabling debugging code + private static final boolean DEBUG = ICUDebug.enabled("resourceBundleWrapper"); + + // This method is for super class's instantiateBundle method + public static UResourceBundle getBundleInstance(String baseName, String localeID, + ClassLoader root, boolean disableFallback){ + UResourceBundle b = instantiateBundle(baseName, localeID, root, disableFallback); + if(b==null){ + String separator ="_"; + if(baseName.indexOf('/')>=0){ + separator = "/"; + } + throw new MissingResourceException("Could not find the bundle "+ baseName+separator+ localeID,"",""); + } + return b; + } + // recursively build bundle and override the super-class method + protected static synchronized UResourceBundle instantiateBundle(String baseName, String localeID, + ClassLoader root, boolean disableFallback) { + if (root == null) { + root = Utility.getFallbackClassLoader(); + } + final ClassLoader cl = root; + String name = baseName; + ULocale defaultLocale = ULocale.getDefault(); + if (localeID.length() != 0) { + name = name + "_" + localeID; + } + + ResourceBundleWrapper b = (ResourceBundleWrapper)loadFromCache(cl, name, defaultLocale); + if(b==null){ + ResourceBundleWrapper parent = null; + int i = localeID.lastIndexOf('_'); + + boolean loadFromProperties = false; + if (i != -1) { + String locName = localeID.substring(0, i); + parent = (ResourceBundleWrapper)loadFromCache(cl, baseName+"_"+locName,defaultLocale); + if(parent == null){ + parent = (ResourceBundleWrapper)instantiateBundle(baseName, locName , cl, disableFallback); + } + }else if(localeID.length()>0){ + parent = (ResourceBundleWrapper)loadFromCache(cl, baseName,defaultLocale); + if(parent==null){ + parent = (ResourceBundleWrapper)instantiateBundle(baseName, "", cl, disableFallback); + } + } + try { + Class cls = cl.loadClass(name).asSubclass(ResourceBundle.class); + ResourceBundle bx = cls.newInstance(); + b = new ResourceBundleWrapper(bx); + if (parent != null) { + b.setParent(parent); + } + b.baseName=baseName; + b.localeID = localeID; + + } catch (ClassNotFoundException e) { + loadFromProperties = true; + } catch (NoClassDefFoundError e) { + loadFromProperties = true; + } catch (Exception e) { + if (DEBUG) + System.out.println("failure"); + if (DEBUG) + System.out.println(e); + } + + if (loadFromProperties) { + try { + final String resName = name.replace('.', '/') + ".properties"; + InputStream stream = java.security.AccessController.doPrivileged( + new java.security.PrivilegedAction() { + public InputStream run() { + if (cl != null) { + return cl.getResourceAsStream(resName); + } else { + return ClassLoader.getSystemResourceAsStream(resName); + } + } + } + ); + if (stream != null) { + // make sure it is buffered + stream = new java.io.BufferedInputStream(stream); + try { + b = new ResourceBundleWrapper(new PropertyResourceBundle(stream)); + if (parent != null) { + b.setParent(parent); + } + b.baseName=baseName; + b.localeID=localeID; + } catch (Exception ex) { + // throw away exception + } finally { + try { + stream.close(); + } catch (Exception ex) { + // throw away exception + } + } + } + + // if a bogus locale is passed then the parent should be + // the default locale not the root locale! + if (b==null) { + String defaultName = defaultLocale.toString(); + if (localeID.length()>0 && localeID.indexOf('_')< 0 && defaultName.indexOf(localeID) == -1) { + b = (ResourceBundleWrapper)loadFromCache(cl,baseName+"_"+defaultName, defaultLocale); + if(b==null){ + b = (ResourceBundleWrapper)instantiateBundle(baseName , defaultName, cl, disableFallback); + } + } + } + // if still could not find the bundle then return the parent + if(b==null){ + b=parent; + } + } catch (Exception e) { + if (DEBUG) + System.out.println("failure"); + if (DEBUG) + System.out.println(e); + } + } + b = (ResourceBundleWrapper)addToCache(cl, name, defaultLocale, b); + } + + if(b!=null){ + b.initKeysVector(); + }else{ + if(DEBUG)System.out.println("Returning null for "+baseName+"_"+localeID); + } + + return b; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Row.java b/main/classes/core/src/com/ibm/icu/impl/Row.java new file mode 100644 index 00000000000..e6826c81678 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Row.java @@ -0,0 +1,184 @@ +/* + ********************************************************************** + * Copyright (c) 2002-2009, Google, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * Author: Mark Davis + ********************************************************************** + */ +package com.ibm.icu.impl; + +import com.ibm.icu.util.Freezable; + + +@SuppressWarnings("unchecked") +public class Row implements java.lang.Comparable, Cloneable, + Freezable>{ + protected Object[] items; + protected boolean frozen; + + /** + * Convenience Methods + */ + public static R2 of(C0 p0, C1 p1) { + return new R2(p0,p1); + } + public static R3 of(C0 p0, C1 p1, C2 p2) { + return new R3(p0,p1,p2); + } + public static R4 of(C0 p0, C1 p1, C2 p2, C3 p3) { + return new R4(p0,p1,p2,p3); + } + public static R5 of(C0 p0, C1 p1, C2 p2, C3 p3, C4 p4) { + return new R5(p0,p1,p2,p3,p4); + } + + public static class R2 extends Row { + public R2(C0 a, C1 b) { + items = new Object[] {a, b}; + } + } + public static class R3 extends Row { + public R3(C0 a, C1 b, C2 c) { + items = new Object[] {a, b, c}; + } + } + public static class R4 extends Row { + public R4(C0 a, C1 b, C2 c, C3 d) { + items = new Object[] {a, b, c, d}; + } + } + public static class R5 extends Row { + public R5(C0 a, C1 b, C2 c, C3 d, C4 e) { + items = new Object[] {a, b, c, d, e}; + } + } + + public Row set0(C0 item) { + return set(0, item); + } + public C0 get0() { + return (C0) items[0]; + } + public Row set1(C1 item) { + return set(1, item); + } + public C1 get1() { + return (C1) items[1]; + } + public Row set2(C2 item) { + return set(2, item); + } + public C2 get2() { + return (C2) items[2]; + } + public Row set3(C3 item) { + return set(3, item); + } + public C3 get3() { + return (C3) items[3]; + } + public Row set4(C4 item) { + return set(4, item); + } + public C4 get4() { + return (C4) items[4]; + } + + protected Row set(int i, Object item) { + if (frozen) { + throw new UnsupportedOperationException("Attempt to modify frozen object"); + } + items[i] = item; + return this; + } + + public int hashCode() { + int sum = items.length; + for (Object item : items) { + sum = sum*37 + Utility.checkHash(item); + } + return sum; + } + + public boolean equals(Object other) { + try { + Row that = (Row)other; + if (items.length != that.items.length) { + return false; + } + int i = 0; + for (Object item : items) { + if (!Utility.objectEquals(item, that.items[i++])) { + return false; + } + } + return true; + } catch (Exception e) { + return false; + } + } + + public int compareTo(Object other) { + int result; + Row that = (Row)other; + result = items.length - that.items.length; + if (result != 0) { + return result; + } + int i = 0; + for (Object item : items) { + result = Utility.checkCompare(((Comparable)item), ((Comparable)that.items[i++])); + if (result != 0) { + return result; + } + } + return 0; + } + + public String toString() { + StringBuilder result = new StringBuilder("["); + boolean first = true; + for (Object item : items) { + if (first) { + first = false; + } else { + result.append(", "); + } + result.append(item); + } + return result.append("]").toString(); + } + + public boolean isFrozen() { + return frozen; + } + + public Row freeze() { + frozen = true; + return this; + } + + public Object clone() { + if (frozen) return this; + try { + Row result = (Row) super.clone(); + items = items.clone(); + return result; + } catch (CloneNotSupportedException e) { + return null; + } + } + + public Row cloneAsThawed() { + try { + Row result = (Row) super.clone(); + items = items.clone(); + result.frozen = false; + return result; + } catch (CloneNotSupportedException e) { + return null; + } + } +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java b/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java new file mode 100644 index 00000000000..2ded714b31e --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java @@ -0,0 +1,346 @@ +/* +********************************************************************** +* Copyright (c) 2003-2010, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: September 23 2003 +* Since: ICU 2.8 +********************************************************************** +*/ +package com.ibm.icu.impl; + +import java.text.ParsePosition; + +import com.ibm.icu.text.SymbolTable; +import com.ibm.icu.text.UTF16; + +/** + * An iterator that returns 32-bit code points. This class is deliberately + * not related to any of the JDK or ICU4J character iterator classes + * in order to minimize complexity. + * @author Alan Liu + * @since ICU 2.8 + */ +public class RuleCharacterIterator { + + // TODO: Ideas for later. (Do not implement if not needed, lest the + // code coverage numbers go down due to unused methods.) + // 1. Add a copy constructor, equals() method, clone() method. + // 2. Rather than return DONE, throw an exception if the end + // is reached -- this is an alternate usage model, probably not useful. + // 3. Return isEscaped from next(). If this happens, + // don't keep an isEscaped member variable. + + /** + * Text being iterated. + */ + private String text; + + /** + * Position of iterator. + */ + private ParsePosition pos; + + /** + * Symbol table used to parse and dereference variables. May be null. + */ + private SymbolTable sym; + + /** + * Current variable expansion, or null if none. + */ + private char[] buf; + + /** + * Position within buf[]. Meaningless if buf == null. + */ + private int bufPos; + + /** + * Flag indicating whether the last character was parsed from an escape. + */ + private boolean isEscaped; + + /** + * Value returned when there are no more characters to iterate. + */ + public static final int DONE = -1; + + /** + * Bitmask option to enable parsing of variable names. If (options & + * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to + * its value. Variables are parsed using the SymbolTable API. + */ + public static final int PARSE_VARIABLES = 1; + + /** + * Bitmask option to enable parsing of escape sequences. If (options & + * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded + * to its value. Escapes are parsed using Utility.unescapeAt(). + */ + public static final int PARSE_ESCAPES = 2; + + /** + * Bitmask option to enable skipping of whitespace. If (options & + * SKIP_WHITESPACE) != 0, then whitespace characters will be silently + * skipped, as if they were not present in the input. Whitespace + * characters are defined by UCharacterProperty.isRuleWhiteSpace(). + */ + public static final int SKIP_WHITESPACE = 4; + + /** + * Constructs an iterator over the given text, starting at the given + * position. + * @param text the text to be iterated + * @param sym the symbol table, or null if there is none. If sym is null, + * then variables will not be deferenced, even if the PARSE_VARIABLES + * option is set. + * @param pos upon input, the index of the next character to return. If a + * variable has been dereferenced, then pos will not increment as + * characters of the variable value are iterated. + */ + public RuleCharacterIterator(String text, SymbolTable sym, + ParsePosition pos) { + if (text == null || pos.getIndex() > text.length()) { + throw new IllegalArgumentException(); + } + this.text = text; + this.sym = sym; + this.pos = pos; + buf = null; + } + + /** + * Returns true if this iterator has no more characters to return. + */ + public boolean atEnd() { + return buf == null && pos.getIndex() == text.length(); + } + + /** + * Returns the next character using the given options, or DONE if there + * are no more characters, and advance the position to the next + * character. + * @param options one or more of the following options, bitwise-OR-ed + * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. + * @return the current 32-bit code point, or DONE + */ + public int next(int options) { + int c = DONE; + isEscaped = false; + + for (;;) { + c = _current(); + _advance(UTF16.getCharCount(c)); + + if (c == SymbolTable.SYMBOL_REF && buf == null && + (options & PARSE_VARIABLES) != 0 && sym != null) { + String name = sym.parseReference(text, pos, text.length()); + // If name == null there was an isolated SYMBOL_REF; + // return it. Caller must be prepared for this. + if (name == null) { + break; + } + bufPos = 0; + buf = sym.lookup(name); + if (buf == null) { + throw new IllegalArgumentException( + "Undefined variable: " + name); + } + // Handle empty variable value + if (buf.length == 0) { + buf = null; + } + continue; + } + + if ((options & SKIP_WHITESPACE) != 0 && + UCharacterProperty.isRuleWhiteSpace(c)) { + continue; + } + + if (c == '\\' && (options & PARSE_ESCAPES) != 0) { + int offset[] = new int[] { 0 }; + c = Utility.unescapeAt(lookahead(), offset); + jumpahead(offset[0]); + isEscaped = true; + if (c < 0) { + throw new IllegalArgumentException("Invalid escape"); + } + } + + break; + } + + return c; + } + + /** + * Returns true if the last character returned by next() was + * escaped. This will only be the case if the option passed in to + * next() included PARSE_ESCAPED and the next character was an + * escape sequence. + */ + public boolean isEscaped() { + return isEscaped; + } + + /** + * Returns true if this iterator is currently within a variable expansion. + */ + public boolean inVariable() { + return buf != null; + } + + /** + * Returns an object which, when later passed to setPos(), will + * restore this iterator's position. Usage idiom: + * + * RuleCharacterIterator iterator = ...; + * Object pos = iterator.getPos(null); // allocate position object + * for (;;) { + * pos = iterator.getPos(pos); // reuse position object + * int c = iterator.next(...); + * ... + * } + * iterator.setPos(pos); + * + * @param p a position object previously returned by getPos(), + * or null. If not null, it will be updated and returned. If + * null, a new position object will be allocated and returned. + * @return a position object which may be passed to setPos(), + * either `p,' or if `p' == null, a newly-allocated object + */ + public Object getPos(Object p) { + if (p == null) { + return new Object[] {buf, new int[] {pos.getIndex(), bufPos}}; + } + Object[] a = (Object[]) p; + a[0] = buf; + int[] v = (int[]) a[1]; + v[0] = pos.getIndex(); + v[1] = bufPos; + return p; + } + + /** + * Restores this iterator to the position it had when getPos() + * returned the given object. + * @param p a position object previously returned by getPos() + */ + public void setPos(Object p) { + Object[] a = (Object[]) p; + buf = (char[]) a[0]; + int[] v = (int[]) a[1]; + pos.setIndex(v[0]); + bufPos = v[1]; + } + + /** + * Skips ahead past any ignored characters, as indicated by the given + * options. This is useful in conjunction with the lookahead() method. + * + * Currently, this only has an effect for SKIP_WHITESPACE. + * @param options one or more of the following options, bitwise-OR-ed + * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. + */ + public void skipIgnored(int options) { + if ((options & SKIP_WHITESPACE) != 0) { + for (;;) { + int a = _current(); + if (!UCharacterProperty.isRuleWhiteSpace(a)) break; + _advance(UTF16.getCharCount(a)); + } + } + } + + /** + * Returns a string containing the remainder of the characters to be + * returned by this iterator, without any option processing. If the + * iterator is currently within a variable expansion, this will only + * extend to the end of the variable expansion. This method is provided + * so that iterators may interoperate with string-based APIs. The typical + * sequence of calls is to call skipIgnored(), then call lookahead(), then + * parse the string returned by lookahead(), then call jumpahead() to + * resynchronize the iterator. + * @return a string containing the characters to be returned by future + * calls to next() + */ + public String lookahead() { + if (buf != null) { + return new String(buf, bufPos, buf.length - bufPos); + } else { + return text.substring(pos.getIndex()); + } + } + + /** + * Advances the position by the given number of 16-bit code units. + * This is useful in conjunction with the lookahead() method. + * @param count the number of 16-bit code units to jump over + */ + public void jumpahead(int count) { + if (count < 0) { + throw new IllegalArgumentException(); + } + if (buf != null) { + bufPos += count; + if (bufPos > buf.length) { + throw new IllegalArgumentException(); + } + if (bufPos == buf.length) { + buf = null; + } + } else { + int i = pos.getIndex() + count; + pos.setIndex(i); + if (i > text.length()) { + throw new IllegalArgumentException(); + } + } + } + + /** + * Returns a string representation of this object, consisting of the + * characters being iterated, with a '|' marking the current position. + * Position within an expanded variable is not indicated. + * @return a string representation of this object + */ + public String toString() { + int b = pos.getIndex(); + return text.substring(0, b) + '|' + text.substring(b); + } + + /** + * Returns the current 32-bit code point without parsing escapes, parsing + * variables, or skipping whitespace. + * @return the current 32-bit code point + */ + private int _current() { + if (buf != null) { + return UTF16.charAt(buf, 0, buf.length, bufPos); + } else { + int i = pos.getIndex(); + return (i < text.length()) ? UTF16.charAt(text, i) : DONE; + } + } + + /** + * Advances the position by the given amount. + * @param count the number of 16-bit code units to advance past + */ + private void _advance(int count) { + if (buf != null) { + bufPos += count; + if (bufPos == buf.length) { + buf = null; + } + } else { + pos.setIndex(pos.getIndex() + count); + if (pos.getIndex() > text.length()) { + pos.setIndex(text.length()); + } + } + } +} \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/SimpleCache.java b/main/classes/core/src/com/ibm/icu/impl/SimpleCache.java new file mode 100644 index 00000000000..8511393936b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/SimpleCache.java @@ -0,0 +1,73 @@ +/* + **************************************************************************** + * Copyright (c) 2007-2009 International Business Machines Corporation and * + * others. All rights reserved. * + **************************************************************************** + */ + +package com.ibm.icu.impl; + +import java.lang.ref.Reference; +import java.lang.ref.SoftReference; +import java.lang.ref.WeakReference; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class SimpleCache implements ICUCache { + private static final int DEFAULT_CAPACITY = 16; + + private Reference> cacheRef = null; + private int type = ICUCache.SOFT; + private int capacity = DEFAULT_CAPACITY; + + public SimpleCache() { + } + + public SimpleCache(int cacheType) { + this(cacheType, DEFAULT_CAPACITY); + } + + public SimpleCache(int cacheType, int initialCapacity) { + if (cacheType == ICUCache.WEAK) { + type = cacheType; + } + if (initialCapacity > 0) { + capacity = initialCapacity; + } + } + + public V get(Object key) { + Reference> ref = cacheRef; + if (ref != null) { + Map map = ref.get(); + if (map != null) { + return map.get(key); + } + } + return null; + } + + public void put(K key, V value) { + Reference> ref = cacheRef; + Map map = null; + if (ref != null) { + map = ref.get(); + } + if (map == null) { + map = Collections.synchronizedMap(new HashMap(capacity)); + if (type == ICUCache.WEAK) { + ref = new WeakReference>(map); + } else { + ref = new SoftReference>(map); + } + cacheRef = ref; + } + map.put(key, value); + } + + public void clear() { + cacheRef = null; + } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/SoftCache.java b/main/classes/core/src/com/ibm/icu/impl/SoftCache.java new file mode 100644 index 00000000000..dac71c6c1e0 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/SoftCache.java @@ -0,0 +1,99 @@ +/* +******************************************************************************* +* Copyright (C) 2010, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import java.lang.ref.SoftReference; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Generic, thread-safe cache implementation, storing SoftReferences to cached instances. + * To use, instantiate a subclass which implements the createInstance() method, + * and call get() with the key and the data. The get() call will use the data + * only if it needs to call createInstance(), otherwise the data is ignored. + * + * By using SoftReferences to instances, the Java runtime can release instances + * once they are not used any more at all. If such an instance is then requested again, + * the get() method will call createInstance() again and also create a new SoftReference. + * The cache holds on to its map of keys to SoftReferenced instances forever. + * + * @param Cache lookup key type + * @param Cache instance value type + * @param Data type for creating a new instance value + * + * @author Markus Scherer, Mark Davis + */ +public abstract class SoftCache extends CacheBase { + @Override + public final V getInstance(K key, D data) { + // We synchronize twice, once on the map and once on valueRef, + // because we prefer the fine-granularity locking of the ConcurrentHashMap + // over coarser locking on the whole cache instance. + // We use a SettableSoftReference (a second level of indirection) because + // ConcurrentHashMap.putIfAbsent() never replaces the key's value, and if it were + // a simple SoftReference we would not be able to reset its value after it has been cleared. + // (And ConcurrentHashMap.put() always replaces the value, which we don't want either.) + SettableSoftReference valueRef = map.get(key); + V value; + if(valueRef != null) { + synchronized(valueRef) { + value = valueRef.ref.get(); + if(value != null) { + return value; + } else { + // The instance has been evicted, its SoftReference cleared. + // Create and set a new instance. + valueRef.ref = new SoftReference(value = createInstance(key, data)); + return value; + } + } + } else /* valueRef == null */ { + // We had never cached an instance for this key. + value = createInstance(key, data); + valueRef = map.putIfAbsent(key, new SettableSoftReference(value)); + if(valueRef == null) { + // Normal "put": Our new value is now cached. + return value; + } else { + // Race condition: Another thread beat us to putting a SettableSoftReference + // into the map. Return its value, but just in case the garbage collector + // was aggressive, we also offer our new instance for caching. + return valueRef.setIfAbsent(value); + } + } + } + /** + * Value type for cache items: Has a SoftReference which can be set + * to a new value when the SoftReference has been cleared. + * The SoftCache class sometimes accesses the ref field directly. + * + * @param Cache instance value type + */ + private static final class SettableSoftReference { + private SettableSoftReference(V value) { + ref = new SoftReference(value); + } + /** + * If the SoftReference has been cleared, then this replaces it with a new SoftReference + * for the new value and returns the new value; otherwise returns the current + * SoftReference's value. + * @param value Replacement value, for when the current reference has been cleared + * @return The value that is held by the SoftReference, old or new + */ + private synchronized V setIfAbsent(V value) { + V oldValue = ref.get(); + if(oldValue == null) { + ref = new SoftReference(value); + return value; + } else { + return oldValue; + } + } + private SoftReference ref; // never null + } + private ConcurrentHashMap> map = + new ConcurrentHashMap>(); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/SortedSetRelation.java b/main/classes/core/src/com/ibm/icu/impl/SortedSetRelation.java new file mode 100644 index 00000000000..7051fa68a91 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/SortedSetRelation.java @@ -0,0 +1,180 @@ +/* +********************************************************************** +* Copyright (c) 2002-2010, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: M. Davis +* Created: December 2002 (moved from UnicodeSet) +* Since: ICU 2.4 +********************************************************************** +*/ +package com.ibm.icu.impl; + +import java.util.Iterator; +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * Computationally efficient determination of the relationship between + * two SortedSets. + */ +public class SortedSetRelation { + + /** + * The relationship between two sets A and B can be determined by looking at: + * A - B + * A & B (intersection) + * B - A + * These are represented by a set of bits. + * Bit 2 is true if A - B is not empty + * Bit 1 is true if A & B is not empty + * BIT 0 is true if B - A is not empty + */ + public static final int + A_NOT_B = 4, + A_AND_B = 2, + B_NOT_A = 1; + + /** + * There are 8 combinations of the relationship bits. These correspond to + * the filters (combinations of allowed bits) in hasRelation. They also + * correspond to the modification functions, listed in comments. + */ + public static final int + ANY = A_NOT_B | A_AND_B | B_NOT_A, // union, addAll + CONTAINS = A_NOT_B | A_AND_B, // A (unnecessary) + DISJOINT = A_NOT_B | B_NOT_A, // A xor B, missing Java function + ISCONTAINED = A_AND_B | B_NOT_A, // B (unnecessary) + NO_B = A_NOT_B, // A setDiff B, removeAll + EQUALS = A_AND_B, // A intersect B, retainAll + NO_A = B_NOT_A, // B setDiff A, removeAll + NONE = 0, // null (unnecessary) + + ADDALL = ANY, // union, addAll + A = CONTAINS, // A (unnecessary) + COMPLEMENTALL = DISJOINT, // A xor B, missing Java function + B = ISCONTAINED, // B (unnecessary) + REMOVEALL = NO_B, // A setDiff B, removeAll + RETAINALL = EQUALS, // A intersect B, retainAll + B_REMOVEALL = NO_A; // B setDiff A, removeAll + + + /** + * Utility that could be on SortedSet. Faster implementation than + * what is in Java for doing contains, equals, etc. + * @param a first set + * @param allow filter, using ANY, CONTAINS, etc. + * @param b second set + * @return whether the filter relationship is true or not. + */ + public static > boolean hasRelation(SortedSet a, int allow, SortedSet b) { + if (allow < NONE || allow > ANY) { + throw new IllegalArgumentException("Relation " + allow + " out of range"); + } + + // extract filter conditions + // these are the ALLOWED conditions Set + + boolean anb = (allow & A_NOT_B) != 0; + boolean ab = (allow & A_AND_B) != 0; + boolean bna = (allow & B_NOT_A) != 0; + + // quick check on sizes + switch(allow) { + case CONTAINS: if (a.size() < b.size()) return false; break; + case ISCONTAINED: if (a.size() > b.size()) return false; break; + case EQUALS: if (a.size() != b.size()) return false; break; + } + + // check for null sets + if (a.size() == 0) { + if (b.size() == 0) return true; + return bna; + } else if (b.size() == 0) { + return anb; + } + + // pick up first strings, and start comparing + Iterator ait = a.iterator(); + Iterator bit = b.iterator(); + + T aa = ait.next(); + T bb = bit.next(); + + while (true) { + int comp = aa.compareTo(bb); + if (comp == 0) { + if (!ab) return false; + if (!ait.hasNext()) { + if (!bit.hasNext()) return true; + return bna; + } else if (!bit.hasNext()) { + return anb; + } + aa = ait.next(); + bb = bit.next(); + } else if (comp < 0) { + if (!anb) return false; + if (!ait.hasNext()) { + return bna; + } + aa = ait.next(); + } else { + if (!bna) return false; + if (!bit.hasNext()) { + return anb; + } + bb = bit.next(); + } + } + } + + /** + * Utility that could be on SortedSet. Allows faster implementation than + * what is in Java for doing addAll, removeAll, retainAll, (complementAll). + * @param a first set + * @param relation the relation filter, using ANY, CONTAINS, etc. + * @param b second set + * @return the new set + */ + public static > SortedSet doOperation(SortedSet a, int relation, SortedSet b) { + // TODO: optimize this as above + TreeSet temp; + switch (relation) { + case ADDALL: + a.addAll(b); + return a; + case A: + return a; // no action + case B: + a.clear(); + a.addAll(b); + return a; + case REMOVEALL: + a.removeAll(b); + return a; + case RETAINALL: + a.retainAll(b); + return a; + // the following is the only case not really supported by Java + // although all could be optimized + case COMPLEMENTALL: + temp = new TreeSet(b); + temp.removeAll(a); + a.removeAll(b); + a.addAll(temp); + return a; + case B_REMOVEALL: + temp = new TreeSet(b); + temp.removeAll(a); + a.clear(); + a.addAll(temp); + return a; + case NONE: + a.clear(); + return a; + default: + throw new IllegalArgumentException("Relation " + relation + " out of range"); + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java b/main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java new file mode 100644 index 00000000000..46342788286 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java @@ -0,0 +1,100 @@ +/* + ****************************************************************************** + * Copyright (C) 2003-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + * + * Created on May 2, 2003 + * + * To change the template for this generated file go to + * Window>Preferences>Java>Code Generation>Code and Comments + */ +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + + + +/** + * @author ram + * + * To change the template for this generated type comment go to + * Window>Preferences>Java>Code Generation>Code and Comments + */ +public final class StringPrepDataReader implements ICUBinary.Authenticate { + private final static boolean debug = ICUDebug.enabled("NormalizerDataReader"); + + /** + *

    private constructor.

    + * @param inputStream ICU uprop.dat file input stream + * @exception IOException throw if data file fails authentication + */ + public StringPrepDataReader(InputStream inputStream) + throws IOException{ + if(debug) System.out.println("Bytes in inputStream " + inputStream.available()); + + unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this); + + if(debug) System.out.println("Bytes left in inputStream " +inputStream.available()); + + dataInputStream = new DataInputStream(inputStream); + + if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available()); + } + + public void read(byte[] idnaBytes, + char[] mappingTable) + throws IOException{ + + //Read the bytes that make up the idnaTrie + dataInputStream.readFully(idnaBytes); + + //Read the extra data + for(int i=0;iStringobject + * @return copy of this iterator + */ + ///CLOVER:OFF + public Object clone() + { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + return null; // never invoked + } + } + ///CLOVER:ON + /** + * Returns the current UTF16 character. + * @return current UTF16 character + */ + public int current() + { + if (m_currentIndex_ < m_text_.length()) { + return m_text_.charAt(m_currentIndex_); + } + return DONE; + } + + + /** + * Returns the length of the text + * @return length of the text + */ + public int getLength() + { + return m_text_.length(); + } + + /** + * Gets the current currentIndex in text. + * @return current currentIndex in text. + */ + public int getIndex() + { + return m_currentIndex_; + } + + /** + * Returns next UTF16 character and increments the iterator's currentIndex + * by 1. + * If the resulting currentIndex is greater or equal to the text length, + * the currentIndex is reset to the text length and a value of DONE is + * returned. + * @return next UTF16 character in text or DONE if the new currentIndex is + * off the end of the text range. + */ + public int next() + { + if (m_currentIndex_ < m_text_.length()) + { + return m_text_.charAt(m_currentIndex_ ++); + } + return DONE; + } + + + /** + * Returns previous UTF16 character and decrements the iterator's + * currentIndex by 1. + * If the resulting currentIndex is less than 0, the currentIndex is reset + * to 0 and a value of DONE is returned. + * @return next UTF16 character in text or DONE if the new currentIndex is + * off the start of the text range. + */ + public int previous() + { + if (m_currentIndex_ > 0) { + return m_text_.charAt(-- m_currentIndex_); + } + return DONE; + } + + /** + *

    Sets the currentIndex to the specified currentIndex in the text and + * returns that single UTF16 character at currentIndex. + * This assumes the text is stored as 16-bit code units.

    + * @param currentIndex the currentIndex within the text. + * @exception IndexOutOfBoundsException is thrown if an invalid currentIndex + * is supplied. i.e. currentIndex is out of bounds. + */ + public void setIndex(int currentIndex) throws IndexOutOfBoundsException + { + if (currentIndex < 0 || currentIndex > m_text_.length()) { + throw new IndexOutOfBoundsException(); + } + m_currentIndex_ = currentIndex; + } + + /** + * Fills the buffer with the underlying text storage of the iterator + * If the buffer capacity is not enough a exception is thrown. The capacity + * of the fill in buffer should at least be equal to length of text in the + * iterator obtained by calling getLength()Usage: + * + * + *
    +     *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
    +     *         char[] buf = new char[iter.getLength()];
    +     *         iter.getText(buf);
    +     *         
    +     *         OR
    +     *         char[] buf= new char[1];
    +     *         int len = 0;
    +     *         for(;;){
    +     *             try{
    +     *                 len = iter.getText(buf);
    +     *                 break;
    +     *             }catch(IndexOutOfBoundsException e){
    +     *                 buf = new char[iter.getLength()];
    +     *             }
    +     *         }
    +     * 
    + *
    + * + * @param fillIn an array of chars to fill with the underlying UTF-16 code + * units. + * @param offset the position within the array to start putting the data. + * @return the number of code units added to fillIn, as a convenience + * @exception IndexOutOfBoundsException exception if there is not enough + * room after offset in the array, or if offset < 0. + */ + ///CLOVER:OFF + public int getText(char[] fillIn, int offset) + { + int length = m_text_.length(); + if (offset < 0 || offset + length > fillIn.length) { + throw new IndexOutOfBoundsException(Integer.toString(length)); + } + m_text_.getChars(0, length, fillIn, offset); + return length; + } + ///CLOVER:ON + /** + * Convenience method for returning the underlying text storage as as + * string + * @return the underlying text storage in the iterator as a string + */ + public String getText() + { + return m_text_; + } + + /** + * Reset this iterator to point to a new string. This method is used by + * other classes that want to avoid allocating new + * ReplaceableCharacterIterator objects every time their setText method + * is called. + * @param text The String to be iterated over + */ + public void setText(String text) + { + if (text == null) { + throw new NullPointerException(); + } + m_text_ = text; + m_currentIndex_ = 0; + } + + // private data members ---------------------------------------------------- + + /** + * Text string object + */ + private String m_text_; + /** + * Current currentIndex + */ + private int m_currentIndex_; + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java b/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java new file mode 100644 index 00000000000..9a172a227b4 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java @@ -0,0 +1,275 @@ +/* + * ******************************************************************************** + * Copyright (C) 2007-2009, International Business Machines Corporation and others. + * All Rights Reserved. + * ******************************************************************************** + */ +package com.ibm.icu.impl; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; + +/** + * TextTrieMap is a trie implementation for supporting + * fast prefix match for the key. + */ +public class TextTrieMap { + /** + * Constructs a TextTrieMap object. + * + * @param ignoreCase true to use case insensitive match + */ + public TextTrieMap(boolean ignoreCase) { + this.ignoreCase = ignoreCase; + } + + /** + * Adds the text key and its associated object in this object. + * + * @param text The text. + * @param o The object associated with the text. + */ + public synchronized void put(String text, V o) { + CharacterNode node = root; + for (int i = 0; i < text.length(); i++) { + int ch = UTF16.charAt(text, i); + node = node.addChildNode(ch); + if (UTF16.getCharCount(ch) == 2) { + i++; + } + } + node.addObject(o); + } + + /** + * Gets an iterator of the objects associated with the + * longest prefix matching string key. + * + * @param text The text to be matched with prefixes. + * @return An iterator of the objects associated with + * the longest prefix matching matching key, or null + * if no matching entry is found. + */ + public Iterator get(String text) { + return get(text, 0); + } + + /** + * Gets an iterator of the objects associated with the + * longest prefix matching string key starting at the + * specified position. + * + * @param text The text to be matched with prefixes. + * @param start The start index of of the text + * @return An iterator of the objects associated with the + * longest prefix matching matching key, or null if no + * matching entry is found. + */ + public Iterator get(String text, int start) { + LongestMatchHandler handler = new LongestMatchHandler(); + find(text, start, handler); + return handler.getMatches(); + } + + public void find(String text, ResultHandler handler) { + find(text, 0, handler); + } + + public void find(String text, int start, ResultHandler handler) { + find(root, text, start, start, handler); + } + + /* + * Find an iterator of the objects associated with the + * longest prefix matching string key under the specified node. + * + * @param node The character node in this trie. + * @param text The text to be matched with prefixes. + * @param start The start index within the text. + * @param index The current index within the text. + * @param handler The result handler, ResultHandler#handlePrefixMatch + * is called when any prefix match is found. + */ + private synchronized void find(CharacterNode node, String text, + int start, int index, ResultHandler handler) { + Iterator itr = node.iterator(); + if (itr != null) { + if (!handler.handlePrefixMatch(index - start, itr)) { + return; + } + } + if (index < text.length()) { + List childNodes = node.getChildNodes(); + if (childNodes == null) { + return; + } + int ch = UTF16.charAt(text, index); + int chLen = UTF16.getCharCount(ch); + for (int i = 0; i < childNodes.size(); i++) { + CharacterNode child = childNodes.get(i); + if (compare(ch, child.getCharacter())) { + find(child, text, start, index + chLen, handler); + break; + } + } + } + } + + /** + * A private method used for comparing two characters. + * + * @param ch1 The first character. + * @param ch2 The second character. + * @return true if the first character matches the second. + */ + private boolean compare(int ch1, int ch2) { + if (ch1 == ch2) { + return true; + } + else if (ignoreCase) { + if (UCharacter.toLowerCase(ch1) == UCharacter.toLowerCase(ch2)) { + return true; + } + else if (UCharacter.toUpperCase(ch1) == UCharacter.toUpperCase(ch2)) { + return true; + } + } + return false; + } + + // The root node of this trie + private CharacterNode root = new CharacterNode(0); + + // Character matching option + boolean ignoreCase; + + /** + * Inner class representing a character node in the trie. + */ + private class CharacterNode { + int character; + List children; + List objlist; + + /** + * Constructs a node for the character. + * + * @param ch The character associated with this node. + */ + public CharacterNode(int ch) { + character = ch; + } + + /** + * Gets the character associated with this node. + * + * @return The character + */ + public int getCharacter() { + return character; + } + + /** + * Adds the object to the node. + * + * @param obj The object set in the leaf node. + */ + public void addObject(V obj) { + if (objlist == null) { + objlist = new LinkedList(); + } + objlist.add(obj); + } + + /** + * Gets an iterator of the objects associated with + * the leaf node. + * + * @return The iterator or null if no objects are + * associated with this node. + */ + public Iterator iterator() { + if (objlist == null) { + return null; + } + return objlist.iterator(); + } + + /** + * Adds a child node for the character under this character + * node in the trie. When the matching child node already + * exists, the reference of the existing child node is + * returned. + * + * @param ch The character associated with a child node. + * @return The child node. + */ + public CharacterNode addChildNode(int ch) { + if (children == null) { + children = new ArrayList(); + CharacterNode newNode = new CharacterNode(ch); + children.add(newNode); + return newNode; + } + CharacterNode node = null; + for (int i = 0; i < children.size(); i++) { + CharacterNode cur = children.get(i); + if (compare(ch, cur.getCharacter())) { + node = cur; + break; + } + } + if (node == null) { + node = new CharacterNode(ch); + children.add(node); + } + return node; + } + + /** + * Gets the list of child nodes under this node. + * + * @return The list of child nodes. + */ + public List getChildNodes() { + return children; + } + } + + /** + * Callback handler for processing prefix matches used by + * find method. + */ + public interface ResultHandler { + /** + * Handles a prefix key match + * + * @param matchLength Matched key's length + * @param values An iterator of the objects associated with the matched key + * @return Return true to continue the search in the trie, false to quit. + */ + public boolean handlePrefixMatch(int matchLength, Iterator values); + } + + private static class LongestMatchHandler implements ResultHandler { + private Iterator matches = null; + private int length = 0; + + public boolean handlePrefixMatch(int matchLength, Iterator values) { + if (matchLength > length) { + length = matchLength; + matches = values; + } + return true; + } + + public Iterator getMatches() { + return matches; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java b/main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java new file mode 100644 index 00000000000..a7e0fdb8609 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java @@ -0,0 +1,147 @@ +/* + ********************************************************************** + * Copyright (c) 2003-2010, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * Author: Alan Liu + * Created: October 2 2003 + * Since: ICU 2.8 + ********************************************************************** + */ + +package com.ibm.icu.impl; +import java.util.Date; + +import com.ibm.icu.util.TimeZone; + +/** + * TimeZoneAdapter wraps a com.ibm.icu.util.TimeZone + * subclass and inherits from java.util.TimeZone. + * Without this class, we would need to 'port' java.util.Date to + * com.ibm.icu.util as well, so that Date could interoperate properly + * with the com.ibm.icu.util TimeZone and Calendar classes. With this + * class, we can use java.util.Date together with com.ibm.icu.util + * classes. + * + * @see com.ibm.icu.util.TimeZone#setDefault + * @author Alan Liu + * @since ICU 2.8 + */ +public class TimeZoneAdapter extends java.util.TimeZone { + + // Generated by serialver from JDK 1.4.1_01 + static final long serialVersionUID = -2040072218820018557L; + + /** + * The contained com.ibm.icu.util.TimeZone object. Must not be null. + * We delegate all methods to this object. + */ + private TimeZone zone; + + /** + * Given a java.util.TimeZone, wrap it in the appropriate adapter + * subclass of com.ibm.icu.util.TimeZone and return the adapter. + */ + public static java.util.TimeZone wrap(com.ibm.icu.util.TimeZone tz) { + return new TimeZoneAdapter(tz); + } + + /** + * Return the java.util.TimeZone wrapped by this object. + */ + public com.ibm.icu.util.TimeZone unwrap() { + return zone; + } + + /** + * Constructs an adapter for a com.ibm.icu.util.TimeZone object. + */ + public TimeZoneAdapter(TimeZone zone) { + this.zone = zone; + super.setID(zone.getID()); + } + + /** + * TimeZone API; calls through to wrapped time zone. + */ + public void setID(String ID) { + super.setID(ID); + zone.setID(ID); + } + + /** + * TimeZone API; calls through to wrapped time zone. + */ + public boolean hasSameRules(java.util.TimeZone other) { + return other instanceof TimeZoneAdapter && + zone.hasSameRules(((TimeZoneAdapter)other).zone); + } + + /** + * TimeZone API; calls through to wrapped time zone. + */ + public int getOffset(int era, int year, int month, int day, int dayOfWeek, + int millis) { + return zone.getOffset(era, year, month, day, dayOfWeek, millis); + } + + /** + * TimeZone API; calls through to wrapped time zone. + */ + public int getRawOffset() { + return zone.getRawOffset(); + } + + /** + * TimeZone API; calls through to wrapped time zone. + */ + public void setRawOffset(int offsetMillis) { + zone.setRawOffset(offsetMillis); + } + + /** + * TimeZone API; calls through to wrapped time zone. + */ + public boolean useDaylightTime() { + return zone.useDaylightTime(); + } + + /** + * TimeZone API; calls through to wrapped time zone. + */ + public boolean inDaylightTime(Date date) { + return zone.inDaylightTime(date); + } + + /** + * Boilerplate API; calls through to wrapped object. + */ + public Object clone() { + return new TimeZoneAdapter((TimeZone)zone.clone()); + } + + /** + * Boilerplate API; calls through to wrapped object. + */ + public synchronized int hashCode() { + return zone.hashCode(); + } + + /** + * Boilerplate API; calls through to wrapped object. + */ + public boolean equals(Object obj) { + if (obj instanceof TimeZoneAdapter) { + obj = ((TimeZoneAdapter) obj).zone; + } + return zone.equals(obj); + } + + /** + * Returns a string representation of this object. + * @return a string representation of this object. + */ + public String toString() { + return "TimeZoneAdapter: " + zone.toString(); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Trie.java b/main/classes/core/src/com/ibm/icu/impl/Trie.java new file mode 100644 index 00000000000..65475eb657a --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Trie.java @@ -0,0 +1,460 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; + +/** + *

    A trie is a kind of compressed, serializable table of values + * associated with Unicode code points (0..0x10ffff).

    + *

    This class defines the basic structure of a trie and provides methods + * to retrieve the offsets to the actual data.

    + *

    Data will be the form of an array of basic types, char or int.

    + *

    The actual data format will have to be specified by the user in the + * inner static interface com.ibm.icu.impl.Trie.DataManipulate.

    + *

    This trie implementation is optimized for getting offset while walking + * forward through a UTF-16 string. + * Therefore, the simplest and fastest access macros are the + * fromLead() and fromOffsetTrail() methods. + * The fromBMP() method are a little more complicated; they get offsets even + * for lead surrogate codepoints, while the fromLead() method get special + * "folded" offsets for lead surrogate code units if there is relevant data + * associated with them. + * From such a folded offsets, an offset needs to be extracted to supply + * to the fromOffsetTrail() methods. + * To handle such supplementary codepoints, some offset information are kept + * in the data.

    + *

    Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve + * that offset from the folded value for the lead surrogate unit.

    + *

    For examples of use, see com.ibm.icu.impl.CharTrie or + * com.ibm.icu.impl.IntTrie.

    + * @author synwee + * @see com.ibm.icu.impl.CharTrie + * @see com.ibm.icu.impl.IntTrie + * @since release 2.1, Jan 01 2002 + */ +public abstract class Trie +{ + // public class declaration ---------------------------------------- + + /** + * Character data in com.ibm.impl.Trie have different user-specified format + * for different purposes. + * This interface specifies methods to be implemented in order for + * com.ibm.impl.Trie, to surrogate offset information encapsulated within + * the data. + */ + public static interface DataManipulate + { + /** + * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's + * data + * the index array offset of the indexes for that lead surrogate. + * @param value data value for a surrogate from the trie, including the + * folding offset + * @return data offset or 0 if there is no data for the lead surrogate + */ + public int getFoldingOffset(int value); + } + + // default implementation + private static class DefaultGetFoldingOffset implements DataManipulate { + public int getFoldingOffset(int value) { + return value; + } + } + + // public methods -------------------------------------------------- + + /** + * Determines if this trie has a linear latin 1 array + * @return true if this trie has a linear latin 1 array, false otherwise + */ + public final boolean isLatin1Linear() + { + return m_isLatin1Linear_; + } + + /** + * Checks if the argument Trie has the same data as this Trie. + * Attributes are checked but not the index data. + * @param other Trie to check + * @return true if the argument Trie has the same data as this Trie, false + * otherwise + */ + ///CLOVER:OFF + public boolean equals(Object other) + { + if (other == this) { + return true; + } + if (!(other instanceof Trie)) { + return false; + } + Trie othertrie = (Trie)other; + return m_isLatin1Linear_ == othertrie.m_isLatin1Linear_ + && m_options_ == othertrie.m_options_ + && m_dataLength_ == othertrie.m_dataLength_ + && Arrays.equals(m_index_, othertrie.m_index_); + } + ///CLOVER:ON + + /** + * Gets the serialized data file size of the Trie. This is used during + * trie data reading for size checking purposes. + * @return size size of serialized trie data file in terms of the number + * of bytes + */ + public int getSerializedDataSize() + { + // includes signature, option, dataoffset and datalength output + int result = (4 << 2); + result += (m_dataOffset_ << 1); + if (isCharTrie()) { + result += (m_dataLength_ << 1); + } + else if (isIntTrie()) { + result += (m_dataLength_ << 2); + } + return result; + } + + // protected constructor ------------------------------------------- + + /** + * Trie constructor for CharTrie use. + * @param inputStream ICU data file input stream which contains the + * trie + * @param dataManipulate object containing the information to parse the + * trie data + * @throws IOException thrown when input stream does not have the + * right header. + */ + protected Trie(InputStream inputStream, + DataManipulate dataManipulate) throws IOException + { + DataInputStream input = new DataInputStream(inputStream); + // Magic number to authenticate the data. + int signature = input.readInt(); + m_options_ = input.readInt(); + + if (!checkHeader(signature)) { + throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file"); + } + + if(dataManipulate != null) { + m_dataManipulate_ = dataManipulate; + } else { + m_dataManipulate_ = new DefaultGetFoldingOffset(); + } + m_isLatin1Linear_ = (m_options_ & + HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; + m_dataOffset_ = input.readInt(); + m_dataLength_ = input.readInt(); + unserialize(inputStream); + } + + /** + * Trie constructor + * @param index array to be used for index + * @param options used by the trie + * @param dataManipulate object containing the information to parse the + * trie data + */ + protected Trie(char index[], int options, DataManipulate dataManipulate) + { + m_options_ = options; + if(dataManipulate != null) { + m_dataManipulate_ = dataManipulate; + } else { + m_dataManipulate_ = new DefaultGetFoldingOffset(); + } + m_isLatin1Linear_ = (m_options_ & + HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; + m_index_ = index; + m_dataOffset_ = m_index_.length; + } + + + // protected data members ------------------------------------------ + + /** + * Lead surrogate code points' index displacement in the index array. + * 0x10000-0xd800=0x2800 + * 0x2800 >> INDEX_STAGE_1_SHIFT_ + */ + protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5; + /** + * Shift size for shifting right the input index. 1..9 + */ + protected static final int INDEX_STAGE_1_SHIFT_ = 5; + /** + * Shift size for shifting left the index array values. + * Increases possible data size with 16-bit index values at the cost + * of compactability. + * This requires blocks of stage 2 data to be aligned by + * DATA_GRANULARITY. + * 0..INDEX_STAGE_1_SHIFT + */ + protected static final int INDEX_STAGE_2_SHIFT_ = 2; + /** + * Number of data values in a stage 2 (data array) block. + */ + protected static final int DATA_BLOCK_LENGTH=1<>INDEX_STAGE_1_SHIFT_ + */ + protected static final int SURROGATE_BLOCK_COUNT=(1<>INDEX_STAGE_1_SHIFT_; + /** + * Surrogate mask to use when shifting offset to retrieve supplementary + * values + */ + protected static final int SURROGATE_MASK_ = 0x3FF; + /** + * Index or UTF16 characters + */ + protected char m_index_[]; + /** + * Internal TrieValue which handles the parsing of the data value. + * This class is to be implemented by the user + */ + protected DataManipulate m_dataManipulate_; + /** + * Start index of the data portion of the trie. CharTrie combines + * index and data into a char array, so this is used to indicate the + * initial offset to the data portion. + * Note this index always points to the initial value. + */ + protected int m_dataOffset_; + /** + * Length of the data array + */ + protected int m_dataLength_; + + // protected methods ----------------------------------------------- + + /** + * Gets the offset to the data which the surrogate pair points to. + * @param lead lead surrogate + * @param trail trailing surrogate + * @return offset to data + */ + protected abstract int getSurrogateOffset(char lead, char trail); + + /** + * Gets the value at the argument index + * @param index value at index will be retrieved + * @return 32 bit value + */ + protected abstract int getValue(int index); + + /** + * Gets the default initial value + * @return 32 bit value + */ + protected abstract int getInitialValue(); + + /** + * Gets the offset to the data which the index ch after variable offset + * points to. + * Note for locating a non-supplementary character data offset, calling + *

    + * getRawOffset(0, ch); + *

    + * will do. Otherwise if it is a supplementary character formed by + * surrogates lead and trail. Then we would have to call getRawOffset() + * with getFoldingIndexOffset(). See getSurrogateOffset(). + * @param offset index offset which ch is to start from + * @param ch index to be used after offset + * @return offset to the data + */ + protected final int getRawOffset(int offset, char ch) + { + return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)] + << INDEX_STAGE_2_SHIFT_) + + (ch & INDEX_STAGE_3_MASK_); + } + + /** + * Gets the offset to data which the BMP character points to + * Treats a lead surrogate as a normal code point. + * @param ch BMP character + * @return offset to data + */ + protected final int getBMPOffset(char ch) + { + return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE + && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) + ? getRawOffset(LEAD_INDEX_OFFSET_, ch) + : getRawOffset(0, ch); + // using a getRawOffset(ch) makes no diff + } + + /** + * Gets the offset to the data which this lead surrogate character points + * to. + * Data at the returned offset may contain folding offset information for + * the next trailing surrogate character. + * @param ch lead surrogate character + * @return offset to data + */ + protected final int getLeadOffset(char ch) + { + return getRawOffset(0, ch); + } + + /** + * Internal trie getter from a code point. + * Could be faster(?) but longer with + * if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); } + * Gets the offset to data which the codepoint points to + * @param ch codepoint + * @return offset to data + */ + protected final int getCodePointOffset(int ch) + { + // if ((ch >> 16) == 0) slower + if (ch < 0) { + return -1; + } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { + // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works + return getRawOffset(0, (char)ch); + } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) { + // BMP codepoint + return getBMPOffset((char)ch); + } else if (ch <= UCharacter.MAX_VALUE) { + // look at the construction of supplementary characters + // trail forms the ends of it. + return getSurrogateOffset(UTF16.getLeadSurrogate(ch), + (char)(ch & SURROGATE_MASK_)); + } else { + // return -1 if there is an error, in this case we return + return -1; + } + } + + /** + *

    Parses the inputstream and creates the trie index with it.

    + *

    This is overwritten by the child classes. + * @param inputStream input stream containing the trie information + * @exception IOException thrown when data reading fails. + */ + protected void unserialize(InputStream inputStream) throws IOException + { + //indexLength is a multiple of 1024 >> INDEX_STAGE_2_SHIFT_ + m_index_ = new char[m_dataOffset_]; + DataInputStream input = new DataInputStream(inputStream); + for (int i = 0; i < m_dataOffset_; i ++) { + m_index_[i] = input.readChar(); + } + } + + /** + * Determines if this is a 32 bit trie + * @return true if options specifies this is a 32 bit trie + */ + protected final boolean isIntTrie() + { + return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) != 0; + } + + /** + * Determines if this is a 16 bit trie + * @return true if this is a 16 bit trie + */ + protected final boolean isCharTrie() + { + return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) == 0; + } + + // private data members -------------------------------------------- + + // struct UTrieHeader { + // int32_t signature; + // int32_t options (a bit field) + // int32_t indexLength + // int32_t dataLength + + /** + * Size of Trie header in bytes + */ + protected static final int HEADER_LENGTH_ = 4 * 4; + /** + * Latin 1 option mask + */ + protected static final int HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_ = 0x200; + /** + * Constant number to authenticate the byte block + */ + protected static final int HEADER_SIGNATURE_ = 0x54726965; + /** + * Header option formatting + */ + private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF; + protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4; + protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100; + + /** + * Flag indicator for Latin quick access data block + */ + private boolean m_isLatin1Linear_; + + /** + *

    Trie options field.

    + *

    options bit field:
    + * 9 1 = Latin-1 data is stored linearly at data + DATA_BLOCK_LENGTH
    + * 8 0 = 16-bit data, 1=32-bit data
    + * 7..4 INDEX_STAGE_1_SHIFT // 0..INDEX_STAGE_2_SHIFT
    + * 3..0 INDEX_STAGE_2_SHIFT // 1..9
    + */ + private int m_options_; + + // private methods --------------------------------------------------- + + /** + * Authenticates raw data header. + * Checking the header information, signature and options. + * @param signature This contains the options and type of a Trie + * @return true if the header is authenticated valid + */ + private final boolean checkHeader(int signature) + { + // check the signature + // Trie in big-endian US-ASCII (0x54726965). + // Magic number to authenticate the data. + if (signature != HEADER_SIGNATURE_) { + return false; + } + + if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) != + INDEX_STAGE_1_SHIFT_ || + ((m_options_ >> HEADER_OPTIONS_INDEX_SHIFT_) & + HEADER_OPTIONS_SHIFT_MASK_) + != INDEX_STAGE_2_SHIFT_) { + return false; + } + return true; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Trie2.java b/main/classes/core/src/com/ibm/icu/impl/Trie2.java new file mode 100644 index 00000000000..858553335e7 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Trie2.java @@ -0,0 +1,1051 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.NoSuchElementException; + + +/** + * This is the interface and common implementation of a Unicode Trie2. + * It is a kind of compressed table that maps from Unicode code points (0..0x10ffff) + * to 16- or 32-bit integer values. It works best when there are ranges of + * characters with the same value, which is generally the case with Unicode + * character properties. + * + * This is the second common version of a Unicode trie (hence the name Trie2). + * + */ +public abstract class Trie2 implements Iterable { + + + /** + * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). + * The serialized format is identical between ICU4C and ICU4J, so this function + * will work with serialized Trie2s from either. + * + * The actual type of the returned Trie2 will be either Trie2_16 or Trie2_32, depending + * on the width of the data. + * + * To obtain the width of the Trie2, check the actual class type of the returned Trie2. + * Or use the createFromSerialized() function of Trie2_16 or Trie2_32, which will + * return only Tries of their specific type/size. + * + * The serialized Trie2 on the stream may be in either little or big endian byte order. + * This allows using serialized Tries from ICU4C without needing to consider the + * byte order of the system that created them. + * + * @param is an input stream to the serialized form of a UTrie2. + * @return An unserialized Trie2, ready for use. + * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. + * @throws IOException if a read error occurs on the InputStream. + * + */ + public static Trie2 createFromSerialized(InputStream is) throws IOException { + // From ICU4C utrie2_impl.h + // * Trie2 data structure in serialized form: + // * + // * UTrie2Header header; + // * uint16_t index[header.index2Length]; + // * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] + // * @internal + // */ + // typedef struct UTrie2Header { + // /** "Tri2" in big-endian US-ASCII (0x54726932) */ + // uint32_t signature; + + // /** + // * options bit field: + // * 15.. 4 reserved (0) + // * 3.. 0 UTrie2ValueBits valueBits + // */ + // uint16_t options; + // + // /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ + // uint16_t indexLength; + // + // /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ + // uint16_t shiftedDataLength; + // + // /** Null index and data blocks, not shifted. */ + // uint16_t index2NullOffset, dataNullOffset; + // + // /** + // * First code point of the single-value range ending with U+10ffff, + // * rounded up and then shifted right by UTRIE2_SHIFT_1. + // */ + // uint16_t shiftedHighStart; + // } UTrie2Header; + + DataInputStream dis = new DataInputStream(is); + boolean needByteSwap = false; + + UTrie2Header header = new UTrie2Header(); + + /* check the signature */ + header.signature = dis.readInt(); + switch (header.signature) { + case 0x54726932: + needByteSwap = false; + break; + case 0x32697254: + needByteSwap = true; + header.signature = Integer.reverseBytes(header.signature); + break; + default: + throw new IllegalArgumentException("Stream does not contain a serialized UTrie2"); + } + + header.options = swapShort(needByteSwap, dis.readUnsignedShort()); + header.indexLength = swapShort(needByteSwap, dis.readUnsignedShort()); + header.shiftedDataLength = swapShort(needByteSwap, dis.readUnsignedShort()); + header.index2NullOffset = swapShort(needByteSwap, dis.readUnsignedShort()); + header.dataNullOffset = swapShort(needByteSwap, dis.readUnsignedShort()); + header.shiftedHighStart = swapShort(needByteSwap, dis.readUnsignedShort()); + + // Trie2 data width - 0: 16 bits + // 1: 32 bits + if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) > 1) { + throw new IllegalArgumentException("UTrie2 serialized format error."); + } + ValueWidth width; + Trie2 This; + if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) == 0) { + width = ValueWidth.BITS_16; + This = new Trie2_16(); + } else { + width = ValueWidth.BITS_32; + This = new Trie2_32(); + } + This.header = header; + + /* get the length values and offsets */ + This.indexLength = header.indexLength; + This.dataLength = header.shiftedDataLength << UTRIE2_INDEX_SHIFT; + This.index2NullOffset = header.index2NullOffset; + This.dataNullOffset = header.dataNullOffset; + This.highStart = header.shiftedHighStart << UTRIE2_SHIFT_1; + This.highValueIndex = This.dataLength - UTRIE2_DATA_GRANULARITY; + if (width == ValueWidth.BITS_16) { + This.highValueIndex += This.indexLength; + } + + // Allocate the Trie2 index array. If the data width is 16 bits, the array also + // includes the space for the data. + + int indexArraySize = This.indexLength; + if (width == ValueWidth.BITS_16) { + indexArraySize += This.dataLength; + } + This.index = new char[indexArraySize]; + + /* Read in the index */ + int i; + for (i=0; i otherIter = OtherTrie.iterator(); + for (Trie2.Range rangeFromThis: this) { + if (otherIter.hasNext() == false) { + return false; + } + rangeFromOther = otherIter.next(); + if (!rangeFromThis.equals(rangeFromOther)) { + return false; + } + } + if (otherIter.hasNext()) { + return false; + } + + if (errorValue != OtherTrie.errorValue || + initialValue != OtherTrie.initialValue) { + return false; + } + + return true; + } + + + public int hashCode() { + if (fHash == 0) { + int hash = initHash(); + for (Range r: this) { + hash = hashInt(hash, r.hashCode()); + } + if (hash == 0) { + hash = 1; + } + fHash = hash; + } + return fHash; + } + + /** + * When iterating over the contents of a Trie2, Elements of this type are produced. + * The iterator will return one item for each contiguous range of codepoints having the same value. + * + * When iterating, the same Trie2EnumRange object will be reused and returned for each range. + * If you need to retain complete iteration results, clone each returned Trie2EnumRange, + * or save the range in some other way, before advancing to the next iteration step. + */ + public static class Range { + public int startCodePoint; + public int endCodePoint; // Inclusive. + public int value; + public boolean leadSurrogate; + + public boolean equals(Object other) { + if (other == null || !(other.getClass().equals(getClass()))) { + return false; + } + Range tother = (Range)other; + return this.startCodePoint == tother.startCodePoint && + this.endCodePoint == tother.endCodePoint && + this.value == tother.value && + this.leadSurrogate == tother.leadSurrogate; + } + + + public int hashCode() { + int h = initHash(); + h = hashUChar32(h, startCodePoint); + h = hashUChar32(h, endCodePoint); + h = hashInt(h, value); + h = hashByte(h, leadSurrogate? 1: 0); + return h; + } + } + + + /** + * Create an iterator over the value ranges in this Trie2. + * Values from the Trie2 are not remapped or filtered, but are returned as they + * are stored in the Trie2. + * + * @return an Iterator + */ + public Iterator iterator() { + return iterator(defaultValueMapper); + } + + private static ValueMapper defaultValueMapper = new ValueMapper() { + public int map(int in) { + return in; + } + }; + + /** + * Create an iterator over the value ranges from this Trie2. + * Values from the Trie2 are passed through a caller-supplied remapping function, + * and it is the remapped values that determine the ranges that + * will be produced by the iterator. + * + * + * @param mapper provides a function to remap values obtained from the Trie2. + * @return an Iterator + */ + public Iterator iterator(ValueMapper mapper) { + return new Trie2Iterator(mapper); + } + + + /** + * Create an iterator over the Trie2 values for the 1024=0x400 code points + * corresponding to a given lead surrogate. + * For example, for the lead surrogate U+D87E it will enumerate the values + * for [U+2F800..U+2FC00[. + * Used by data builder code that sets special lead surrogate code unit values + * for optimized UTF-16 string processing. + * + * Do not modify the Trie2 during the iteration. + * + * Except for the limited code point range, this functions just like Trie2.iterator(). + * + */ + public Iterator iteratorForLeadSurrogate(char lead, ValueMapper mapper) { + return new Trie2Iterator(lead, mapper); + } + + /** + * Create an iterator over the Trie2 values for the 1024=0x400 code points + * corresponding to a given lead surrogate. + * For example, for the lead surrogate U+D87E it will enumerate the values + * for [U+2F800..U+2FC00[. + * Used by data builder code that sets special lead surrogate code unit values + * for optimized UTF-16 string processing. + * + * Do not modify the Trie2 during the iteration. + * + * Except for the limited code point range, this functions just like Trie2.iterator(). + * + */ + public Iterator iteratorForLeadSurrogate(char lead) { + return new Trie2Iterator(lead, defaultValueMapper); + } + + /** + * When iterating over the contents of a Trie2, an instance of TrieValueMapper may + * be used to remap the values from the Trie2. The remapped values will be used + * both in determining the ranges of codepoints and as the value to be returned + * for each range. + * + * Example of use, with an anonymous subclass of TrieValueMapper: + * + * + * ValueMapper m = new ValueMapper() { + * int map(int in) {return in & 0x1f;}; + * } + * for (Iterator iter = trie.iterator(m); i.hasNext(); ) { + * Trie2EnumRange r = i.next(); + * ... // Do something with the range r. + * } + * + */ + public interface ValueMapper { + public int map(int originalVal); + } + + + /** + * Serialize a trie2 Header and Index onto an OutputStream. This is + * common code used for both the Trie2_16 and Trie2_32 serialize functions. + * @param dos the stream to which the serialized Trie2 data will be written. + * @return the number of bytes written. + */ + protected int serializeHeader(DataOutputStream dos) throws IOException { + // Write the header. It is already set and ready to use, having been + // created when the Trie2 was unserialized or when it was frozen. + int bytesWritten = 0; + + dos.writeInt(header.signature); + dos.writeShort(header.options); + dos.writeShort(header.indexLength); + dos.writeShort(header.shiftedDataLength); + dos.writeShort(header.index2NullOffset); + dos.writeShort(header.dataNullOffset); + dos.writeShort(header.shiftedHighStart); + bytesWritten += 16; + + // Write the index + int i; + for (i=0; i< header.indexLength; i++) { + dos.writeChar(index[i]); + } + bytesWritten += header.indexLength; + return bytesWritten; + } + + + /** + * Struct-like class for holding the results returned by a UTrie2 CharSequence iterator. + * The iteration walks over a CharSequence, and for each Unicode code point therein + * returns the character and its associated Trie2 value. + */ + public static class CharSequenceValues { + /** string index of the current code point. */ + public int index; + /** The code point at index. */ + public int codePoint; + /** The Trie2 value for the current code point */ + public int value; + } + + + /** + * Create an iterator that will produce the values from the Trie2 for + * the sequence of code points in an input text. + * + * @param text A text string to be iterated over. + * @param index The starting iteration position within the input text. + * @return the CharSequenceIterator + */ + public CharSequenceIterator charSequenceIterator(CharSequence text, int index) { + return new CharSequenceIterator(text, index); + } + + // TODO: Survey usage of the equivalent of CharSequenceIterator in ICU4C + // and if there is none, remove it from here. + // Don't waste time testing and maintaining unused code. + + /** + * An iterator that operates over an input CharSequence, and for each Unicode code point + * in the input returns the associated value from the Trie2. + * + * The iterator can move forwards or backwards, and can be reset to an arbitrary index. + * + * Note that Trie2_16 and Trie2_32 subclass Trie2.CharSequenceIterator. This is done + * only for performance reasons. It does require that any changes made here be propagated + * into the corresponding code in the subclasses. + */ + public class CharSequenceIterator implements Iterator { + /** + * Internal constructor. + */ + CharSequenceIterator(CharSequence t, int index) { + text = t; + textLength = text.length(); + set(index); + } + + private CharSequence text; + private int textLength; + private int index; + private Trie2.CharSequenceValues fResults = new Trie2.CharSequenceValues(); + + + public void set(int i) { + if (i < 0 || i > textLength) { + throw new IndexOutOfBoundsException(); + } + index = i; + } + + + public final boolean hasNext() { + return index0; + } + + + public Trie2.CharSequenceValues next() { + int c = Character.codePointAt(text, index); + int val = get(c); + + fResults.index = index; + fResults.codePoint = c; + fResults.value = val; + index++; + if (c >= 0x10000) { + index++; + } + return fResults; + } + + + public Trie2.CharSequenceValues previous() { + int c = Character.codePointBefore(text, index); + int val = get(c); + index--; + if (c >= 0x10000) { + index--; + } + fResults.index = index; + fResults.codePoint = c; + fResults.value = val; + return fResults; + } + + /** + * Iterator.remove() is not supported by Trie2.CharSequenceIterator. + * @throws UnsupportedOperationException Always thrown because this operation is not supported + * @see java.util.Iterator#remove() + */ + public void remove() { + throw new UnsupportedOperationException("Trie2.CharSequenceIterator does not support remove()."); + } + } + + + //-------------------------------------------------------------------------------- + // + // Below this point are internal implementation items. No further public API. + // + //-------------------------------------------------------------------------------- + + + /** + * Selectors for the width of a UTrie2 data value. + */ + enum ValueWidth { + BITS_16, + BITS_32 + } + + /** + * Trie2 data structure in serialized form: + * + * UTrie2Header header; + * uint16_t index[header.index2Length]; + * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] + * + * For Java, this is read from the stream into an instance of UTrie2Header. + * (The C version just places a struct over the raw serialized data.) + * + * @internal + */ + static class UTrie2Header { + /** "Tri2" in big-endian US-ASCII (0x54726932) */ + int signature; + + /** + * options bit field (uint16_t): + * 15.. 4 reserved (0) + * 3.. 0 UTrie2ValueBits valueBits + */ + int options; + + /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH (uint16_t) */ + int indexLength; + + /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT (uint16_t) */ + int shiftedDataLength; + + /** Null index and data blocks, not shifted. (uint16_t) */ + int index2NullOffset, dataNullOffset; + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UTRIE2_SHIFT_1. (uint16_t) + */ + int shiftedHighStart; + } + + // + // Data members of UTrie2. + // + UTrie2Header header; + char index[]; // Index array. Includes data for 16 bit Tries. + int data16; // Offset to data portion of the index array, if 16 bit data. + // zero if 32 bit data. + int data32[]; // NULL if 16b data is used via index + + int indexLength; + int dataLength; + int index2NullOffset; // 0xffff if there is no dedicated index-2 null block + int initialValue; + + /** Value returned for out-of-range code points and illegal UTF-8. */ + int errorValue; + + /* Start of the last range which ends at U+10ffff, and its value. */ + int highStart; + int highValueIndex; + + int dataNullOffset; + + int fHash; // Zero if not yet computed. + // Shared by Trie2Writable, Trie2_16, Trie2_32. + // Thread safety: if two racing threads compute + // the same hash on a frozen Trie2, no damage is done. + + + /** + * Trie2 constants, defining shift widths, index array lengths, etc. + * + * These are needed for the runtime macros but users can treat these as + * implementation details and skip to the actual public API further below. + */ + + static final int UTRIE2_OPTIONS_VALUE_BITS_MASK=0x000f; + + + /** Shift size for getting the index-1 table offset. */ + static final int UTRIE2_SHIFT_1=6+5; + + /** Shift size for getting the index-2 table offset. */ + static final int UTRIE2_SHIFT_2=5; + + /** + * Difference between the two shift sizes, + * for getting an index-1 offset from an index-2 offset. 6=11-5 + */ + static final int UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2; + + /** + * Number of index-1 entries for the BMP. 32=0x20 + * This part of the index-1 table is omitted from the serialized form. + */ + static final int UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1; + + /** Number of code points per index-1 table entry. 2048=0x800 */ + static final int UTRIE2_CP_PER_INDEX_1_ENTRY=1<>UTRIE2_SHIFT_2. + */ + static final int UTRIE2_INDEX_2_OFFSET=0; + + /** + * The part of the index-2 table for U+D800..U+DBFF stores values for + * lead surrogate code _units_ not code _points_. + * Values for lead surrogate code _points_ are indexed with this portion of the table. + * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.) + */ + static final int UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2; + static final int UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2; + + /** Count the lengths of both BMP pieces. 2080=0x820 */ + static final int UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH; + + /** + * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. + * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2. + */ + static final int UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH; + static final int UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6; /* U+0800 is the first code point after 2-byte UTF-8 */ + + /** + * The index-1 table, only used for supplementary code points, at offset 2112=0x840. + * Variable length, for code points up to highStart, where the last single-value range starts. + * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1. + * (For 0x100000 supplementary code points U+10000..U+10ffff.) + * + * The part of the index-2 table for supplementary code points starts + * after this index-1 table. + * + * Both the index-1 table and the following part of the index-2 table + * are omitted completely if there is only BMP data. + */ + static final int UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH; + static final int UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1; + + /* + * Fixed layout of the first part of the data array. ----------------------- + * Starts with 4 blocks (128=0x80 entries) for ASCII. + */ + + /** + * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80. + * Used with linear access for single bytes 0..0xbf for simple error handling. + * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH. + */ + static final int UTRIE2_BAD_UTF8_DATA_OFFSET=0x80; + + /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */ + static final int UTRIE2_DATA_START_OFFSET=0xc0; + + /* Building a Trie2 ---------------------------------------------------------- */ + + /* + * These definitions are mostly needed by utrie2_builder.c, but also by + * utrie2_get32() and utrie2_enum(). + */ + + /* + * At build time, leave a gap in the index-2 table, + * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table + * and the supplementary index-1 table. + * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. + */ + static final int UNEWTRIE2_INDEX_GAP_OFFSET = UTRIE2_INDEX_2_BMP_LENGTH; + static final int UNEWTRIE2_INDEX_GAP_LENGTH = + ((UTRIE2_UTF8_2B_INDEX_2_LENGTH + UTRIE2_MAX_INDEX_1_LENGTH) + UTRIE2_INDEX_2_MASK) & + ~UTRIE2_INDEX_2_MASK; + + /** + * Maximum length of the build-time index-2 array. + * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, + * plus the part of the index-2 table for lead surrogate code points, + * plus the build-time index gap, + * plus the null index-2 block. + */ + static final int UNEWTRIE2_MAX_INDEX_2_LENGTH= + (0x110000>>UTRIE2_SHIFT_2)+ + UTRIE2_LSCP_INDEX_2_LENGTH+ + UNEWTRIE2_INDEX_GAP_LENGTH+ + UTRIE2_INDEX_2_BLOCK_LENGTH; + + static final int UNEWTRIE2_INDEX_1_LENGTH = 0x110000>>UTRIE2_SHIFT_1; + + /** + * Maximum length of the build-time data array. + * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, + * plus values for the 0x400 surrogate code units. + */ + static final int UNEWTRIE2_MAX_DATA_LENGTH = (0x110000+0x40+0x40+0x400); + + + + /** + * Implementation class for an iterator over a Trie2. + * + * Iteration over a Trie2 first returns all of the ranges that are indexed by code points, + * then returns the special alternate values for the lead surrogates + * + * @internal + */ + class Trie2Iterator implements Iterator { + // The normal constructor that configures the iterator to cover the complete + // contents of the Trie2 + Trie2Iterator(ValueMapper vm) { + mapper = vm; + nextStart = 0; + limitCP = 0x110000; + doLeadSurrogates = true; + } + + // An alternate constructor that configures the iterator to cover only the + // code points corresponding to a particular Lead Surrogate value. + Trie2Iterator(char leadSurrogate, ValueMapper vm) { + if (leadSurrogate < 0xd800 || leadSurrogate > 0xdbff) { + throw new IllegalArgumentException("Bad lead surrogate value."); + } + mapper = vm; + nextStart = (leadSurrogate - 0xd7c0) << 10; + limitCP = nextStart + 0x400; + doLeadSurrogates = false; // Do not iterate over lead the special lead surrogate + // values after completing iteration over code points. + } + + /** + * The main next() function for Trie2 iterators + * + */ + public Range next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + if (nextStart >= limitCP) { + // Switch over from iterating normal code point values to + // doing the alternate lead-surrogate values. + doingCodePoints = false; + nextStart = 0xd800; + } + int endOfRange = 0; + int val = 0; + int mappedVal = 0; + + if (doingCodePoints) { + // Iteration over code point values. + val = get(nextStart); + mappedVal = mapper.map(val); + endOfRange = rangeEnd(nextStart, limitCP, val); + // Loop once for each range in the Trie2 with the same raw (unmapped) value. + // Loop continues so long as the mapped values are the same. + for (;;) { + if (endOfRange >= limitCP-1) { + break; + } + val = get(endOfRange+1); + if (mapper.map(val) != mappedVal) { + break; + } + endOfRange = rangeEnd(endOfRange+1, limitCP, val); + } + } else { + // Iteration over the alternate lead surrogate values. + val = getFromU16SingleLead((char)nextStart); + mappedVal = mapper.map(val); + endOfRange = rangeEndLS((char)nextStart); + // Loop once for each range in the Trie2 with the same raw (unmapped) value. + // Loop continues so long as the mapped values are the same. + for (;;) { + if (endOfRange >= 0xdbff) { + break; + } + val = getFromU16SingleLead((char)(endOfRange+1)); + if (mapper.map(val) != mappedVal) { + break; + } + endOfRange = rangeEndLS((char)(endOfRange+1)); + } + } + returnValue.startCodePoint = nextStart; + returnValue.endCodePoint = endOfRange; + returnValue.value = mappedVal; + returnValue.leadSurrogate = !doingCodePoints; + nextStart = endOfRange+1; + return returnValue; + } + + /** + * + */ + public boolean hasNext() { + return doingCodePoints && (doLeadSurrogates || nextStart < limitCP) || nextStart < 0xdc00; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + + /** + * Find the last lead surrogate in a contiguous range with the + * same Trie2 value as the input character. + * + * Use the alternate Lead Surrogate values from the Trie2, + * not the code-point values. + * + * Note: Trie2_16 and Trie2_32 override this implementation with optimized versions, + * meaning that the implementation here is only being used with + * Trie2Writable. The code here is logically correct with any type + * of Trie2, however. + * + * @param c The character to begin with. + * @return The last contiguous character with the same value. + */ + private int rangeEndLS(char startingLS) { + if (startingLS >= 0xdbff) { + return 0xdbff; + } + + int c; + int val = getFromU16SingleLead(startingLS); + for (c = startingLS+1; c <= 0x0dbff; c++) { + if (getFromU16SingleLead((char)c) != val) { + break; + } + } + return c-1; + } + + // + // Iteration State Variables + // + private ValueMapper mapper; + private Range returnValue = new Range(); + // The starting code point for the next range to be returned. + private int nextStart; + // The upper limit for the last normal range to be returned. Normally 0x110000, but + // may be lower when iterating over the code points for a single lead surrogate. + private int limitCP; + + // True while iterating over the the Trie2 values for code points. + // False while iterating over the alternate values for lead surrogates. + private boolean doingCodePoints = true; + + // True if the iterator should iterate the special values for lead surrogates in + // addition to the normal values for code points. + private boolean doLeadSurrogates = true; + } + + /** + * Find the last character in a contiguous range of characters with the + * same Trie2 value as the input character. + * + * @param c The character to begin with. + * @return The last contiguous character with the same value. + */ + int rangeEnd(int start, int limitp, int val) { + int c; + int limit = Math.min(highStart, limitp); + + for (c = start+1; c < limit; c++) { + if (get(c) != val) { + break; + } + } + if (c >= highStart) { + c = limitp; + } + return c - 1; + } + + + // + // Hashing implementation functions. FNV hash. Respected public domain algorithm. + // + private static int initHash() { + return 0x811c9DC5; // unsigned 2166136261 + } + + private static int hashByte(int h, int b) { + h = h * 16777619; + h = h ^ b; + return h; + } + + private static int hashUChar32(int h, int c) { + h = Trie2.hashByte(h, c & 255); + h = Trie2.hashByte(h, (c>>8) & 255); + h = Trie2.hashByte(h, c>>16); + return h; + } + + private static int hashInt(int h, int i) { + h = Trie2.hashByte(h, i & 255); + h = Trie2.hashByte(h, (i>>8) & 255); + h = Trie2.hashByte(h, (i>>16) & 255); + h = Trie2.hashByte(h, (i>>24) & 255); + return h; + } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java b/main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java new file mode 100644 index 00000000000..989c907bd1a --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Trie2Writable.java @@ -0,0 +1,1217 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +/** + * @author aheninger + * + * A Trie2Writable is a modifiable, or build-time Trie2. + * Functions for reading data from the Trie are all from class Trie2. + * + */ +public class Trie2Writable extends Trie2 { + + + /** + * Create a new, empty, writable Trie2. 32-bit data values are used. + * + * @param initialValueP the initial value that is set for all code points + * @param errorValueP the value for out-of-range code points and illegal UTF-8 + */ + public Trie2Writable(int initialValueP, int errorValueP) { + // This constructor corresponds to utrie2_open() in ICU4C. + init(initialValueP, errorValueP); + } + + + private void init(int initialValueP, int errorValueP) { + this.initialValue = initialValueP; + this.errorValue = errorValueP; + this.highStart = 0x110000; + + this.data = new int[UNEWTRIE2_INITIAL_DATA_LENGTH]; + this.dataCapacity = UNEWTRIE2_INITIAL_DATA_LENGTH; + this.initialValue = initialValueP; + this.errorValue = errorValueP; + this.highStart = 0x110000; + this.firstFreeBlock = 0; /* no free block in the list */ + this.isCompacted = false; + + /* + * preallocate and reset + * - ASCII + * - the bad-UTF-8-data block + * - the null data block + */ + int i, j; + for(i=0; i<0x80; ++i) { + data[i] = initialValue; + } + for(; i<0xc0; ++i) { + data[i] = errorValue; + } + for(i=UNEWTRIE2_DATA_NULL_OFFSET; i>UTRIE2_SHIFT_2 ASCII data blocks */ + for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + index2[i]=j; + map[i]=1; + } + + /* reference counts for the bad-UTF-8-data block */ + for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + map[i]=0; + } + + /* + * Reference counts for the null data block: all blocks except for the ASCII blocks. + * Plus 1 so that we don't drop this block during compaction. + * Plus as many as needed for lead surrogate code points. + */ + /* i==newTrie->dataNullOffset */ + map[i++] = + (0x110000>>UTRIE2_SHIFT_2) - + (0x80>>UTRIE2_SHIFT_2) + + 1 + + UTRIE2_LSCP_INDEX_2_LENGTH; + j += UTRIE2_DATA_BLOCK_LENGTH; + for(; j>UTRIE2_SHIFT_2; i>UTRIE2_SHIFT_2))+ + (c>>UTRIE2_SHIFT_2); + } else { + i2=index1[c>>UTRIE2_SHIFT_1]+ + ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); + } + block=index2[i2]; + return (block==dataNullOffset); + } + + private int allocIndex2Block() { + int newBlock, newTop; + + newBlock=index2Length; + newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; + if(newTop > index2.length) { + throw new IllegalStateException("Internal error in Trie2 creation."); + /* + * Should never occur. + * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, + * or the code writes more values than should be possible. + */ + } + index2Length=newTop; + System.arraycopy(index2, index2NullOffset, index2, newBlock, UTRIE2_INDEX_2_BLOCK_LENGTH); + return newBlock; + } + + private int getIndex2Block(int c, boolean forLSCP) { + int i1, i2; + + if(c>=0xd800 && c<0xdc00 && forLSCP) { + return UTRIE2_LSCP_INDEX_2_OFFSET; + } + + i1=c>>UTRIE2_SHIFT_1; + i2=index1[i1]; + if(i2==index2NullOffset) { + i2=allocIndex2Block(); + index1[i1]=i2; + } + return i2; + } + + private int allocDataBlock(int copyBlock) { + int newBlock, newTop; + + if(firstFreeBlock!=0) { + /* get the first free block */ + newBlock=firstFreeBlock; + firstFreeBlock=-map[newBlock>>UTRIE2_SHIFT_2]; + } else { + /* get a new block from the high end */ + newBlock=dataLength; + newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; + if(newTop>dataCapacity) { + /* out of memory in the data array */ + int capacity; + int[] newData; + + if(dataCapacity>UTRIE2_SHIFT_2]=0; + return newBlock; + } + + + /* call when the block's reference counter reaches 0 */ + private void releaseDataBlock(int block) { + /* put this block at the front of the free-block chain */ + map[block>>UTRIE2_SHIFT_2]=-firstFreeBlock; + firstFreeBlock=block; + } + + + private boolean isWritableBlock(int block) { + return (block!=dataNullOffset && 1==map[block>>UTRIE2_SHIFT_2]); + } + + private void setIndex2Entry(int i2, int block) { + int oldBlock; + ++map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ + oldBlock=index2[i2]; + if(0 == --map[oldBlock>>UTRIE2_SHIFT_2]) { + releaseDataBlock(oldBlock); + } + index2[i2]=block; + } + + + /** + * No error checking for illegal arguments. + * + * @internal + */ + private int getDataBlock(int c, boolean forLSCP) { + int i2, oldBlock, newBlock; + + i2=getIndex2Block(c, forLSCP); + + i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + oldBlock=index2[i2]; + if(isWritableBlock(oldBlock)) { + return oldBlock; + } + + /* allocate a new data block */ + newBlock=allocDataBlock(oldBlock); + setIndex2Entry(i2, newBlock); + return newBlock; + } + /** + * Set a value for a code point. + * + * @param c the code point + * @param value the value + */ + public Trie2Writable set(int c, int value) { + if (c<0 || c>0x10ffff) { + throw new IllegalArgumentException("Invalid code point."); + } + set(c, true, value); + fHash = 0; + return this; + } + + private Trie2Writable set(int c, boolean forLSCP, int value) { + int block; + if (isCompacted) { + uncompact(); + } + block = getDataBlock(c, forLSCP); + data[block + (c&UTRIE2_DATA_MASK)] = value; + return this; + } + + + /* + * Uncompact a compacted Trie2Writable. + * This is needed if a the WritableTrie2 was compacted in preparation for creating a read-only + * Trie2, and then is subsequently altered. + * + * The structure is a bit awkward - it would be cleaner to leave the original + * Trie2 unaltered - but compacting in place was taken directly from the ICU4C code. + * + * The approach is to create a new (uncompacted) Trie2Writable from this one, then transfer + * the guts from the new to the old. + */ + private void uncompact() { + Trie2Writable tempTrie = new Trie2Writable(this); + + // Members from Trie2Writable + this.index1 = tempTrie.index1; + this.index2 = tempTrie.index2; + this.data = tempTrie.data; + this.index2Length = tempTrie.index2Length; + this.dataCapacity = tempTrie.dataCapacity; + this.isCompacted = tempTrie.isCompacted; + + // Members From Trie2 + this.header = tempTrie.header; + this.index = tempTrie.index; + this.data16 = tempTrie.data16; + this.data32 = tempTrie.data32; + this.indexLength = tempTrie.indexLength; + this.dataLength = tempTrie.dataLength; + this.index2NullOffset = tempTrie.index2NullOffset; + this.initialValue = tempTrie.initialValue; + this.errorValue = tempTrie.errorValue; + this.highStart = tempTrie.highStart; + this.highValueIndex = tempTrie.highValueIndex; + this.dataNullOffset = tempTrie.dataNullOffset; + } + + + private void writeBlock(int block, int value) { + int limit=block+UTRIE2_DATA_BLOCK_LENGTH; + while(block0x10ffff || start<0 || end>0x10ffff || end<0 || start>end) { + throw new IllegalArgumentException("Invalid code point range."); + } + if(!overwrite && value==initialValue) { + return this; /* nothing to do */ + } + fHash = 0; + if(isCompacted) { + this.uncompact(); + } + + limit=end+1; + if((start&UTRIE2_DATA_MASK) != 0) { + int /*UChar32*/ nextStart; + + /* set partial block at [start..following block boundary[ */ + block=getDataBlock(start, true); + + nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK; + if(nextStart<=limit) { + fillBlock(block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, + value, initialValue, overwrite); + start=nextStart; + } else { + fillBlock(block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, + value, initialValue, overwrite); + return this; + } + } + + /* number of positions in the last, partial block */ + rest=limit&UTRIE2_DATA_MASK; + + /* round down limit to a block boundary */ + limit&=~UTRIE2_DATA_MASK; + + /* iterate over all-value blocks */ + if(value==initialValue) { + repeatBlock=dataNullOffset; + } else { + repeatBlock=-1; + } + + while(start>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + block=index2[i2]; + if(isWritableBlock(block)) { + /* already allocated */ + if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { + /* + * We overwrite all values, and it's not a + * protected (ASCII-linear or 2-byte UTF-8) block: + * replace with the repeatBlock. + */ + setRepeatBlock=true; + } else { + /* !overwrite, or protected block: just write the values into this block */ + fillBlock(block, + 0, UTRIE2_DATA_BLOCK_LENGTH, + value, initialValue, overwrite); + } + } else if(data[block]!=value && (overwrite || block==dataNullOffset)) { + /* + * Set the repeatBlock instead of the null block or previous repeat block: + * + * If !isWritableBlock() then all entries in the block have the same value + * because it's the null block or a range block (the repeatBlock from a previous + * call to utrie2_setRange32()). + * No other blocks are used multiple times before compacting. + * + * The null block is the only non-writable block with the initialValue because + * of the repeatBlock initialization above. (If value==initialValue, then + * the repeatBlock will be the null data block.) + * + * We set our repeatBlock if the desired value differs from the block's value, + * and if we overwrite any data or if the data is all initial values + * (which is the same as the block being the null block, see above). + */ + setRepeatBlock=true; + } + if(setRepeatBlock) { + if(repeatBlock>=0) { + setIndex2Entry(i2, repeatBlock); + } else { + /* create and set and fill the repeatBlock */ + repeatBlock=getDataBlock(start, true); + writeBlock(repeatBlock, value); + } + } + + start+=UTRIE2_DATA_BLOCK_LENGTH; + } + + if(rest>0) { + /* set partial block at [last block boundary..limit[ */ + block=getDataBlock(start, true); + fillBlock(block, 0, rest, value, initialValue, overwrite); + } + + return this; + } + + /** + * Set the values from a Trie2.Range. + * + * All code points within the range will get the value if + * overwrite is TRUE or if the old value is the initial value. + * + * Ranges with the lead surrogate flag set will set the alternate + * lead-surrogate values in the Trie, rather than the code point values. + * + * This function is intended to work with the ranges produced when iterating + * the contents of a source Trie. + * + * @param range contains the range of code points and the value to be set. + * @param overwrite flag for whether old non-initial values are to be overwritten + */ + public Trie2Writable setRange(Trie2.Range range, boolean overwrite) { + fHash = 0; + if (range.leadSurrogate) { + for (int c=range.startCodePoint; c<=range.endCodePoint; c++) { + if (overwrite || getFromU16SingleLead((char)c) == this.initialValue) { + setForLeadSurrogateCodeUnit((char)c, range.value); + } + } + } else { + setRange(range.startCodePoint, range.endCodePoint, range.value, overwrite); + } + return this; + } + + /** + * Set a value for a UTF-16 code unit. + * Note that a Trie2 stores separate values for + * supplementary code points in the lead surrogate range + * (accessed via the plain set() and get() interfaces) + * and for lead surrogate code units. + * + * The lead surrogate code unit values are set via this function and + * read by the function getFromU16SingleLead(). + * + * For code units outside of the lead surrogate range, this function + * behaves identically to set(). + * + * @param codeUnit A UTF-16 code unit. + * @param value the value to be stored in the Trie2. + */ + public Trie2Writable setForLeadSurrogateCodeUnit(char codeUnit, int value) { + fHash = 0; + set(codeUnit, false, value); + return this; + } + + + /** + * Get the value for a code point as stored in the Trie2. + * + * @param codePoint the code point + * @return the value + */ + @Override + public int get(int codePoint) { + if (codePoint<0 || codePoint>0x10ffff) { + return errorValue; + } else { + return get(codePoint, true); + } + } + + + private int get(int c, boolean fromLSCP) { + int i2, block; + + if(c>=highStart && (!(c>=0xd800 && c<0xdc00) || fromLSCP)) { + return data[dataLength-UTRIE2_DATA_GRANULARITY]; + } + + if((c>=0xd800 && c<0xdc00) && fromLSCP) { + i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ + (c>>UTRIE2_SHIFT_2); + } else { + i2=index1[c>>UTRIE2_SHIFT_1]+ + ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); + } + block=index2[i2]; + return data[block+(c&UTRIE2_DATA_MASK)]; + } + + /** + * Get a trie value for a UTF-16 code unit. + * + * This function returns the same value as get() if the input + * character is outside of the lead surrogate range + * + * There are two values stored in a Trie for inputs in the lead + * surrogate range. This function returns the alternate value, + * while Trie2.get() returns the main value. + * + * @param c the code point or lead surrogate value. + * @return the value + */ + @Override + public int getFromU16SingleLead(char c) { + return get(c, false); + } + + /* compaction --------------------------------------------------------------- */ + + private boolean equal_int(int[] a, int s, int t, int length) { + for (int i=0; i0) { + i2Block=index1[--i1]; + if(i2Block==prevI2Block) { + /* the index-2 block is the same as the previous one, and filled with highValue */ + c-=UTRIE2_CP_PER_INDEX_1_ENTRY; + continue; + } + prevI2Block=i2Block; + if(i2Block==index2NullOffset) { + /* this is the null index-2 block */ + if(highValue!=initialValue) { + return c; + } + c-=UTRIE2_CP_PER_INDEX_1_ENTRY; + } else { + /* enumerate data blocks for one index-2 block */ + for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { + block=index2[i2Block+ --i2]; + if(block==prevBlock) { + /* the block is the same as the previous one, and filled with highValue */ + c-=UTRIE2_DATA_BLOCK_LENGTH; + continue; + } + prevBlock=block; + if(block==dataNullOffset) { + /* this is the null data block */ + if(highValue!=initialValue) { + return c; + } + c-=UTRIE2_DATA_BLOCK_LENGTH; + } else { + for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { + value=data[block+ --j]; + if(value!=highValue) { + return c; + } + --c; + } + } + } + } + } + + /* deliver last range */ + return 0; + } + + /* + * Compact a build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps adjacent blocks as much as possible (if overlap==TRUE) + * - moves blocks in steps of the data granularity + * - moves and overlaps blocks that overlap with multiple values in the overlap region + * + * It does not + * - try to move and overlap blocks that are not already adjacent + */ + private void compactData() { + int start, newStart, movedStart; + int blockLength, overlap; + int i, mapIndex, blockCount; + + /* do not compact linear-ASCII data */ + newStart=UTRIE2_DATA_START_OFFSET; + for(start=0, i=0; start>UTRIE2_SHIFT_2; + for(start=newStart; start>UTRIE2_SHIFT_2]<=0) { + /* advance start to the next block */ + start+=blockLength; + + /* leave newStart with the previous block! */ + continue; + } + + /* search for an identical block */ + movedStart=findSameDataBlock(newStart, start, blockLength); + if(movedStart >= 0) { + /* found an identical block, set the other block's index value for the current block */ + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + map[mapIndex++]=movedStart; + movedStart+=UTRIE2_DATA_BLOCK_LENGTH; + } + + /* advance start to the next block */ + start+=blockLength; + + /* leave newStart with the previous block! */ + continue; + } + + /* see if the beginning of this block can be overlapped with the end of the previous block */ + /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ + for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; + overlap>0 && !equal_int(data, (newStart-overlap), start, overlap); + overlap-=UTRIE2_DATA_GRANULARITY) {} + + if(overlap>0 || newStart>UTRIE2_SHIFT_2; i>0; --i) { + map[mapIndex++]=movedStart; + movedStart+=UTRIE2_DATA_BLOCK_LENGTH; + } + + /* move the non-overlapping indexes to their new positions */ + start+=overlap; + for(i=blockLength-overlap; i>0; --i) { + data[newStart++]=data[start++]; + } + } else /* no overlap && newStart==start */ { + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + map[mapIndex++]=start; + start+=UTRIE2_DATA_BLOCK_LENGTH; + } + newStart=start; + } + } + + /* now adjust the index-2 table */ + for(i=0; i>UTRIE2_SHIFT_2]; + } + dataNullOffset=map[dataNullOffset>>UTRIE2_SHIFT_2]; + + /* ensure dataLength alignment */ + while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { + data[newStart++]=initialValue; + } + + if (UTRIE2_DEBUG) { + /* we saved some space */ + System.out.printf("compacting UTrie2: count of 32-bit data words %d->%d\n", + dataLength, newStart); + } + + dataLength=newStart; + } + + private void compactIndex2() { + int i, start, newStart, movedStart, overlap; + + /* do not compact linear-BMP index-2 blocks */ + newStart=UTRIE2_INDEX_2_BMP_LENGTH; + for(start=0, i=0; start>UTRIE2_SHIFT_1); + + for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start=0 + ) { + /* found an identical block, set the other block's index value for the current block */ + map[start>>UTRIE2_SHIFT_1_2]=movedStart; + + /* advance start to the next block */ + start+=UTRIE2_INDEX_2_BLOCK_LENGTH; + + /* leave newStart with the previous block! */ + continue; + } + + /* see if the beginning of this block can be overlapped with the end of the previous block */ + /* look for maximum overlap with the previous, adjacent block */ + for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; + overlap>0 && !equal_int(index2, newStart-overlap, start, overlap); + --overlap) {} + + if(overlap>0 || newStart>UTRIE2_SHIFT_1_2]=newStart-overlap; + + /* move the non-overlapping indexes to their new positions */ + start+=overlap; + for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { + index2[newStart++]=index2[start++]; + } + } else /* no overlap && newStart==start */ { + map[start>>UTRIE2_SHIFT_1_2]=start; + start+=UTRIE2_INDEX_2_BLOCK_LENGTH; + newStart=start; + } + } + + /* now adjust the index-1 table */ + for(i=0; i>UTRIE2_SHIFT_1_2]; + } + index2NullOffset=map[index2NullOffset>>UTRIE2_SHIFT_1_2]; + + /* + * Ensure data table alignment: + * Needs to be granularity-aligned for 16-bit trie + * (so that dataMove will be down-shiftable), + * and 2-aligned for uint32_t data. + */ + while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { + /* Arbitrary value: 0x3fffc not possible for real data. */ + index2[newStart++]=0x0000ffff<%d\n", + index2Length, newStart); + } + + index2Length=newStart; + } + + private void compactTrie() { + int localHighStart; + int suppHighStart; + int highValue; + + /* find highStart and round it up */ + highValue=get(0x10ffff); + localHighStart=findHighStart(highValue); + localHighStart=(localHighStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); + if(localHighStart==0x110000) { + highValue=errorValue; + } + + /* + * Set trie->highStart only after utrie2_get32(trie, highStart). + * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. + */ + this.highStart=localHighStart; + + if (UTRIE2_DEBUG) { + System.out.printf("UTrie2: highStart U+%04x highValue 0x%x initialValue 0x%x\n", + highStart, highValue, initialValue); + } + + if(highStart<0x110000) { + /* Blank out [highStart..10ffff] to release associated data blocks. */ + suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; + setRange(suppHighStart, 0x10ffff, initialValue, true); + } + + compactData(); + if(highStart>0x10000) { + compactIndex2(); + } else { + if (UTRIE2_DEBUG) { + System.out.printf("UTrie2: highStart U+%04x count of 16-bit index-2 words %d->%d\n", + highStart, index2Length, UTRIE2_INDEX_1_OFFSET); + } + } + + /* + * Store the highValue in the data array and round up the dataLength. + * Must be done after compactData() because that assumes that dataLength + * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. + */ + data[dataLength++]=highValue; + while((dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { + data[dataLength++]=initialValue; + } + + isCompacted=true; + } + + + /** + * Produce an optimized, read-only Trie2_16 from this writable Trie. + * The data values outside of the range that will fit in a 16 bit + * unsigned value will be truncated. + */ + public Trie2_16 toTrie2_16() { + Trie2_16 frozenTrie = new Trie2_16(); + freeze(frozenTrie, ValueWidth.BITS_16); + return frozenTrie; + } + + + /** + * Produce an optimized, read-only Trie2_32 from this writable Trie. + * + */ + public Trie2_32 toTrie2_32() { + Trie2_32 frozenTrie = new Trie2_32(); + freeze(frozenTrie, ValueWidth.BITS_32); + return frozenTrie; + } + + + /** + * Maximum length of the runtime index array. + * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. + * (The actual maximum length is lower, + * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) + */ + private static final int UTRIE2_MAX_INDEX_LENGTH = 0xffff; + + /** + * Maximum length of the runtime data array. + * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, + * and by uint16_t UTrie2Header.shiftedDataLength. + */ + private static final int UTRIE2_MAX_DATA_LENGTH = 0xffff<0 if the data is moved to the end of the index array */ + + + /* compact if necessary */ + if(!isCompacted) { + compactTrie(); + } + + if(highStart<=0x10000) { + allIndexesLength=UTRIE2_INDEX_1_OFFSET; + } else { + allIndexesLength=index2Length; + } + if(valueBits==ValueWidth.BITS_16) { + dataMove=allIndexesLength; + } else { + dataMove=0; + } + + /* are indexLength and dataLength within limits? */ + if( /* for unshifted indexLength */ + allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || + /* for unshifted dataNullOffset */ + (dataMove+dataNullOffset)>0xffff || + /* for unshifted 2-byte UTF-8 index-2 values */ + (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || + /* for shiftedDataLength */ + (dataMove+dataLength)>UTRIE2_MAX_DATA_LENGTH) { + throw new UnsupportedOperationException("Trie2 data is too large."); + } + + /* calculate the sizes of, and allocate, the index and data arrays */ + int indexLength = allIndexesLength; + if (valueBits==ValueWidth.BITS_16) { + indexLength += dataLength; + } else { + dest.data32 = new int[dataLength]; + } + dest.index = new char[indexLength]; + + dest.indexLength = allIndexesLength; + dest.dataLength = dataLength; + if(highStart<=0x10000) { + dest.index2NullOffset = 0xffff; + } else { + dest.index2NullOffset = UTRIE2_INDEX_2_OFFSET + index2NullOffset; + } + dest.initialValue = initialValue; + dest.errorValue = errorValue; + dest.highStart = highStart; + dest.highValueIndex = dataMove + dataLength - UTRIE2_DATA_GRANULARITY; + dest.dataNullOffset = (dataMove+dataNullOffset); + + // Create a header and set the its fields. + // (This is only used in the event that we serialize the Trie, but is + // convenient to do here.) + dest.header = new Trie2.UTrie2Header(); + dest.header.signature = 0x54726932; /* "Tri2" */ + dest.header.options = valueBits==ValueWidth.BITS_16 ? 0 : 1; + dest.header.indexLength = dest.indexLength; + dest.header.shiftedDataLength = dest.dataLength>>UTRIE2_INDEX_SHIFT; + dest.header.index2NullOffset = dest.index2NullOffset; + dest.header.dataNullOffset = dest.dataNullOffset; + dest.header.shiftedHighStart = dest.highStart>>UTRIE2_SHIFT_1; + + + + /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ + int destIdx = 0; + for(i=0; i> UTRIE2_INDEX_SHIFT); + } + if (UTRIE2_DEBUG) { + System.out.println("\n\nIndex2 for BMP limit is " + Integer.toHexString(destIdx)); + } + + /* write UTF-8 2-byte index-2 values, not right-shifted */ + for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ + dest.index[destIdx++] = (char)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); + } + for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ + dest.index[destIdx++]=(char)(dataMove+index2[i<<(6-UTRIE2_SHIFT_2)]); + } + if (UTRIE2_DEBUG) { + System.out.println("Index2 for UTF-8 2byte values limit is " + Integer.toHexString(destIdx)); + } + + if(highStart>0x10000) { + int index1Length = (highStart-0x10000)>>UTRIE2_SHIFT_1; + int index2Offset = UTRIE2_INDEX_2_BMP_LENGTH + UTRIE2_UTF8_2B_INDEX_2_LENGTH + index1Length; + + /* write 16-bit index-1 values for supplementary code points */ + //p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; + for(i=0; i>UTRIE2_INDEX_SHIFT); + } + if (UTRIE2_DEBUG) { + System.out.println("Index 2 for supplementals, limit is " + Integer.toHexString(destIdx)); + } + } + + /* write the 16/32-bit data array */ + switch(valueBits) { + case BITS_16: + /* write 16-bit data values */ + assert(destIdx == dataMove); + dest.data16 = destIdx; + for(i=0; i0: reference counter (number of index-2 entries pointing here) + * <0: next free data block in free-block list + * + * While compacting: + * + * Map of adjusted indexes, used in compactData() and compactIndex2(). + * Maps from original indexes to new ones. + */ + private int[] map = new int[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; + + + private boolean UTRIE2_DEBUG = false; + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java b/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java new file mode 100644 index 00000000000..cf4666b6307 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java @@ -0,0 +1,255 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + + +/** + * @author aheninger + * + * A read-only Trie2, holding 16 bit data values. + * + * A Trie2 is a highly optimized data structure for mapping from Unicode + * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. + * + * See class Trie2 for descriptions of the API for accessing the contents of a trie. + * + * The fundamental data access methods are declared final in this class, with + * the intent that applications might gain a little extra performance, when compared + * with calling the same methods via the abstract UTrie2 base class. + */ +public final class Trie2_16 extends Trie2 { + + + /** + * Internal constructor, not for general use. + */ + Trie2_16() { + } + + + /** + * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). + * The serialized format is identical between ICU4C and ICU4J, so this function + * will work with serialized Trie2s from either. + * + * The serialized Trie2 on the stream may be in either little or big endian byte order. + * This allows using serialized Tries from ICU4C without needing to consider the + * byte order of the system that created them. + * + * @param is an input stream to the serialized form of a UTrie2. + * @return An unserialized Trie_16, ready for use. + * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. + * @throws IOException if a read error occurs on the InputStream. + * @throws ClassCastException if the stream contains a serialized Trie2_32 + */ + public static Trie2_16 createFromSerialized(InputStream is) throws IOException { + return (Trie2_16) Trie2.createFromSerialized(is); + } + + /** + * Get the value for a code point as stored in the Trie2. + * + * @param codePoint the code point + * @return the value + */ + @Override + public final int get(int codePoint) { + int value; + int ix; + + if (codePoint >= 0) { + if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 16 bit data is stored in the index array itself. + ix = index[codePoint >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint <= 0xffff) { + // Lead Surrogate Code Point. A Separate index section is stored for + // lead surrogate code units and code points. + // The main index has the code unit data. + // For this function, we need the code point data. + // Note: this expression could be refactored for slightly improved efficiency, but + // surrogate code points will be so rare in practice that it's not worth it. + ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint < highStart) { + // Supplemental code point, use two-level lookup. + ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); + ix = index[ix]; + ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; + ix = index[ix]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint <= 0x10ffff) { + value = index[highValueIndex]; + return value; + } + } + + // Fall through. The code point is outside of the legal range of 0..0x10ffff. + return errorValue; + } + + + /** + * Get a Trie2 value for a UTF-16 code unit. + * + * This function returns the same value as get() if the input + * character is outside of the lead surrogate range + * + * There are two values stored in a Trie2 for inputs in the lead + * surrogate range. This function returns the alternate value, + * while Trie2.get() returns the main value. + * + * @param codeUnit a 16 bit code unit or lead surrogate value. + * @return the value + */ + @Override + public int getFromU16SingleLead(char codeUnit) { + int value; + int ix; + + // Because the input is a 16 bit char, we can skip the tests for it being in + // the BMP range. It is. + ix = index[codeUnit >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + + + /** + * Serialize a Trie2_16 onto an OutputStream. + * + * A Trie2 can be serialized multiple times. + * The serialized data is compatible with ICU4C UTrie2 serialization. + * Trie2 serialization is unrelated to Java object serialization. + * + * @param os the stream to which the serialized Trie2 data will be written. + * @return the number of bytes written. + * @throw IOException on an error writing to the OutputStream. + */ + public int serialize(OutputStream os) throws IOException { + DataOutputStream dos = new DataOutputStream(os); + int bytesWritten = 0; + + bytesWritten += serializeHeader(dos); + for (int i=0; i= limit) { + break; + } + if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 16 bit data is stored in the index array itself. + index2Block = 0; + block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; + } else if (cp < 0xffff) { + // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 + index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; + block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; + } else if (cp < highStart) { + // Supplemental code point, use two-level lookup. + int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); + index2Block = index[ix]; + block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; + } else { + // Code point above highStart. + if (value == index[highValueIndex]) { + cp = limit; + } + break; + } + + if (index2Block == index2NullOffset) { + if (value != initialValue) { + break; + } + cp += UTRIE2_CP_PER_INDEX_1_ENTRY; + } else if (block == dataNullOffset) { + // The block at dataNullOffset has all values == initialValue. + // Because Trie2 iteration always proceeds in ascending order, we will always + // encounter a null block at its beginning, and can skip over + // a number of code points equal to the length of the block. + if (value != initialValue) { + break; + } + cp += UTRIE2_DATA_BLOCK_LENGTH; + } else { + // Current position refers to an ordinary data block. + // Walk over the data entries, checking the values. + int startIx = block + (cp & UTRIE2_DATA_MASK); + int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; + for (int ix = startIx; ix limit) { + cp = limit; + } + + return cp - 1; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java b/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java new file mode 100644 index 00000000000..0a9b95a17aa --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java @@ -0,0 +1,254 @@ +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * @author aheninger + * + * A read-only Trie2, holding 32 bit data values. + * + * A Trie2 is a highly optimized data structure for mapping from Unicode + * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. + * + * See class Trie2 for descriptions of the API for accessing the contents of a trie. + * + * The fundamental data access methods are declared final in this class, with + * the intent that applications might gain a little extra performance, when compared + * with calling the same methods via the abstract UTrie2 base class. + */ + +public class Trie2_32 extends Trie2 { + + /** + * Internal constructor, not for general use. + */ + Trie2_32() { + } + + + /** + * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). + * The serialized format is identical between ICU4C and ICU4J, so this function + * will work with serialized Trie2s from either. + * + * The serialized Trie2 on the stream may be in either little or big endian byte order. + * This allows using serialized Tries from ICU4C without needing to consider the + * byte order of the system that created them. + * + * @param is an input stream to the serialized form of a UTrie2. + * @return An unserialized Trie_32, ready for use. + * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. + * @throws IOException if a read error occurs on the InputStream. + * @throws ClassCastException if the stream contains a serialized Trie2_16 + */ + public static Trie2_32 createFromSerialized(InputStream is) throws IOException { + return (Trie2_32) Trie2.createFromSerialized(is); + } + + /** + * Get the value for a code point as stored in the Trie2. + * + * @param codePoint the code point + * @return the value + */ + @Override + public final int get(int codePoint) { + int value; + int ix; + + if (codePoint >= 0) { + if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 32 bit data is stored in the index array itself. + ix = index[codePoint >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = data32[ix]; + return value; + } + if (codePoint <= 0xffff) { + // Lead Surrogate Code Point. A Separate index section is stored for + // lead surrogate code units and code points. + // The main index has the code unit data. + // For this function, we need the code point data. + // Note: this expression could be refactored for slightly improved efficiency, but + // surrogate code points will be so rare in practice that it's not worth it. + ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = data32[ix]; + return value; + } + if (codePoint < highStart) { + // Supplemental code point, use two-level lookup. + ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); + ix = index[ix]; + ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; + ix = index[ix]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = data32[ix]; + return value; + } + if (codePoint <= 0x10ffff) { + value = data32[highValueIndex]; + return value; + } + } + + // Fall through. The code point is outside of the legal range of 0..0x10ffff. + return errorValue; + } + + + /** + * Get a Trie2 value for a UTF-16 code unit. + * + * This function returns the same value as get() if the input + * character is outside of the lead surrogate range + * + * There are two values stored in a Trie2 for inputs in the lead + * surrogate range. This function returns the alternate value, + * while Trie2.get() returns the main value. + * + * @param codeUnit a 16 bit code unit or lead surrogate value. + * @return the value + */ + @Override + public int getFromU16SingleLead(char codeUnit){ + int value; + int ix; + + ix = index[codeUnit >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); + value = data32[ix]; + return value; + + } + + /** + * Serialize a Trie2_32 onto an OutputStream. + * + * A Trie2 can be serialized multiple times. + * The serialized data is compatible with ICU4C UTrie2 serialization. + * Trie2 serialization is unrelated to Java object serialization. + * + * @param os the stream to which the serialized Trie2 data will be written. + * @return the number of bytes written. + * @throw IOException on an error writing to the OutputStream. + */ + public int serialize(OutputStream os) throws IOException { + DataOutputStream dos = new DataOutputStream(os); + int bytesWritten = 0; + + bytesWritten += serializeHeader(dos); + for (int i=0; i= limit) { + break; + } + if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 16 bit data is stored in the index array itself. + index2Block = 0; + block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT; + } else if (cp < 0xffff) { + // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00 + index2Block = UTRIE2_LSCP_INDEX_2_OFFSET; + block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT; + } else if (cp < highStart) { + // Supplemental code point, use two-level lookup. + int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1); + index2Block = index[ix]; + block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT; + } else { + // Code point above highStart. + if (value == data32[highValueIndex]) { + cp = limit; + } + break; + } + + if (index2Block == index2NullOffset) { + if (value != initialValue) { + break; + } + cp += UTRIE2_CP_PER_INDEX_1_ENTRY; + } else if (block == dataNullOffset) { + // The block at dataNullOffset has all values == initialValue. + // Because Trie2 iteration always proceeds in ascending order, we will always + // encounter a null block at its beginning, and can skip over + // a number of code points equal to the length of the block. + if (value != initialValue) { + break; + } + cp += UTRIE2_DATA_BLOCK_LENGTH; + } else { + // Current position refers to an ordinary data block. + // Walk over the data entries, checking the values. + int startIx = block + (cp & UTRIE2_DATA_MASK); + int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH; + for (int ix = startIx; ix limit) { + cp = limit; + } + + return cp - 1; + } + +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/TrieBuilder.java b/main/classes/core/src/com/ibm/icu/impl/TrieBuilder.java new file mode 100644 index 00000000000..f5f5c07e62e --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/TrieBuilder.java @@ -0,0 +1,261 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.util.Arrays; + +import com.ibm.icu.lang.UCharacter; + +/** + * Builder class to manipulate and generate a trie. + * This is useful for ICU data in primitive types. + * Provides a compact way to store information that is indexed by Unicode + * values, such as character properties, types, keyboard values, etc. This is + * very useful when you have a block of Unicode data that contains significant + * values while the rest of the Unicode data is unused in the application or + * when you have a lot of redundance, such as where all 21,000 Han ideographs + * have the same value. However, lookup is much faster than a hash table. + * A trie of any primitive data type serves two purposes: + *

      + *
    • Fast access of the indexed values. + *
    • Smaller memory footprint. + *
    + * This is a direct port from the ICU4C version + * @author Syn Wee Quek + */ +public class TrieBuilder +{ + // public data member ---------------------------------------------- + + /** + * Number of data values in a stage 2 (data array) block. 2, 4, 8, .., + * 0x200 + */ + public static final int DATA_BLOCK_LENGTH = 1 << Trie.INDEX_STAGE_1_SHIFT_; + + // public class declaration ---------------------------------------- + + /** + * Character data in com.ibm.impl.Trie have different user-specified format + * for different purposes. + * This interface specifies methods to be implemented in order for + * com.ibm.impl.Trie, to surrogate offset information encapsulated within + * the data. + */ + public static interface DataManipulate + { + /** + * Build-time trie callback function, used with serialize(). + * This function calculates a lead surrogate's value including a + * folding offset from the 1024 supplementary code points + * [start..start+1024[ . + * It is U+10000 <= start <= U+10fc00 and (start&0x3ff)==0. + * The folding offset is provided by the caller. + * It is offset=UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT + * with n=0..1023. + * Instead of the offset itself, n can be stored in 10 bits - or fewer + * if it can be assumed that few lead surrogates have associated data. + * The returned value must be + * - not zero if and only if there is relevant data for the + * corresponding 1024 supplementary code points + * - such that UTrie.getFoldingOffset(UNewTrieGetFoldedValue(..., + * offset))==offset + * @return a folded value, or 0 if there is no relevant data for the + * lead surrogate. + */ + public int getFoldedValue(int start, int offset); + } + + // public methods ---------------------------------------------------- + + /** + * Checks if the character belongs to a zero block in the trie + * @param ch codepoint which data is to be retrieved + * @return true if ch is in the zero block + */ + public boolean isInZeroBlock(int ch) + { + // valid, uncompacted trie and valid c? + if (m_isCompacted_ || ch > UCharacter.MAX_VALUE + || ch < UCharacter.MIN_VALUE) { + return true; + } + + return m_index_[ch >> SHIFT_] == 0; + } + + // package private method ----------------------------------------------- + + // protected data member ----------------------------------------------- + + /** + * Index values at build-time are 32 bits wide for easier processing. + * Bit 31 is set if the data block is used by multiple index values + * (from setRange()). + */ + protected int m_index_[]; + protected int m_indexLength_; + protected int m_dataCapacity_; + protected int m_dataLength_; + protected boolean m_isLatin1Linear_; + protected boolean m_isCompacted_; + /** + * Map of adjusted indexes, used in utrie_compact(). + * Maps from original indexes to new ones. + */ + protected int m_map_[]; + + /** + * Shift size for shifting right the input index. 1..9 + */ + protected static final int SHIFT_ = Trie.INDEX_STAGE_1_SHIFT_; + /** + * Length of the index (stage 1) array before folding. + * Maximum number of Unicode code points (0x110000) shifted right by + * SHIFT. + */ + protected static final int MAX_INDEX_LENGTH_ = (0x110000 >> SHIFT_); + /** + * Length of the BMP portion of the index (stage 1) array. + */ + protected static final int BMP_INDEX_LENGTH_ = 0x10000 >> SHIFT_; + /** + * Number of index (stage 1) entries per lead surrogate. + * Same as number of indexe entries for 1024 trail surrogates, + * ==0x400>>UTRIE_SHIFT + * 10 - SHIFT == Number of bits of a trail surrogate that are used in + * index table lookups. + */ + protected static final int SURROGATE_BLOCK_COUNT_ = 1 << (10 - SHIFT_); + /** + * Mask for getting the lower bits from the input index. + * DATA_BLOCK_LENGTH - 1. + */ + protected static final int MASK_ = Trie.INDEX_STAGE_3_MASK_; + /** + * Shift size for shifting left the index array values. + * Increases possible data size with 16-bit index values at the cost + * of compactability. + * This requires blocks of stage 2 data to be aligned by UTRIE_DATA_GRANULARITY. + * 0..UTRIE_SHIFT + */ + protected static final int INDEX_SHIFT_ = Trie.INDEX_STAGE_2_SHIFT_; + /** + * Maximum length of the runtime data (stage 2) array. + * Limited by 16-bit index values that are left-shifted by INDEX_SHIFT_. + */ + protected static final int MAX_DATA_LENGTH_ = (0x10000 << INDEX_SHIFT_); + /** + * Shifting to position the index value in options + */ + protected static final int OPTIONS_INDEX_SHIFT_ = 4; + /** + * If set, then the data (stage 2) array is 32 bits wide. + */ + protected static final int OPTIONS_DATA_IS_32_BIT_ = 0x100; + /** + * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data + * (stage 2) array as a simple, linear array at data + DATA_BLOCK_LENGTH. + */ + protected static final int OPTIONS_LATIN1_IS_LINEAR_ = 0x200; + /** + * The alignment size of a stage 2 data block. Also the granularity for + * compaction. + */ + protected static final int DATA_GRANULARITY_ = 1 << INDEX_SHIFT_; + + // protected constructor ---------------------------------------------- + + protected TrieBuilder() + { + m_index_ = new int[MAX_INDEX_LENGTH_]; + m_map_ = new int[MAX_BUILD_TIME_DATA_LENGTH_ >> SHIFT_]; + m_isLatin1Linear_ = false; + m_isCompacted_ = false; + m_indexLength_ = MAX_INDEX_LENGTH_; + } + + protected TrieBuilder(TrieBuilder table) + { + m_index_ = new int[MAX_INDEX_LENGTH_]; + m_indexLength_ = table.m_indexLength_; + System.arraycopy(table.m_index_, 0, m_index_, 0, m_indexLength_); + m_dataCapacity_ = table.m_dataCapacity_; + m_dataLength_ = table.m_dataLength_; + m_map_ = new int[table.m_map_.length]; + System.arraycopy(table.m_map_, 0, m_map_, 0, m_map_.length); + m_isLatin1Linear_ = table.m_isLatin1Linear_; + m_isCompacted_ = table.m_isCompacted_; + } + + // protected functions ------------------------------------------------ + + /** + * Compare two sections of an array for equality. + */ + protected static final boolean equal_int(int[] array, int start1, int start2, int length) { + while(length>0 && array[start1]==array[start2]) { + ++start1; + ++start2; + --length; + } + return length==0; + } + + /** + * Set a value in the trie index map to indicate which data block + * is referenced and which one is not. + * utrie_compact() will remove data blocks that are not used at all. + * Set + * - 0 if it is used + * - -1 if it is not used + */ + protected void findUnusedBlocks() + { + // fill the entire map with "not used" + Arrays.fill(m_map_, 0xff); + + // mark each block that _is_ used with 0 + for (int i = 0; i < m_indexLength_; ++ i) { + m_map_[Math.abs(m_index_[i]) >> SHIFT_] = 0; + } + + // never move the all-initial-value block 0 + m_map_[0] = 0; + } + + /** + * Finds the same index block as the otherBlock + * @param index array + * @param indexLength size of index + * @param otherBlock + * @return same index block + */ + protected static final int findSameIndexBlock(int index[], int indexLength, + int otherBlock) + { + for (int block = BMP_INDEX_LENGTH_; block < indexLength; + block += SURROGATE_BLOCK_COUNT_) { + if(equal_int(index, block, otherBlock, SURROGATE_BLOCK_COUNT_)) { + return block; + } + } + return indexLength; + } + + // private data member ------------------------------------------------ + + /** + * Maximum length of the build-time data (stage 2) array. + * The maximum length is 0x110000 + DATA_BLOCK_LENGTH + 0x400. + * (Number of Unicode code points + one all-initial-value block + + * possible duplicate entries for 1024 lead surrogates.) + */ + private static final int MAX_BUILD_TIME_DATA_LENGTH_ = + 0x110000 + DATA_BLOCK_LENGTH + 0x400; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java b/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java new file mode 100644 index 00000000000..1630b312be7 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java @@ -0,0 +1,530 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2010, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +*/ + +package com.ibm.icu.impl; + +import java.util.NoSuchElementException; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.util.RangeValueIterator; + +/** + *

    Class enabling iteration of the values in a Trie.

    + *

    Result of each iteration contains the interval of codepoints that have + * the same value type and the value type itself.

    + *

    The comparison of each codepoint value is done via extract(), which the + * default implementation is to return the value as it is.

    + *

    Method extract() can be overwritten to perform manipulations on + * codepoint values in order to perform specialized comparison.

    + *

    TrieIterator is designed to be a generic iterator for the CharTrie + * and the IntTrie, hence to accommodate both types of data, the return + * result will be in terms of int (32 bit) values.

    + *

    See com.ibm.icu.text.UCharacterTypeIterator for examples of use.

    + *

    Notes for porting utrie_enum from icu4c to icu4j:
    + * Internally, icu4c's utrie_enum performs all iterations in its body. In Java + * sense, the caller will have to pass a object with a callback function + * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, + * uint32_t value) into utrie_enum. utrie_enum will then find ranges of + * codepoints with the same value as determined by + * UTrieEnumValue(const void *context, uint32_t value). for each range, + * utrie_enum calls the callback function to perform a task. In this way, + * icu4c performs the iteration within utrie_enum. + * To follow the JDK model, icu4j is slightly different from icu4c. + * Instead of requesting the caller to implement an object for a callback. + * The caller will have to implement a subclass of TrieIterator, fleshing out + * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j, + * the caller will have to code his own iteration and flesh out the task + * (equivalent to UTrieEnumRange) to be performed in the iteration loop. + *

    + *

    There are basically 3 usage scenarios for porting:

    + *

    1) UTrieEnumValue is the only implemented callback then just implement a + * subclass of TrieIterator and override the extract(int) method. The + * extract(int) method is analogus to UTrieEnumValue callback. + *

    + *

    2) UTrieEnumValue and UTrieEnumRange both are implemented then implement + * a subclass of TrieIterator, override the extract method and iterate, e.g + *

    + *

    utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange, + * set);
    + * In Java :
    + *

    + * class TrieIteratorImpl extends TrieIterator{
    + *     public TrieIteratorImpl(Trie data){
    + *         super(data);
    + *     }
    + *     public int extract(int value){
    + *         // port the implementation of _enumPropertyStartsValue here
    + *     }
    + * }
    + * .... 
    + * TrieIterator fcdIter  = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
    + * while(fcdIter.next(result)) {
    + *     // port the implementation of _enumPropertyStartsRange
    + * }
    + * 
    + *

    + *

    3) UTrieEnumRange is the only implemented callback then just implement + * the while loop, when utrie_enum is called + *

    + * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
    + * TrieIterator fcdIter  = new TrieIterator(fcdTrieImpl.fcdTrie);
    + * while(fcdIter.next(result)){
    + *     set.add(result.start);
    + * }
    + * 

    + * @author synwee + * @see com.ibm.icu.impl.Trie + * @since release 2.1, Jan 17 2002 + */ +public class TrieIterator implements RangeValueIterator + +{ + // public constructor --------------------------------------------- + + /** + * TrieEnumeration constructor + * @param trie to be used + * @exception IllegalArgumentException throw when argument is null. + */ + public TrieIterator(Trie trie) + { + if (trie == null) { + throw new IllegalArgumentException( + "Argument trie cannot be null"); + } + m_trie_ = trie; + // synwee: check that extract belongs to the child class + m_initialValue_ = extract(m_trie_.getInitialValue()); + reset(); + } + + // public methods ------------------------------------------------- + + /** + *

    Returns true if we are not at the end of the iteration, false + * otherwise.

    + *

    The next set of codepoints with the same value type will be + * calculated during this call and returned in the arguement element.

    + * @param element return result + * @return true if we are not at the end of the iteration, false otherwise. + * @exception NoSuchElementException - if no more elements exist. + * @see com.ibm.icu.util.RangeValueIterator.Element + */ + public final boolean next(Element element) + { + if (m_nextCodepoint_ > UCharacter.MAX_VALUE) { + return false; + } + if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE && + calculateNextBMPElement(element)) { + return true; + } + calculateNextSupplementaryElement(element); + return true; + } + + /** + * Resets the iterator to the beginning of the iteration + */ + public final void reset() + { + m_currentCodepoint_ = 0; + m_nextCodepoint_ = 0; + m_nextIndex_ = 0; + m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_; + if (m_nextBlock_ == m_trie_.m_dataOffset_) { + m_nextValue_ = m_initialValue_; + } + else { + m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_)); + } + m_nextBlockIndex_ = 0; + m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_; + } + + // protected methods ---------------------------------------------- + + /** + * Called by next() to extracts a 32 bit value from a trie value + * used for comparison. + * This method is to be overwritten if special manipulation is to be done + * to retrieve a relevant comparison. + * The default function is to return the value as it is. + * @param value a value from the trie + * @return extracted value + */ + protected int extract(int value) + { + return value; + } + + // private methods ------------------------------------------------ + + /** + * Set the result values + * @param element return result object + * @param start codepoint of range + * @param limit (end + 1) codepoint of range + * @param value common value of range + */ + private final void setResult(Element element, int start, int limit, + int value) + { + element.start = start; + element.limit = limit; + element.value = value; + } + + /** + * Finding the next element. + * This method is called just before returning the result of + * next(). + * We always store the next element before it is requested. + * In the case that we have to continue calculations into the + * supplementary planes, a false will be returned. + * @param element return result object + * @return true if the next range is found, false if we have to proceed to + * the supplementary range. + */ + private final boolean calculateNextBMPElement(Element element) + { + int currentValue = m_nextValue_; + m_currentCodepoint_ = m_nextCodepoint_; + m_nextCodepoint_ ++; + m_nextBlockIndex_ ++; + if (!checkBlockDetail(currentValue)) { + setResult(element, m_currentCodepoint_, m_nextCodepoint_, + currentValue); + return true; + } + // synwee check that next block index == 0 here + // enumerate BMP - the main loop enumerates data blocks + while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) { + // because of the way the character is split to form the index + // the lead surrogate and trail surrogate can not be in the + // mid of a block + if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) { + // skip lead surrogate code units, + // go to lead surrogate codepoints + m_nextIndex_ = BMP_INDEX_LENGTH_; + } + else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) { + // go back to regular BMP code points + m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_; + } else { + m_nextIndex_ ++; + } + + m_nextBlockIndex_ = 0; + if (!checkBlock(currentValue)) { + setResult(element, m_currentCodepoint_, m_nextCodepoint_, + currentValue); + return true; + } + } + m_nextCodepoint_ --; // step one back since this value has not been + m_nextBlockIndex_ --; // retrieved yet. + return false; + } + + /** + * Finds the next supplementary element. + * For each entry in the trie, the value to be delivered is passed through + * extract(). + * We always store the next element before it is requested. + * Called after calculateNextBMP() completes its round of BMP characters. + * There is a slight difference in the usage of m_currentCodepoint_ + * here as compared to calculateNextBMP(). Though both represents the + * lower bound of the next element, in calculateNextBMP() it gets set + * at the start of any loop, where-else, in calculateNextSupplementary() + * since m_currentCodepoint_ already contains the lower bound of the + * next element (passed down from calculateNextBMP()), we keep it till + * the end before resetting it to the new value. + * Note, if there are no more iterations, it will never get to here. + * Blocked out by next(). + * @param element return result object + */ + private final void calculateNextSupplementaryElement(Element element) + { + int currentValue = m_nextValue_; + m_nextCodepoint_ ++; + m_nextBlockIndex_ ++; + + if (UTF16.getTrailSurrogate(m_nextCodepoint_) + != UTF16.TRAIL_SURROGATE_MIN_VALUE) { + // this piece is only called when we are in the middle of a lead + // surrogate block + if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) { + setResult(element, m_currentCodepoint_, m_nextCodepoint_, + currentValue); + m_currentCodepoint_ = m_nextCodepoint_; + return; + } + // we have cleared one block + m_nextIndex_ ++; + m_nextTrailIndexOffset_ ++; + if (!checkTrailBlock(currentValue)) { + setResult(element, m_currentCodepoint_, m_nextCodepoint_, + currentValue); + m_currentCodepoint_ = m_nextCodepoint_; + return; + } + } + int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); + // enumerate supplementary code points + while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) { + // lead surrogate access + final int leadBlock = + m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << + Trie.INDEX_STAGE_2_SHIFT_; + if (leadBlock == m_trie_.m_dataOffset_) { + // no entries for a whole block of lead surrogates + if (currentValue != m_initialValue_) { + m_nextValue_ = m_initialValue_; + m_nextBlock_ = leadBlock; // == m_trie_.m_dataOffset_ + m_nextBlockIndex_ = 0; + setResult(element, m_currentCodepoint_, m_nextCodepoint_, + currentValue); + m_currentCodepoint_ = m_nextCodepoint_; + return; + } + + nextLead += DATA_BLOCK_LENGTH_; + // number of total affected supplementary codepoints in one + // block + // this is not a simple addition of + // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider + // that we might have moved some of the codepoints + m_nextCodepoint_ = UCharacterProperty.getRawSupplementary( + (char)nextLead, + (char)UTF16.TRAIL_SURROGATE_MIN_VALUE); + continue; + } + if (m_trie_.m_dataManipulate_ == null) { + throw new NullPointerException( + "The field DataManipulate in this Trie is null"); + } + // enumerate trail surrogates for this lead surrogate + m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( + m_trie_.getValue(leadBlock + + (nextLead & Trie.INDEX_STAGE_3_MASK_))); + if (m_nextIndex_ <= 0) { + // no data for this lead surrogate + if (currentValue != m_initialValue_) { + m_nextValue_ = m_initialValue_; + m_nextBlock_ = m_trie_.m_dataOffset_; + m_nextBlockIndex_ = 0; + setResult(element, m_currentCodepoint_, m_nextCodepoint_, + currentValue); + m_currentCodepoint_ = m_nextCodepoint_; + return; + } + m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_; + } else { + m_nextTrailIndexOffset_ = 0; + if (!checkTrailBlock(currentValue)) { + setResult(element, m_currentCodepoint_, m_nextCodepoint_, + currentValue); + m_currentCodepoint_ = m_nextCodepoint_; + return; + } + } + nextLead ++; + } + + // deliver last range + setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1, + currentValue); + } + + /** + * Internal block value calculations + * Performs calculations on a data block to find codepoints in m_nextBlock_ + * after the index m_nextBlockIndex_ that has the same value. + * Note m_*_ variables at this point is the next codepoint whose value + * has not been calculated. + * But when returned with false, it will be the last codepoint whose + * value has been calculated. + * @param currentValue the value which other codepoints are tested against + * @return true if the whole block has the same value as currentValue or if + * the whole block has been calculated, false otherwise. + */ + private final boolean checkBlockDetail(int currentValue) + { + while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) { + m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ + + m_nextBlockIndex_)); + if (m_nextValue_ != currentValue) { + return false; + } + ++ m_nextBlockIndex_; + ++ m_nextCodepoint_; + } + return true; + } + + /** + * Internal block value calculations + * Performs calculations on a data block to find codepoints in m_nextBlock_ + * that has the same value. + * Will call checkBlockDetail() if highlevel check fails. + * Note m_*_ variables at this point is the next codepoint whose value + * has not been calculated. + * @param currentBlock the initial block containing all currentValue + * @param currentValue the value which other codepoints are tested against + * @return true if the whole block has the same value as currentValue or if + * the whole block has been calculated, false otherwise. + */ + private final boolean checkBlock(int currentValue) + { + int currentBlock = m_nextBlock_; + m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] << + Trie.INDEX_STAGE_2_SHIFT_; + if (m_nextBlock_ == currentBlock && + (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) { + // the block is the same as the previous one, filled with + // currentValue + m_nextCodepoint_ += DATA_BLOCK_LENGTH_; + } + else if (m_nextBlock_ == m_trie_.m_dataOffset_) { + // this is the all-initial-value block + if (currentValue != m_initialValue_) { + m_nextValue_ = m_initialValue_; + m_nextBlockIndex_ = 0; + return false; + } + m_nextCodepoint_ += DATA_BLOCK_LENGTH_; + } + else { + if (!checkBlockDetail(currentValue)) { + return false; + } + } + return true; + } + + /** + * Internal block value calculations + * Performs calculations on multiple data blocks for a set of trail + * surrogates to find codepoints in m_nextBlock_ that has the same value. + * Will call checkBlock() for internal block checks. + * Note m_*_ variables at this point is the next codepoint whose value + * has not been calculated. + * @param currentValue the value which other codepoints are tested against + * @return true if the whole block has the same value as currentValue or if + * the whole block has been calculated, false otherwise. + */ + private final boolean checkTrailBlock(int currentValue) + { + // enumerate code points for this lead surrogate + while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_) + { + // if we ever reach here, we are at the start of a new block + m_nextBlockIndex_ = 0; + // copy of most of the body of the BMP loop + if (!checkBlock(currentValue)) { + return false; + } + m_nextTrailIndexOffset_ ++; + m_nextIndex_ ++; + } + return true; + } + + /** + * Checks if we are beginning at the start of a initial block. + * If we are then the rest of the codepoints in this initial block + * has the same values. + * We increment m_nextCodepoint_ and relevant data members if so. + * This is used only in for the supplementary codepoints because + * the offset to the trail indexes could be 0. + * @return true if we are at the start of a initial block. + */ + private final boolean checkNullNextTrailIndex() + { + if (m_nextIndex_ <= 0) { + m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; + int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); + int leadBlock = + m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << + Trie.INDEX_STAGE_2_SHIFT_; + if (m_trie_.m_dataManipulate_ == null) { + throw new NullPointerException( + "The field DataManipulate in this Trie is null"); + } + m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( + m_trie_.getValue(leadBlock + + (nextLead & Trie.INDEX_STAGE_3_MASK_))); + m_nextIndex_ --; + m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; + return true; + } + return false; + } + + // private data members -------------------------------------------- + + /** + * Size of the stage 1 BMP indexes + */ + private static final int BMP_INDEX_LENGTH_ = + 0x10000 >> Trie.INDEX_STAGE_1_SHIFT_; + /** + * Lead surrogate minimum value + */ + private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800; + /** + * Trail surrogate minimum value + */ + private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00; + /* + * Trail surrogate maximum value + */ + //private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF; + /** + * Number of trail surrogate + */ + private static final int TRAIL_SURROGATE_COUNT_ = 0x400; + /** + * Number of stage 1 indexes for supplementary calculations that maps to + * each lead surrogate character. + * See second pass into getRawOffset for the trail surrogate character. + * 10 for significant number of bits for trail surrogates, 5 for what we + * discard during shifting. + */ + private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ = + 1 << (10 - Trie.INDEX_STAGE_1_SHIFT_); + /** + * Number of data values in a stage 2 (data array) block. + */ + private static final int DATA_BLOCK_LENGTH_ = + 1 << Trie.INDEX_STAGE_1_SHIFT_; +// /** +// * Number of codepoints in a stage 2 block +// */ +// private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ = +// DATA_BLOCK_LENGTH_ << 10; + /** + * Trie instance + */ + private Trie m_trie_; + /** + * Initial value for trie values + */ + private int m_initialValue_; + /** + * Next element results and data. + */ + private int m_currentCodepoint_; + private int m_nextCodepoint_; + private int m_nextValue_; + private int m_nextIndex_; + private int m_nextBlock_; + private int m_nextBlockIndex_; + private int m_nextTrailIndexOffset_; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java b/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java new file mode 100644 index 00000000000..8314b5bb157 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java @@ -0,0 +1,353 @@ +/* +******************************************************************************* +* +* Copyright (C) 2004-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: UBiDiProps.java +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2005jan16 +* created by: Markus W. Scherer +* +* Low-level Unicode bidi/shaping properties access. +* Java port of ubidi_props.h/.c. +*/ + +package com.ibm.icu.impl; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UProperty; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.RangeValueIterator; + +public final class UBiDiProps { + // constructors etc. --------------------------------------------------- *** + + // port of ubidi_openProps() + private UBiDiProps() throws IOException{ + InputStream is=ICUData.getStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME); + BufferedInputStream b=new BufferedInputStream(is, 4096 /* data buffer size */); + readData(b); + b.close(); + is.close(); + } + + private UBiDiProps(boolean makeDummy) { // ignore makeDummy, only creates a unique signature + indexes=new int[IX_TOP]; + indexes[0]=IX_TOP; + trie=new CharTrie(0, 0, null); // dummy trie, always returns 0 + } + + + private void readData(InputStream is) throws IOException { + DataInputStream inputStream=new DataInputStream(is); + + // read the header + ICUBinary.readHeader(inputStream, FMT, new IsAcceptable()); + + // read indexes[] + int i, count; + count=inputStream.readInt(); + if(count0) { + mirrors=new int[count]; + for(i=0; i>MAX_JG_SHIFT; + case UProperty.JOINING_TYPE: + return (max&JT_MASK)>>JT_SHIFT; + default: + return -1; /* undefined */ + } + } + + public final int getClass(int c) { + return getClassFromProps(trie.getCodePointValue(c)); + } + + public final boolean isMirrored(int c) { + return getFlagFromProps(trie.getCodePointValue(c), IS_MIRRORED_SHIFT); + } + + public final int getMirror(int c) { + int props; + int delta; + + props=trie.getCodePointValue(c); + delta=((short)props)>>MIRROR_DELTA_SHIFT; + if(delta!=ESC_MIRROR_DELTA) { + return c+delta; + } else { + /* look for mirror code point in the mirrors[] table */ + int m; + int i, length; + int c2; + + length=indexes[IX_MIRROR_LENGTH]; + + /* linear search */ + for(i=0; i>JT_SHIFT; + } + + public final int getJoiningGroup(int c) { + int start, limit; + + start=indexes[IX_JG_START]; + limit=indexes[IX_JG_LIMIT]; + if(start<=c && c>shift)&1)!=0; + } + + private static final int ESC_MIRROR_DELTA=-4; + //private static final int MIN_MIRROR_DELTA=-3; + //private static final int MAX_MIRROR_DELTA=3; + + // definitions for 32-bit mirror table entry --------------------------- *** + + /* the source Unicode code point takes 21 bits (20..0) */ + private static final int MIRROR_INDEX_SHIFT=21; + //private static final int MAX_MIRROR_INDEX=0x7ff; + + private static final int getMirrorCodePoint(int m) { + return m&0x1fffff; + } + private static final int getMirrorIndex(int m) { + return m>>>MIRROR_INDEX_SHIFT; + } + + + /* + * public singleton instance + */ + public static final UBiDiProps INSTANCE; + + private static volatile UBiDiProps FULL_INSTANCE; + private static volatile UBiDiProps DUMMY_INSTANCE; + + // This static initializer block must be placed after + // other static member initialization + static { + UBiDiProps bp; + try { + bp = new UBiDiProps(); + FULL_INSTANCE = bp; + } catch (IOException e) { + // creating dummy + bp = new UBiDiProps(true); + DUMMY_INSTANCE = bp; + } + INSTANCE = bp; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java b/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java new file mode 100644 index 00000000000..f154dc724b3 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java @@ -0,0 +1,1490 @@ +/* +******************************************************************************* +* +* Copyright (C) 2004-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: UCaseProps.java +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2005jan29 +* created by: Markus W. Scherer +* +* Low-level Unicode character/string case mapping code. +* Java port of ucase.h/.c. +*/ + +package com.ibm.icu.impl; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UProperty; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.RangeValueIterator; +import com.ibm.icu.util.ULocale; + +public final class UCaseProps { + + // constructors etc. --------------------------------------------------- *** + + // port of ucase_openProps() + private UCaseProps() throws IOException { + InputStream is=ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME); + BufferedInputStream b=new BufferedInputStream(is, 4096 /* data buffer size */); + readData(b); + b.close(); + is.close(); + } + + private UCaseProps(boolean makeDummy) { // ignore makeDummy, only creates a unique signature + indexes=new int[IX_TOP]; + indexes[0]=IX_TOP; + trie=new CharTrie(0, 0, null); // dummy trie, always returns 0 + } + + private final void readData(InputStream is) throws IOException { + DataInputStream inputStream=new DataInputStream(is); + + // read the header + ICUBinary.readHeader(inputStream, FMT, new IsAcceptable()); + + // read indexes[] + int i, count; + count=inputStream.readInt(); + if(count0) { + exceptions=new char[count]; + for(i=0; i0) { + unfold=new char[count]; + for(i=0; i>EXC_SHIFT; + } + + private static final boolean propsHasException(int props) { + return (props&EXCEPTION)!=0; + } + + /* number of bits in an 8-bit integer value */ + private static final byte flagsOffset[/*256*/]={ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; + + private static final boolean hasSlot(int flags, int index) { + return (flags&(1<=UPPER) { + c+=getDelta(props); + } + } else { + int excOffset=getExceptionsOffset(props); + int excWord=exceptions[excOffset++]; + if(hasSlot(excWord, EXC_LOWER)) { + c=getSlotValue(excWord, EXC_LOWER, excOffset); + } + } + return c; + } + + public final int toupper(int c) { + int props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + if(getTypeFromProps(props)==LOWER) { + c+=getDelta(props); + } + } else { + int excOffset=getExceptionsOffset(props); + int excWord=exceptions[excOffset++]; + if(hasSlot(excWord, EXC_UPPER)) { + c=getSlotValue(excWord, EXC_UPPER, excOffset); + } + } + return c; + } + + public final int totitle(int c) { + int props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + if(getTypeFromProps(props)==LOWER) { + c+=getDelta(props); + } + } else { + int excOffset=getExceptionsOffset(props); + int excWord=exceptions[excOffset++]; + int index; + if(hasSlot(excWord, EXC_TITLE)) { + index=EXC_TITLE; + } else if(hasSlot(excWord, EXC_UPPER)) { + index=EXC_UPPER; + } else { + return c; + } + c=getSlotValue(excWord, index, excOffset); + } + return c; + } + + /** + * Adds all simple case mappings and the full case folding for c to sa, + * and also adds special case closure mappings. + * c itself is not added. + * For example, the mappings + * - for s include long s + * - for sharp s include ss + * - for k include the Kelvin sign + */ + public final void addCaseClosure(int c, UnicodeSet set) { + /* + * Hardcode the case closure of i and its relatives and ignore the + * data file data for these characters. + * The Turkic dotless i and dotted I with their case mapping conditions + * and case folding option make the related characters behave specially. + * This code matches their closure behavior to their case folding behavior. + */ + + switch(c) { + case 0x49: + /* regular i and I are in one equivalence class */ + set.add(0x69); + return; + case 0x69: + set.add(0x49); + return; + case 0x130: + /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ + set.add(iDot); + return; + case 0x131: + /* dotless i is in a class by itself */ + return; + default: + /* otherwise use the data file data */ + break; + } + + int props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + if(getTypeFromProps(props)!=NONE) { + /* add the one simple case mapping, no matter what type it is */ + int delta=getDelta(props); + if(delta!=0) { + set.add(c+delta); + } + } + } else { + /* + * c has exceptions, so there may be multiple simple and/or + * full case mappings. Add them all. + */ + int excOffset0, excOffset=getExceptionsOffset(props); + int closureOffset; + int excWord=exceptions[excOffset++]; + int index, closureLength, fullLength, length; + + excOffset0=excOffset; + + /* add all simple case mappings */ + for(index=EXC_LOWER; index<=EXC_TITLE; ++index) { + if(hasSlot(excWord, index)) { + excOffset=excOffset0; + c=getSlotValue(excWord, index, excOffset); + set.add(c); + } + } + + /* get the closure string pointer & length */ + if(hasSlot(excWord, EXC_CLOSURE)) { + excOffset=excOffset0; + long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset); + closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */ + closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */ + } else { + closureLength=0; + closureOffset=0; + } + + /* add the full case folding */ + if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { + excOffset=excOffset0; + long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); + fullLength=(int)value; + + /* start of full case mapping strings */ + excOffset=(int)(value>>32)+1; + + fullLength&=0xffff; /* bits 16 and higher are reserved */ + + /* skip the lowercase result string */ + excOffset+=fullLength&FULL_LOWER; + fullLength>>=4; + + /* add the full case folding string */ + length=fullLength&0xf; + if(length!=0) { + set.add(new String(exceptions, excOffset, length)); + excOffset+=length; + } + + /* skip the uppercase and titlecase strings */ + fullLength>>=4; + excOffset+=fullLength&0xf; + fullLength>>=4; + excOffset+=fullLength; + + closureOffset=excOffset; /* behind full case mappings */ + } + + /* add each code point in the closure string */ + for(index=0; index0 and max>0 and s.length()<=max + */ + private final int strcmpMax(String s, int unfoldOffset, int max) { + int i1, length, c1, c2; + + length=s.length(); + max-=length; /* we require length<=max, so no need to decrement max in the loop */ + i1=0; + do { + c1=s.charAt(i1++); + c2=unfold[unfoldOffset++]; + if(c2==0) { + return 1; /* reached the end of t but not of s */ + } + c1-=c2; + if(c1!=0) { + return c1; /* return difference result */ + } + } while(--length>0); + /* ends with length==0 */ + + if(max==0 || unfold[unfoldOffset]==0) { + return 0; /* equal to length of both strings */ + } else { + return -max; /* return lengh difference */ + } + } + + /** + * Maps the string to single code points and adds the associated case closure + * mappings. + * The string is mapped to code points if it is their full case folding string. + * In other words, this performs a reverse full case folding and then + * adds the case closure items of the resulting code points. + * If the string is found and its closure applied, then + * the string itself is added as well as part of its code points' closure. + * + * @return true if the string was found + */ + public final boolean addStringCaseClosure(String s, UnicodeSet set) { + int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth; + + if(unfold==null || s==null) { + return false; /* no reverse case folding data, or no string */ + } + length=s.length(); + if(length<=1) { + /* the string is too short to find any match */ + /* + * more precise would be: + * if(!u_strHasMoreChar32Than(s, length, 1)) + * but this does not make much practical difference because + * a single supplementary code point would just not be found + */ + return false; + } + + unfoldRows=unfold[UNFOLD_ROWS]; + unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH]; + unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH]; + //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth; + + if(length>unfoldStringWidth) { + /* the string is too long to find any match */ + return false; + } + + /* do a binary search for the string */ + start=0; + limit=unfoldRows; + while(start0 */ { + start=i+1; + } + } + + return false; /* string not found */ + } + + /** @return NONE, LOWER, UPPER, TITLE */ + public final int getType(int c) { + return getTypeFromProps(trie.getCodePointValue(c)); + } + + /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */ + public final int getTypeOrIgnorable(int c) { + int props=trie.getCodePointValue(c); + int type=getTypeFromProps(props); + if(propsHasException(props)) { + if((exceptions[getExceptionsOffset(props)]&EXC_CASE_IGNORABLE)!=0) { + type|=4; + } + } else if(type==NONE && (props&CASE_IGNORABLE)!=0) { + type|=4; + } + return type; + } + + /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */ + public final int getDotType(int c) { + int props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + return props&DOT_MASK; + } else { + return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK; + } + } + + public final boolean isSoftDotted(int c) { + return getDotType(c)==SOFT_DOTTED; + } + + public final boolean isCaseSensitive(int c) { + return (trie.getCodePointValue(c)&SENSITIVE)!=0; + } + + // string casing ------------------------------------------------------- *** + + /* + * These internal functions form the core of string case mappings. + * They map single code points to result code points or strings and take + * all necessary conditions (context, locale ID, options) into account. + * + * They do not iterate over the source or write to the destination + * so that the same functions are useful for non-standard string storage, + * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc. + * For the same reason, the "surrounding text" context is passed in as a + * ContextIterator which does not make any assumptions about + * the underlying storage. + * + * This section contains helper functions that check for conditions + * in the input text surrounding the current code point + * according to SpecialCasing.txt. + * + * Each helper function gets the index + * - after the current code point if it looks at following text + * - before the current code point if it looks at preceding text + * + * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows: + * + * Final_Sigma + * C is preceded by a sequence consisting of + * a cased letter and a case-ignorable sequence, + * and C is not followed by a sequence consisting of + * an ignorable sequence and then a cased letter. + * + * More_Above + * C is followed by one or more characters of combining class 230 (ABOVE) + * in the combining character sequence. + * + * After_Soft_Dotted + * The last preceding character with combining class of zero before C + * was Soft_Dotted, + * and there is no intervening combining character class 230 (ABOVE). + * + * Before_Dot + * C is followed by combining dot above (U+0307). + * Any sequence of characters with a combining class that is neither 0 nor 230 + * may intervene between the current character and the combining dot above. + * + * The erratum from 2002-10-31 adds the condition + * + * After_I + * The last preceding base character was an uppercase I, and there is no + * intervening combining character class 230 (ABOVE). + * + * (See Jitterbug 2344 and the comments on After_I below.) + * + * Helper definitions in Unicode 3.2 UAX 21: + * + * D1. A character C is defined to be cased + * if it meets any of the following criteria: + * + * - The general category of C is Titlecase Letter (Lt) + * - In [CoreProps], C has one of the properties Uppercase, or Lowercase + * - Given D = NFD(C), then it is not the case that: + * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) + * (This third criterium does not add any characters to the list + * for Unicode 3.2. Ignored.) + * + * D2. A character C is defined to be case-ignorable + * if it meets either of the following criteria: + * + * - The general category of C is + * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or + * Letter Modifier (Lm), or Symbol Modifier (Sk) + * - C is one of the following characters + * U+0027 APOSTROPHE + * U+00AD SOFT HYPHEN (SHY) + * U+2019 RIGHT SINGLE QUOTATION MARK + * (the preferred character for apostrophe) + * + * D3. A case-ignorable sequence is a sequence of + * zero or more case-ignorable characters. + */ + + /** + * Iterator for string case mappings, which need to look at the + * context (surrounding text) of a given character for conditional mappings. + * + * The iterator only needs to go backward or forward away from the + * character in question. It does not use any indexes on this interface. + * It does not support random access or an arbitrary change of + * iteration direction. + * + * The code point being case-mapped itself is never returned by + * this iterator. + */ + public interface ContextIterator { + /** + * Reset the iterator for forward or backward iteration. + * @param dir >0: Begin iterating forward from the first code point + * after the one that is being case-mapped. + * <0: Begin iterating backward from the first code point + * before the one that is being case-mapped. + */ + public void reset(int dir); + /** + * Iterate and return the next code point, moving in the direction + * determined by the reset() call. + * @return Next code point, or <0 when the iteration is done. + */ + public int next(); + } + + /** + * For string case mappings, a single character (a code point) is mapped + * either to itself (in which case in-place mapping functions do nothing), + * or to another single code point, or to a string. + * Aside from the string contents, these are indicated with a single int + * value as follows: + * + * Mapping to self: Negative values (~self instead of -self to support U+0000) + * + * Mapping to another code point: Positive values >MAX_STRING_LENGTH + * + * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is + * returned. Note that the string result may indeed have zero length. + */ + public static final int MAX_STRING_LENGTH=0x1f; + + private static final int LOC_UNKNOWN=0; + private static final int LOC_ROOT=1; + private static final int LOC_TURKISH=2; + private static final int LOC_LITHUANIAN=3; + + /* + * Checks and caches the type of locale ID as it is relevant for case mapping. + * If the locCache is not null, then it must be initialized with locCache[0]=0 . + */ + private static final int getCaseLocale(ULocale locale, int[] locCache) { + int result; + + if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) { + return result; + } + + result=LOC_ROOT; + + String language=locale.getLanguage(); + if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) { + result=LOC_TURKISH; + } else if(language.equals("lt") || language.equals("lit")) { + result=LOC_LITHUANIAN; + } + + if(locCache!=null) { + locCache[0]=result; + } + return result; + } + + /* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */ + private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) { + int c; + + if(iter==null) { + return false; + } + + for(iter.reset(dir); (c=iter.next())>=0;) { + int type=getTypeOrIgnorable(c); + if((type&4)!=0) { + /* case-ignorable, continue with the loop */ + } else if(type!=NONE) { + return true; /* followed by cased letter */ + } else { + return false; /* uncased and not case-ignorable */ + } + } + + return false; /* not followed by cased letter */ + } + + /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ + private final boolean isPrecededBySoftDotted(ContextIterator iter) { + int c; + int dotType; + + if(iter==null) { + return false; + } + + for(iter.reset(-1); (c=iter.next())>=0;) { + dotType=getDotType(c); + if(dotType==SOFT_DOTTED) { + return true; /* preceded by TYPE_i */ + } else if(dotType!=OTHER_ACCENT) { + return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ + } + } + + return false; /* not preceded by TYPE_i */ + } + + /* + * See Jitterbug 2344: + * The condition After_I for Turkic-lowercasing of U+0307 combining dot above + * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because + * we made those releases compatible with Unicode 3.2 which had not fixed + * a related bug in SpecialCasing.txt. + * + * From the Jitterbug 2344 text: + * ... this bug is listed as a Unicode erratum + * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html + * + * There are two errors in SpecialCasing.txt. + * 1. Missing semicolons on two lines. ... [irrelevant for ICU] + * 2. An incorrect context definition. Correct as follows: + * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE + * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE + * --- + * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE + * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE + * where the context After_I is defined as: + * The last preceding base character was an uppercase I, and there is no + * intervening combining character class 230 (ABOVE). + * + * + * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as: + * + * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. + * # This matches the behavior of the canonically equivalent I-dot_above + * + * See also the description in this place in older versions of uchar.c (revision 1.100). + * + * Markus W. Scherer 2003-feb-15 + */ + + /* Is preceded by base character 'I' with no intervening cc=230 ? */ + private final boolean isPrecededBy_I(ContextIterator iter) { + int c; + int dotType; + + if(iter==null) { + return false; + } + + for(iter.reset(-1); (c=iter.next())>=0;) { + if(c==0x49) { + return true; /* preceded by I */ + } + dotType=getDotType(c); + if(dotType!=OTHER_ACCENT) { + return false; /* preceded by different base character (not I), or intervening cc==230 */ + } + } + + return false; /* not preceded by I */ + } + + /* Is followed by one or more cc==230 ? */ + private final boolean isFollowedByMoreAbove(ContextIterator iter) { + int c; + int dotType; + + if(iter==null) { + return false; + } + + for(iter.reset(1); (c=iter.next())>=0;) { + dotType=getDotType(c); + if(dotType==ABOVE) { + return true; /* at least one cc==230 following */ + } else if(dotType!=OTHER_ACCENT) { + return false; /* next base character, no more cc==230 following */ + } + } + + return false; /* no more cc==230 following */ + } + + /* Is followed by a dot above (without cc==230 in between) ? */ + private final boolean isFollowedByDotAbove(ContextIterator iter) { + int c; + int dotType; + + if(iter==null) { + return false; + } + + for(iter.reset(1); (c=iter.next())>=0; ) { + if(c==0x307) { + return true; + } + dotType=getDotType(c); + if(dotType!=OTHER_ACCENT) { + return false; /* next base character or cc==230 in between */ + } + } + + return false; /* no dot above following */ + } + + private static final String + iDot= "i\u0307", + jDot= "j\u0307", + iOgonekDot= "\u012f\u0307", + iDotGrave= "i\u0307\u0300", + iDotAcute= "i\u0307\u0301", + iDotTilde= "i\u0307\u0303"; + + /** + * Get the full lowercase mapping for c. + * + * @param c Character to be mapped. + * @param iter Character iterator, used for context-sensitive mappings. + * See ContextIterator for details. + * If iter==null then a context-independent result is returned. + * @param out If the mapping result is a string, then it is appended to out. + * @param locale Locale ID for locale-dependent mappings. + * @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing + * the locale ID for subsequent calls. + * Can be null. + * @return Output code point or string length, see MAX_STRING_LENGTH. + * + * @see ContextIterator + * @see #MAX_STRING_LENGTH + * @internal + */ + public final int toFullLower(int c, ContextIterator iter, + StringBuffer out, + ULocale locale, int[] locCache) { + int result, props; + + result=c; + props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + if(getTypeFromProps(props)>=UPPER) { + result=c+getDelta(props); + } + } else { + int excOffset=getExceptionsOffset(props), excOffset2; + int excWord=exceptions[excOffset++]; + int full; + + excOffset2=excOffset; + + if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { + /* use hardcoded conditions and mappings */ + int loc=getCaseLocale(locale, locCache); + + /* + * Test for conditional mappings first + * (otherwise the unconditional default mappings are always taken), + * then test for characters that have unconditional mappings in SpecialCasing.txt, + * then get the UnicodeData.txt mappings. + */ + if( loc==LOC_LITHUANIAN && + /* base characters, find accents above */ + (((c==0x49 || c==0x4a || c==0x12e) && + isFollowedByMoreAbove(iter)) || + /* precomposed with accent above, no need to find one */ + (c==0xcc || c==0xcd || c==0x128)) + ) { + /* + # Lithuanian + + # Lithuanian retains the dot in a lowercase i when followed by accents. + + # Introduce an explicit dot above when lowercasing capital I's and J's + # whenever there are more accents above. + # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) + + 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I + 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J + 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK + 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE + 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE + 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE + */ + switch(c) { + case 0x49: /* LATIN CAPITAL LETTER I */ + out.append(iDot); + return 2; + case 0x4a: /* LATIN CAPITAL LETTER J */ + out.append(jDot); + return 2; + case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ + out.append(iOgonekDot); + return 2; + case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ + out.append(iDotGrave); + return 3; + case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ + out.append(iDotAcute); + return 3; + case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ + out.append(iDotTilde); + return 3; + default: + return 0; /* will not occur */ + } + /* # Turkish and Azeri */ + } else if(loc==LOC_TURKISH && c==0x130) { + /* + # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri + # The following rules handle those cases. + + 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE + */ + return 0x69; + } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) { + /* + # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. + # This matches the behavior of the canonically equivalent I-dot_above + + 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE + 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE + */ + return 0; /* remove the dot (continue without output) */ + } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) { + /* + # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. + + 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I + 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I + */ + return 0x131; + } else if(c==0x130) { + /* + # Preserve canonical equivalence for I with dot. Turkic is handled below. + + 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE + */ + out.append(iDot); + return 2; + } else if( c==0x3a3 && + !isFollowedByCasedLetter(iter, 1) && + isFollowedByCasedLetter(iter, -1) /* -1=preceded */ + ) { + /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ + /* + # Special case for final form of sigma + + 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA + */ + return 0x3c2; /* greek small final sigma */ + } else { + /* no known conditional special case mapping, use a normal mapping */ + } + } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { + long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); + full=(int)value&FULL_LOWER; + if(full!=0) { + /* start of full case mapping strings */ + excOffset=(int)(value>>32)+1; + + /* set the output pointer to the lowercase mapping */ + out.append(exceptions, excOffset, full); + + /* return the string length */ + return full; + } + } + + if(hasSlot(excWord, EXC_LOWER)) { + result=getSlotValue(excWord, EXC_LOWER, excOffset2); + } + } + + return (result==c) ? ~result : result; + } + + /* internal */ + private final int toUpperOrTitle(int c, ContextIterator iter, + StringBuffer out, + ULocale locale, int[] locCache, + boolean upperNotTitle) { + int result; + int props; + + result=c; + props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + if(getTypeFromProps(props)==LOWER) { + result=c+getDelta(props); + } + } else { + int excOffset=getExceptionsOffset(props), excOffset2; + int excWord=exceptions[excOffset++]; + int full, index; + + excOffset2=excOffset; + + if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { + /* use hardcoded conditions and mappings */ + int loc=getCaseLocale(locale, locCache); + + if(loc==LOC_TURKISH && c==0x69) { + /* + # Turkish and Azeri + + # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri + # The following rules handle those cases. + + # When uppercasing, i turns into a dotted capital I + + 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I + 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I + */ + return 0x130; + } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) { + /* + # Lithuanian + + # Lithuanian retains the dot in a lowercase i when followed by accents. + + # Remove DOT ABOVE after "i" with upper or titlecase + + 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE + */ + return 0; /* remove the dot (continue without output) */ + } else { + /* no known conditional special case mapping, use a normal mapping */ + } + } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { + long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); + full=(int)value&0xffff; + + /* start of full case mapping strings */ + excOffset=(int)(value>>32)+1; + + /* skip the lowercase and case-folding result strings */ + excOffset+=full&FULL_LOWER; + full>>=4; + excOffset+=full&0xf; + full>>=4; + + if(upperNotTitle) { + full&=0xf; + } else { + /* skip the uppercase result string */ + excOffset+=full&0xf; + full=(full>>4)&0xf; + } + + if(full!=0) { + /* set the output pointer to the result string */ + out.append(exceptions, excOffset, full); + + /* return the string length */ + return full; + } + } + + if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) { + index=EXC_TITLE; + } else if(hasSlot(excWord, EXC_UPPER)) { + /* here, titlecase is same as uppercase */ + index=EXC_UPPER; + } else { + return ~c; + } + result=getSlotValue(excWord, index, excOffset2); + } + + return (result==c) ? ~result : result; + } + + public final int toFullUpper(int c, ContextIterator iter, + StringBuffer out, + ULocale locale, int[] locCache) { + return toUpperOrTitle(c, iter, out, locale, locCache, true); + } + + public final int toFullTitle(int c, ContextIterator iter, + StringBuffer out, + ULocale locale, int[] locCache) { + return toUpperOrTitle(c, iter, out, locale, locCache, false); + } + + /* case folding ------------------------------------------------------------- */ + + /* + * Case folding is similar to lowercasing. + * The result may be a simple mapping, i.e., a single code point, or + * a full mapping, i.e., a string. + * If the case folding for a code point is the same as its simple (1:1) lowercase mapping, + * then only the lowercase mapping is stored. + * + * Some special cases are hardcoded because their conditions cannot be + * parsed and processed from CaseFolding.txt. + * + * Unicode 3.2 CaseFolding.txt specifies for its status field: + + # C: common case folding, common mappings shared by both simple and full mappings. + # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. + # S: simple case folding, mappings to single characters where different from F. + # T: special case for uppercase I and dotted uppercase I + # - For non-Turkic languages, this mapping is normally not used. + # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. + # + # Usage: + # A. To do a simple case folding, use the mappings with status C + S. + # B. To do a full case folding, use the mappings with status C + F. + # + # The mappings with status T can be used or omitted depending on the desired case-folding + # behavior. (The default option is to exclude them.) + + * Unicode 3.2 has 'T' mappings as follows: + + 0049; T; 0131; # LATIN CAPITAL LETTER I + 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE + + * while the default mappings for these code points are: + + 0049; C; 0069; # LATIN CAPITAL LETTER I + 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE + + * U+0130 has no simple case folding (simple-case-folds to itself). + */ + + /** + * Bit mask for getting just the options from a string compare options word + * that are relevant for case folding (of a single string or code point). + * @internal + */ + private static final int FOLD_CASE_OPTIONS_MASK = 0xff; + + /* return the simple case folding mapping for c */ + public final int fold(int c, int options) { + int props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + if(getTypeFromProps(props)>=UPPER) { + c+=getDelta(props); + } + } else { + int excOffset=getExceptionsOffset(props); + int excWord=exceptions[excOffset++]; + int index; + if((excWord&EXC_CONDITIONAL_FOLD)!=0) { + /* special case folding mappings, hardcoded */ + if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { + /* default mappings */ + if(c==0x49) { + /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ + return 0x69; + } else if(c==0x130) { + /* no simple case folding for U+0130 */ + return c; + } + } else { + /* Turkic mappings */ + if(c==0x49) { + /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ + return 0x131; + } else if(c==0x130) { + /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ + return 0x69; + } + } + } + if(hasSlot(excWord, EXC_FOLD)) { + index=EXC_FOLD; + } else if(hasSlot(excWord, EXC_LOWER)) { + index=EXC_LOWER; + } else { + return c; + } + c=getSlotValue(excWord, index, excOffset); + } + return c; + } + + /* + * Issue for canonical caseless match (UAX #21): + * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve + * canonical equivalence, unlike default-option casefolding. + * For example, I-grave and I + grave fold to strings that are not canonically + * equivalent. + * For more details, see the comment in unorm_compare() in unorm.cpp + * and the intermediate prototype changes for Jitterbug 2021. + * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) + * + * This did not get fixed because it appears that it is not possible to fix + * it for uppercase and lowercase characters (I-grave vs. i-grave) + * together in a way that they still fold to common result strings. + */ + + public final int toFullFolding(int c, StringBuffer out, int options) { + int result; + int props; + + result=c; + props=trie.getCodePointValue(c); + if(!propsHasException(props)) { + if(getTypeFromProps(props)>=UPPER) { + result=c+getDelta(props); + } + } else { + int excOffset=getExceptionsOffset(props), excOffset2; + int excWord=exceptions[excOffset++]; + int full, index; + + excOffset2=excOffset; + + if((excWord&EXC_CONDITIONAL_FOLD)!=0) { + /* use hardcoded conditions and mappings */ + if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { + /* default mappings */ + if(c==0x49) { + /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ + return 0x69; + } else if(c==0x130) { + /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ + out.append(iDot); + return 2; + } + } else { + /* Turkic mappings */ + if(c==0x49) { + /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ + return 0x131; + } else if(c==0x130) { + /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ + return 0x69; + } + } + } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { + long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); + full=(int)value&0xffff; + + /* start of full case mapping strings */ + excOffset=(int)(value>>32)+1; + + /* skip the lowercase result string */ + excOffset+=full&FULL_LOWER; + full=(full>>4)&0xf; + + if(full!=0) { + /* set the output pointer to the result string */ + out.append(exceptions, excOffset, full); + + /* return the string length */ + return full; + } + } + + if(hasSlot(excWord, EXC_FOLD)) { + index=EXC_FOLD; + } else if(hasSlot(excWord, EXC_LOWER)) { + index=EXC_LOWER; + } else { + return ~c; + } + result=getSlotValue(excWord, index, excOffset2); + } + + return (result==c) ? ~result : result; + } + + /* case mapping properties API ---------------------------------------------- */ + + private static final int[] rootLocCache = { LOC_ROOT }; + /* + * We need a StringBuffer for multi-code point output from the + * full case mapping functions. However, we do not actually use that output, + * we just check whether the input character was mapped to anything else. + * We use a shared StringBuffer to avoid allocating a new one in each call. + * We remove its contents each time so that it does not grow large over time. + * + * @internal + */ + public static final StringBuffer dummyStringBuffer = new StringBuffer(); + + public final boolean hasBinaryProperty(int c, int which) { + switch(which) { + case UProperty.LOWERCASE: + return LOWER==getType(c); + case UProperty.UPPERCASE: + return UPPER==getType(c); + case UProperty.SOFT_DOTTED: + return isSoftDotted(c); + case UProperty.CASE_SENSITIVE: + return isCaseSensitive(c); + case UProperty.CASED: + return NONE!=getType(c); + case UProperty.CASE_IGNORABLE: + return (getTypeOrIgnorable(c)>>2)!=0; + /* + * Note: The following Changes_When_Xyz are defined as testing whether + * the NFD form of the input changes when Xyz-case-mapped. + * However, this simpler implementation of these properties, + * ignoring NFD, passes the tests. + * The implementation needs to be changed if the tests start failing. + * When that happens, optimizations should be used to work with the + * per-single-code point ucase_toFullXyz() functions unless + * the NFD form has more than one code point, + * and the property starts set needs to be the union of the + * start sets for normalization and case mappings. + */ + case UProperty.CHANGES_WHEN_LOWERCASED: + dummyStringBuffer.setLength(0); + return toFullLower(c, null, dummyStringBuffer, ULocale.ROOT, rootLocCache)>=0; + case UProperty.CHANGES_WHEN_UPPERCASED: + dummyStringBuffer.setLength(0); + return toFullUpper(c, null, dummyStringBuffer, ULocale.ROOT, rootLocCache)>=0; + case UProperty.CHANGES_WHEN_TITLECASED: + dummyStringBuffer.setLength(0); + return toFullTitle(c, null, dummyStringBuffer, ULocale.ROOT, rootLocCache)>=0; + /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */ + case UProperty.CHANGES_WHEN_CASEMAPPED: + dummyStringBuffer.setLength(0); + return + toFullLower(c, null, dummyStringBuffer, ULocale.ROOT, rootLocCache)>=0 || + toFullUpper(c, null, dummyStringBuffer, ULocale.ROOT, rootLocCache)>=0 || + toFullTitle(c, null, dummyStringBuffer, ULocale.ROOT, rootLocCache)>=0; + default: + return false; + } + } + + // data members -------------------------------------------------------- *** + private int indexes[]; + private char exceptions[]; + private char unfold[]; + + private CharTrie trie; + + // data format constants ----------------------------------------------- *** + private static final String DATA_NAME="ucase"; + private static final String DATA_TYPE="icu"; + private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE; + + /* format "cAsE" */ + private static final byte FMT[]={ 0x63, 0x41, 0x53, 0x45 }; + + /* indexes into indexes[] */ + private static final int IX_INDEX_TOP=0; + //private static final int IX_LENGTH=1; + //private static final int IX_TRIE_SIZE=2; + private static final int IX_EXC_LENGTH=3; + private static final int IX_UNFOLD_LENGTH=4; + + //private static final int IX_MAX_FULL_LENGTH=15; + private static final int IX_TOP=16; + + // definitions for 16-bit case properties word ------------------------- *** + + /* 2-bit constants for types of cased characters */ + public static final int TYPE_MASK=3; + public static final int NONE=0; + public static final int LOWER=1; + public static final int UPPER=2; + public static final int TITLE=3; + + private static final int getTypeFromProps(int props) { + return props&TYPE_MASK; + } + + private static final int SENSITIVE= 4; + private static final int EXCEPTION= 8; + + private static final int DOT_MASK= 0x30; + //private static final int NO_DOT= 0; /* normal characters with cc=0 */ + private static final int SOFT_DOTTED= 0x10; /* soft-dotted characters with cc=0 */ + private static final int ABOVE= 0x20; /* "above" accents with cc=230 */ + private static final int OTHER_ACCENT= 0x30; /* other accent character (0>DELTA_SHIFT; + } + + /* case-ignorable uses one of the delta bits, see gencase/store.c */ + private static final int CASE_IGNORABLE=0x40; + + /* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */ + private static final int EXC_SHIFT= 4; + //private static final int EXC_MASK= 0xfff0; + //private static final int MAX_EXCEPTIONS=0x1000; + + /* definitions for 16-bit main exceptions word ------------------------------ */ + + /* first 8 bits indicate values in optional slots */ + private static final int EXC_LOWER=0; + private static final int EXC_FOLD=1; + private static final int EXC_UPPER=2; + private static final int EXC_TITLE=3; + //private static final int EXC_4=4; /* reserved */ + //private static final int EXC_5=5; /* reserved */ + private static final int EXC_CLOSURE=6; + private static final int EXC_FULL_MAPPINGS=7; + //private static final int EXC_ALL_SLOTS=8; /* one past the last slot */ + + /* each slot is 2 uint16_t instead of 1 */ + private static final int EXC_DOUBLE_SLOTS= 0x100; + + /* reserved: exception bits 10..9 */ + + private static final int EXC_CASE_IGNORABLE= 0x800; + + /* EXC_DOT_MASK=DOT_MASK< text.length || start > limit) { + throw new IllegalArgumentException("start: " + start + " or limit: " + + limit + " out of range [0, " + + text.length + ")"); + } + this.text = text; + this.start = start; + this.limit = limit; + + this.pos = start; + } + + public int current() { + return pos < limit ? text[pos] : DONE; + } + + public int getLength() { + return limit - start; + } + + public int getIndex() { + return pos - start; + } + + public int next() { + return pos < limit ? text[pos++] : DONE; + } + + public int previous() { + return pos > start ? text[--pos] : DONE; + } + + public void setIndex(int index) { + if (index < 0 || index > limit - start) { + throw new IndexOutOfBoundsException("index: " + index + + " out of range [0, " + + (limit - start) + ")"); + } + pos = start + index; + } + + public int getText(char[] fillIn, int offset) { + int len = limit - start; + System.arraycopy(text, start, fillIn, offset, len); + return len; + } + + /** + * Creates a copy of this iterator, does not clone the underlying + * Replaceableobject + * @return copy of this iterator + */ + public Object clone(){ + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + return null; // never invoked + } + } +} \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java b/main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java new file mode 100644 index 00000000000..914edda842e --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java @@ -0,0 +1,141 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl; + +import java.text.CharacterIterator; + +import com.ibm.icu.text.UCharacterIterator; + +/** + * This class is a wrapper around UCharacterIterator and implements the + * CharacterIterator protocol + * @author ram + */ +public class UCharacterIteratorWrapper implements CharacterIterator{ + + public UCharacterIteratorWrapper(UCharacterIterator iter){ + this.iterator = iter; + } + + private UCharacterIterator iterator; + + + /** + * Sets the position to getBeginIndex() and returns the character at that + * position. + * @return the first character in the text, or DONE if the text is empty + * @see #getBeginIndex() + */ + public char first(){ + //UCharacterIterator always iterates from 0 to length + iterator.setToStart(); + return (char)iterator.current(); + } + + /** + * Sets the position to getEndIndex()-1 (getEndIndex() if the text is empty) + * and returns the character at that position. + * @return the last character in the text, or DONE if the text is empty + * @see #getEndIndex() + */ + public char last(){ + iterator.setToLimit(); + return (char)iterator.previous(); + } + + /** + * Gets the character at the current position (as returned by getIndex()). + * @return the character at the current position or DONE if the current + * position is off the end of the text. + * @see #getIndex() + */ + public char current(){ + return (char) iterator.current(); + } + + /** + * Increments the iterator's index by one and returns the character + * at the new index. If the resulting index is greater or equal + * to getEndIndex(), the current index is reset to getEndIndex() and + * a value of DONE is returned. + * @return the character at the new position or DONE if the new + * position is off the end of the text range. + */ + public char next(){ + //pre-increment + iterator.next(); + return (char) iterator.current(); + } + + /** + * Decrements the iterator's index by one and returns the character + * at the new index. If the current index is getBeginIndex(), the index + * remains at getBeginIndex() and a value of DONE is returned. + * @return the character at the new position or DONE if the current + * position is equal to getBeginIndex(). + */ + public char previous(){ + //pre-decrement + return (char) iterator.previous(); + } + + /** + * Sets the position to the specified position in the text and returns that + * character. + * @param position the position within the text. Valid values range from + * getBeginIndex() to getEndIndex(). An IllegalArgumentException is thrown + * if an invalid value is supplied. + * @return the character at the specified position or DONE if the specified position is equal to getEndIndex() + */ + public char setIndex(int position){ + iterator.setIndex(position); + return (char) iterator.current(); + } + + /** + * Returns the start index of the text. + * @return the index at which the text begins. + */ + public int getBeginIndex(){ + //UCharacterIterator always starts from 0 + return 0; + } + + /** + * Returns the end index of the text. This index is the index of the first + * character following the end of the text. + * @return the index after the last character in the text + */ + public int getEndIndex(){ + return iterator.getLength(); + } + + /** + * Returns the current index. + * @return the current index. + */ + public int getIndex(){ + return iterator.getIndex(); + } + + /** + * Create a copy of this iterator + * @return A copy of this + */ + public Object clone(){ + try { + UCharacterIteratorWrapper result = (UCharacterIteratorWrapper) super.clone(); + result.iterator = (UCharacterIterator)this.iterator.clone(); + return result; + } catch (CloneNotSupportedException e) { + return null; // only invoked if bad underlying character iterator + } + } + +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java b/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java new file mode 100644 index 00000000000..340cad2fec8 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java @@ -0,0 +1,1674 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.MissingResourceException; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UCharacterCategory; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** +* Internal class to manage character names. +* Since data for names are stored +* in an array of char, by default indexes used in this class is refering to +* a 2 byte count, unless otherwise stated. Cases where the index is refering +* to a byte count, the index is halved and depending on whether the index is +* even or odd, the MSB or LSB of the result char at the halved index is +* returned. For indexes to an array of int, the index is multiplied by 2, +* result char at the multiplied index and its following char is returned as an +* int. +* UCharacter acts as a public facade for this class +* Note : 0 - 0x1F are control characters without names in Unicode 3.0 +* @author Syn Wee Quek +* @since nov0700 +*/ + +public final class UCharacterName +{ + // public data members ---------------------------------------------- + + /* + * public singleton instance + */ + public static final UCharacterName INSTANCE; + + static { + try { + INSTANCE = new UCharacterName(); + } catch (IOException e) { + ///CLOVER:OFF + throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","",""); + ///CLOVER:ON + } + } + + /** + * Number of lines per group + * 1 << GROUP_SHIFT_ + */ + public static final int LINES_PER_GROUP_ = 1 << 5; + /** + * Maximum number of groups + */ + public int m_groupcount_ = 0; + + // public methods --------------------------------------------------- + + /** + * Retrieve the name of a Unicode code point. + * Depending on choice, the character name written into the + * buffer is the "modern" name or the name that was defined in Unicode + * version 1.0. + * The name contains only "invariant" characters + * like A-Z, 0-9, space, and '-'. + * + * @param ch the code point for which to get the name. + * @param choice Selector for which name to get. + * @return if code point is above 0x1fff, null is returned + */ + public String getName(int ch, int choice) + { + if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || + choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) { + return null; + } + + String result = null; + + result = getAlgName(ch, choice); + + // getting normal character name + if (result == null || result.length() == 0) { + if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { + result = getExtendedName(ch); + } else { + result = getGroupName(ch, choice); + } + } + + return result; + } + + /** + * Find a character by its name and return its code point value + * @param choice selector to indicate if argument name is a Unicode 1.0 + * or the most current version + * @param name the name to search for + * @return code point + */ + public int getCharFromName(int choice, String name) + { + // checks for illegal arguments + if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT || + name == null || name.length() == 0) { + return -1; + } + + // try extended names first + int result = getExtendedChar(name.toLowerCase(), choice); + if (result >= -1) { + return result; + } + + String upperCaseName = name.toUpperCase(); + // try algorithmic names first, if fails then try group names + // int result = getAlgorithmChar(choice, uppercasename); + + if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || + choice == UCharacterNameChoice.EXTENDED_CHAR_NAME + ) { + int count = 0; + if (m_algorithm_ != null) { + count = m_algorithm_.length; + } + for (count --; count >= 0; count --) { + result = m_algorithm_[count].getChar(upperCaseName); + if (result >= 0) { + return result; + } + } + } + + if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { + result = getGroupChar(upperCaseName, + UCharacterNameChoice.UNICODE_CHAR_NAME); + if (result == -1) { + result = getGroupChar(upperCaseName, + UCharacterNameChoice.UNICODE_10_CHAR_NAME); + } + if (result == -1) { + result = getGroupChar(upperCaseName, + UCharacterNameChoice.CHAR_NAME_ALIAS); + } + } + else { + result = getGroupChar(upperCaseName, choice); + } + return result; + } + + // these are all UCharacterNameIterator use methods ------------------- + + /** + * Reads a block of compressed lengths of 32 strings and expands them into + * offsets and lengths for each string. Lengths are stored with a + * variable-width encoding in consecutive nibbles: + * If a nibble<0xc, then it is the length itself (0 = empty string). + * If a nibble>=0xc, then it forms a length value with the following + * nibble. + * The offsets and lengths arrays must be at least 33 (one more) long + * because there is no check here at the end if the last nibble is still + * used. + * @param index of group string object in array + * @param offsets array to store the value of the string offsets + * @param lengths array to store the value of the string length + * @return next index of the data string immediately after the lengths + * in terms of byte address + */ + public int getGroupLengths(int index, char offsets[], char lengths[]) + { + char length = 0xffff; + byte b = 0, + n = 0; + int shift; + index = index * m_groupsize_; // byte count offsets of group strings + int stringoffset = UCharacterUtility.toInt( + m_groupinfo_[index + OFFSET_HIGH_OFFSET_], + m_groupinfo_[index + OFFSET_LOW_OFFSET_]); + + offsets[0] = 0; + + // all 32 lengths must be read to get the offset of the first group + // string + for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) { + b = m_groupstring_[stringoffset]; + shift = 4; + + while (shift >= 0) { + // getting nibble + n = (byte)((b >> shift) & 0x0F); + if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) { + length = (char)((n - 12) << 4); + } + else { + if (length != 0xffff) { + lengths[i] = (char)((length | n) + 12); + } + else { + lengths[i] = (char)n; + } + + if (i < LINES_PER_GROUP_) { + offsets[i + 1] = (char)(offsets[i] + lengths[i]); + } + + length = 0xffff; + i ++; + } + + shift -= 4; + } + } + return stringoffset; + } + + /** + * Gets the name of the argument group index. + * UnicodeData.txt uses ';' as a field separator, so no field can contain + * ';' as part of its contents. In unames.icu, it is marked as + * token[';'] == -1 only if the semicolon is used in the data file - which + * is iff we have Unicode 1.0 names or ISO comments or aliases. + * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases + * although we know that it will never be part of a name. + * Equivalent to ICU4C's expandName. + * @param index of the group name string in byte count + * @param length of the group name string + * @param choice of Unicode 1.0 name or the most current name + * @return name of the group + */ + public String getGroupName(int index, int length, int choice) + { + if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && + choice != UCharacterNameChoice.EXTENDED_CHAR_NAME + ) { + if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) { + /* + * skip the modern name if it is not requested _and_ + * if the semicolon byte value is a character, not a token number + */ + int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; + do { + int oldindex = index; + index += UCharacterUtility.skipByteSubString(m_groupstring_, + index, length, (byte)';'); + length -= (index - oldindex); + } while(--fieldIndex>0); + } + else { + // the semicolon byte is a token number, therefore only modern + // names are stored in unames.dat and there is no such + // requested alternate name here + length = 0; + } + } + + synchronized (m_utilStringBuffer_) { + m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); + byte b; + char token; + for (int i = 0; i < length;) { + b = m_groupstring_[index + i]; + i ++; + + if (b >= m_tokentable_.length) { + if (b == ';') { + break; + } + m_utilStringBuffer_.append(b); // implicit letter + } + else { + token = m_tokentable_[b & 0x00ff]; + if (token == 0xFFFE) { + // this is a lead byte for a double-byte token + token = m_tokentable_[b << 8 | + (m_groupstring_[index + i] & 0x00ff)]; + i ++; + } + if (token == 0xFFFF) { + if (b == ';') { + // skip the semicolon if we are seeking extended + // names and there was no 2.0 name but there + // is a 1.0 name. + if (m_utilStringBuffer_.length() == 0 && choice == + UCharacterNameChoice.EXTENDED_CHAR_NAME) { + continue; + } + break; + } + // explicit letter + m_utilStringBuffer_.append((char)(b & 0x00ff)); + } + else { // write token word + UCharacterUtility.getNullTermByteSubString( + m_utilStringBuffer_, m_tokenstring_, token); + } + } + } + + if (m_utilStringBuffer_.length() > 0) { + return m_utilStringBuffer_.toString(); + } + } + return null; + } + + /** + * Retrieves the extended name + */ + public String getExtendedName(int ch) + { + String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); + if (result == null) { + if (getType(ch) == UCharacterCategory.CONTROL) { + result = getName(ch, + UCharacterNameChoice.UNICODE_10_CHAR_NAME); + } + if (result == null) { + result = getExtendedOr10Name(ch); + } + } + return result; + } + + /** + * Gets the group index for the codepoint, or the group before it. + * @param codepoint The codepoint index. + * @return group index containing codepoint or the group before it. + */ + public int getGroup(int codepoint) + { + int endGroup = m_groupcount_; + int msb = getCodepointMSB(codepoint); + int result = 0; + // binary search for the group of names that contains the one for + // code + // find the group that contains codepoint, or the highest before it + while (result < endGroup - 1) { + int gindex = (result + endGroup) >> 1; + if (msb < getGroupMSB(gindex)) { + endGroup = gindex; + } + else { + result = gindex; + } + } + return result; + } + + /** + * Gets the extended and 1.0 name when the most current unicode names + * fail + * @param ch codepoint + * @return name of codepoint extended or 1.0 + */ + public String getExtendedOr10Name(int ch) + { + String result = null; + if (getType(ch) == UCharacterCategory.CONTROL) { + result = getName(ch, + UCharacterNameChoice.UNICODE_10_CHAR_NAME); + } + if (result == null) { + int type = getType(ch); + // Return unknown if the table of names above is not up to + // date. + if (type >= TYPE_NAMES_.length) { + result = UNKNOWN_TYPE_NAME_; + } + else { + result = TYPE_NAMES_[type]; + } + synchronized (m_utilStringBuffer_) { + m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); + m_utilStringBuffer_.append('<'); + m_utilStringBuffer_.append(result); + m_utilStringBuffer_.append('-'); + String chStr = Integer.toHexString(ch).toUpperCase(); + int zeros = 4 - chStr.length(); + while (zeros > 0) { + m_utilStringBuffer_.append('0'); + zeros --; + } + m_utilStringBuffer_.append(chStr); + m_utilStringBuffer_.append('>'); + result = m_utilStringBuffer_.toString(); + } + } + return result; + } + + /** + * Gets the MSB from the group index + * @param gindex group index + * @return the MSB of the group if gindex is valid, -1 otherwise + */ + public int getGroupMSB(int gindex) + { + if (gindex >= m_groupcount_) { + return -1; + } + return m_groupinfo_[gindex * m_groupsize_]; + } + + /** + * Gets the MSB of the codepoint + * @param codepoint The codepoint value. + * @return the MSB of the codepoint + */ + public static int getCodepointMSB(int codepoint) + { + return codepoint >> GROUP_SHIFT_; + } + + /** + * Gets the maximum codepoint + 1 of the group + * @param msb most significant byte of the group + * @return limit codepoint of the group + */ + public static int getGroupLimit(int msb) + { + return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_; + } + + /** + * Gets the minimum codepoint of the group + * @param msb most significant byte of the group + * @return minimum codepoint of the group + */ + public static int getGroupMin(int msb) + { + return msb << GROUP_SHIFT_; + } + + /** + * Gets the offset to a group + * @param codepoint The codepoint value. + * @return offset to a group + */ + public static int getGroupOffset(int codepoint) + { + return codepoint & GROUP_MASK_; + } + + /** + * Gets the minimum codepoint of a group + * @param codepoint The codepoint value. + * @return minimum codepoint in the group which codepoint belongs to + */ + ///CLOVER:OFF + public static int getGroupMinFromCodepoint(int codepoint) + { + return codepoint & ~GROUP_MASK_; + } + ///CLOVER:ON + + /** + * Get the Algorithm range length + * @return Algorithm range length + */ + public int getAlgorithmLength() + { + return m_algorithm_.length; + } + + /** + * Gets the start of the range + * @param index algorithm index + * @return algorithm range start + */ + public int getAlgorithmStart(int index) + { + return m_algorithm_[index].m_rangestart_; + } + + /** + * Gets the end of the range + * @param index algorithm index + * @return algorithm range end + */ + public int getAlgorithmEnd(int index) + { + return m_algorithm_[index].m_rangeend_; + } + + /** + * Gets the Algorithmic name of the codepoint + * @param index algorithmic range index + * @param codepoint The codepoint value. + * @return algorithmic name of codepoint + */ + public String getAlgorithmName(int index, int codepoint) + { + String result = null; + synchronized (m_utilStringBuffer_) { + m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); + m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_); + result = m_utilStringBuffer_.toString(); + } + return result; + } + + /** + * Gets the group name of the character + * @param ch character to get the group name + * @param choice name choice selector to choose a unicode 1.0 or newer name + */ + public synchronized String getGroupName(int ch, int choice) + { + // gets the msb + int msb = getCodepointMSB(ch); + int group = getGroup(ch); + + // return this if it is an exact match + if (msb == m_groupinfo_[group * m_groupsize_]) { + int index = getGroupLengths(group, m_groupoffsets_, + m_grouplengths_); + int offset = ch & GROUP_MASK_; + return getGroupName(index + m_groupoffsets_[offset], + m_grouplengths_[offset], choice); + } + + return null; + } + + // these are transliterator use methods --------------------------------- + + /** + * Gets the maximum length of any codepoint name. + * Equivalent to uprv_getMaxCharNameLength. + * @return the maximum length of any codepoint name + */ + public int getMaxCharNameLength() + { + if (initNameSetsLengths()) { + return m_maxNameLength_; + } + else { + return 0; + } + } + + /** + * Gets the maximum length of any iso comments. + * Equivalent to uprv_getMaxISOCommentLength. + * @return the maximum length of any codepoint name + */ + ///CLOVER:OFF + public int getMaxISOCommentLength() + { + if (initNameSetsLengths()) { + return m_maxISOCommentLength_; + } + else { + return 0; + } + } + ///CLOVER:ON + + /** + * Fills set with characters that are used in Unicode character names. + * Equivalent to uprv_getCharNameCharacters. + * @param set USet to receive characters. Existing contents are deleted. + */ + public void getCharNameCharacters(UnicodeSet set) + { + convert(m_nameSet_, set); + } + + /** + * Fills set with characters that are used in Unicode character names. + * Equivalent to uprv_getISOCommentCharacters. + * @param set USet to receive characters. Existing contents are deleted. + */ + ///CLOVER:OFF + public void getISOCommentCharacters(UnicodeSet set) + { + convert(m_ISOCommentSet_, set); + } + ///CLOVER:ON + + // package private inner class -------------------------------------- + + /** + * Algorithmic name class + */ + static final class AlgorithmName + { + // package private data members ---------------------------------- + + /** + * Constant type value of the different AlgorithmName + */ + static final int TYPE_0_ = 0; + static final int TYPE_1_ = 1; + + // package private constructors ---------------------------------- + + /** + * Constructor + */ + AlgorithmName() + { + } + + // package private methods --------------------------------------- + + /** + * Sets the information for accessing the algorithmic names + * @param rangestart starting code point that lies within this name group + * @param rangeend end code point that lies within this name group + * @param type algorithm type. There's 2 kinds of algorithmic type. First + * which uses code point as part of its name and the other uses + * variant postfix strings + * @param variant algorithmic variant + * @return true if values are valid + */ + boolean setInfo(int rangestart, int rangeend, byte type, byte variant) + { + if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend + && rangeend <= UCharacter.MAX_VALUE && + (type == TYPE_0_ || type == TYPE_1_)) { + m_rangestart_ = rangestart; + m_rangeend_ = rangeend; + m_type_ = type; + m_variant_ = variant; + return true; + } + return false; + } + + /** + * Sets the factor data + * @param factor Array of factor + * @return true if factors are valid + */ + boolean setFactor(char factor[]) + { + if (factor.length == m_variant_) { + m_factor_ = factor; + return true; + } + return false; + } + + /** + * Sets the name prefix + * @param prefix + * @return true if prefix is set + */ + boolean setPrefix(String prefix) + { + if (prefix != null && prefix.length() > 0) { + m_prefix_ = prefix; + return true; + } + return false; + } + + /** + * Sets the variant factorized name data + * @param string variant factorized name data + * @return true if values are set + */ + boolean setFactorString(byte string[]) + { + // factor and variant string can be empty for things like + // hanggul code points + m_factorstring_ = string; + return true; + } + + /** + * Checks if code point lies in Algorithm object at index + * @param ch code point + */ + boolean contains(int ch) + { + return m_rangestart_ <= ch && ch <= m_rangeend_; + } + + /** + * Appends algorithm name of code point into StringBuffer. + * Note this method does not check for validity of code point in Algorithm, + * result is undefined if code point does not belong in Algorithm. + * @param ch code point + * @param str StringBuffer to append to + */ + void appendName(int ch, StringBuffer str) + { + str.append(m_prefix_); + switch (m_type_) + { + case TYPE_0_: + // prefix followed by hex digits indicating variants + str.append(Utility.hex(ch,m_variant_)); + break; + case TYPE_1_: + // prefix followed by factorized-elements + int offset = ch - m_rangestart_; + int indexes[] = m_utilIntBuffer_; + int factor; + + // write elements according to the factors + // the factorized elements are determined by modulo + // arithmetic + synchronized (m_utilIntBuffer_) { + for (int i = m_variant_ - 1; i > 0; i --) + { + factor = m_factor_[i] & 0x00FF; + indexes[i] = offset % factor; + offset /= factor; + } + + // we don't need to calculate the last modulus because + // start <= code <= end guarantees here that + // code <= factors[0] + indexes[0] = offset; + + // joining up the factorized strings + str.append(getFactorString(indexes, m_variant_)); + } + break; + } + } + + /** + * Gets the character for the argument algorithmic name + * @return the algorithmic char or -1 otherwise. + */ + int getChar(String name) + { + int prefixlen = m_prefix_.length(); + if (name.length() < prefixlen || + !m_prefix_.equals(name.substring(0, prefixlen))) { + return -1; + } + + switch (m_type_) + { + case TYPE_0_ : + try + { + int result = Integer.parseInt(name.substring(prefixlen), + 16); + // does it fit into the range? + if (m_rangestart_ <= result && result <= m_rangeend_) { + return result; + } + } + catch (NumberFormatException e) + { + return -1; + } + break; + case TYPE_1_ : + // repetitative suffix name comparison done here + // offset is the character code - start + for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++) + { + int offset = ch - m_rangestart_; + int indexes[] = m_utilIntBuffer_; + int factor; + + // write elements according to the factors + // the factorized elements are determined by modulo + // arithmetic + synchronized (m_utilIntBuffer_) { + for (int i = m_variant_ - 1; i > 0; i --) + { + factor = m_factor_[i] & 0x00FF; + indexes[i] = offset % factor; + offset /= factor; + } + + // we don't need to calculate the last modulus + // because start <= code <= end guarantees here that + // code <= factors[0] + indexes[0] = offset; + + // joining up the factorized strings + if (compareFactorString(indexes, m_variant_, name, + prefixlen)) { + return ch; + } + } + } + } + + return -1; + } + + /** + * Adds all chars in the set of algorithmic names into the set. + * Equivalent to part of calcAlgNameSetsLengths. + * @param set int set to add the chars of the algorithm names into + * @param maxlength maximum length to compare to + * @return the length that is either maxlength of the length of this + * algorithm name if it is longer than maxlength + */ + int add(int set[], int maxlength) + { + // prefix length + int length = UCharacterName.add(set, m_prefix_); + switch (m_type_) { + case TYPE_0_ : { + // name = prefix + (range->variant times) hex-digits + // prefix + length += m_variant_; + /* synwee to check + * addString(set, (const char *)(range + 1)) + + range->variant;*/ + break; + } + case TYPE_1_ : { + // name = prefix factorized-elements + // get the set and maximum factor suffix length for each + // factor + for (int i = m_variant_ - 1; i > 0; i --) + { + int maxfactorlength = 0; + int count = 0; + for (int factor = m_factor_[i]; factor > 0; -- factor) { + synchronized (m_utilStringBuffer_) { + m_utilStringBuffer_.delete(0, + m_utilStringBuffer_.length()); + count + = UCharacterUtility.getNullTermByteSubString( + m_utilStringBuffer_, + m_factorstring_, count); + UCharacterName.add(set, m_utilStringBuffer_); + if (m_utilStringBuffer_.length() + > maxfactorlength) + { + maxfactorlength + = m_utilStringBuffer_.length(); + } + } + } + length += maxfactorlength; + } + } + } + if (length > maxlength) { + return length; + } + return maxlength; + } + + // private data members ------------------------------------------ + + /** + * Algorithmic data information + */ + private int m_rangestart_; + private int m_rangeend_; + private byte m_type_; + private byte m_variant_; + private char m_factor_[]; + private String m_prefix_; + private byte m_factorstring_[]; + /** + * Utility StringBuffer + */ + private StringBuffer m_utilStringBuffer_ = new StringBuffer(); + /** + * Utility int buffer + */ + private int m_utilIntBuffer_[] = new int[256]; + + // private methods ----------------------------------------------- + + /** + * Gets the indexth string in each of the argument factor block + * @param index array with each index corresponding to each factor block + * @param length length of the array index + * @return the combined string of the array of indexth factor string in + * factor block + */ + private String getFactorString(int index[], int length) + { + int size = m_factor_.length; + if (index == null || length != size) { + return null; + } + + synchronized (m_utilStringBuffer_) { + m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); + int count = 0; + int factor; + size --; + for (int i = 0; i <= size; i ++) { + factor = m_factor_[i]; + count = UCharacterUtility.skipNullTermByteSubString( + m_factorstring_, count, index[i]); + count = UCharacterUtility.getNullTermByteSubString( + m_utilStringBuffer_, m_factorstring_, + count); + if (i != size) { + count = UCharacterUtility.skipNullTermByteSubString( + m_factorstring_, count, + factor - index[i] - 1); + } + } + return m_utilStringBuffer_.toString(); + } + } + + /** + * Compares the indexth string in each of the argument factor block with + * the argument string + * @param index array with each index corresponding to each factor block + * @param length index array length + * @param str string to compare with + * @param offset of str to start comparison + * @return true if string matches + */ + private boolean compareFactorString(int index[], int length, String str, + int offset) + { + int size = m_factor_.length; + if (index == null || length != size) + return false; + + int count = 0; + int strcount = offset; + int factor; + size --; + for (int i = 0; i <= size; i ++) + { + factor = m_factor_[i]; + count = UCharacterUtility.skipNullTermByteSubString( + m_factorstring_, count, index[i]); + strcount = UCharacterUtility.compareNullTermByteSubString(str, + m_factorstring_, strcount, count); + if (strcount < 0) { + return false; + } + + if (i != size) { + count = UCharacterUtility.skipNullTermByteSubString( + m_factorstring_, count, factor - index[i]); + } + } + if (strcount != str.length()) { + return false; + } + return true; + } + } + + // package private data members -------------------------------------- + + /** + * Size of each groups + */ + int m_groupsize_ = 0; + + // package private methods -------------------------------------------- + + /** + * Sets the token data + * @param token array of tokens + * @param tokenstring array of string values of the tokens + * @return false if there is a data error + */ + boolean setToken(char token[], byte tokenstring[]) + { + if (token != null && tokenstring != null && token.length > 0 && + tokenstring.length > 0) { + m_tokentable_ = token; + m_tokenstring_ = tokenstring; + return true; + } + return false; + } + + /** + * Set the algorithm name information array + * @param alg Algorithm information array + * @return true if the group string offset has been set correctly + */ + boolean setAlgorithm(AlgorithmName alg[]) + { + if (alg != null && alg.length != 0) { + m_algorithm_ = alg; + return true; + } + return false; + } + + /** + * Sets the number of group and size of each group in number of char + * @param count number of groups + * @param size size of group in char + * @return true if group size is set correctly + */ + boolean setGroupCountSize(int count, int size) + { + if (count <= 0 || size <= 0) { + return false; + } + m_groupcount_ = count; + m_groupsize_ = size; + return true; + } + + /** + * Sets the group name data + * @param group index information array + * @param groupstring name information array + * @return false if there is a data error + */ + boolean setGroup(char group[], byte groupstring[]) + { + if (group != null && groupstring != null && group.length > 0 && + groupstring.length > 0) { + m_groupinfo_ = group; + m_groupstring_ = groupstring; + return true; + } + return false; + } + + // private data members ---------------------------------------------- + + /** + * Data used in unames.icu + */ + private char m_tokentable_[]; + private byte m_tokenstring_[]; + private char m_groupinfo_[]; + private byte m_groupstring_[]; + private AlgorithmName m_algorithm_[]; + + /** + * Group use. Note - access must be synchronized. + */ + private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1]; + private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1]; + + /** + * Default name of the name datafile + */ + private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu"; + /** + * Shift count to retrieve group information + */ + private static final int GROUP_SHIFT_ = 5; + /** + * Mask to retrieve the offset for a particular character within a group + */ + private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1; + /** + * Default buffer size of datafile + */ + private static final int NAME_BUFFER_SIZE_ = 100000; + + /** + * Position of offsethigh in group information array + */ + private static final int OFFSET_HIGH_OFFSET_ = 1; + + /** + * Position of offsetlow in group information array + */ + private static final int OFFSET_LOW_OFFSET_ = 2; + /** + * Double nibble indicator, any nibble > this number has to be combined + * with its following nibble + */ + private static final int SINGLE_NIBBLE_MAX_ = 11; + + /* + * Maximum length of character names (regular & 1.0). + */ + //private static int MAX_NAME_LENGTH_ = 0; + /* + * Maximum length of ISO comments. + */ + //private static int MAX_ISO_COMMENT_LENGTH_ = 0; + + /** + * Set of chars used in character names (regular & 1.0). + * Chars are platform-dependent (can be EBCDIC). + */ + private int m_nameSet_[] = new int[8]; + /** + * Set of chars used in ISO comments. (regular & 1.0). + * Chars are platform-dependent (can be EBCDIC). + */ + private int m_ISOCommentSet_[] = new int[8]; + /** + * Utility StringBuffer + */ + private StringBuffer m_utilStringBuffer_ = new StringBuffer(); + /** + * Utility int buffer + */ + private int m_utilIntBuffer_[] = new int[2]; + /** + * Maximum ISO comment length + */ + private int m_maxISOCommentLength_; + /** + * Maximum name length + */ + private int m_maxNameLength_; + /** + * Type names used for extended names + */ + private static final String TYPE_NAMES_[] = {"unassigned", + "uppercase letter", + "lowercase letter", + "titlecase letter", + "modifier letter", + "other letter", + "non spacing mark", + "enclosing mark", + "combining spacing mark", + "decimal digit number", + "letter number", + "other number", + "space separator", + "line separator", + "paragraph separator", + "control", + "format", + "private use area", + "surrogate", + "dash punctuation", + "start punctuation", + "end punctuation", + "connector punctuation", + "other punctuation", + "math symbol", + "currency symbol", + "modifier symbol", + "other symbol", + "initial punctuation", + "final punctuation", + "noncharacter", + "lead surrogate", + "trail surrogate"}; + /** + * Unknown type name + */ + private static final String UNKNOWN_TYPE_NAME_ = "unknown"; + /** + * Not a character type + */ + private static final int NON_CHARACTER_ + = UCharacterCategory.CHAR_CATEGORY_COUNT; + /** + * Lead surrogate type + */ + private static final int LEAD_SURROGATE_ + = UCharacterCategory.CHAR_CATEGORY_COUNT + 1; + /** + * Trail surrogate type + */ + private static final int TRAIL_SURROGATE_ + = UCharacterCategory.CHAR_CATEGORY_COUNT + 2; + /** + * Extended category count + */ + static final int EXTENDED_CATEGORY_ + = UCharacterCategory.CHAR_CATEGORY_COUNT + 3; + + // private constructor ------------------------------------------------ + + /** + *

    Protected constructor for use in UCharacter.

    + * @exception IOException thrown when data reading fails + */ + private UCharacterName() throws IOException + { + InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_); + BufferedInputStream b = new BufferedInputStream(is, NAME_BUFFER_SIZE_); + UCharacterNameReader reader = new UCharacterNameReader(b); + reader.read(this); + b.close(); + } + + // private methods --------------------------------------------------- + + /** + * Gets the algorithmic name for the argument character + * @param ch character to determine name for + * @param choice name choice + * @return the algorithmic name or null if not found + */ + private String getAlgName(int ch, int choice) + { + /* Only the normative character name can be algorithmic. */ + if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || + choice == UCharacterNameChoice.EXTENDED_CHAR_NAME + ) { + // index in terms integer index + synchronized (m_utilStringBuffer_) { + m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); + + for (int index = m_algorithm_.length - 1; index >= 0; index --) + { + if (m_algorithm_[index].contains(ch)) { + m_algorithm_[index].appendName(ch, m_utilStringBuffer_); + return m_utilStringBuffer_.toString(); + } + } + } + } + return null; + } + + /** + * Getting the character with the tokenized argument name + * @param name of the character + * @return character with the tokenized argument name or -1 if character + * is not found + */ + private synchronized int getGroupChar(String name, int choice) + { + for (int i = 0; i < m_groupcount_; i ++) { + // populating the data set of grouptable + + int startgpstrindex = getGroupLengths(i, m_groupoffsets_, + m_grouplengths_); + + // shift out to function + int result = getGroupChar(startgpstrindex, m_grouplengths_, name, + choice); + if (result != -1) { + return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) + | result; + } + } + return -1; + } + + /** + * Compares and retrieve character if name is found within the argument + * group + * @param index index where the set of names reside in the group block + * @param length list of lengths of the strings + * @param name character name to search for + * @param choice of either 1.0 or the most current unicode name + * @return relative character in the group which matches name, otherwise if + * not found, -1 will be returned + */ + private int getGroupChar(int index, char length[], String name, + int choice) + { + byte b = 0; + char token; + int len; + int namelen = name.length(); + int nindex; + int count; + + for (int result = 0; result <= LINES_PER_GROUP_; result ++) { + nindex = 0; + len = length[result]; + + if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME && + choice != UCharacterNameChoice.EXTENDED_CHAR_NAME + ) { + /* + * skip the modern name if it is not requested _and_ + * if the semicolon byte value is a character, not a token number + */ + int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice; + do { + int oldindex = index; + index += UCharacterUtility.skipByteSubString(m_groupstring_, + index, len, (byte)';'); + len -= (index - oldindex); + } while(--fieldIndex>0); + } + + // number of tokens is > the length of the name + // write each letter directly, and write a token word per token + for (count = 0; count < len && nindex != -1 && nindex < namelen; + ) { + b = m_groupstring_[index + count]; + count ++; + + if (b >= m_tokentable_.length) { + if (name.charAt(nindex ++) != (b & 0xFF)) { + nindex = -1; + } + } + else { + token = m_tokentable_[b & 0xFF]; + if (token == 0xFFFE) { + // this is a lead byte for a double-byte token + token = m_tokentable_[b << 8 | + (m_groupstring_[index + count] & 0x00ff)]; + count ++; + } + if (token == 0xFFFF) { + if (name.charAt(nindex ++) != (b & 0xFF)) { + nindex = -1; + } + } + else { + // compare token with name + nindex = UCharacterUtility.compareNullTermByteSubString( + name, m_tokenstring_, nindex, token); + } + } + } + + if (namelen == nindex && + (count == len || m_groupstring_[index + count] == ';')) { + return result; + } + + index += len; + } + return -1; + } + + /** + * Gets the character extended type + * @param ch character to be tested + * @return extended type it is associated with + */ + private static int getType(int ch) + { + if (UCharacterUtility.isNonCharacter(ch)) { + // not a character we return a invalid category count + return NON_CHARACTER_; + } + int result = UCharacter.getType(ch); + if (result == UCharacterCategory.SURROGATE) { + if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { + result = LEAD_SURROGATE_; + } + else { + result = TRAIL_SURROGATE_; + } + } + return result; + } + + /** + * Getting the character with extended name of the form <....>. + * @param name of the character to be found + * @param choice name choice + * @return character associated with the name, -1 if such character is not + * found and -2 if we should continue with the search. + */ + private static int getExtendedChar(String name, int choice) + { + if (name.charAt(0) == '<') { + if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { + int endIndex = name.length() - 1; + if (name.charAt(endIndex) == '>') { + int startIndex = name.lastIndexOf('-'); + if (startIndex >= 0) { // We've got a category. + startIndex ++; + int result = -1; + try { + result = Integer.parseInt( + name.substring(startIndex, endIndex), + 16); + } + catch (NumberFormatException e) { + return -1; + } + // Now validate the category name. We could use a + // binary search, or a trie, if we really wanted to. + String type = name.substring(1, startIndex - 1); + int length = TYPE_NAMES_.length; + for (int i = 0; i < length; ++ i) { + if (type.compareTo(TYPE_NAMES_[i]) == 0) { + if (getType(result) == i) { + return result; + } + break; + } + } + } + } + } + return -1; + } + return -2; + } + + // sets of name characters, maximum name lengths ----------------------- + + /** + * Adds a codepoint into a set of ints. + * Equivalent to SET_ADD. + * @param set set to add to + * @param ch 16 bit char to add + */ + private static void add(int set[], char ch) + { + set[ch >>> 5] |= 1 << (ch & 0x1f); + } + + /** + * Checks if a codepoint is a part of a set of ints. + * Equivalent to SET_CONTAINS. + * @param set set to check in + * @param ch 16 bit char to check + * @return true if codepoint is part of the set, false otherwise + */ + private static boolean contains(int set[], char ch) + { + return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0; + } + + /** + * Adds all characters of the argument str and gets the length + * Equivalent to calcStringSetLength. + * @param set set to add all chars of str to + * @param str string to add + */ + private static int add(int set[], String str) + { + int result = str.length(); + + for (int i = result - 1; i >= 0; i --) { + add(set, str.charAt(i)); + } + return result; + } + + /** + * Adds all characters of the argument str and gets the length + * Equivalent to calcStringSetLength. + * @param set set to add all chars of str to + * @param str string to add + */ + private static int add(int set[], StringBuffer str) + { + int result = str.length(); + + for (int i = result - 1; i >= 0; i --) { + add(set, str.charAt(i)); + } + return result; + } + + /** + * Adds all algorithmic names into the name set. + * Equivalent to part of calcAlgNameSetsLengths. + * @param maxlength length to compare to + * @return the maximum length of any possible algorithmic name if it is > + * maxlength, otherwise maxlength is returned. + */ + private int addAlgorithmName(int maxlength) + { + int result = 0; + for (int i = m_algorithm_.length - 1; i >= 0; i --) { + result = m_algorithm_[i].add(m_nameSet_, maxlength); + if (result > maxlength) { + maxlength = result; + } + } + return maxlength; + } + + /** + * Adds all extended names into the name set. + * Equivalent to part of calcExtNameSetsLengths. + * @param maxlength length to compare to + * @return the maxlength of any possible extended name. + */ + private int addExtendedName(int maxlength) + { + for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) { + // for each category, count the length of the category name + // plus 9 = + // 2 for <> + // 1 for - + // 6 for most hex digits per code point + int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]); + if (length > maxlength) { + maxlength = length; + } + } + return maxlength; + } + + /** + * Adds names of a group to the argument set. + * Equivalent to calcNameSetLength. + * @param offset of the group name string in byte count + * @param length of the group name string + * @param tokenlength array to store the length of each token + * @param set to add to + * @return the length of the name string and the length of the group + * string parsed + */ + private int[] addGroupName(int offset, int length, byte tokenlength[], + int set[]) + { + int resultnlength = 0; + int resultplength = 0; + while (resultplength < length) { + char b = (char)(m_groupstring_[offset + resultplength] & 0xff); + resultplength ++; + if (b == ';') { + break; + } + + if (b >= m_tokentable_.length) { + add(set, b); // implicit letter + resultnlength ++; + } + else { + char token = m_tokentable_[b & 0x00ff]; + if (token == 0xFFFE) { + // this is a lead byte for a double-byte token + b = (char)(b << 8 | (m_groupstring_[offset + resultplength] + & 0x00ff)); + token = m_tokentable_[b]; + resultplength ++; + } + if (token == 0xFFFF) { + add(set, b); + resultnlength ++; + } + else { + // count token word + // use cached token length + byte tlength = tokenlength[b]; + if (tlength == 0) { + synchronized (m_utilStringBuffer_) { + m_utilStringBuffer_.delete(0, + m_utilStringBuffer_.length()); + UCharacterUtility.getNullTermByteSubString( + m_utilStringBuffer_, m_tokenstring_, + token); + tlength = (byte)add(set, m_utilStringBuffer_); + } + tokenlength[b] = tlength; + } + resultnlength += tlength; + } + } + } + m_utilIntBuffer_[0] = resultnlength; + m_utilIntBuffer_[1] = resultplength; + return m_utilIntBuffer_; + } + + /** + * Adds names of all group to the argument set. + * Sets the data member m_max*Length_. + * Method called only once. + * Equivalent to calcGroupNameSetsLength. + * @param maxlength length to compare to + */ + private void addGroupName(int maxlength) + { + int maxisolength = 0; + char offsets[] = new char[LINES_PER_GROUP_ + 2]; + char lengths[] = new char[LINES_PER_GROUP_ + 2]; + byte tokenlengths[] = new byte[m_tokentable_.length]; + + // enumerate all groups + // for (int i = m_groupcount_ - 1; i >= 0; i --) { + for (int i = 0; i < m_groupcount_ ; i ++) { + int offset = getGroupLengths(i, offsets, lengths); + // enumerate all lines in each group + // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0; + // linenumber --) { + for (int linenumber = 0; linenumber < LINES_PER_GROUP_; + linenumber ++) { + int lineoffset = offset + offsets[linenumber]; + int length = lengths[linenumber]; + if (length == 0) { + continue; + } + + // read regular name + int parsed[] = addGroupName(lineoffset, length, tokenlengths, + m_nameSet_); + if (parsed[0] > maxlength) { + // 0 for name length + maxlength = parsed[0]; + } + lineoffset += parsed[1]; + if (parsed[1] >= length) { + // 1 for parsed group string length + continue; + } + length -= parsed[1]; + // read Unicode 1.0 name + parsed = addGroupName(lineoffset, length, tokenlengths, + m_nameSet_); + if (parsed[0] > maxlength) { + // 0 for name length + maxlength = parsed[0]; + } + lineoffset += parsed[1]; + if (parsed[1] >= length) { + // 1 for parsed group string length + continue; + } + length -= parsed[1]; + // read ISO comment + parsed = addGroupName(lineoffset, length, tokenlengths, + m_ISOCommentSet_); + if (parsed[1] > maxisolength) { + maxisolength = length; + } + } + } + + // set gMax... - name length last for threading + m_maxISOCommentLength_ = maxisolength; + m_maxNameLength_ = maxlength; + } + + /** + * Sets up the name sets and the calculation of the maximum lengths. + * Equivalent to calcNameSetsLengths. + */ + private boolean initNameSetsLengths() + { + if (m_maxNameLength_ > 0) { + return true; + } + + String extra = "0123456789ABCDEF<>-"; + // set hex digits, used in various names, and <>-, used in extended + // names + for (int i = extra.length() - 1; i >= 0; i --) { + add(m_nameSet_, extra.charAt(i)); + } + + // set sets and lengths from algorithmic names + m_maxNameLength_ = addAlgorithmName(0); + // set sets and lengths from extended names + m_maxNameLength_ = addExtendedName(m_maxNameLength_); + // set sets and lengths from group names, set global maximum values + addGroupName(m_maxNameLength_); + return true; + } + + /** + * Converts the char set cset into a Unicode set uset. + * Equivalent to charSetToUSet. + * @param set Set of 256 bit flags corresponding to a set of chars. + * @param uset USet to receive characters. Existing contents are deleted. + */ + private void convert(int set[], UnicodeSet uset) + { + uset.clear(); + if (!initNameSetsLengths()) { + return; + } + + // build a char string with all chars that are used in character names + for (char c = 255; c > 0; c --) { + if (contains(set, c)) { + uset.add(c); + } + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UCharacterNameChoice.java b/main/classes/core/src/com/ibm/icu/impl/UCharacterNameChoice.java new file mode 100644 index 00000000000..a15b6db80b4 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCharacterNameChoice.java @@ -0,0 +1,31 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.impl; + +/** +* Internal class containing selector constants for the unicode character names. +* Constants representing the "modern" name of a Unicode character or the name +* that was defined in Unicode version 1.0, before the Unicode standard +* merged with ISO-10646. +* Arguments for UCharacterName +* @author Syn Wee Quek +* @since oct0600 +*/ + +public interface UCharacterNameChoice +{ + // public variables ============================================= + + static final int UNICODE_CHAR_NAME = 0; + static final int UNICODE_10_CHAR_NAME = 1; + static final int EXTENDED_CHAR_NAME = 2; + /* Corrected name from NameAliases.txt. */ + static final int CHAR_NAME_ALIAS = 3; + static final int CHAR_NAME_CHOICE_COUNT = 4; + static final int ISO_COMMENT_ = CHAR_NAME_CHOICE_COUNT; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java b/main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java new file mode 100644 index 00000000000..6e706e85406 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java @@ -0,0 +1,210 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +/** +*

    Internal reader class for ICU data file uname.dat containing +* Unicode codepoint name data.

    +*

    This class simply reads unames.icu, authenticates that it is a valid +* ICU data file and split its contents up into blocks of data for use in +* com.ibm.icu.impl.UCharacterName. +*

    +*

    unames.icu which is in big-endian format is jared together with this +* package.

    +* @author Syn Wee Quek +* @since release 2.1, February 1st 2002 +*/ + +final class UCharacterNameReader implements ICUBinary.Authenticate +{ + // public methods ---------------------------------------------------- + + public boolean isDataVersionAcceptable(byte version[]) + { + return version[0] == DATA_FORMAT_VERSION_[0]; + } + + // protected constructor --------------------------------------------- + + /** + *

    Protected constructor.

    + * @param inputStream ICU uprop.dat file input stream + * @exception IOException throw if data file fails authentication + */ + protected UCharacterNameReader(InputStream inputStream) + throws IOException + { + ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, this); + m_dataInputStream_ = new DataInputStream(inputStream); + } + + // protected methods ------------------------------------------------- + + /** + * Read and break up the stream of data passed in as arguments + * and fills up UCharacterName. + * If unsuccessful false will be returned. + * @param data instance of datablock + * @exception IOException thrown when there's a data error. + */ + protected void read(UCharacterName data) throws IOException + { + // reading index + m_tokenstringindex_ = m_dataInputStream_.readInt(); + m_groupindex_ = m_dataInputStream_.readInt(); + m_groupstringindex_ = m_dataInputStream_.readInt(); + m_algnamesindex_ = m_dataInputStream_.readInt(); + + // reading tokens + int count = m_dataInputStream_.readChar(); + char token[] = new char[count]; + for (char i = 0; i < count; i ++) { + token[i] = m_dataInputStream_.readChar(); + } + int size = m_groupindex_ - m_tokenstringindex_; + byte tokenstr[] = new byte[size]; + m_dataInputStream_.readFully(tokenstr); + data.setToken(token, tokenstr); + + // reading the group information records + count = m_dataInputStream_.readChar(); + data.setGroupCountSize(count, GROUP_INFO_SIZE_); + count *= GROUP_INFO_SIZE_; + char group[] = new char[count]; + for (int i = 0; i < count; i ++) { + group[i] = m_dataInputStream_.readChar(); + } + + size = m_algnamesindex_ - m_groupstringindex_; + byte groupstring[] = new byte[size]; + m_dataInputStream_.readFully(groupstring); + + data.setGroup(group, groupstring); + + count = m_dataInputStream_.readInt(); + UCharacterName.AlgorithmName alg[] = + new UCharacterName.AlgorithmName[count]; + + for (int i = 0; i < count; i ++) + { + UCharacterName.AlgorithmName an = readAlg(); + if (an == null) { + throw new IOException("unames.icu read error: Algorithmic names creation error"); + } + alg[i] = an; + } + data.setAlgorithm(alg); + } + + /** + *

    Checking the file for the correct format.

    + * @param dataformatid + * @param dataformatversion + * @return true if the file format version is correct + */ + ///CLOVER:OFF + protected boolean authenticate(byte dataformatid[], + byte dataformatversion[]) + { + return Arrays.equals(DATA_FORMAT_ID_, dataformatid) && + Arrays.equals(DATA_FORMAT_VERSION_, dataformatversion); + } + ///CLOVER:ON + + // private variables ------------------------------------------------- + + /** + * Data input stream for names + */ + private DataInputStream m_dataInputStream_; + /** + * Size of the group information block in number of char + */ + private static final int GROUP_INFO_SIZE_ = 3; + + /** + * Index of the offset information + */ + private int m_tokenstringindex_; + private int m_groupindex_; + private int m_groupstringindex_; + private int m_algnamesindex_; + + /** + * Size of an algorithmic name information group + * start code point size + end code point size + type size + variant size + + * size of data size + */ + private static final int ALG_INFO_SIZE_ = 12; + + /** + * File format version and id that this class understands. + * No guarantees are made if a older version is used + */ + private static final byte DATA_FORMAT_VERSION_[] = + {(byte)0x1, (byte)0x0, (byte)0x0, (byte)0x0}; + private static final byte DATA_FORMAT_ID_[] = {(byte)0x75, (byte)0x6E, + (byte)0x61, (byte)0x6D}; + + // private methods --------------------------------------------------- + + /** + * Reads an individual record of AlgorithmNames + * @return an instance of AlgorithNames if read is successful otherwise null + * @exception IOException thrown when file read error occurs or data is corrupted + */ + private UCharacterName.AlgorithmName readAlg() throws IOException + { + UCharacterName.AlgorithmName result = + new UCharacterName.AlgorithmName(); + int rangestart = m_dataInputStream_.readInt(); + int rangeend = m_dataInputStream_.readInt(); + byte type = m_dataInputStream_.readByte(); + byte variant = m_dataInputStream_.readByte(); + if (!result.setInfo(rangestart, rangeend, type, variant)) { + return null; + } + + int size = m_dataInputStream_.readChar(); + if (type == UCharacterName.AlgorithmName.TYPE_1_) + { + char factor[] = new char[variant]; + for (int j = 0; j < variant; j ++) { + factor[j] = m_dataInputStream_.readChar(); + } + + result.setFactor(factor); + size -= (variant << 1); + } + + StringBuilder prefix = new StringBuilder(); + char c = (char)(m_dataInputStream_.readByte() & 0x00FF); + while (c != 0) + { + prefix.append(c); + c = (char)(m_dataInputStream_.readByte() & 0x00FF); + } + + result.setPrefix(prefix.toString()); + + size -= (ALG_INFO_SIZE_ + prefix.length() + 1); + + if (size > 0) + { + byte string[] = new byte[size]; + m_dataInputStream_.readFully(string); + result.setFactorString(string); + } + return result; + } +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java b/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java new file mode 100644 index 00000000000..f9ed9cf18af --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java @@ -0,0 +1,1094 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.impl; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.MissingResourceException; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UCharacterCategory; +import com.ibm.icu.lang.UProperty; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.RangeValueIterator; +import com.ibm.icu.util.VersionInfo; + +/** +*

    Internal class used for Unicode character property database.

    +*

    This classes store binary data read from uprops.icu. +* It does not have the capability to parse the data into more high-level +* information. It only returns bytes of information when required.

    +*

    Due to the form most commonly used for retrieval, array of char is used +* to store the binary data.

    +*

    UCharacterPropertyDB also contains information on accessing indexes to +* significant points in the binary data.

    +*

    Responsibility for molding the binary data into more meaning form lies on +* UCharacter.

    +* @author Syn Wee Quek +* @since release 2.1, february 1st 2002 +*/ + +public final class UCharacterProperty +{ + // public data members ----------------------------------------------- + + /* + * public singleton instance + */ + public static final UCharacterProperty INSTANCE; + + static { + try { + INSTANCE = new UCharacterProperty(); + } + catch (IOException e) { + throw new MissingResourceException(e.getMessage(),"",""); + } + } + + /** + * Trie data + */ + public CharTrie m_trie_; + /** + * Optimization + * CharTrie index array + */ + public char[] m_trieIndex_; + /** + * Optimization + * CharTrie data array + */ + public char[] m_trieData_; + /** + * Optimization + * CharTrie data offset + */ + public int m_trieInitialValue_; + /** + * Unicode version + */ + public VersionInfo m_unicodeVersion_; + /** + * Latin capital letter i with dot above + */ + public static final char LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_ = 0x130; + /** + * Latin small letter i with dot above + */ + public static final char LATIN_SMALL_LETTER_DOTLESS_I_ = 0x131; + /** + * Latin lowercase i + */ + public static final char LATIN_SMALL_LETTER_I_ = 0x69; + /** + * Character type mask + */ + public static final int TYPE_MASK = 0x1F; + + // uprops.h enum UPropertySource --------------------------------------- *** + + /** No source, not a supported property. */ + public static final int SRC_NONE=0; + /** From uchar.c/uprops.icu main trie */ + public static final int SRC_CHAR=1; + /** From uchar.c/uprops.icu properties vectors trie */ + public static final int SRC_PROPSVEC=2; + /** From unames.c/unames.icu */ + public static final int SRC_NAMES=3; + /** From ucase.c/ucase.icu */ + public static final int SRC_CASE=4; + /** From ubidi_props.c/ubidi.icu */ + public static final int SRC_BIDI=5; + /** From uchar.c/uprops.icu main trie as well as properties vectors trie */ + public static final int SRC_CHAR_AND_PROPSVEC=6; + /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */ + public static final int SRC_CASE_AND_NORM=7; + /** From normalizer2impl.cpp/nfc.nrm */ + public static final int SRC_NFC=8; + /** From normalizer2impl.cpp/nfkc.nrm */ + public static final int SRC_NFKC=9; + /** From normalizer2impl.cpp/nfkc_cf.nrm */ + public static final int SRC_NFKC_CF=10; + /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */ + public static final int SRC_NFC_CANON_ITER=11; + /** One more than the highest UPropertySource (SRC_) constant. */ + public static final int SRC_COUNT=12; + + // public methods ---------------------------------------------------- + + /** + * Java friends implementation + */ + public void setIndexData(CharTrie.FriendAgent friendagent) + { + m_trieIndex_ = friendagent.getPrivateIndex(); + m_trieData_ = friendagent.getPrivateData(); + m_trieInitialValue_ = friendagent.getPrivateInitialValue(); + } + + /** + * Gets the property value at the index. + * This is optimized. + * Note this is a little different from CharTrie the index m_trieData_ + * is never negative. + * @param ch code point whose property value is to be retrieved + * @return property value of code point + */ + public final int getProperty(int ch) + { + if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE + || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE + && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) { + // BMP codepoint 0000..D7FF or DC00..FFFF + // optimized + try { // using try for ch < 0 is faster than using an if statement + return m_trieData_[ + (m_trieIndex_[ch >> Trie.INDEX_STAGE_1_SHIFT_] + << Trie.INDEX_STAGE_2_SHIFT_) + + (ch & Trie.INDEX_STAGE_3_MASK_)]; + } catch (ArrayIndexOutOfBoundsException e) { + return m_trieInitialValue_; + } + } + if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { + // lead surrogate D800..DBFF + return m_trieData_[ + (m_trieIndex_[Trie.LEAD_INDEX_OFFSET_ + + (ch >> Trie.INDEX_STAGE_1_SHIFT_)] + << Trie.INDEX_STAGE_2_SHIFT_) + + (ch & Trie.INDEX_STAGE_3_MASK_)]; + } + if (ch <= UTF16.CODEPOINT_MAX_VALUE) { + // supplementary code point 10000..10FFFF + // look at the construction of supplementary characters + // trail forms the ends of it. + return m_trie_.getSurrogateValue( + UTF16.getLeadSurrogate(ch), + (char)(ch & Trie.SURROGATE_MASK_)); + } + // ch is out of bounds + // return m_dataOffset_ if there is an error, in this case we return + // the default value: m_initialValue_ + // we cannot assume that m_initialValue_ is at offset 0 + // this is for optimization. + return m_trieInitialValue_; + + // this all is an inlined form of return m_trie_.getCodePointValue(ch); + } + + /** + * Gets the unicode additional properties. + * C version getUnicodeProperties. + * @param codepoint codepoint whose additional properties is to be + * retrieved + * @param column The column index. + * @return unicode properties + */ + public int getAdditional(int codepoint, int column) { + if (column == -1) { + return getProperty(codepoint); + } + if (column < 0 || column >= m_additionalColumnsCount_) { + return 0; + } + return m_additionalVectors_[ + m_additionalTrie_.getCodePointValue(codepoint) + column]; + } + + static final int MY_MASK = UCharacterProperty.TYPE_MASK + & ((1<Get the "age" of the code point.

    + *

    The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) or assigned a + * character.

    + *

    This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters.

    + *

    The data is from the UCD file DerivedAge.txt.

    + *

    This API does not check the validity of the codepoint.

    + * @param codepoint The code point. + * @return the Unicode version number + */ + public VersionInfo getAge(int codepoint) + { + int version = getAdditional(codepoint, 0) >> AGE_SHIFT_; + return VersionInfo.getInstance( + (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_, + version & LAST_NIBBLE_MASK_, 0, 0); + } + + private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED); + private static final int GC_CC_MASK = getMask(UCharacter.CONTROL); + private static final int GC_CS_MASK = getMask(UCharacter.SURROGATE); + private static final int GC_ZS_MASK = getMask(UCharacter.SPACE_SEPARATOR); + private static final int GC_ZL_MASK = getMask(UCharacter.LINE_SEPARATOR); + private static final int GC_ZP_MASK = getMask(UCharacter.PARAGRAPH_SEPARATOR); + /** Mask constant for multiple UCharCategory bits (Z Separators). */ + private static final int GC_Z_MASK = GC_ZS_MASK|GC_ZL_MASK|GC_ZP_MASK; + + /** + * Checks if c is in + * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] + * with space=\p{Whitespace} and Control=Cc. + * Implements UCHAR_POSIX_GRAPH. + * @internal + */ + private static final boolean isgraphPOSIX(int c) { + /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ + /* comparing ==0 returns FALSE for the categories mentioned */ + return (getMask(UCharacter.getType(c))& + (GC_CC_MASK|GC_CS_MASK|GC_CN_MASK|GC_Z_MASK)) + ==0; + } + + private static final class BinaryProperties{ + int column; + int mask; + public BinaryProperties(int column, int mask) { + this.column = column; + this.mask = mask; + } + } + BinaryProperties[] binProps={ + /* + * column and mask values for binary properties from u_getUnicodeProperties(). + * Must be in order of corresponding UProperty, + * and there must be exactly one entry per binary UProperty. + */ + new BinaryProperties( 1, ( 1 << ALPHABETIC_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << ASCII_HEX_DIGIT_PROPERTY_) ), + new BinaryProperties( SRC_BIDI, 0 ), /* UCHAR_BIDI_CONTROL */ + new BinaryProperties( SRC_BIDI, 0 ), /* UCHAR_BIDI_MIRRORED */ + new BinaryProperties( 1, ( 1 << DASH_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << DEPRECATED_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << DIACRITIC_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << EXTENDER_PROPERTY_) ), + new BinaryProperties( SRC_NFC, 0 ), /* UCHAR_FULL_COMPOSITION_EXCLUSION */ + new BinaryProperties( 1, ( 1 << GRAPHEME_BASE_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << GRAPHEME_EXTEND_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << GRAPHEME_LINK_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << HEX_DIGIT_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << HYPHEN_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << ID_CONTINUE_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << ID_START_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << IDEOGRAPHIC_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << IDS_BINARY_OPERATOR_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << IDS_TRINARY_OPERATOR_PROPERTY_) ), + new BinaryProperties( SRC_BIDI, 0 ), /* UCHAR_JOIN_CONTROL */ + new BinaryProperties( 1, ( 1 << LOGICAL_ORDER_EXCEPTION_PROPERTY_) ), + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_LOWERCASE */ + new BinaryProperties( 1, ( 1 << MATH_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << NONCHARACTER_CODE_POINT_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << QUOTATION_MARK_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << RADICAL_PROPERTY_) ), + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_SOFT_DOTTED */ + new BinaryProperties( 1, ( 1 << TERMINAL_PUNCTUATION_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << UNIFIED_IDEOGRAPH_PROPERTY_) ), + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_UPPERCASE */ + new BinaryProperties( 1, ( 1 << WHITE_SPACE_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << XID_CONTINUE_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << XID_START_PROPERTY_) ), + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CASE_SENSITIVE */ + new BinaryProperties( 1, ( 1 << S_TERM_PROPERTY_) ), + new BinaryProperties( 1, ( 1 << VARIATION_SELECTOR_PROPERTY_) ), + new BinaryProperties( SRC_NFC, 0 ), /* UCHAR_NFD_INERT */ + new BinaryProperties( SRC_NFKC, 0 ), /* UCHAR_NFKD_INERT */ + new BinaryProperties( SRC_NFC, 0 ), /* UCHAR_NFC_INERT */ + new BinaryProperties( SRC_NFKC, 0 ), /* UCHAR_NFKC_INERT */ + new BinaryProperties( SRC_NFC_CANON_ITER, 0 ), /* UCHAR_SEGMENT_STARTER */ + new BinaryProperties( 1, ( 1 << PATTERN_SYNTAX) ), + new BinaryProperties( 1, ( 1 << PATTERN_WHITE_SPACE) ), + new BinaryProperties( SRC_CHAR_AND_PROPSVEC, 0 ), /* UCHAR_POSIX_ALNUM */ + new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_BLANK */ + new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_GRAPH */ + new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_PRINT */ + new BinaryProperties( SRC_CHAR, 0 ), /* UCHAR_POSIX_XDIGIT */ + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CASED */ + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CASE_IGNORABLE */ + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CHANGES_WHEN_LOWERCASED */ + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CHANGES_WHEN_UPPERCASED */ + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CHANGES_WHEN_TITLECASED */ + new BinaryProperties( SRC_CASE_AND_NORM, 0 ), /* UCHAR_CHANGES_WHEN_CASEFOLDED */ + new BinaryProperties( SRC_CASE, 0 ), /* UCHAR_CHANGES_WHEN_CASEMAPPED */ + new BinaryProperties( SRC_NFKC_CF, 0 ), /* UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED */ + }; + + + /** + *

    Check a binary Unicode property for a code point.

    + *

    Unicode, especially in version 3.2, defines many more properties + * than the original set in UnicodeData.txt.

    + *

    This API is intended to reflect Unicode properties as defined in + * the Unicode Character Database (UCD) and Unicode Technical Reports + * (UTR).

    + *

    For details about the properties see + * http://www.unicode.org/.

    + *

    For names of Unicode properties see the UCD file + * PropertyAliases.txt.

    + *

    This API does not check the validity of the codepoint.

    + *

    Important: If ICU is built with UCD files from Unicode versions + * below 3.2, then properties marked with "new" are not or + * not fully available.

    + * @param c Code point to test. + * @param which selector constant from com.ibm.icu.lang.UProperty, + * identifies which binary property to check. + * @return true or false according to the binary Unicode property value + * for ch. Also false if property is out of bounds or if the + * Unicode version does not have data for the property at all, or + * not for this code point. + * @see com.ibm.icu.lang.UProperty + */ + + public boolean hasBinaryProperty(int c, int which) { + if(which=0x41 && (c<=0x46 || c>=0x61)) || + (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) + ) { + return true; + } + + return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER; + default: + break; + } + } else if(column==SRC_CHAR_AND_PROPSVEC) { + switch(which) { + case UProperty.POSIX_ALNUM: + return UCharacter.isUAlphabetic(c) || UCharacter.isDigit(c); + default: + break; + } + } else if(column==SRC_CASE_AND_NORM) { + String nfd; + switch(which) { + case UProperty.CHANGES_WHEN_CASEFOLDED: + nfd=Norm2AllModes.getNFCInstance().impl.getDecomposition(c); + if(nfd!=null) { + /* c has a decomposition */ + c=nfd.codePointAt(0); + if(Character.charCount(c)!=nfd.length()) { + /* multiple code points */ + c=-1; + } + } else if(c<0) { + return false; /* protect against bad input */ + } + if(c>=0) { + /* single code point */ + try { + UCaseProps csp=UCaseProps.getSingleton(); + UCaseProps.dummyStringBuffer.setLength(0); + return csp.toFullFolding(c, UCaseProps.dummyStringBuffer, + UCharacter.FOLD_CASE_DEFAULT)>=0; + } catch (IOException e) { + return false; + } + } else { + String folded=UCharacter.foldCase(nfd, true); + return !folded.equals(nfd); + } + default: + break; + } + } + } + } + return false; + } + + public final int getSource(int which) { + if(which + * Note this is for internal use hence no checks for the validity of the + * surrogate characters are done + * @param lead lead surrogate character + * @param trail trailing surrogate character + * @return code point of the supplementary character + */ + public static int getRawSupplementary(char lead, char trail) + { + return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_; + } + + /** + *

    + * Unicode property names and property value names are compared + * "loosely". Property[Value]Aliases.txt say: + * + * "With loose matching of property names, the case distinctions, + * whitespace, and '_' are ignored." + * + *

    + *

    + * This function does just that, for ASCII (char *) name strings. + * It is almost identical to ucnv_compareNames() but also ignores + * ASCII White_Space characters (U+0009..U+000d). + *

    + * @param name1 name to compare + * @param name2 name to compare + * @return 0 if names are equal, < 0 if name1 is less than name2 and > 0 + * if name1 is greater than name2. + */ + /* to be implemented in 2.4 + * public static int comparePropertyNames(String name1, String name2) + { + int result = 0; + int i1 = 0; + int i2 = 0; + while (true) { + char ch1 = 0; + char ch2 = 0; + // Ignore delimiters '-', '_', and ASCII White_Space + if (i1 < name1.length()) { + ch1 = name1.charAt(i1 ++); + } + while (ch1 == '-' || ch1 == '_' || ch1 == ' ' || ch1 == '\t' + || ch1 == '\n' // synwee what is || ch1 == '\v' + || ch1 == '\f' || ch1=='\r') { + if (i1 < name1.length()) { + ch1 = name1.charAt(i1 ++); + } + else { + ch1 = 0; + } + } + if (i2 < name2.length()) { + ch2 = name2.charAt(i2 ++); + } + while (ch2 == '-' || ch2 == '_' || ch2 == ' ' || ch2 == '\t' + || ch2 == '\n' // synwee what is || ch1 == '\v' + || ch2 == '\f' || ch2=='\r') { + if (i2 < name2.length()) { + ch2 = name2.charAt(i2 ++); + } + else { + ch2 = 0; + } + } + + // If we reach the ends of both strings then they match + if (ch1 == 0 && ch2 == 0) { + return 0; + } + + // Case-insensitive comparison + if (ch1 != ch2) { + result = Character.toLowerCase(ch1) + - Character.toLowerCase(ch2); + if (result != 0) { + return result; + } + } + } + } + */ + + /** + * Checks if the argument c is to be treated as a white space in ICU + * rules. Usually ICU rule white spaces are ignored unless quoted. + * Equivalent to test for Pattern_White_Space Unicode property. + * Stable set of characters, won't change. + * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ + * @param c codepoint to check + * @return true if c is a ICU white space + */ + public static boolean isRuleWhiteSpace(int c) + { + /* "white space" in the sense of ICU rule parsers + This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES. + See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ + U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029 + Equivalent to test for Pattern_White_Space Unicode property. + */ + return (c >= 0x0009 && c <= 0x2029 && + (c <= 0x000D || c == 0x0020 || c == 0x0085 || + c == 0x200E || c == 0x200F || c >= 0x2028)); + } + + /** + * Get the the maximum values for some enum/int properties. + * @return maximum values for the integer properties. + */ + public int getMaxValues(int column) + { + // return m_maxBlockScriptValue_; + + switch(column) { + case 0: + return m_maxBlockScriptValue_; + case 2: + return m_maxJTGValue_; + default: + return 0; + } + } + + /** + * Gets the type mask + * @param type character type + * @return mask + */ + public static final int getMask(int type) + { + return 1 << type; + } + + // protected variables ----------------------------------------------- + + /** + * Extra property trie + */ + CharTrie m_additionalTrie_; + /** + * Extra property vectors, 1st column for age and second for binary + * properties. + */ + int m_additionalVectors_[]; + /** + * Number of additional columns + */ + int m_additionalColumnsCount_; + /** + * Maximum values for block, bits used as in vector word + * 0 + */ + int m_maxBlockScriptValue_; + /** + * Maximum values for script, bits used as in vector word + * 0 + */ + int m_maxJTGValue_; + // private variables ------------------------------------------------- + + /** + * Default name of the datafile + */ + private static final String DATA_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/uprops.icu"; + + /** + * Default buffer size of datafile + */ + private static final int DATA_BUFFER_SIZE_ = 25000; + + /** + * Shift value for lead surrogate to form a supplementary character. + */ + private static final int LEAD_SURROGATE_SHIFT_ = 10; + /** + * Offset to add to combined surrogate pair to avoid msking. + */ + private static final int SURROGATE_OFFSET_ = + UTF16.SUPPLEMENTARY_MIN_VALUE - + (UTF16.SURROGATE_MIN_VALUE << + LEAD_SURROGATE_SHIFT_) - + UTF16.TRAIL_SURROGATE_MIN_VALUE; + + + // additional properties ---------------------------------------------- + + /** + * Additional properties used in internal trie data + */ + /* + * Properties in vector word 1 + * Each bit encodes one binary property. + * The following constants represent the bit number, use 1<0) { + /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */ + TrieIterator propsVectorsIter = new TrieIterator(m_additionalTrie_); + RangeValueIterator.Element propsVectorsResult = new RangeValueIterator.Element(); + while(propsVectorsIter.next(propsVectorsResult)){ + set.add(propsVectorsResult.start); + } + } + } + +/*---------------------------------------------------------------- + * Inclusions list + *----------------------------------------------------------------*/ + + /* + * Return a set of characters for property enumeration. + * The set implicitly contains 0x110000 as well, which is one more than the highest + * Unicode code point. + * + * This set is used as an ordered list - its code points are ordered, and + * consecutive code points (in Unicode code point order) in the set define a range. + * For each two consecutive characters (start, limit) in the set, + * all of the UCD/normalization and related properties for + * all code points start..limit-1 are all the same, + * except for character names and ISO comments. + * + * All Unicode code points U+0000..U+10ffff are covered by these ranges. + * The ranges define a partition of the Unicode code space. + * ICU uses the inclusions set to enumerate properties for generating + * UnicodeSets containing all code points that have a certain property value. + * + * The Inclusion List is generated from the UCD. It is generated + * by enumerating the data tries, and code points for hardcoded properties + * are added as well. + * + * -------------------------------------------------------------------------- + * + * The following are ideas for getting properties-unique code point ranges, + * with possible optimizations beyond the current implementation. + * These optimizations would require more code and be more fragile. + * The current implementation generates one single list (set) for all properties. + * + * To enumerate properties efficiently, one needs to know ranges of + * repetitive values, so that the value of only each start code point + * can be applied to the whole range. + * This information is in principle available in the uprops.icu/unorm.icu data. + * + * There are two obstacles: + * + * 1. Some properties are computed from multiple data structures, + * making it necessary to get repetitive ranges by intersecting + * ranges from multiple tries. + * + * 2. It is not economical to write code for getting repetitive ranges + * that are precise for each of some 50 properties. + * + * Compromise ideas: + * + * - Get ranges per trie, not per individual property. + * Each range contains the same values for a whole group of properties. + * This would generate currently five range sets, two for uprops.icu tries + * and three for unorm.icu tries. + * + * - Combine sets of ranges for multiple tries to get sufficient sets + * for properties, e.g., the uprops.icu main and auxiliary tries + * for all non-normalization properties. + * + * Ideas for representing ranges and combining them: + * + * - A UnicodeSet could hold just the start code points of ranges. + * Multiple sets are easily combined by or-ing them together. + * + * - Alternatively, a UnicodeSet could hold each even-numbered range. + * All ranges could be enumerated by using each start code point + * (for the even-numbered ranges) as well as each limit (end+1) code point + * (for the odd-numbered ranges). + * It should be possible to combine two such sets by xor-ing them, + * but no more than two. + * + * The second way to represent ranges may(?!) yield smaller UnicodeSet arrays, + * but the first one is certainly simpler and applicable for combining more than + * two range sets. + * + * It is possible to combine all range sets for all uprops/unorm tries into one + * set that can be used for all properties. + * As an optimization, there could be less-combined range sets for certain + * groups of properties. + * The relationship of which less-combined range set to use for which property + * depends on the implementation of the properties and must be hardcoded + * - somewhat error-prone and higher maintenance but can be tested easily + * by building property sets "the simple way" in test code. + * + * --- + * + * Do not use a UnicodeSet pattern because that causes infinite recursion; + * UnicodeSet depends on the inclusions set. + * + * --- + * + * getInclusions() is commented out starting 2005-feb-12 because + * UnicodeSet now calls the uxyz_addPropertyStarts() directly, + * and only for the relevant property source. + */ + /* + public UnicodeSet getInclusions() { + UnicodeSet set = new UnicodeSet(); + NormalizerImpl.addPropertyStarts(set); + addPropertyStarts(set); + return set; + } + */ +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UCharacterPropertyReader.java b/main/classes/core/src/com/ibm/icu/impl/UCharacterPropertyReader.java new file mode 100644 index 00000000000..b2c6c25e12c --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCharacterPropertyReader.java @@ -0,0 +1,162 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +import com.ibm.icu.util.VersionInfo; + +/** +*

    Internal reader class for ICU data file uprops.icu containing +* Unicode codepoint data.

    +*

    This class simply reads uprops.icu, authenticates that it is a valid +* ICU data file and split its contents up into blocks of data for use in +* com.ibm.icu.impl.UCharacterProperty. +*

    +*

    uprops.icu which is in big-endian format is jared together with this +* package.

    +* +* Unicode character properties file format see +* (ICU4C)/source/tools/genprops/store.c +* +* @author Syn Wee Quek +* @since release 2.1, February 1st 2002 +*/ +final class UCharacterPropertyReader implements ICUBinary.Authenticate +{ + // public methods ---------------------------------------------------- + + public boolean isDataVersionAcceptable(byte version[]) + { + return version[0] == DATA_FORMAT_VERSION_[0] + && version[2] == DATA_FORMAT_VERSION_[2] + && version[3] == DATA_FORMAT_VERSION_[3]; + } + + // protected constructor --------------------------------------------- + + /** + *

    Protected constructor.

    + * @param inputStream ICU uprop.dat file input stream + * @exception IOException throw if data file fails authentication + */ + protected UCharacterPropertyReader(InputStream inputStream) + throws IOException + { + m_unicodeVersion_ = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, + this); + m_dataInputStream_ = new DataInputStream(inputStream); + } + + // protected methods ------------------------------------------------- + + /** + *

    Reads uprops.icu, parse it into blocks of data to be stored in + * UCharacterProperty.

    0) { + // reads the additional property block + ucharppty.m_additionalTrie_ = new CharTrie(m_dataInputStream_, null); + + // additional properties + size = m_reservedOffset_ - m_additionalVectorsOffset_; + ucharppty.m_additionalVectors_ = new int[size]; + for (int i = 0; i < size; i ++) { + ucharppty.m_additionalVectors_[i] = m_dataInputStream_.readInt(); + } + } + + m_dataInputStream_.close(); + ucharppty.m_additionalColumnsCount_ = m_additionalColumnsCount_; + ucharppty.m_unicodeVersion_ = VersionInfo.getInstance( + (int)m_unicodeVersion_[0], (int)m_unicodeVersion_[1], + (int)m_unicodeVersion_[2], (int)m_unicodeVersion_[3]); + } + + // private variables ------------------------------------------------- + + /** + * Index size + */ + private static final int INDEX_SIZE_ = 16; + + /** + * ICU data file input stream + */ + private DataInputStream m_dataInputStream_; + + /** + * Offset information in the indexes. + */ + private int m_propertyOffset_; + private int m_exceptionOffset_; + private int m_caseOffset_; + private int m_additionalOffset_; + private int m_additionalVectorsOffset_; + private int m_additionalColumnsCount_; + private int m_reservedOffset_; + private byte m_unicodeVersion_[]; + + /** + * Data format "UPro". + */ + private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x50, + (byte)0x72, (byte)0x6F}; + /** + * Format version; this code works with all versions with the same major + * version number and the same Trie bit distribution. + */ + private static final byte DATA_FORMAT_VERSION_[] = {(byte)6, (byte)0, + (byte)Trie.INDEX_STAGE_1_SHIFT_, + (byte)Trie.INDEX_STAGE_2_SHIFT_}; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UCharacterUtility.java b/main/classes/core/src/com/ibm/icu/impl/UCharacterUtility.java new file mode 100644 index 00000000000..dc10e06c03b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UCharacterUtility.java @@ -0,0 +1,194 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2004, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +package com.ibm.icu.impl; + +/** +* Internal character utility class for simple data type conversion and String +* parsing functions. Does not have an analog in the JDK. +* @author Syn Wee Quek +* @since sep2900 +*/ + +public final class UCharacterUtility +{ + // public methods ----------------------------------------------------- + + /** + * Determines if codepoint is a non character + * @param ch codepoint + * @return true if codepoint is a non character false otherwise + */ + public static boolean isNonCharacter(int ch) + { + if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) == + NON_CHARACTER_SUFFIX_MIN_3_0_) { + return true; + } + + return ch >= NON_CHARACTER_MIN_3_1_ && ch <= NON_CHARACTER_MAX_3_1_; + } + + // package private methods --------------------------------------------- + + /** + * joining 2 chars to form an int + * @param msc most significant char + * @param lsc least significant char + * @return int form + */ + static int toInt(char msc, char lsc) + { + return ((msc << 16) | lsc); + } + + /** + * Retrieves a null terminated substring from an array of bytes. + * Substring is a set of non-zero bytes starting from argument start to the + * next zero byte. If the first byte is a zero, the next byte will be taken as + * the first byte. + * @param str stringbuffer to store data in, data will be store with each + * byte as a char + * @param array byte array + * @param index to start substring in byte count + * @return the end position of the substring within the character array + */ + static int getNullTermByteSubString(StringBuffer str, byte[] array, + int index) + { + byte b = 1; + + while (b != 0) + { + b = array[index]; + if (b != 0) { + str.append((char)(b & 0x00FF)); + } + index ++; + } + return index; + } + + /** + * Compares a null terminated substring from an array of bytes. + * Substring is a set of non-zero bytes starting from argument start to the + * next zero byte. if the first byte is a zero, the next byte will be taken as + * the first byte. + * @param str string to compare + * @param array byte array + * @param strindex index within str to start comparing + * @param aindex array index to start in byte count + * @return the end position of the substring within str if matches otherwise + * a -1 + */ + static int compareNullTermByteSubString(String str, byte[] array, + int strindex, int aindex) + { + byte b = 1; + int length = str.length(); + + while (b != 0) + { + b = array[aindex]; + aindex ++; + if (b == 0) { + break; + } + // if we have reached the end of the string and yet the array has not + // reached the end of their substring yet, abort + if (strindex == length + || (str.charAt(strindex) != (char)(b & 0xFF))) { + return -1; + } + strindex ++; + } + return strindex; + } + + /** + * Skip null terminated substrings from an array of bytes. + * Substring is a set of non-zero bytes starting from argument start to the + * next zero byte. If the first byte is a zero, the next byte will be taken as + * the first byte. + * @param array byte array + * @param index to start substrings in byte count + * @param skipcount number of null terminated substrings to skip + * @return the end position of the substrings within the character array + */ + static int skipNullTermByteSubString(byte[] array, int index, + int skipcount) + { + byte b; + for (int i = 0; i < skipcount; i ++) + { + b = 1; + while (b != 0) + { + b = array[index]; + index ++; + } + } + return index; + } + + /** + * skip substrings from an array of characters, where each character is a set + * of 2 bytes. substring is a set of non-zero bytes starting from argument + * start to the byte of the argument value. skips up to a max number of + * characters + * @param array byte array to parse + * @param index to start substrings in byte count + * @param length the max number of bytes to skip + * @param skipend value of byte to skip to + * @return the number of bytes skipped + */ + static int skipByteSubString(byte[] array, int index, int length, + byte skipend) + { + int result; + byte b; + + for (result = 0; result < length; result ++) + { + b = array[index + result]; + if (b == skipend) + { + result ++; + break; + } + } + + return result; + } + + // private data member -------------------------------------------------- + + /** + * Minimum suffix value that indicates if a character is non character. + * Unicode 3.0 non characters + */ + private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE; + /** + * New minimum non character in Unicode 3.1 + */ + private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0; + /** + * New non character range in Unicode 3.1 + */ + private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF; + + // private constructor -------------------------------------------------- + + ///CLOVER:OFF + /** + * private constructor to avoid initialisation + */ + private UCharacterUtility() + { + } + ///CLOVER:ON +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java b/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java new file mode 100644 index 00000000000..98b45a0430a --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java @@ -0,0 +1,673 @@ +/* + ********************************************************************** + * Copyright (c) 2002-2010, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * Author: Alan Liu + * Created: November 5 2002 + * Since: ICU 2.4 + ********************************************************************** + */ + +package com.ibm.icu.impl; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.MissingResourceException; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UProperty; + +/** + * Wrapper for the pnames.icu binary data file. This data file is + * imported from icu4c. It contains property and property value + * aliases from the UCD files PropertyAliases.txt and + * PropertyValueAliases.txt. The file is built by the icu4c tool + * genpname. It must be built on an ASCII big-endian platform to be + * usable in icu4j. + * + * This class performs two functions. + * + * (1) It can import the flat binary data into a tree of usable + * objects. + * + * (2) It provides an API to access the tree of objects. + * + * Needless to say, this class is tightly coupled to the binary format + * of icu4c's pnames.icu file. + * + * Each time a UPropertyAliases is constructed, the pnames.icu file is + * read, parsed, and a data tree assembled. Clients should create one + * singleton instance and cache it. + * + * @author Alan Liu + * @since ICU 2.4 + */ +public final class UPropertyAliases implements ICUBinary.Authenticate { + + //---------------------------------------------------------------- + // Runtime data. This is an unflattened representation of the + // data in pnames.icu. + + /** + * Map from property enum value to nameGroupPool[] index + */ + private NonContiguousEnumToShort enumToName; + + /** + * Map from property alias to property enum value + */ + private NameToEnum nameToEnum; + + /** + * Map from property enum value to valueMapArray[] index + */ + private NonContiguousEnumToShort enumToValue; + + /** + * Each entry represents a binary or enumerated property + */ + private ValueMap valueMapArray[]; + + /** + * Pool of concatenated integer runs. Each run contains one + * or more entries. The last entry of the run is negative. + * A zero entry indicates "n/a" in the Property*Aliases.txt. + * Each entry is a stringPool[] index. + */ + private short nameGroupPool[]; + + /** + * Pool of strings. + */ + private String stringPool[]; + + //---------------------------------------------------------------- + // Constants + + /** + * Debug flag (not really constant) + */ + private static boolean DEBUG = ICUDebug.enabled("pnames"); + + /** + * File format that this class understands. + * See icu4c/src/common/propname.h. + */ + private static final byte DATA_FORMAT_ID[] = {'p', 'n', 'a', 'm'}; + + /** + * File version that this class understands. + * See icu4c/src/common/propname.h. + */ + private static final byte DATA_FORMAT_VERSION = 1; + + /** + * Name of the datafile + */ + private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE+"/pnames.icu"; + + /** + * Buffer size of datafile. The whole file is < 16k. + */ + private static final int DATA_BUFFER_SIZE = 8192; + + //---------------------------------------------------------------- + // Constructor + + /** + * Constructs a UPropertyAliases object. The binary file + * DATA_FILE_NAME is read from the jar/classpath and unflattened + * into member variables of this object. + */ + private UPropertyAliases() throws IOException { + + // Open the .icu file from the jar/classpath + InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME); + BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE); + // Read and discard Unicode version... + /* byte unicodeVersion[] = */ICUBinary.readHeader(b, DATA_FORMAT_ID, this); + DataInputStream d = new DataInputStream(b); + + // Record the origin position of the file. Keep enough around + // to seek back to the start of the header. + d.mark(256); + + short enumToName_offset = d.readShort(); + short nameToEnum_offset = d.readShort(); + short enumToValue_offset = d.readShort(); + short total_size = d.readShort(); + short valueMap_offset = d.readShort(); + short valueMap_count = d.readShort(); + short nameGroupPool_offset = d.readShort(); + short nameGroupPool_count = d.readShort(); + short stringPool_offset = d.readShort(); + short stringPool_count = d.readShort(); + + if (DEBUG) { + System.out.println( + "enumToName_offset=" + enumToName_offset + "\n" + + "nameToEnum_offset=" + nameToEnum_offset + "\n" + + "enumToValue_offset=" + enumToValue_offset + "\n" + + "total_size=" + total_size + "\n" + + "valueMap_offset=" + valueMap_offset + "\n" + + "valueMap_count=" + valueMap_count + "\n" + + "nameGroupPool_offset=" + nameGroupPool_offset + "\n" + + "nameGroupPool_count=" + nameGroupPool_count + "\n" + + "stringPool_offset=" + stringPool_offset + "\n" + + "stringPool_count=" + stringPool_count); + } + + // Read it all (less than 32k). Seeking around (using + // mark/reset/skipBytes) doesn't work directly on the file, + // but it works fine if we read everything into a byte[] array + // first. + byte raw[] = new byte[total_size]; + d.reset(); + d.readFully(raw); + d.close(); + + Builder builder = new Builder(raw); + + stringPool = builder.readStringPool(stringPool_offset, + stringPool_count); + + nameGroupPool = builder.readNameGroupPool(nameGroupPool_offset, + nameGroupPool_count); + + builder.setupValueMap_map(valueMap_offset, valueMap_count); + + // Some of the following data structures have to be set up + // here, _not_ in Builder. That's because they are instances + // of non-static inner classes, and they contain implicit + // references to this. + + builder.seek(enumToName_offset); + enumToName = new NonContiguousEnumToShort(builder); + builder.nameGroupOffsetToIndex(enumToName.offsetArray); + + builder.seek(nameToEnum_offset); + nameToEnum = new NameToEnum(builder); + + builder.seek(enumToValue_offset); + enumToValue = new NonContiguousEnumToShort(builder); + builder.valueMapOffsetToIndex(enumToValue.offsetArray); + + valueMapArray = new ValueMap[valueMap_count]; + for (int i=0; i= enumLimit) { + throw new IllegalIcuArgumentException("Invalid enum. enumStart = " +enumStart + + " enumLimit = " + enumLimit + + " enumProbe = " + enumProbe ); + } + return offsetArray[enumProbe - enumStart]; + } + + ContiguousEnumToShort(ICUBinaryStream s) throws IOException { + enumStart = s.readInt(); + enumLimit = s.readInt(); + int count = enumLimit - enumStart; + offsetArray = new short[count]; + for (int i=0; i enumProbe) break; + return offsetArray[i]; + } + throw new IllegalIcuArgumentException("Invalid enum"); + } + + NonContiguousEnumToShort(ICUBinaryStream s) throws IOException { + int i; + int count = s.readInt(); + enumArray = new int[count]; + offsetArray = new short[count]; + for (i=0; i 0) continue; + if (c < 0) break; + return enumArray[i]; + } + return UProperty.UNDEFINED; + } + + NameToEnum(Builder b) throws IOException { + int i; + int count = b.readInt(); + enumArray = new int[count]; + nameArray = new short[count]; + for (i=0; i0. The + * comparison is that described as "loose" matching in the + * Property*Aliases.txt files. + */ + public static int compare(String stra, String strb) { + // Note: This implementation is a literal copy of + // uprv_comparePropertyNames. It can probably be improved. + int istra=0, istrb=0, rc; + int cstra=0, cstrb=0; + for (;;) { + /* Ignore delimiters '-', '_', and ASCII White_Space */ + while (istra 0) { + if (nameGroupPool[nameGroupIndex++] < 0) { + throw new IllegalIcuArgumentException("Invalid name choice"); + } + } + short a = nameGroupPool[nameGroupIndex]; + return stringPool[(a < 0) ? -a : a]; + } + + /** + * Return the valueMap[] entry for a given property. + */ + private ValueMap getValueMap(int property) { + int valueMapIndex = enumToValue.getShort(property); + return valueMapArray[valueMapIndex]; + } + + //---------------------------------------------------------------- + // ICUBinary API + + /** + * Return true if the given data version can be used. + */ + public boolean isDataVersionAcceptable(byte version[]) { + return version[0] == DATA_FORMAT_VERSION; + } + + //---------------------------------------------------------------- + // Builder + + /** + * A specialized ICUBinaryStream that can map between offsets and + * index values into various arrays (stringPool, nameGroupPool, + * and valueMap). It also knows how to read various structures. + */ + static class Builder extends ICUBinaryStream { + + // map[i] = offset of object i. We need maps for all of our + // arrays. The arrays are indexed by offset in the raw binary + // file; we need to translate that to index. + + private short stringPool_map[]; + + private short valueMap_map[]; + + private short nameGroup_map[]; + + public Builder(byte raw[]) { + super(raw); + } + + /** + * The valueMap_map[] must be setup in advance. This method + * does that. + */ + public void setupValueMap_map(short offset, short count) { + valueMap_map = new short[count]; + for (int i=0; iindex + * map (nameGroupPool_map[]). + */ + public short[] readNameGroupPool(short offset, short count) + throws IOException { + // Read nameGroupPool[]. This contains offsets from start of + // header. We translate these into indices into stringPool[] + // on the fly. The offset 0, which indicates "no entry", we + // translate into index 0, which contains a null String + // pointer. + seek(offset); + short pos = offset; + short nameGroupPool[] = new short[count]; + nameGroup_map = new short[count]; + for (int i=0; i handlers; + + private static final boolean DEBUG = ICUDebug.enabled("URLHandler"); + + static { + Map h = null; + + try { + InputStream is = URLHandler.class.getResourceAsStream(PROPNAME); + if (is == null) { + ClassLoader loader = Utility.getFallbackClassLoader(); + is = loader.getResourceAsStream(PROPNAME); + } + + if (is != null) { + Class[] params = { URL.class }; + BufferedReader br = new BufferedReader(new InputStreamReader(is)); + + for (String line = br.readLine(); line != null; line = br.readLine()) { + line = line.trim(); + + if (line.length() == 0 || line.charAt(0) == '#') { + continue; + } + + int ix = line.indexOf('='); + + if (ix == -1) { + if (DEBUG) System.err.println("bad urlhandler line: '" + line + "'"); + break; + } + + String key = line.substring(0, ix).trim(); + String value = line.substring(ix+1).trim(); + + try { + Class cl = Class.forName(value); + Method m = cl.getDeclaredMethod("get", params); + + if (h == null) { + h = new HashMap(); + } + + h.put(key, m); + } + catch (ClassNotFoundException e) { + if (DEBUG) System.err.println(e); + } + catch(NoSuchMethodException e) { + if (DEBUG) System.err.println(e); + } + catch(SecurityException e) { + if (DEBUG) System.err.println(e); + } + } + } + } catch (Throwable t) { + if (DEBUG) System.err.println(t); + } + + handlers = h; + } + + public static URLHandler get(URL url) { + if (url == null) { + return null; + } + + String protocol = url.getProtocol(); + + if (handlers != null) { + Method m = handlers.get(protocol); + + if (m != null) { + try { + URLHandler handler = (URLHandler)m.invoke(null, new Object[] { url }); + + if (handler != null) { + return handler; + } + } + catch(IllegalAccessException e) { + if (DEBUG) System.err.println(e); + } + catch(IllegalArgumentException e) { + if (DEBUG) System.err.println(e); + } + catch(InvocationTargetException e) { + if (DEBUG) System.err.println(e); + } + } + } + + return getDefault(url); + } + + protected static URLHandler getDefault(URL url) { + String protocol = url.getProtocol(); + + if (protocol.equals("file")) { + return new FileURLHandler(url); + } else if (protocol.equals("jar")) { + return new JarURLHandler(url); + } else { + return null; + } + } + + private static class FileURLHandler extends URLHandler { + File file; + + FileURLHandler(URL url) { + try { + file = new File(url.toURI()); + } catch (URISyntaxException use) { + // fall through + } + if (file == null || !file.exists()) { + if (DEBUG) System.err.println("file does not exist - " + url.toString()); + throw new IllegalArgumentException(); + } + } + + public void guide(URLVisitor v, boolean recurse, boolean strip) { + if (file.isDirectory()) { + process(v, recurse, strip, "/", file.listFiles()); + } else { + v.visit(file.getName()); + } + } + + private void process(URLVisitor v, boolean recurse, boolean strip, String path, File[] files) { + for (int i = 0; i < files.length; i++) { + File f = files[i]; + + if (f.isDirectory()) { + if (recurse) { + process(v, recurse, strip, path + f.getName()+ '/', f.listFiles()); + } + } else { + v.visit(strip? f.getName() : path + f.getName()); + } + } + } + } + + private static class JarURLHandler extends URLHandler { + JarFile jarFile; + String prefix; + + JarURLHandler(URL url) { + try { + prefix = url.getPath(); + + int ix = prefix.indexOf("!/"); + + if (ix >= 0) { + prefix = prefix.substring(ix + 2); // truncate after "!/" + } + + JarURLConnection conn = (JarURLConnection)url.openConnection(); + + jarFile = conn.getJarFile(); + } + catch (Exception e) { + if (DEBUG) System.err.println("icurb jar error: " + e); + throw new IllegalArgumentException("jar error: " + e.getMessage()); + } + } + + public void guide(URLVisitor v, boolean recurse, boolean strip) { + try { + Enumeration entries = jarFile.entries(); + + while (entries.hasMoreElements()) { + JarEntry entry = entries.nextElement(); + + if (!entry.isDirectory()) { // skip just directory paths + String name = entry.getName(); + + if (name.startsWith(prefix)) { + name = name.substring(prefix.length()); + + int ix = name.lastIndexOf('/'); + + if (ix != -1) { + if (!recurse) { + continue; + } + + if (strip) { + name = name.substring(ix+1); + } + } + + v.visit(name); + } + } + } + } + catch (Exception e) { + if (DEBUG) System.err.println("icurb jar error: " + e); + } + } + } + + public void guide(URLVisitor visitor, boolean recurse) + { + guide(visitor, recurse, true); + } + + public abstract void guide(URLVisitor visitor, boolean recurse, boolean strip); + + public interface URLVisitor { + void visit(String str); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/USerializedSet.java b/main/classes/core/src/com/ibm/icu/impl/USerializedSet.java new file mode 100644 index 00000000000..4e2a874737d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/USerializedSet.java @@ -0,0 +1,185 @@ +/* + ******************************************************************************* + * Copyright (C) 2002-2010, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* +*/ + +package com.ibm.icu.impl; +/** + * @version 1.1 + * @author Markus W. Scherer + * Ram: Add documentation, remove unwanted methods, improve coverage. + */ + +/** + * Simple class for handling serialized USet/UnicodeSet structures + * without object creation. See ICU4C icu/source/common/uset.c. + * + * @internal + */ +public final class USerializedSet { + /** + * Fill in the given serialized set object. + * @param src pointer to start of array + * @param srcStart pointer to start of serialized data (length value) + * @return true if the given array is valid, otherwise false + */ + public final boolean getSet(char src[], int srcStart) { + // leave most argument checking up to Java exceptions + array=null; + arrayOffset=bmpLength=length=0; + + length=src[srcStart++]; + + + if((length&0x8000) >0) { + /* there are supplementary values */ + length&=0x7fff; + if(src.length<(srcStart+1+length)) { + length=0; + throw new IndexOutOfBoundsException(); + } + bmpLength=src[srcStart++]; + } else { + /* only BMP values */ + if(src.length<(srcStart+length)) { + length=0; + throw new IndexOutOfBoundsException(); + } + bmpLength=length; + } + array = new char[length]; + System.arraycopy(src,srcStart,array,0,length); + //arrayOffset=srcStart; + return true; + } + + /** + * Set the USerializedSet to contain the given character (and nothing + * else). + */ + public final void setToOne(int c) { + if( 0x10ffff>16); + array[1]=(char)c; + ++c; + array[2]=(char)(c>>16); + array[3]=(char)c; + } else /* c==0x10ffff */ { + bmpLength=0; + length=2; + array[0]=0x10; + array[1]=0xffff; + } + } + + /** + * Returns a range of characters contained in the given serialized + * set. + * @param rangeIndex a non-negative integer in the range 0.. + * getSerializedRangeCount()-1 + * @param range variable to receive the data in the range + * @return true if rangeIndex is valid, otherwise false + */ + public final boolean getRange(int rangeIndex, int[] range) { + if( rangeIndex<0) { + return false; + } + if(array==null){ + array = new char[8]; + } + if(range==null || range.length <2){ + throw new IllegalArgumentException(); + } + rangeIndex*=2; /* address start/limit pairs */ + if(rangeIndex0x10ffff) { + return false; + } + + if(c<=0xffff) { + int i; + /* find c in the BMP part */ + for(i=0; i=array[i]; ++i) {} + return ((i&1) != 0); + } else { + int i; + /* find c in the supplementary part */ + char high=(char)(c>>16), low=(char)c; + for(i=bmpLength; + iarray[i] || (high==array[i] && low>=array[i+1])); + i+=2) {} + + /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */ + return (((i+bmpLength)&2)!=0); + } + } + + /** + * Returns the number of disjoint ranges of characters contained in + * the given serialized set. Ignores any strings contained in the + * set. + * @return a non-negative integer counting the character ranges + * contained in set + */ + public final int countRanges() { + return (bmpLength+(length-bmpLength)/2+1)/2; + } + + private char array[] = new char[8]; + private int arrayOffset, bmpLength, length; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java b/main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java new file mode 100644 index 00000000000..925ad5a1e4b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java @@ -0,0 +1,387 @@ +/* + ******************************************************************************** + * Copyright (C) 2009-2010, Google, International Business Machines Corporation * + * and others. All Rights Reserved. * + ******************************************************************************** + */ +package com.ibm.icu.impl; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.text.ParsePosition; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.regex.Pattern; + +import com.ibm.icu.text.StringTransform; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.Freezable; + +/** + * Contains utilities to supplement the JDK Regex, since it doesn't handle + * Unicode well. + * + * @author markdavis + */ +public class UnicodeRegex implements Cloneable, Freezable, StringTransform { + // Note: we don't currently have any state, but intend to in the future, + // particularly for the regex style supported. + + /** + * Adds full Unicode property support, with the latest version of Unicode, + * to Java Regex, bringing it up to Level 1 (see + * http://www.unicode.org/reports/tr18/). It does this by preprocessing the + * regex pattern string and interpreting the character classes (\p{...}, + * \P{...}, [...]) according to their syntax and meaning in UnicodeSet. With + * this utility, Java regex expressions can be updated to work with the + * latest version of Unicode, and with all Unicode properties. Note that the + * UnicodeSet syntax has not yet, however, been updated to be completely + * consistent with Java regex, so be careful of the differences. + *

    Not thread-safe; create a separate copy for different threads. + *

    In the future, we may extend this to support other regex packages. + * + * @regex A modified Java regex pattern, as in the input to + * Pattern.compile(), except that all "character classes" are + * processed as if they were UnicodeSet patterns. Example: + * "abc[:bc=N:]. See UnicodeSet for the differences in syntax. + * @return A processed Java regex pattern, suitable for input to + * Pattern.compile(). + */ + public String transform(String regex) { + StringBuilder result = new StringBuilder(); + UnicodeSet temp = new UnicodeSet(); + ParsePosition pos = new ParsePosition(0); + int state = 0; // 1 = after \ + + // We add each character unmodified to the output, unless we have a + // UnicodeSet. Note that we don't worry about supplementary characters, + // since none of the syntax uses them. + + for (int i = 0; i < regex.length(); ++i) { + // look for UnicodeSets, allowing for quoting with \ and \Q + char ch = regex.charAt(i); + switch (state) { + case 0: // we only care about \, and '['. + if (ch == '\\') { + if (UnicodeSet.resemblesPattern(regex, i)) { + // should only happen with \p + i = processSet(regex, i, result, temp, pos); + continue; + } + state = 1; + } else if (ch == '[') { + // if we have what looks like a UnicodeSet + if (UnicodeSet.resemblesPattern(regex, i)) { + i = processSet(regex, i, result, temp, pos); + continue; + } + } + break; + + case 1: // we are after a \ + if (ch == 'Q') { + state = 1; + } else { + state = 0; + } + break; + + case 2: // we are in a \Q... + if (ch == '\\') { + state = 3; + } + break; + + case 3: // we are in at \Q...\ + if (ch == 'E') { + state = 0; + } + state = 2; + break; + } + result.append(ch); + } + return result.toString(); + } + + /** + * Convenience static function, using standard parameters. + * @param regex as in process() + * @return processed regex pattern, as in process() + */ + public static String fix(String regex) { + return STANDARD.transform(regex); + } + + /** + * Compile a regex string, after processing by fix(...). + * + * @param regex Raw regex pattern, as in fix(...). + * @return Pattern + */ + public static Pattern compile(String regex) { + return Pattern.compile(STANDARD.transform(regex)); + } + + /** + * Compile a regex string, after processing by fix(...). + * + * @param regex Raw regex pattern, as in fix(...). + * @return Pattern + */ + public static Pattern compile(String regex, int options) { + return Pattern.compile(STANDARD.transform(regex), options); + } + + /** + * Compile a composed string from a set of BNF lines; see the List version for more information. + * + * @param bnfLines Series of BNF lines. + * @return Pattern + */ + public String compileBnf(String bnfLines) { + return compileBnf(Arrays.asList(bnfLines.split("\\r\\n?|\\n"))); + } + + /** + * Compile a composed string from a set of BNF lines, such as for composing a regex + * expression. The lines can be in any order, but there must not be any + * cycles. The result can be used as input for fix(). + *

    + * Example: + *

    +     * uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;
    +     * scheme = reserved+;
    +     * host = // reserved+;
    +     * query = [\\=reserved]+;
    +     * fragment = reserved+;
    +     * reserved = [[:ascii:][:alphabetic:]];
    +     * 
    + *

    + * Caveats: at this point the parsing is simple; for example, # cannot be + * quoted (use \\u0023); you can set it to null to disable. + * The equality sign and a few others can be reset with + * setBnfX(). + * + * @param lines Series of lines that represent a BNF expression. The lines contain + * a series of statements that of the form x=y;. A statement can take + * multiple lines, but there can't be multiple statements on a line. + * A hash quotes to the end of the line. + * @return Pattern + */ + public String compileBnf(List lines) { + Map variables = getVariables(lines); + Set unused = new LinkedHashSet(variables.keySet()); + // brute force replacement; do twice to allow for different order + // later on can optimize + for (int i = 0; i < 2; ++i) { + for (Iterator it = variables.keySet().iterator(); it.hasNext();) { + String variable = it.next(); + String definition = variables.get(variable); + for (Iterator it2 = variables.keySet().iterator(); it2.hasNext();) { + String variable2 = it2.next(); + if (variable.equals(variable2)) continue; + String definition2 = variables.get(variable2); + String altered2 = definition2.replace(variable, definition); + if (!altered2.equals(definition2)) { + unused.remove(variable); + variables.put(variable2, altered2); + if (log != null) { + try { + log.append(variable2 + "=" + altered2 + ";"); + } catch (IOException e) { + throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); + } + } + } + } + } + } + if (unused.size() != 1) { + throw new IllegalArgumentException("Not a single root: " + unused); + } + return variables.get(unused.iterator().next()); + } + + public String getBnfCommentString() { + return bnfCommentString; + } + + public void setBnfCommentString(String bnfCommentString) { + this.bnfCommentString = bnfCommentString; + } + + public String getBnfVariableInfix() { + return bnfVariableInfix; + } + + public void setBnfVariableInfix(String bnfVariableInfix) { + this.bnfVariableInfix = bnfVariableInfix; + } + + public String getBnfLineSeparator() { + return bnfLineSeparator; + } + + public void setBnfLineSeparator(String bnfLineSeparator) { + this.bnfLineSeparator = bnfLineSeparator; + } + + /** + * Utility for loading lines from a file. + * @param result The result of the appended lines. + * @param file The file to have an input stream. + * @param encoding if null, then UTF-8 + * @return filled list + * @throws IOException If there were problems opening the file for input stream. + */ + public static List appendLines(List result, String file, String encoding) throws IOException { + return appendLines(result, new FileInputStream(file), encoding); + } + + /** + * Utility for loading lines from a UTF8 file. + * @param result The result of the appended lines. + * @param inputStream The input stream. + * @param encoding if null, then UTF-8 + * @return filled list + * @throws IOException If there were problems opening the input stream for reading. + */ + public static List appendLines(List result, InputStream inputStream, String encoding) + throws UnsupportedEncodingException, IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(inputStream, encoding == null ? "UTF-8" : encoding)); + while (true) { + String line = in.readLine(); + if (line == null) break; + result.add(line); + } + return result; + } + + + + /* (non-Javadoc) + * @see com.ibm.icu.util.Freezable#cloneAsThawed() + */ + public UnicodeRegex cloneAsThawed() { + // TODO Auto-generated method stub + try { + return (UnicodeRegex)clone(); + } catch (CloneNotSupportedException e) { + throw new IllegalArgumentException(); // should never happen + } + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.Freezable#freeze() + */ + public UnicodeRegex freeze() { + // no action needed now. + return this; + } + + /* (non-Javadoc) + * @see com.ibm.icu.util.Freezable#isFrozen() + */ + public boolean isFrozen() { + // at this point, always true + return true; + } + + // ===== PRIVATES ===== + + private int processSet(String regex, int i, StringBuilder result, UnicodeSet temp, ParsePosition pos) { + try { + pos.setIndex(i); + UnicodeSet x = temp.clear().applyPattern(regex, pos, null, 0); + x.complement().complement(); // hack to fix toPattern + result.append(x.toPattern(false)); + i = pos.getIndex() - 1; // allow for the loop increment + return i; + } catch (Exception e) { + throw (IllegalArgumentException) new IllegalArgumentException("Error in " + regex).initCause(e); + } + } + + private static UnicodeRegex STANDARD = new UnicodeRegex(); + private String bnfCommentString = "#"; + private String bnfVariableInfix = "="; + private String bnfLineSeparator = "\n"; + private Appendable log = null; + + private Comparator LongestFirst = new Comparator() { + public int compare(Object obj0, Object obj1) { + String arg0 = obj0.toString(); + String arg1 = obj1.toString(); + int len0 = arg0.length(); + int len1 = arg1.length(); + if (len0 != len1) return len1 - len0; + return arg0.compareTo(arg1); + } + }; + + private Map getVariables(List lines) { + Map variables = new TreeMap(LongestFirst); + String variable = null; + StringBuffer definition = new StringBuffer(); + int count = 0; + for (Iterator it = lines.iterator(); it.hasNext();) { + String line = it.next(); + ++count; + // remove initial bom, comments + if (line.length() == 0) continue; + if (line.charAt(0) == '\uFEFF') line = line.substring(1); + + if (bnfCommentString != null) { + int hashPos = line.indexOf(bnfCommentString); + if (hashPos >= 0) line = line.substring(0, hashPos); + } + String trimline = line.trim(); + if (trimline.length() == 0) continue; + + // String[] lineParts = line.split(";"); + String linePart = line; // lineParts[i]; // .trim().replace("\\s+", " "); + if (linePart.trim().length() == 0) continue; + boolean terminated = trimline.endsWith(";"); + if (terminated) { + linePart = linePart.substring(0,linePart.lastIndexOf(';')); + } + int equalsPos = linePart.indexOf(bnfVariableInfix); + if (equalsPos >= 0) { + if (variable != null) { + throw new IllegalArgumentException("Missing ';' before " + count + ") " + line); + } + variable = linePart.substring(0,equalsPos).trim(); + if (variables.containsKey(variable)) { + throw new IllegalArgumentException("Duplicate variable definition in " + line); + } + definition.append(linePart.substring(equalsPos+1).trim()); + } else { // no equals, so + if (variable == null) { + throw new IllegalArgumentException("Missing '=' at " + count + ") " + line); + } + definition.append(bnfLineSeparator).append(linePart); + } + // we are terminated if i is not at the end, or the line ends with a ; + if (terminated) { + variables.put(variable, definition.toString()); + variable = null; // signal we have no variable + definition.setLength(0); + } + } + if (variable != null) { + throw new IllegalArgumentException("Missing ';' at end"); + } + return variables; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java b/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java new file mode 100644 index 00000000000..96bfa4880ed --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java @@ -0,0 +1,932 @@ +/* + ****************************************************************************** + * + * Copyright (C) 2009-2010, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + */ + +package com.ibm.icu.impl; + +import java.util.ArrayList; + +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSet.SpanCondition; + +/* + * Implement span() etc. for a set with strings. + * Avoid recursion because of its exponential complexity. + * Instead, try multiple paths at once and track them with an IndexList. + */ +public class UnicodeSetStringSpan { + + /* + * Which span() variant will be used? The object is either built for one variant and used once, or built for all and + * may be used many times. + */ + public static final int FWD = 0x20; + public static final int BACK = 0x10; + public static final int UTF16 = 8; + public static final int CONTAINED = 2; + public static final int NOT_CONTAINED = 1; + + public static final int ALL = 0x3f; + + public static final int FWD_UTF16_CONTAINED = FWD | UTF16 | CONTAINED; + public static final int FWD_UTF16_NOT_CONTAINED = FWD | UTF16 | NOT_CONTAINED; + public static final int BACK_UTF16_CONTAINED = BACK | UTF16 | CONTAINED; + public static final int BACK_UTF16_NOT_CONTAINED = BACK | UTF16 | NOT_CONTAINED; + + // Special spanLength short values. (since Java has not unsigned byte type) + // All code points in the string are contained in the parent set. + static final short ALL_CP_CONTAINED = 0xff; + // The spanLength is >=0xfe. + static final short LONG_SPAN = ALL_CP_CONTAINED - 1; + + // Set for span(). Same as parent but without strings. + private UnicodeSet spanSet; + + // Set for span(not contained). + // Same as spanSet, plus characters that start or end strings. + private UnicodeSet spanNotSet; + + // The strings of the parent set. + private ArrayList strings; + + // the lengths of span(), spanBack() etc. for each string. + private short[] spanLengths; + + // Maximum lengths of relevant strings. + private int maxLength16; + + // Set up for all variants of span()? + private boolean all; + + // Span helper + private OffsetList offsets; + + // Construct for all variants of span(), or only for any one variant. + // Initialize as little as possible, for single use. + public UnicodeSetStringSpan(final UnicodeSet set, final ArrayList setStrings, int which) { + spanSet = new UnicodeSet(0, 0x10ffff); + strings = setStrings; + all = (which == ALL); + spanSet.retainAll(set); + if (0 != (which & NOT_CONTAINED)) { + // Default to the same sets. + // addToSpanNotSet() will create a separate set if necessary. + spanNotSet = spanSet; + } + offsets = new OffsetList(); + + // Determine if the strings even need to be taken into account at all for span() etc. + // If any string is relevant, then all strings need to be used for + // span(longest match) but only the relevant ones for span(while contained). + // TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH + // and do not store UTF-8 strings if !thisRelevant and CONTAINED. + // (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.) + // Also count the lengths of the UTF-8 versions of the strings for memory allocation. + int stringsLength = strings.size(); + + int i, spanLength; + boolean someRelevant = false; + for (i = 0; i < stringsLength; ++i) { + String string = strings.get(i); + int length16 = string.length(); + spanLength = spanSet.span(string, SpanCondition.CONTAINED); + if (spanLength < length16) { // Relevant string. + someRelevant = true; + } + if ((0 != (which & UTF16)) && length16 > maxLength16) { + maxLength16 = length16; + } + } + if (!someRelevant) { + maxLength16 = 0; + return; + } + + // Freeze after checking for the need to use strings at all because freezing + // a set takes some time and memory which are wasted if there are no relevant strings. + if (all) { + spanSet.freeze(); + } + + int spanBackLengthsOffset; + + // Allocate a block of meta data. + int allocSize; + if (all) { + // 2 sets of span lengths + allocSize = stringsLength * (2); + } else { + allocSize = stringsLength; // One set of span lengths. + } + spanLengths = new short[allocSize]; + + if (all) { + // Store span lengths for all span() variants. + spanBackLengthsOffset = stringsLength; + } else { + // Store span lengths for only one span() variant. + spanBackLengthsOffset = 0; + } + + // Set the meta data and spanNotSet and write the UTF-8 strings. + + for (i = 0; i < stringsLength; ++i) { + String string = strings.get(i); + int length16 = string.length(); + spanLength = spanSet.span(string, SpanCondition.CONTAINED); + if (spanLength < length16) { // Relevant string. + if (0 != (which & UTF16)) { + if (0 != (which & CONTAINED)) { + if (0 != (which & FWD)) { + spanLengths[i] = makeSpanLengthByte(spanLength); + } + if (0 != (which & BACK)) { + spanLength = length16 + - spanSet.spanBack(string, length16, SpanCondition.CONTAINED); + spanLengths[spanBackLengthsOffset + i] = makeSpanLengthByte(spanLength); + } + } else /* not CONTAINED, not all, but NOT_CONTAINED */{ + spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = 0; // Only store a relevant/irrelevant + // flag. + } + } + if (0 != (which & NOT_CONTAINED)) { + // Add string start and end code points to the spanNotSet so that + // a span(while not contained) stops before any string. + int c; + if (0 != (which & FWD)) { + c = string.codePointAt(0); + addToSpanNotSet(c); + } + if (0 != (which & BACK)) { + c = string.codePointBefore(length16); + addToSpanNotSet(c); + } + } + } else { // Irrelevant string. + if (all) { + spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = ALL_CP_CONTAINED; + } else { + // All spanXYZLengths pointers contain the same address. + spanLengths[i] = ALL_CP_CONTAINED; + } + } + } + + // Finish. + if (all) { + spanNotSet.freeze(); + } + } + + /** + * Constructs a copy of an existing UnicodeSetStringSpan. + * Assumes which==ALL for a frozen set. + */ + public UnicodeSetStringSpan(final UnicodeSetStringSpan otherStringSpan, final ArrayList newParentSetStrings) { + spanSet = otherStringSpan.spanSet; + strings = newParentSetStrings; + maxLength16 = otherStringSpan.maxLength16; + all = true; + if (otherStringSpan.spanNotSet == otherStringSpan.spanSet) { + spanNotSet = spanSet; + } else { + spanNotSet = (UnicodeSet) otherStringSpan.spanNotSet.clone(); + } + offsets = new OffsetList(); + + spanLengths = otherStringSpan.spanLengths.clone(); + } + + /* + * Do the strings need to be checked in span() etc.? + * + * @return TRUE if strings need to be checked (call span() here), FALSE if not (use a BMPSet for best performance). + */ + public boolean needsStringSpanUTF16() { + return (maxLength16 != 0); + } + + // For fast UnicodeSet::contains(c). + public boolean contains(int c) { + return spanSet.contains(c); + } + + // Add a starting or ending string character to the spanNotSet + // so that a character span ends before any string. + private void addToSpanNotSet(int c) { + if (spanNotSet == null || spanNotSet == spanSet) { + if (spanSet.contains(c)) { + return; // Nothing to do. + } + spanNotSet = spanSet.cloneAsThawed(); + } + spanNotSet.add(c); + } + + /* + * Note: In span() when spanLength==0 (after a string match, or at the beginning after an empty code point span) and + * in spanNot() and spanNotUTF8(), string matching could use a binary search because all string matches are done + * from the same start index. + * + * For UTF-8, this would require a comparison function that returns UTF-16 order. + * + * This optimization should not be necessary for normal UnicodeSets because most sets have no strings, and most sets + * with strings have very few very short strings. For cases with many strings, it might be better to use a different + * API and implementation with a DFA (state machine). + */ + + /* + * Algorithm for span(SpanCondition.CONTAINED) + * + * Theoretical algorithm: - Iterate through the string, and at each code point boundary: + If the code point there + * is in the set, then remember to continue after it. + If a set string matches at the current position, then + * remember to continue after it. + Either recursively span for each code point or string match, or recursively span + * for all but the shortest one and iteratively continue the span with the shortest local match. + Remember the + * longest recursive span (the farthest end point). + If there is no match at the current position, neither for the + * code point there nor for any set string, then stop and return the longest recursive span length. + * + * Optimized implementation: + * + * (We assume that most sets will have very few very short strings. A span using a string-less set is extremely + * fast.) + * + * Create and cache a spanSet which contains all of the single code points of the original set but none of its + * strings. + * + * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED). - Loop: + Try to match each set + * string at the end of the spanLength. ~ Set strings that start with set-contained code points must be matched with + * a partial overlap because the recursive algorithm would have tried to match them at every position. ~ Set strings + * that entirely consist of set-contained code points are irrelevant for span(SpanCondition.CONTAINED) + * because the recursive algorithm would continue after them anyway and find the longest recursive match from their + * end. ~ Rather than recursing, note each end point of a set string match. + If no set string matched after + * spanSet.span(), then return with where the spanSet.span() ended. + If at least one set string matched after + * spanSet.span(), then pop the shortest string match end point and continue the loop, trying to match all set + * strings from there. + If at least one more set string matched after a previous string match, then test if the + * code point after the previous string match is also contained in the set. Continue the loop with the shortest end + * point of either this code point or a matching set string. + If no more set string matched after a previous string + * match, then try another spanLength=spanSet.span(SpanCondition.CONTAINED). Stop if spanLength==0, + * otherwise continue the loop. + * + * By noting each end point of a set string match, the function visits each string position at most once and + * finishes in linear time. + * + * The recursive algorithm may visit the same string position many times if multiple paths lead to it and finishes + * in exponential time. + */ + + /* + * Algorithm for span(SIMPLE) + * + * Theoretical algorithm: - Iterate through the string, and at each code point boundary: + If the code point there + * is in the set, then remember to continue after it. + If a set string matches at the current position, then + * remember to continue after it. + Continue from the farthest match position and ignore all others. + If there is + * no match at the current position, then stop and return the current position. + * + * Optimized implementation: + * + * (Same assumption and spanSet as above.) + * + * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED). - Loop: + Try to match each set + * string at the end of the spanLength. ~ Set strings that start with set-contained code points must be matched with + * a partial overlap because the standard algorithm would have tried to match them earlier. ~ Set strings that + * entirely consist of set-contained code points must be matched with a full overlap because the longest-match + * algorithm would hide set string matches that end earlier. Such set strings need not be matched earlier inside the + * code point span because the standard algorithm would then have continued after the set string match anyway. ~ + * Remember the longest set string match (farthest end point) from the earliest starting point. + If no set string + * matched after spanSet.span(), then return with where the spanSet.span() ended. + If at least one set string + * matched, then continue the loop after the longest match from the earliest position. + If no more set string + * matched after a previous string match, then try another + * spanLength=spanSet.span(SpanCondition.CONTAINED). Stop if spanLength==0, otherwise continue the + * loop. + */ + /** + * Span a string. + * + * @param s The string to be spanned + * @param start The start index that the span begins + * @param spanCondition The span condition + * @return the length of the span + * @draft ICU 4.4 + */ + public synchronized int span(CharSequence s, int start, int length, SpanCondition spanCondition) { + if (spanCondition == SpanCondition.NOT_CONTAINED) { + return spanNot(s, start, length); + } + int spanLength = spanSet.span(s.subSequence(start, start + length), SpanCondition.CONTAINED); + if (spanLength == length) { + return length; + } + + // Consider strings; they may overlap with the span. + int initSize = 0; + if (spanCondition == SpanCondition.CONTAINED) { + // Use offset list to try all possibilities. + initSize = maxLength16; + } + offsets.setMaxLength(initSize); + int pos = start + spanLength, rest = length - spanLength; + int i, stringsLength = strings.size(); + for (;;) { + if (spanCondition == SpanCondition.CONTAINED) { + for (i = 0; i < stringsLength; ++i) { + int overlap = spanLengths[i]; + if (overlap == ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + String string = strings.get(i); + + int length16 = string.length(); + + // Try to match this string at pos-overlap..pos. + if (overlap >= LONG_SPAN) { + overlap = length16; + // While contained: No point matching fully inside the code point span. + overlap = string.offsetByCodePoints(overlap, -1); // Length of the string minus the last code + // point. + } + if (overlap > spanLength) { + overlap = spanLength; + } + int inc = length16 - overlap; // Keep overlap+inc==length16. + for (;;) { + if (inc > rest) { + break; + } + // Try to match if the increment is not listed already. + if (!offsets.containsOffset(inc) && matches16CPB(s, pos - overlap, length, string, length16)) { + if (inc == rest) { + return length; // Reached the end of the string. + } + offsets.addOffset(inc); + } + if (overlap == 0) { + break; + } + --overlap; + ++inc; + } + } + } else /* SIMPLE */{ + int maxInc = 0, maxOverlap = 0; + for (i = 0; i < stringsLength; ++i) { + int overlap = spanLengths[i]; + // For longest match, we do need to try to match even an all-contained string + // to find the match from the earliest start. + + String string = strings.get(i); + + int length16 = string.length(); + + // Try to match this string at pos-overlap..pos. + if (overlap >= LONG_SPAN) { + overlap = length16; + // Longest match: Need to match fully inside the code point span + // to find the match from the earliest start. + } + if (overlap > spanLength) { + overlap = spanLength; + } + int inc = length16 - overlap; // Keep overlap+inc==length16. + for (;;) { + if (inc > rest || overlap < maxOverlap) { + break; + } + // Try to match if the string is longer or starts earlier. + if ((overlap > maxOverlap || /* redundant overlap==maxOverlap && */inc > maxInc) + && matches16CPB(s, pos - overlap, length, string, length16)) { + maxInc = inc; // Longest match from earliest start. + maxOverlap = overlap; + break; + } + --overlap; + ++inc; + } + } + + if (maxInc != 0 || maxOverlap != 0) { + // Longest-match algorithm, and there was a string match. + // Simply continue after it. + pos += maxInc; + rest -= maxInc; + if (rest == 0) { + return length; // Reached the end of the string. + } + spanLength = 0; // Match strings from after a string match. + continue; + } + } + // Finished trying to match all strings at pos. + + if (spanLength != 0 || pos == 0) { + // The position is after an unlimited code point span (spanLength!=0), + // not after a string match. + // The only position where spanLength==0 after a span is pos==0. + // Otherwise, an unlimited code point span is only tried again when no + // strings match, and if such a non-initial span fails we stop. + if (offsets.isEmpty()) { + return pos - start; // No strings matched after a span. + } + // Match strings from after the next string match. + } else { + // The position is after a string match (or a single code point). + if (offsets.isEmpty()) { + // No more strings matched after a previous string match. + // Try another code point span from after the last string match. + spanLength = spanSet.span(s.subSequence(pos, pos + rest), SpanCondition.CONTAINED); + if (spanLength == rest || // Reached the end of the string, or + spanLength == 0 // neither strings nor span progressed. + ) { + return pos + spanLength - start; + } + pos += spanLength; + rest -= spanLength; + continue; // spanLength>0: Match strings from after a span. + } else { + // Try to match only one code point from after a string match if some + // string matched beyond it, so that we try all possible positions + // and don't overshoot. + spanLength = spanOne(spanSet, s, pos, rest); + if (spanLength > 0) { + if (spanLength == rest) { + return length; // Reached the end of the string. + } + // Match strings after this code point. + // There cannot be any increments below it because UnicodeSet strings + // contain multiple code points. + pos += spanLength; + rest -= spanLength; + offsets.shift(spanLength); + spanLength = 0; + continue; // Match strings from after a single code point. + } + // Match strings from after the next string match. + } + } + int minOffset = offsets.popMinimum(); + pos += minOffset; + rest -= minOffset; + spanLength = 0; // Match strings from after a string match. + } + } + + /** + * Span a string backwards. + * + * @param s The string to be spanned + * @param spanCondition The span condition + * @return The string index which starts the span (i.e. inclusive). + * @draft ICU 4.4 + */ + public synchronized int spanBack(CharSequence s, int length, SpanCondition spanCondition) { + if (spanCondition == SpanCondition.NOT_CONTAINED) { + return spanNotBack(s, length); + } + int pos = spanSet.spanBack(s, length, SpanCondition.CONTAINED); + if (pos == 0) { + return 0; + } + int spanLength = length - pos; + + // Consider strings; they may overlap with the span. + int initSize = 0; + if (spanCondition == SpanCondition.CONTAINED) { + // Use offset list to try all possibilities. + initSize = maxLength16; + } + offsets.setMaxLength(initSize); + int i, stringsLength = strings.size(); + int spanBackLengthsOffset = 0; + if (all) { + spanBackLengthsOffset = stringsLength; + } + for (;;) { + if (spanCondition == SpanCondition.CONTAINED) { + for (i = 0; i < stringsLength; ++i) { + int overlap = spanLengths[spanBackLengthsOffset + i]; + if (overlap == ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + String string = strings.get(i); + + int length16 = string.length(); + + // Try to match this string at pos-(length16-overlap)..pos-length16. + if (overlap >= LONG_SPAN) { + overlap = length16; + // While contained: No point matching fully inside the code point span. + int len1 = 0; + len1 = string.offsetByCodePoints(0, 1); + overlap -= len1; // Length of the string minus the first code point. + } + if (overlap > spanLength) { + overlap = spanLength; + } + int dec = length16 - overlap; // Keep dec+overlap==length16. + for (;;) { + if (dec > pos) { + break; + } + // Try to match if the decrement is not listed already. + if (!offsets.containsOffset(dec) && matches16CPB(s, pos - dec, length, string, length16)) { + if (dec == pos) { + return 0; // Reached the start of the string. + } + offsets.addOffset(dec); + } + if (overlap == 0) { + break; + } + --overlap; + ++dec; + } + } + } else /* SIMPLE */{ + int maxDec = 0, maxOverlap = 0; + for (i = 0; i < stringsLength; ++i) { + int overlap = spanLengths[spanBackLengthsOffset + i]; + // For longest match, we do need to try to match even an all-contained string + // to find the match from the latest end. + + String string = strings.get(i); + + int length16 = string.length(); + + // Try to match this string at pos-(length16-overlap)..pos-length16. + if (overlap >= LONG_SPAN) { + overlap = length16; + // Longest match: Need to match fully inside the code point span + // to find the match from the latest end. + } + if (overlap > spanLength) { + overlap = spanLength; + } + int dec = length16 - overlap; // Keep dec+overlap==length16. + for (;;) { + if (dec > pos || overlap < maxOverlap) { + break; + } + // Try to match if the string is longer or ends later. + if ((overlap > maxOverlap || /* redundant overlap==maxOverlap && */dec > maxDec) + && matches16CPB(s, pos - dec, length, string, length16)) { + maxDec = dec; // Longest match from latest end. + maxOverlap = overlap; + break; + } + --overlap; + ++dec; + } + } + + if (maxDec != 0 || maxOverlap != 0) { + // Longest-match algorithm, and there was a string match. + // Simply continue before it. + pos -= maxDec; + if (pos == 0) { + return 0; // Reached the start of the string. + } + spanLength = 0; // Match strings from before a string match. + continue; + } + } + // Finished trying to match all strings at pos. + + if (spanLength != 0 || pos == length) { + // The position is before an unlimited code point span (spanLength!=0), + // not before a string match. + // The only position where spanLength==0 before a span is pos==length. + // Otherwise, an unlimited code point span is only tried again when no + // strings match, and if such a non-initial span fails we stop. + if (offsets.isEmpty()) { + return pos; // No strings matched before a span. + } + // Match strings from before the next string match. + } else { + // The position is before a string match (or a single code point). + if (offsets.isEmpty()) { + // No more strings matched before a previous string match. + // Try another code point span from before the last string match. + int oldPos = pos; + pos = spanSet.spanBack(s, oldPos, SpanCondition.CONTAINED); + spanLength = oldPos - pos; + if (pos == 0 || // Reached the start of the string, or + spanLength == 0 // neither strings nor span progressed. + ) { + return pos; + } + continue; // spanLength>0: Match strings from before a span. + } else { + // Try to match only one code point from before a string match if some + // string matched beyond it, so that we try all possible positions + // and don't overshoot. + spanLength = spanOneBack(spanSet, s, pos); + if (spanLength > 0) { + if (spanLength == pos) { + return 0; // Reached the start of the string. + } + // Match strings before this code point. + // There cannot be any decrements below it because UnicodeSet strings + // contain multiple code points. + pos -= spanLength; + offsets.shift(spanLength); + spanLength = 0; + continue; // Match strings from before a single code point. + } + // Match strings from before the next string match. + } + } + pos -= offsets.popMinimum(); + spanLength = 0; // Match strings from before a string match. + } + } + + /* + * Algorithm for spanNot()==span(SpanCondition.NOT_CONTAINED) + * + * Theoretical algorithm: - Iterate through the string, and at each code point boundary: + If the code point there + * is in the set, then return with the current position. + If a set string matches at the current position, then + * return with the current position. + * + * Optimized implementation: + * + * (Same assumption as for span() above.) + * + * Create and cache a spanNotSet which contains all of the single code points of the original set but none of its + * strings. For each set string add its initial code point to the spanNotSet. (Also add its final code point for + * spanNotBack().) + * + * - Loop: + * + Do spanLength=spanNotSet.span(SpanCondition.NOT_CONTAINED). + * + If the current code point is in the original set, then return the current position. + * + If any set string matches at the current position, then return the current position. + * + If there is no match at the current position, neither for the code point + * there nor for any set string, then skip this code point and continue the loop. This happens for + * set-string-initial code points that were added to spanNotSet when there is not actually a match for such a set + * string. + * + * @return the length of the span + */ + private int spanNot(CharSequence s, int start, int length) { + int pos = start, rest = length; + int i, stringsLength = strings.size(); + do { + // Span until we find a code point from the set, + // or a code point that starts or ends some string. + i = spanNotSet.span(s.subSequence(pos, pos + rest), SpanCondition.NOT_CONTAINED); + if (i == rest) { + return length; // Reached the end of the string. + } + pos += i; + rest -= i; + + // Check whether the current code point is in the original set, + // without the string starts and ends. + int cpLength = spanOne(spanSet, s, pos, rest); + if (cpLength > 0) { + return pos - start; // There is a set element at pos. + } + + // Try to match the strings at pos. + for (i = 0; i < stringsLength; ++i) { + if (spanLengths[i] == ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + String string = strings.get(i); + + int length16 = string.length(); + if (length16 <= rest && matches16CPB(s, pos, length, string, length16)) { + return pos - start; // There is a set element at pos. + } + } + + // The span(while not contained) ended on a string start/end which is + // not in the original set. Skip this code point and continue. + // cpLength<0 + pos -= cpLength; + rest += cpLength; + } while (rest != 0); + return length; // Reached the end of the string. + } + + private int spanNotBack(CharSequence s, int length) { + int pos = length; + int i, stringsLength = strings.size(); + do { + // Span until we find a code point from the set, + // or a code point that starts or ends some string. + pos = spanNotSet.spanBack(s, pos, SpanCondition.NOT_CONTAINED); + if (pos == 0) { + return 0; // Reached the start of the string. + } + + // Check whether the current code point is in the original set, + // without the string starts and ends. + int cpLength = spanOneBack(spanSet, s, pos); + if (cpLength > 0) { + return pos; // There is a set element at pos. + } + + // Try to match the strings at pos. + for (i = 0; i < stringsLength; ++i) { + // Use spanLengths rather than a spanLengths pointer because + // it is easier and we only need to know whether the string is irrelevant + // which is the same in either array. + if (spanLengths[i] == ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + String string = strings.get(i); + + int length16 = string.length(); + if (length16 <= pos && matches16CPB(s, pos - length16, length, string, length16)) { + return pos; // There is a set element at pos. + } + } + + // The span(while not contained) ended on a string start/end which is + // not in the original set. Skip this code point and continue. + // cpLength<0 + pos += cpLength; + } while (pos != 0); + return 0; // Reached the start of the string. + } + + static short makeSpanLengthByte(int spanLength) { + // 0xfe==UnicodeSetStringSpan::LONG_SPAN + return spanLength < LONG_SPAN ? (short) spanLength : LONG_SPAN; + } + + // Compare strings without any argument checks. Requires length>0. + private static boolean matches16(CharSequence s, int start, final String t, int length) { + int end = start + length; + while (length-- > 0) { + if (s.charAt(--end) != t.charAt(length)) { + return false; + } + } + return true; + } + + /** + * Compare 16-bit Unicode strings (which may be malformed UTF-16) + * at code point boundaries. + * That is, each edge of a match must not be in the middle of a surrogate pair. + * @param start The start index of s. + * @param slength The length of s from start. + * @param tlength The length of t. + */ + static boolean matches16CPB(CharSequence s, int start, int slength, final String t, int tlength) { + return !(0 < start && com.ibm.icu.text.UTF16.isLeadSurrogate (s.charAt(start - 1)) && + com.ibm.icu.text.UTF16.isTrailSurrogate(s.charAt(start + 0))) + && !(tlength < slength && com.ibm.icu.text.UTF16.isLeadSurrogate (s.charAt(start + tlength - 1)) && + com.ibm.icu.text.UTF16.isTrailSurrogate(s.charAt(start + tlength))) + && matches16(s, start, t, tlength); + } + + // Does the set contain the next code point? + // If so, return its length; otherwise return its negative length. + static int spanOne(final UnicodeSet set, CharSequence s, int start, int length) { + char c = s.charAt(start); + if (c >= 0xd800 && c <= 0xdbff && length >= 2) { + char c2 = s.charAt(start + 1); + if (com.ibm.icu.text.UTF16.isTrailSurrogate(c2)) { + int supplementary = UCharacterProperty.getRawSupplementary(c, c2); + return set.contains(supplementary) ? 2 : -2; + } + } + return set.contains(c) ? 1 : -1; + } + + static int spanOneBack(final UnicodeSet set, CharSequence s, int length) { + char c = s.charAt(length - 1); + if (c >= 0xdc00 && c <= 0xdfff && length >= 2) { + char c2 = s.charAt(length - 2); + if (com.ibm.icu.text.UTF16.isLeadSurrogate(c2)) { + int supplementary = UCharacterProperty.getRawSupplementary(c2, c); + return set.contains(supplementary) ? 2 : -2; + } + } + return set.contains(c) ? 1 : -1; + } + + + /* + * Helper class for UnicodeSetStringSpan. + * + * List of offsets from the current position from where to try matching a code point or a string. Store offsets rather + * than indexes to simplify the code and use the same list for both increments (in span()) and decrements (in + * spanBack()). + * + * Assumption: The maximum offset is limited, and the offsets that are stored at any one time are relatively dense, that + * is, there are normally no gaps of hundreds or thousands of offset values. + * + * The implementation uses a circular buffer of byte flags, each indicating whether the corresponding offset is in the + * list. This avoids inserting into a sorted list of offsets (or absolute indexes) and physically moving part of the + * list. + * + * Note: In principle, the caller should setMaxLength() to the maximum of the max string length and U16_LENGTH/U8_LENGTH + * to account for "long" single code points. + * + * Note: If maxLength were guaranteed to be no more than 32 or 64, the list could be stored as bit flags in a single + * integer. Rather than handling a circular buffer with a start list index, the integer would simply be shifted when + * lower offsets are removed. UnicodeSet does not have a limit on the lengths of strings. + */ + static class OffsetList { + private boolean[] list; + private int length; + private int start; + + public OffsetList() { + list = new boolean[16]; // default size + } + + public void setMaxLength(int maxLength) { + if (maxLength > list.length) { + list = new boolean[maxLength]; + } + clear(); + } + + public void clear() { + for (int i = list.length; i-- > 0;) { + list[i] = false; + } + start = length = 0; + } + + public boolean isEmpty() { + return (length == 0); + } + + // Reduce all stored offsets by delta, used when the current position + // moves by delta. + // There must not be any offsets lower than delta. + // If there is an offset equal to delta, it is removed. + // delta=[1..maxLength] + public void shift(int delta) { + int i = start + delta; + if (i >= list.length) { + i -= list.length; + } + if (list[i]) { + list[i] = false; + --length; + } + start = i; + } + + // Add an offset. The list must not contain it yet. + // offset=[1..maxLength] + public void addOffset(int offset) { + int i = start + offset; + if (i >= list.length) { + i -= list.length; + } + list[i] = true; + ++length; + } + + // offset=[1..maxLength] + public boolean containsOffset(int offset) { + int i = start + offset; + if (i >= list.length) { + i -= list.length; + } + return list[i]; + } + + // Find the lowest stored offset from a non-empty list, remove it, + // and reduce all other offsets by this minimum. + // Returns [1..maxLength]. + public int popMinimum() { + // Look for the next offset in list[start+1..list.length-1]. + int i = start, result; + while (++i < list.length) { + if (list[i]) { + list[i] = false; + --length; + result = i - start; + start = i; + return result; + } + } + // i==list.length + + // Wrap around and look for the next offset in list[0..start]. + // Since the list is not empty, there will be one. + result = list.length - start; + i = 0; + while (!list[i]) { + ++i; + } + list[i] = false; + --length; + start = i; + return result += i; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/Utility.java b/main/classes/core/src/com/ibm/icu/impl/Utility.java new file mode 100644 index 00000000000..f514ebb9cea --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/Utility.java @@ -0,0 +1,1845 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.regex.Pattern; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.Replaceable; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeMatcher; + +public final class Utility { + + private static final char APOSTROPHE = '\''; + private static final char BACKSLASH = '\\'; + private static final int MAGIC_UNSIGNED = 0x80000000; + + /** + * Convenience utility to compare two Object[]s. + * Ought to be in System + */ + public final static boolean arrayEquals(Object[] source, Object target) { + if (source == null) return (target == null); + if (!(target instanceof Object[])) return false; + Object[] targ = (Object[]) target; + return (source.length == targ.length + && arrayRegionMatches(source, 0, targ, 0, source.length)); + } + + /** + * Convenience utility to compare two int[]s + * Ought to be in System + */ + public final static boolean arrayEquals(int[] source, Object target) { + if (source == null) return (target == null); + if (!(target instanceof int[])) return false; + int[] targ = (int[]) target; + return (source.length == targ.length + && arrayRegionMatches(source, 0, targ, 0, source.length)); + } + + /** + * Convenience utility to compare two double[]s + * Ought to be in System + */ + public final static boolean arrayEquals(double[] source, Object target) { + if (source == null) return (target == null); + if (!(target instanceof double[])) return false; + double[] targ = (double[]) target; + return (source.length == targ.length + && arrayRegionMatches(source, 0, targ, 0, source.length)); + } + public final static boolean arrayEquals(byte[] source, Object target) { + if (source == null) return (target == null); + if (!(target instanceof byte[])) return false; + byte[] targ = (byte[]) target; + return (source.length == targ.length + && arrayRegionMatches(source, 0, targ, 0, source.length)); + } + + /** + * Convenience utility to compare two Object[]s + * Ought to be in System + */ + public final static boolean arrayEquals(Object source, Object target) { + if (source == null) return (target == null); + // for some reason, the correct arrayEquals is not being called + // so do it by hand for now. + if (source instanceof Object[]) + return(arrayEquals((Object[]) source,target)); + if (source instanceof int[]) + return(arrayEquals((int[]) source,target)); + if (source instanceof double[]) + return(arrayEquals((int[]) source,target)); + if (source instanceof byte[]) + return(arrayEquals((byte[]) source,target)); + return source.equals(target); + } + + /** + * Convenience utility to compare two Object[]s + * Ought to be in System. + * @param len the length to compare. + * The start indices and start+len must be valid. + */ + public final static boolean arrayRegionMatches(Object[] source, int sourceStart, + Object[] target, int targetStart, + int len) + { + int sourceEnd = sourceStart + len; + int delta = targetStart - sourceStart; + for (int i = sourceStart; i < sourceEnd; i++) { + if (!arrayEquals(source[i],target[i + delta])) + return false; + } + return true; + } + + /** + * Convenience utility to compare two Object[]s + * Ought to be in System. + * @param len the length to compare. + * The start indices and start+len must be valid. + */ + public final static boolean arrayRegionMatches(char[] source, int sourceStart, + char[] target, int targetStart, + int len) + { + int sourceEnd = sourceStart + len; + int delta = targetStart - sourceStart; + for (int i = sourceStart; i < sourceEnd; i++) { + if (source[i]!=target[i + delta]) + return false; + } + return true; + } + + /** + * Convenience utility to compare two int[]s. + * @param len the length to compare. + * The start indices and start+len must be valid. + * Ought to be in System + */ + public final static boolean arrayRegionMatches(int[] source, int sourceStart, + int[] target, int targetStart, + int len) + { + int sourceEnd = sourceStart + len; + int delta = targetStart - sourceStart; + for (int i = sourceStart; i < sourceEnd; i++) { + if (source[i] != target[i + delta]) + return false; + } + return true; + } + + /** + * Convenience utility to compare two arrays of doubles. + * @param len the length to compare. + * The start indices and start+len must be valid. + * Ought to be in System + */ + public final static boolean arrayRegionMatches(double[] source, int sourceStart, + double[] target, int targetStart, + int len) + { + int sourceEnd = sourceStart + len; + int delta = targetStart - sourceStart; + for (int i = sourceStart; i < sourceEnd; i++) { + if (source[i] != target[i + delta]) + return false; + } + return true; + } + public final static boolean arrayRegionMatches(byte[] source, int sourceStart, + byte[] target, int targetStart, int len){ + int sourceEnd = sourceStart + len; + int delta = targetStart - sourceStart; + for (int i = sourceStart; i < sourceEnd; i++) { + if (source[i] != target[i + delta]) + return false; + } + return true; + } + + /** + * Convenience utility. Does null checks on objects, then calls equals. + */ + public final static boolean objectEquals(Object a, Object b) { + return a == null ? + b == null ? true : false : + b == null ? false : a.equals(b); + } + + /** + * Convenience utility. Does null checks on objects, then calls compare. + */ + public static > int checkCompare(T a, T b) { + return a == null ? + b == null ? 0 : -1 : + b == null ? 1 : a.compareTo(b); + } + + /** + * Convenience utility. Does null checks on object, then calls hashCode. + */ + public static int checkHash(Object a) { + return a == null ? 0 : a.hashCode(); + } + + /** + * The ESCAPE character is used during run-length encoding. It signals + * a run of identical chars. + */ + private static final char ESCAPE = '\uA5A5'; + + /** + * The ESCAPE_BYTE character is used during run-length encoding. It signals + * a run of identical bytes. + */ + static final byte ESCAPE_BYTE = (byte)0xA5; + + /** + * Construct a string representing an int array. Use run-length encoding. + * A character represents itself, unless it is the ESCAPE character. Then + * the following notations are possible: + * ESCAPE ESCAPE ESCAPE literal + * ESCAPE n c n instances of character c + * Since an encoded run occupies 3 characters, we only encode runs of 4 or + * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. + * If we encounter a run where n == ESCAPE, we represent this as: + * c ESCAPE n-1 c + * The ESCAPE value is chosen so as not to collide with commonly + * seen values. + */ + static public final String arrayToRLEString(int[] a) { + StringBuilder buffer = new StringBuilder(); + + appendInt(buffer, a.length); + int runValue = a[0]; + int runLength = 1; + for (int i=1; i 0 and n != ESCAPE and n <= 0xFFFF. + * If we encounter a run where n == ESCAPE, we represent this as: + * c ESCAPE n-1 c + * The ESCAPE value is chosen so as not to collide with commonly + * seen values. + */ + static public final String arrayToRLEString(short[] a) { + StringBuilder buffer = new StringBuilder(); + // for (int i=0; i> 16)); + buffer.append((char) a.length); + short runValue = a[0]; + int runLength = 1; + for (int i=1; i 0 and n != ESCAPE and n <= 0xFFFF. + * If we encounter a run where n == ESCAPE, we represent this as: + * c ESCAPE n-1 c + * The ESCAPE value is chosen so as not to collide with commonly + * seen values. + */ + static public final String arrayToRLEString(char[] a) { + StringBuilder buffer = new StringBuilder(); + buffer.append((char) (a.length >> 16)); + buffer.append((char) a.length); + char runValue = a[0]; + int runLength = 1; + for (int i=1; i 0 and n != ESCAPE_BYTE and n <= 0xFF. + * If we encounter a run where n == ESCAPE_BYTE, we represent this as: + * b ESCAPE_BYTE n-1 b + * The ESCAPE_BYTE value is chosen so as not to collide with commonly + * seen values. + */ + static public final String arrayToRLEString(byte[] a) { + StringBuilder buffer = new StringBuilder(); + buffer.append((char) (a.length >> 16)); + buffer.append((char) a.length); + byte runValue = a[0]; + int runLength = 1; + byte[] state = new byte[2]; + for (int i=1; i 0 && <= 0xFFFF. + */ + private static final void encodeRun(T buffer, int value, int length) { + if (length < 4) { + for (int j=0; j void appendInt(T buffer, int value) { + try { + buffer.append((char)(value >>> 16)); + buffer.append((char)(value & 0xFFFF)); + } catch (IOException e) { + throw new IllegalIcuArgumentException(e); + } + } + + /** + * Encode a run, possibly a degenerate run (of < 4 values). + * @param length The length of the run; must be > 0 && <= 0xFFFF. + */ + private static final void encodeRun(T buffer, short value, int length) { + try { + if (length < 4) { + for (int j=0; j 0 && <= 0xFF. + */ + private static final void encodeRun(T buffer, byte value, int length, + byte[] state) { + if (length < 4) { + for (int j=0; j void appendEncodedByte(T buffer, byte value, + byte[] state) { + try { + if (state[0] != 0) { + char c = (char) ((state[1] << 8) | (((int) value) & 0xFF)); + buffer.append(c); + state[0] = 0; + } + else { + state[0] = 1; + state[1] = value; + } + } catch (IOException e) { + throw new IllegalIcuArgumentException(e); + } + } + + /** + * Construct an array of ints from a run-length encoded string. + */ + static public final int[] RLEStringToIntArray(String s) { + int length = getInt(s, 0); + int[] array = new int[length]; + int ai = 0, i = 1; + + int maxI = s.length() / 2; + while (ai < length && i < maxI) { + int c = getInt(s, i++); + + if (c == ESCAPE) { + c = getInt(s, i++); + if (c == ESCAPE) { + array[ai++] = c; + } else { + int runLength = c; + int runValue = getInt(s, i++); + for (int j=0; j> 8); + nextChar = false; + } + else { + b = (byte) (c & 0xFF); + nextChar = true; + } + + // This part of the loop is a tiny state machine which handles + // the parsing of the run-length encoding. This would be simpler + // if we could look ahead, but we can't, so we use 'node' to + // move between three nodes in the state machine. + switch (node) { + case 0: + // Normal idle node + if (b == ESCAPE_BYTE) { + node = 1; + } + else { + array[ai++] = b; + } + break; + case 1: + // We have seen one ESCAPE_BYTE; we expect either a second + // one, or a run length and value. + if (b == ESCAPE_BYTE) { + array[ai++] = ESCAPE_BYTE; + node = 0; + } + else { + runLength = b; + // Interpret signed byte as unsigned + if (runLength < 0) runLength += 0x100; + node = 2; + } + break; + case 2: + // We have seen an ESCAPE_BYTE and length byte. We interpret + // the next byte as the value to be repeated. + for (int j=0; j 0) buffer.append('+').append(LINE_SEPARATOR); + buffer.append(" \""); + int count = 11; + while (i> 6]); // HEX_DIGIT works for octal + buffer.append(HEX_DIGIT[(c & 0070) >> 3]); + buffer.append(HEX_DIGIT[(c & 0007)]); + count += 4; + } + } + else if (c <= '\u007E') { + buffer.append(c); + count += 1; + } + else { + buffer.append("\\u"); + buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); + buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); + buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); + buffer.append(HEX_DIGIT[(c & 0x000F)]); + count += 6; + } + } + buffer.append('"'); + } + return buffer.toString(); + } + + static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7', + '8','9','A','B','C','D','E','F'}; + + /** + * Format a String for representation in a source file. Like + * formatForSource but does not do line breaking. + */ + static public final String format1ForSource(String s) { + StringBuilder buffer = new StringBuilder(); + buffer.append("\""); + for (int i=0; i> 6]); // HEX_DIGIT works for octal + buffer.append(HEX_DIGIT[(c & 0070) >> 3]); + buffer.append(HEX_DIGIT[(c & 0007)]); + } + } + else if (c <= '\u007E') { + buffer.append(c); + } + else { + buffer.append("\\u"); + buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]); + buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]); + buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]); + buffer.append(HEX_DIGIT[(c & 0x000F)]); + } + } + buffer.append('"'); + return buffer.toString(); + } + + /** + * Convert characters outside the range U+0020 to U+007F to + * Unicode escapes, and convert backslash to a double backslash. + */ + public static final String escape(String s) { + StringBuilder buf = new StringBuilder(); + for (int i=0; i= ' ' && c <= 0x007F) { + if (c == '\\') { + buf.append("\\\\"); // That is, "\\" + } else { + buf.append((char)c); + } + } else { + boolean four = c <= 0xFFFF; + buf.append(four ? "\\u" : "\\U"); + buf.append(hex(c, four ? 4 : 8)); + } + } + return buf.toString(); + } + + /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ + static private final char[] UNESCAPE_MAP = { + /*" 0x22, 0x22 */ + /*' 0x27, 0x27 */ + /*? 0x3F, 0x3F */ + /*\ 0x5C, 0x5C */ + /*a*/ 0x61, 0x07, + /*b*/ 0x62, 0x08, + /*e*/ 0x65, 0x1b, + /*f*/ 0x66, 0x0c, + /*n*/ 0x6E, 0x0a, + /*r*/ 0x72, 0x0d, + /*t*/ 0x74, 0x09, + /*v*/ 0x76, 0x0b + }; + + /** + * Convert an escape to a 32-bit code point value. We attempt + * to parallel the icu4c unescapeAt() function. + * @param offset16 an array containing offset to the character + * after the backslash. Upon return offset16[0] will + * be updated to point after the escape sequence. + * @return character value from 0 to 10FFFF, or -1 on error. + */ + public static int unescapeAt(String s, int[] offset16) { + int c; + int result = 0; + int n = 0; + int minDig = 0; + int maxDig = 0; + int bitsPerDigit = 4; + int dig; + int i; + boolean braces = false; + + /* Check that offset is in range */ + int offset = offset16[0]; + int length = s.length(); + if (offset < 0 || offset >= length) { + return -1; + } + + /* Fetch first UChar after '\\' */ + c = Character.codePointAt(s, offset); + offset += UTF16.getCharCount(c); + + /* Convert hexadecimal and octal escapes */ + switch (c) { + case 'u': + minDig = maxDig = 4; + break; + case 'U': + minDig = maxDig = 8; + break; + case 'x': + minDig = 1; + if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) { + ++offset; + braces = true; + maxDig = 8; + } else { + maxDig = 2; + } + break; + default: + dig = UCharacter.digit(c, 8); + if (dig >= 0) { + minDig = 1; + maxDig = 3; + n = 1; /* Already have first octal digit */ + bitsPerDigit = 3; + result = dig; + } + break; + } + if (minDig != 0) { + while (offset < length && n < maxDig) { + c = UTF16.charAt(s, offset); + dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); + if (dig < 0) { + break; + } + result = (result << bitsPerDigit) | dig; + offset += UTF16.getCharCount(c); + ++n; + } + if (n < minDig) { + return -1; + } + if (braces) { + if (c != 0x7D /*}*/) { + return -1; + } + ++offset; + } + if (result < 0 || result >= 0x110000) { + return -1; + } + // If an escape sequence specifies a lead surrogate, see + // if there is a trail surrogate after it, either as an + // escape or as a literal. If so, join them up into a + // supplementary. + if (offset < length && + UTF16.isLeadSurrogate((char) result)) { + int ahead = offset+1; + c = s.charAt(offset); // [sic] get 16-bit code unit + if (c == '\\' && ahead < length) { + int o[] = new int[] { ahead }; + c = unescapeAt(s, o); + ahead = o[0]; + } + if (UTF16.isTrailSurrogate((char) c)) { + offset = ahead; + result = UCharacterProperty.getRawSupplementary( + (char) result, (char) c); + } + } + offset16[0] = offset; + return result; + } + + /* Convert C-style escapes in table */ + for (i=0; i + * "0041". + */ + public static String hex(long ch) { + return hex(ch, 4); + } + + /** + * Supplies a zero-padded hex representation of an integer (without 0x) + */ + static public String hex(long i, int places) { + if (i == Long.MIN_VALUE) return "-8000000000000000"; + boolean negative = i < 0; + if (negative) { + i = -i; + } + String result = Long.toString(i, 16).toUpperCase(); + if (result.length() < places) { + result = "0000000000000000".substring(result.length(),places) + result; + } + if (negative) { + return '-' + result; + } + return result; + } + + /** + * Convert a string to comma-separated groups of 4 hex uppercase + * digits. E.g., hex('ab') => "0041,0042". + */ + public static String hex(CharSequence s) { + return hex(s, 4, ",", true, new StringBuilder()).toString(); + } + + /** + * Convert a string to separated groups of hex uppercase + * digits. E.g., hex('ab'...) => "0041,0042". Append the output + * to the given Appendable. + */ + public static T hex(S s, int width, U separator, boolean useCodePoints, T result) { + try { + if (useCodePoints) { + int cp; + for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { + cp = Character.codePointAt(s, i); + if (i != 0) { + result.append(separator); + } + result.append(hex(cp,width)); + } + } else { + for (int i = 0; i < s.length(); ++i) { + if (i != 0) { + result.append(separator); + } + result.append(hex(s.charAt(i),width)); + } + } + return result; + } catch (IOException e) { + throw new IllegalIcuArgumentException(e); + } + } + + + /** + * Convert a string to comma-separated groups of 4 hex uppercase + * digits. E.g., hex('ab') => "0041,0042". + */ + public static String hex(S s, int width, S separator) { + return hex(s, width, separator, true, new StringBuilder()).toString(); + } + + /** + * Split a string into pieces based on the given divider character + * @param s the string to split + * @param divider the character on which to split. Occurrences of + * this character are not included in the output + * @param output an array to receive the substrings between + * instances of divider. It must be large enough on entry to + * accomodate all output. Adjacent instances of the divider + * character will place empty strings into output. Before + * returning, output is padded out with empty strings. + */ + public static void split(String s, char divider, String[] output) { + int last = 0; + int current = 0; + int i; + for (i = 0; i < s.length(); ++i) { + if (s.charAt(i) == divider) { + output[current++] = s.substring(last,i); + last = i+1; + } + } + output[current++] = s.substring(last,i); + while (current < output.length) { + output[current++] = ""; + } + } + + /** + * Split a string into pieces based on the given divider character + * @param s the string to split + * @param divider the character on which to split. Occurrences of + * this character are not included in the output + * @return output an array to receive the substrings between + * instances of divider. Adjacent instances of the divider + * character will place empty strings into output. + */ + public static String[] split(String s, char divider) { + int last = 0; + int i; + ArrayList output = new ArrayList(); + for (i = 0; i < s.length(); ++i) { + if (s.charAt(i) == divider) { + output.add(s.substring(last,i)); + last = i+1; + } + } + output.add( s.substring(last,i)); + return output.toArray(new String[output.size()]); + } + + /** + * Look up a given string in a string array. Returns the index at + * which the first occurrence of the string was found in the + * array, or -1 if it was not found. + * @param source the string to search for + * @param target the array of zero or more strings in which to + * look for source + * @return the index of target at which source first occurs, or -1 + * if not found + */ + public static int lookup(String source, String[] target) { + for (int i = 0; i < target.length; ++i) { + if (source.equals(target[i])) return i; + } + return -1; + } + + /** + * Skip over a sequence of zero or more white space characters + * at pos. Return the index of the first non-white-space character + * at or after pos, or str.length(), if there is none. + */ + public static int skipWhitespace(String str, int pos) { + while (pos < str.length()) { + int c = Character.codePointAt(str, pos); + if (!UCharacterProperty.isRuleWhiteSpace(c)) { + break; + } + pos += UTF16.getCharCount(c); + } + return pos; + } + + /** + * Skip over a sequence of zero or more white space characters + * at pos[0], advancing it. + */ + public static void skipWhitespace(String str, int[] pos) { + pos[0] = skipWhitespace(str, pos[0]); + } + + /** + * Remove all rule white space from a string. + */ + public static String deleteRuleWhiteSpace(String str) { + StringBuilder buf = new StringBuilder(); + for (int i=0; i= the number of '#' + * signs in 'pattern'. + * @return the position after the last character parsed, or -1 if + * the parse failed + */ + @SuppressWarnings("fallthrough") + public static int parsePattern(String rule, int pos, int limit, + String pattern, int[] parsedInts) { + // TODO Update this to handle surrogates + int[] p = new int[1]; + int intCount = 0; // number of integers parsed + for (int i=0; i= limit) { + return -1; + } + c = rule.charAt(pos++); + if (!UCharacterProperty.isRuleWhiteSpace(c)) { + return -1; + } + // FALL THROUGH to skipWhitespace + case '~': + pos = skipWhitespace(rule, pos); + break; + case '#': + p[0] = pos; + parsedInts[intCount++] = parseInteger(rule, p, limit); + if (p[0] == pos) { + // Syntax error; failed to parse integer + return -1; + } + pos = p[0]; + break; + default: + if (pos >= limit) { + return -1; + } + c = (char) UCharacter.toLowerCase(rule.charAt(pos++)); + if (c != cpat) { + return -1; + } + break; + } + } + return pos; + } + + /** + * Parse a pattern string within the given Replaceable and a parsing + * pattern. Characters are matched literally and case-sensitively + * except for the following special characters: + * + * ~ zero or more uprv_isRuleWhiteSpace chars + * + * If end of pattern is reached with all matches along the way, + * pos is advanced to the first unparsed index and returned. + * Otherwise -1 is returned. + * @param pat pattern that controls parsing + * @param text text to be parsed, starting at index + * @param index offset to first character to parse + * @param limit offset after last character to parse + * @return index after last parsed character, or -1 on parse failure. + */ + public static int parsePattern(String pat, + Replaceable text, + int index, + int limit) { + int ipat = 0; + + // empty pattern matches immediately + if (ipat == pat.length()) { + return index; + } + + int cpat = Character.codePointAt(pat, ipat); + + while (index < limit) { + int c = text.char32At(index); + + // parse \s* + if (cpat == '~') { + if (UCharacterProperty.isRuleWhiteSpace(c)) { + index += UTF16.getCharCount(c); + continue; + } else { + if (++ipat == pat.length()) { + return index; // success; c unparsed + } + // fall thru; process c again with next cpat + } + } + + // parse literal + else if (c == cpat) { + int n = UTF16.getCharCount(c); + index += n; + ipat += n; + if (ipat == pat.length()) { + return index; // success; c parsed + } + // fall thru; get next cpat + } + + // match failure of literal + else { + return -1; + } + + cpat = UTF16.charAt(pat, ipat); + } + + return -1; // text ended before end of pat + } + + /** + * Parse an integer at pos, either of the form \d+ or of the form + * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, + * or octal format. + * @param pos INPUT-OUTPUT parameter. On input, the first + * character to parse. On output, the character after the last + * parsed character. + */ + public static int parseInteger(String rule, int[] pos, int limit) { + int count = 0; + int value = 0; + int p = pos[0]; + int radix = 10; + + if (rule.regionMatches(true, p, "0x", 0, 2)) { + p += 2; + radix = 16; + } else if (p < limit && rule.charAt(p) == '0') { + p++; + count = 1; + radix = 8; + } + + while (p < limit) { + int d = UCharacter.digit(rule.charAt(p++), radix); + if (d < 0) { + --p; + break; + } + ++count; + int v = (value * radix) + d; + if (v <= value) { + // If there are too many input digits, at some point + // the value will go negative, e.g., if we have seen + // "0x8000000" already and there is another '0', when + // we parse the next 0 the value will go negative. + return 0; + } + value = v; + } + if (count > 0) { + pos[0] = p; + } + return value; + } + + /** + * Parse a Unicode identifier from the given string at the given + * position. Return the identifier, or null if there is no + * identifier. + * @param str the string to parse + * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the + * first character to examine. It must be less than str.length(), + * and it must not point to a whitespace character. That is, must + * have pos[0] < str.length() and + * !UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0])). On + * OUTPUT, the position after the last parsed character. + * @return the Unicode identifier, or null if there is no valid + * identifier at pos[0]. + */ + public static String parseUnicodeIdentifier(String str, int[] pos) { + // assert(pos[0] < str.length()); + // assert(!UCharacterProperty.isRuleWhiteSpace(UTF16.charAt(str, pos[0]))); + StringBuilder buf = new StringBuilder(); + int p = pos[0]; + while (p < str.length()) { + int ch = Character.codePointAt(str, p); + if (buf.length() == 0) { + if (UCharacter.isUnicodeIdentifierStart(ch)) { + buf.appendCodePoint(ch); + } else { + return null; + } + } else { + if (UCharacter.isUnicodeIdentifierPart(ch)) { + buf.appendCodePoint(ch); + } else { + break; + } + } + p += UTF16.getCharCount(ch); + } + pos[0] = p; + return buf.toString(); + } + + static final char DIGITS[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z' + }; + + /** + * Append the digits of a positive integer to the given + * Appendable in the given radix. This is + * done recursively since it is easiest to generate the low- + * order digit first, but it must be appended last. + * + * @param result is the Appendable to append to + * @param n is the positive integer + * @param radix is the radix, from 2 to 36 inclusive + * @param minDigits is the minimum number of digits to append. + */ + private static void recursiveAppendNumber(T result, int n, + int radix, int minDigits) + { + try { + int digit = n % radix; + + if (n >= radix || minDigits > 1) { + recursiveAppendNumber(result, n / radix, radix, minDigits - 1); + } + result.append(DIGITS[digit]); + } catch (IOException e) { + throw new IllegalIcuArgumentException(e); + } + } + + /** + * Append a number to the given Appendable in the given radix. + * Standard digits '0'-'9' are used and letters 'A'-'Z' for + * radices 11 through 36. + * @param result the digits of the number are appended here + * @param n the number to be converted to digits; may be negative. + * If negative, a '-' is prepended to the digits. + * @param radix a radix from 2 to 36 inclusive. + * @param minDigits the minimum number of digits, not including + * any '-', to produce. Values less than 2 have no effect. One + * digit is always emitted regardless of this parameter. + * @return a reference to result + */ + public static T appendNumber(T result, int n, + int radix, int minDigits) + { + try { + if (radix < 2 || radix > 36) { + throw new IllegalArgumentException("Illegal radix " + radix); + } + + + int abs = n; + + if (n < 0) { + abs = -n; + result.append("-"); + } + + recursiveAppendNumber(result, abs, radix, minDigits); + + return result; + } catch (IOException e) { + throw new IllegalIcuArgumentException(e); + } + + } + + /** + * Parse an unsigned 31-bit integer at the given offset. Use + * UCharacter.digit() to parse individual characters into digits. + * @param text the text to be parsed + * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the + * offset within text at which to start parsing; it should point + * to a valid digit. On exit, pos[0] is the offset after the last + * parsed character. If the parse failed, it will be unchanged on + * exit. Must be >= 0 on entry. + * @param radix the radix in which to parse; must be >= 2 and <= + * 36. + * @return a non-negative parsed number, or -1 upon parse failure. + * Parse fails if there are no digits, that is, if pos[0] does not + * point to a valid digit on entry, or if the number to be parsed + * does not fit into a 31-bit unsigned integer. + */ + public static int parseNumber(String text, int[] pos, int radix) { + // assert(pos[0] >= 0); + // assert(radix >= 2); + // assert(radix <= 36); + int n = 0; + int p = pos[0]; + while (p < text.length()) { + int ch = Character.codePointAt(text, p); + int d = UCharacter.digit(ch, radix); + if (d < 0) { + break; + } + n = radix*n + d; + // ASSUME that when a 32-bit integer overflows it becomes + // negative. E.g., 214748364 * 10 + 8 => negative value. + if (n < 0) { + return -1; + } + ++p; + } + if (p == pos[0]) { + return -1; + } + pos[0] = p; + return n; + } + + /** + * Return true if the character is NOT printable ASCII. The tab, + * newline and linefeed characters are considered unprintable. + */ + public static boolean isUnprintable(int c) { + //0x20 = 32 and 0x7E = 126 + return !(c >= 0x20 && c <= 0x7E); + } + + /** + * Escape unprintable characters using uxxxx notation + * for U+0000 to U+FFFF and Uxxxxxxxx for U+10000 and + * above. If the character is printable ASCII, then do nothing + * and return FALSE. Otherwise, append the escaped notation and + * return TRUE. + */ + public static boolean escapeUnprintable(T result, int c) { + try { + if (isUnprintable(c)) { + result.append('\\'); + if ((c & ~0xFFFF) != 0) { + result.append('U'); + result.append(DIGITS[0xF&(c>>28)]); + result.append(DIGITS[0xF&(c>>24)]); + result.append(DIGITS[0xF&(c>>20)]); + result.append(DIGITS[0xF&(c>>16)]); + } else { + result.append('u'); + } + result.append(DIGITS[0xF&(c>>12)]); + result.append(DIGITS[0xF&(c>>8)]); + result.append(DIGITS[0xF&(c>>4)]); + result.append(DIGITS[0xF&c]); + return true; + } + return false; + } catch (IOException e) { + throw new IllegalIcuArgumentException(e); + } + } + + /** + * Returns the index of the first character in a set, ignoring quoted text. + * For example, in the string "abc'hide'h", the 'h' in "hide" will not be + * found by a search for "h". Unlike String.indexOf(), this method searches + * not for a single character, but for any character of the string + * setOfChars. + * @param text text to be searched + * @param start the beginning index, inclusive; 0 <= start + * <= limit. + * @param limit the ending index, exclusive; start <= limit + * <= text.length(). + * @param setOfChars string with one or more distinct characters + * @return Offset of the first character in setOfChars + * found, or -1 if not found. + * @see String#indexOf + */ + public static int quotedIndexOf(String text, int start, int limit, + String setOfChars) { + for (int i=start; i= 0) { + return i; + } + } + return -1; + } + + /** + * Append a character to a rule that is being built up. To flush + * the quoteBuf to rule, make one final call with isLiteral == true. + * If there is no final character, pass in (int)-1 as c. + * @param rule the string to append the character to + * @param c the character to append, or (int)-1 if none. + * @param isLiteral if true, then the given character should not be + * quoted or escaped. Usually this means it is a syntactic element + * such as > or $ + * @param escapeUnprintable if true, then unprintable characters + * should be escaped using escapeUnprintable(). These escapes will + * appear outside of quotes. + * @param quoteBuf a buffer which is used to build up quoted + * substrings. The caller should initially supply an empty buffer, + * and thereafter should not modify the buffer. The buffer should be + * cleared out by, at the end, calling this method with a literal + * character (which may be -1). + */ + public static void appendToRule(StringBuffer rule, + int c, + boolean isLiteral, + boolean escapeUnprintable, + StringBuffer quoteBuf) { + // If we are escaping unprintables, then escape them outside + // quotes. \\u and \\U are not recognized within quotes. The same + // logic applies to literals, but literals are never escaped. + if (isLiteral || + (escapeUnprintable && Utility.isUnprintable(c))) { + if (quoteBuf.length() > 0) { + // We prefer backslash APOSTROPHE to double APOSTROPHE + // (more readable, less similar to ") so if there are + // double APOSTROPHEs at the ends, we pull them outside + // of the quote. + + // If the first thing in the quoteBuf is APOSTROPHE + // (doubled) then pull it out. + while (quoteBuf.length() >= 2 && + quoteBuf.charAt(0) == APOSTROPHE && + quoteBuf.charAt(1) == APOSTROPHE) { + rule.append(BACKSLASH).append(APOSTROPHE); + quoteBuf.delete(0, 2); + } + // If the last thing in the quoteBuf is APOSTROPHE + // (doubled) then remove and count it and add it after. + int trailingCount = 0; + while (quoteBuf.length() >= 2 && + quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE && + quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) { + quoteBuf.setLength(quoteBuf.length()-2); + ++trailingCount; + } + if (quoteBuf.length() > 0) { + rule.append(APOSTROPHE); + rule.append(quoteBuf); + rule.append(APOSTROPHE); + quoteBuf.setLength(0); + } + while (trailingCount-- > 0) { + rule.append(BACKSLASH).append(APOSTROPHE); + } + } + if (c != -1) { + /* Since spaces are ignored during parsing, they are + * emitted only for readability. We emit one here + * only if there isn't already one at the end of the + * rule. + */ + if (c == ' ') { + int len = rule.length(); + if (len > 0 && rule.charAt(len-1) != ' ') { + rule.append(' '); + } + } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) { + rule.appendCodePoint(c); + } + } + } + + // Escape ' and '\' and don't begin a quote just for them + else if (quoteBuf.length() == 0 && + (c == APOSTROPHE || c == BACKSLASH)) { + rule.append(BACKSLASH).append((char)c); + } + + // Specials (printable ascii that isn't [0-9a-zA-Z]) and + // whitespace need quoting. Also append stuff to quotes if we are + // building up a quoted substring already. + else if (quoteBuf.length() > 0 || + (c >= 0x0021 && c <= 0x007E && + !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || + (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || + (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || + UCharacterProperty.isRuleWhiteSpace(c)) { + quoteBuf.appendCodePoint(c); + // Double ' within a quote + if (c == APOSTROPHE) { + quoteBuf.append((char)c); + } + } + + // Otherwise just append + else { + rule.appendCodePoint(c); + } + } + + /** + * Append the given string to the rule. Calls the single-character + * version of appendToRule for each character. + */ + public static void appendToRule(StringBuffer rule, + String text, + boolean isLiteral, + boolean escapeUnprintable, + StringBuffer quoteBuf) { + for (int i=0; i target) { + return 1; + } + return 0; + } + + /** + * Find the highest bit in a positive integer. This is done + * by doing a binary search through the bits. + * + * @param n is the integer + * + * @return the bit number of the highest bit, with 0 being + * the low order bit, or -1 if n is not positive + */ + public static final byte highBit(int n) + { + if (n <= 0) { + return -1; + } + + byte bit = 0; + + if (n >= 1 << 16) { + n >>= 16; + bit += 16; + } + + if (n >= 1 << 8) { + n >>= 8; + bit += 8; + } + + if (n >= 1 << 4) { + n >>= 4; + bit += 4; + } + + if (n >= 1 << 2) { + n >>= 2; + bit += 2; + } + + if (n >= 1 << 1) { + n >>= 1; + bit += 1; + } + + return bit; + } + /** + * Utility method to take a int[] containing codepoints and return + * a string representation with code units. + */ + public static String valueOf(int[]source){ + // TODO: Investigate why this method is not on UTF16 class + StringBuilder result = new StringBuilder(source.length); + for(int i=0; iTimeZone.getTimeZone() to construct the + * corresponding TimeZone object. + * @param country a two-letter ISO 3166 country code, or null + * to return zones not associated with any country + * @return an array of IDs for system TimeZones in the given + * country. If there are none, return a zero-length array. + */ + public static synchronized String[] getAvailableIDs(String country) { + String[] ids = null; + + try{ + UResourceBundle top = (ICUResourceBundle)ICUResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, ZONEINFORESNAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle regions = top.get(kREGIONS); + + // Create a list of zones associated with the country + List countryZones = new ArrayList(); + + for (int i = 0; i < regions.getSize(); i++) { + if (country.equals(regions.getString(i))) { + String zoneName = getZoneID(i); + countryZones.add(zoneName); + } + } + if (countryZones.size() > 0) { + ids = countryZones.toArray(new String[countryZones.size()]); + } + } catch (MissingResourceException ex){ + //throw away the exception + } + + if (ids == null) { + ids = new String[0]; + } + return ids; + } + + public static synchronized String[] getAvailableIDs() { + String[] ids = getZoneIDs(); + if (ids == null) { + return new String[0]; + } + return ids.clone(); + } + + public static synchronized String[] getAvailableIDs(int offset){ + String[] ids = null; + String[] all = getZoneIDs(); + if (all != null) { + ArrayList zones = new ArrayList(); + for (String zid : all) { + // This is VERY inefficient. + TimeZone z = TimeZone.getTimeZone(zid); + // Make sure we get back the ID we wanted (if the ID is + // invalid we get back GMT). + if (z != null && z.getID().equals(zid) && z.getRawOffset() == offset) { + zones.add(zid); + } + } + if (zones.size() > 0) { + ids = zones.toArray(new String[zones.size()]); + } + } + if (ids == null) { + ids = new String[0]; + } + return ids; + } + + /** + * Returns the number of IDs in the equivalency group that + * includes the given ID. An equivalency group contains zones + * that behave identically to the given zone. + * + *

    If there are no equivalent zones, then this method returns + * 0. This means either the given ID is not a valid zone, or it + * is and there are no other equivalent zones. + * @param id a system time zone ID + * @return the number of zones in the equivalency group containing + * 'id', or zero if there are no equivalent zones. + * @see #getEquivalentID + */ + public static synchronized int countEquivalentIDs(String id) { + int count = 0; + try { + UResourceBundle res = openOlsonResource(null, id); + UResourceBundle links = res.get("links"); + int[] v = links.getIntVector(); + count = v.length; + } catch (MissingResourceException ex) { + // throw away + } + return count; + } + + /** + * Returns an ID in the equivalency group that includes the given + * ID. An equivalency group contains zones that behave + * identically to the given zone. + * + *

    The given index must be in the range 0..n-1, where n is the + * value returned by countEquivalentIDs(id). For + * some value of 'index', the returned value will be equal to the + * given id. If the given id is not a valid system time zone, or + * if 'index' is out of range, then returns an empty string. + * @param id a system time zone ID + * @param index a value from 0 to n-1, where n is the value + * returned by countEquivalentIDs(id) + * @return the ID of the index-th zone in the equivalency group + * containing 'id', or an empty string if 'id' is not a valid + * system ID or 'index' is out of range + * @see #countEquivalentIDs + */ + public static synchronized String getEquivalentID(String id, int index) { + String result = ""; + int zoneIdx = -1; + + if (index >= 0) { + try { + UResourceBundle res = openOlsonResource(null, id); + UResourceBundle links = res.get("links"); + int[] zones = links.getIntVector(); + if (index < zones.length) { + zoneIdx = zones[index]; + } + } catch (MissingResourceException ex) { + // throw away + zoneIdx = -1; + } + } + if (zoneIdx >= 0) { + String tmp = getZoneID(zoneIdx); + if (tmp != null) { + result = tmp; + } + } + return result; + } + + private static String[] ZONEIDS = null; + + /* + * ICU frequently refers the zone ID array in zoneinfo resource + */ + private static synchronized String[] getZoneIDs() { + if (ZONEIDS == null) { + try { + UResourceBundle top = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, ZONEINFORESNAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle names = top.get(kNAMES); + ZONEIDS = names.getStringArray(); + } catch (MissingResourceException ex) { + // throw away.. + } + } + if (ZONEIDS == null) { + ZONEIDS = new String[0]; + } + return ZONEIDS; + } + + private static String getZoneID(int idx) { + if (idx >= 0) { + String[] ids = getZoneIDs(); + if (idx < ids.length) { + return ids[idx]; + } + } + return null; + } + + private static int getZoneIndex(String zid) { + int zoneIdx = -1; + + String[] all = getZoneIDs(); + if (all.length > 0) { + int start = 0; + int limit = all.length; + + int lastMid = Integer.MAX_VALUE; + for (;;) { + int mid = (start + limit) / 2; + if (lastMid == mid) { /* Have we moved? */ + break; /* We haven't moved, and it wasn't found. */ + } + lastMid = mid; + int r = zid.compareTo(all[mid]); + if (r == 0) { + zoneIdx = mid; + break; + } else if(r < 0) { + limit = mid; + } else { + start = mid; + } + } + } + + return zoneIdx; + } + + + private static ICUCache CANONICAL_ID_CACHE = new SimpleCache(); + private static ICUCache REGION_CACHE = new SimpleCache(); + private static ICUCache SINGLE_COUNTRY_CACHE = new SimpleCache(); + + /** + * Return the canonical id for this system tzid, which might be the id itself. + * If the given system tzid is not know, return null. + */ + public static String getCanonicalSystemID(String tzid) { + String canonical = CANONICAL_ID_CACHE.get(tzid); + if (canonical == null) { + int zoneIdx = getZoneIndex(tzid); + if (zoneIdx >= 0) { + try { + UResourceBundle top = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, ZONEINFORESNAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle zones = top.get(kZONES); + UResourceBundle zone = zones.get(zoneIdx); + if (zone.getType() == UResourceBundle.INT) { + // resolve link + String tmp = getZoneID(zone.getInt()); + if (tmp != null) { + canonical = tmp; + } + } else { + canonical = tzid; + } + // check canonical mapping in CLDR + UResourceBundle keyTypeData = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, "keyTypeData", ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle typeAlias = keyTypeData.get("typeAlias"); + UResourceBundle aliasesForKey = typeAlias.get("timezone"); + String cldrCanonical = aliasesForKey.getString(canonical.replace('/', ':')); + if (cldrCanonical != null) { + canonical = cldrCanonical; + } + } catch (MissingResourceException e) { + // fall through + } + } + if (canonical != null) { + CANONICAL_ID_CACHE.put(tzid, canonical); + } + } + return canonical; + } + + /** + * Return the canonical country code for this tzid. If we have none, or if the time zone + * is not associated with a country, return null. + */ + public static String getCanonicalCountry(String tzid) { + String region = REGION_CACHE.get(tzid); + if (region == null) { + int zoneIdx = getZoneIndex(tzid); + if (zoneIdx >= 0) { + try { + UResourceBundle top = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, ZONEINFORESNAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle regions = top.get(kREGIONS); + if (zoneIdx < regions.getSize()) { + region = regions.getString(zoneIdx); + } + } catch (MissingResourceException e) { + // throw away + } + if (region != null) { + REGION_CACHE.put(tzid, region); + } + } + } + if (region.equals("001")) { + return null; + } + return region; + } + + /** + * Return the country code if this is a 'single' time zone that can fallback to just + * the country, otherwise return null. (Note, one must also check the locale data + * to see that there is a localization for the country in order to implement + * tr#35 appendix J step 5.) + */ + public static String getSingleCountry(String tzid) { + String country = getCanonicalCountry(tzid); + if (country != null) { + Boolean isSingle = SINGLE_COUNTRY_CACHE.get(tzid); + if (isSingle == null) { + // This is not so efficient + boolean isSingleCountryZone = true; + String[] ids = TimeZone.getAvailableIDs(country); + if (ids.length > 1) { + // Check if there are multiple canonical zones included + String canonical = getCanonicalSystemID(ids[0]); + for (int i = 1; i < ids.length; i++) { + if (!canonical.equals(getCanonicalSystemID(ids[i]))) { + isSingleCountryZone = false; + break; + } + } + } + isSingle = Boolean.valueOf(isSingleCountryZone); + SINGLE_COUNTRY_CACHE.put(tzid, isSingle); + } + if (!isSingle) { + country = null; + } + } + return country; + } + + /** + * Returns a time zone location(region) format string defined by UTR#35. + * e.g. "Italy Time", "United States (Los Angeles) Time" + */ + public static String getLocationFormat(String tzid, String city, ULocale locale) { + String country_code = getCanonicalCountry(tzid); + if (country_code == null) { + // no location is associated + return null; + } + + String country = null; + try { + ICUResourceBundle rb = + (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_REGION_BASE_NAME, locale); +// +// TODO: There is a design bug in UResourceBundle and getLoadingStatus() does not work well. +// +// if (rb.getLoadingStatus() != ICUResourceBundle.FROM_ROOT && rb.getLoadingStatus() != ICUResourceBundle.FROM_DEFAULT) { +// country = ULocale.getDisplayCountry("xx_" + country_code, locale); +// } +// START WORKAROUND + ULocale rbloc = rb.getULocale(); + if (!rbloc.equals(ULocale.ROOT) && rbloc.getLanguage().equals(locale.getLanguage())) { + country = ULocale.getDisplayCountry("xx_" + country_code, locale); + } +// END WORKAROUND + } catch (MissingResourceException e) { + // fall through + } + if (country == null || country.length() == 0) { + country = country_code; + } + + // This is not behavior specified in tr35, but behavior added by Mark. + // TR35 says to display the country _only_ if there is a localization. + if (getSingleCountry(tzid) != null) { // single country + String regPat = getTZLocalizationInfo(locale, REGION_FORMAT); + if (regPat == null) { + regPat = DEF_REGION_FORMAT; + } + MessageFormat mf = new MessageFormat(regPat); + return mf.format(new Object[] { country }); + } + + if (city == null) { + city = tzid.substring(tzid.lastIndexOf('/')+1).replace('_',' '); + } + + String flbPat = getTZLocalizationInfo(locale, FALLBACK_FORMAT); + if (flbPat == null) { + flbPat = DEF_FALLBACK_FORMAT; + } + MessageFormat mf = new MessageFormat(flbPat); + + return mf.format(new Object[] { city, country }); + } + + private static final String DEF_REGION_FORMAT = "{0}"; + private static final String DEF_FALLBACK_FORMAT = "{1} ({0})"; + + public static final String + HOUR = "hourFormat", + GMT = "gmtFormat", + REGION_FORMAT = "regionFormat", + FALLBACK_FORMAT = "fallbackFormat", + ZONE_STRINGS = "zoneStrings", + FORWARD_SLASH = "/"; + + /** + * Get the index'd tz datum for this locale. Index must be one of the + * values PREFIX, HOUR, GMT, REGION_FORMAT, FALLBACK_FORMAT + */ + public static String getTZLocalizationInfo(ULocale locale, String format) { + String result = null; + try { + ICUResourceBundle bundle = (ICUResourceBundle) ICUResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_ZONE_BASE_NAME, locale); + result = bundle.getStringWithFallback(ZONE_STRINGS+FORWARD_SLASH+format); + } catch (MissingResourceException e) { + result = null; + } + return result; + } + +// private static Set getValidIDs() { +// // Construct list of time zones that are valid, according +// // to the current underlying core JDK. We have to do this +// // at runtime since we don't know what we're running on. +// Set valid = new TreeSet(); +// valid.addAll(Arrays.asList(java.util.TimeZone.getAvailableIDs())); +// return valid; +// } + + + /** + * Given an ID and the top-level resource of the zoneinfo resource, + * open the appropriate resource for the given time zone. + * Dereference links if necessary. + * @param top the top level resource of the zoneinfo resource or null. + * @param id zone id + * @return the corresponding zone resource or null if not found + */ + public static UResourceBundle openOlsonResource(UResourceBundle top, String id) + { + UResourceBundle res = null; + int zoneIdx = getZoneIndex(id); + if (zoneIdx >= 0) { + try { + if (top == null) { + top = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, ZONEINFORESNAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + } + UResourceBundle zones = top.get(kZONES); + UResourceBundle zone = zones.get(zoneIdx); + if (zone.getType() == UResourceBundle.INT) { + // resolve link + zone = zones.get(zone.getInt()); + } + res = zone; + } catch (MissingResourceException e) { + res = null; + } + } + return res; + } + + + private static ICUCache SYSTEM_ZONE_CACHE = new SimpleCache(); + + /** + * Lookup the given name in our system zone table. If found, + * instantiate a new zone of that name and return it. If not + * found, return 0. + */ + public static TimeZone getSystemTimeZone(String id) { + TimeZone z = SYSTEM_ZONE_CACHE.get(id); + if (z == null) { + try{ + UResourceBundle top = UResourceBundle.getBundleInstance( + ICUResourceBundle.ICU_BASE_NAME, ZONEINFORESNAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle res = openOlsonResource(top, id); + z = new OlsonTimeZone(top, res); + z.setID(id); + SYSTEM_ZONE_CACHE.put(id, z); + }catch(Exception ex){ + return null; + } + } + return (TimeZone)z.clone(); + } + + public static TimeZone getGMT(){ + TimeZone z = new SimpleTimeZone(0, kGMT_ID); + z.setID(kGMT_ID); + return z; + } + + // Maximum value of valid custom time zone hour/min + private static final int kMAX_CUSTOM_HOUR = 23; + private static final int kMAX_CUSTOM_MIN = 59; + private static final int kMAX_CUSTOM_SEC = 59; + + /** + * Parse a custom time zone identifier and return a corresponding zone. + * @param id a string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or + * GMT[+-]hh. + * @return a newly created SimpleTimeZone with the given offset and + * no Daylight Savings Time, or null if the id cannot be parsed. + */ + public static TimeZone getCustomTimeZone(String id){ + int[] fields = new int[4]; + if (parseCustomID(id, fields)) { + String zid = formatCustomID(fields[1], fields[2], fields[3], fields[0] < 0); + int offset = fields[0] * ((fields[1] * 60 + fields[2]) * 60 + fields[3]) * 1000; + return new SimpleTimeZone(offset, zid); + } + return null; + } + + /** + * Parse a custom time zone identifier and return the normalized + * custom time zone identifier for the given custom id string. + * @param id a string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or + * GMT[+-]hh. + * @return The normalized custom id string. + */ + public static String getCustomID(String id) { + int[] fields = new int[4]; + if (parseCustomID(id, fields)) { + return formatCustomID(fields[1], fields[2], fields[3], fields[0] < 0); + } + return null; + } + + /* + * Parses the given custom time zone identifier + * @param id id A string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or + * GMT[+-]hh. + * @param fields An array of int (length = 4) to receive the parsed + * offset time fields. The sign is set to fields[0] (-1 or 1), + * hour is set to fields[1], minute is set to fields[2] and second is + * set to fields[3]. + * @return Returns true when the given custom id is valid. + */ + static boolean parseCustomID(String id, int[] fields) { + NumberFormat numberFormat = null; + String idUppercase = id.toUpperCase(); + + if (id != null && id.length() > kGMT_ID.length() && + idUppercase.startsWith(kGMT_ID)) { + ParsePosition pos = new ParsePosition(kGMT_ID.length()); + int sign = 1; + int hour = 0; + int min = 0; + int sec = 0; + + if (id.charAt(pos.getIndex()) == 0x002D /*'-'*/) { + sign = -1; + } else if (id.charAt(pos.getIndex()) != 0x002B /*'+'*/) { + return false; + } + pos.setIndex(pos.getIndex() + 1); + + numberFormat = NumberFormat.getInstance(); + numberFormat.setParseIntegerOnly(true); + + // Look for either hh:mm, hhmm, or hh + int start = pos.getIndex(); + + Number n = numberFormat.parse(id, pos); + if (pos.getIndex() == start) { + return false; + } + hour = n.intValue(); + + if (pos.getIndex() < id.length()){ + if (pos.getIndex() - start > 2 + || id.charAt(pos.getIndex()) != 0x003A /*':'*/) { + return false; + } + // hh:mm + pos.setIndex(pos.getIndex() + 1); + int oldPos = pos.getIndex(); + n = numberFormat.parse(id, pos); + if ((pos.getIndex() - oldPos) != 2) { + // must be 2 digits + return false; + } + min = n.intValue(); + if (pos.getIndex() < id.length()) { + if (id.charAt(pos.getIndex()) != 0x003A /*':'*/) { + return false; + } + // [:ss] + pos.setIndex(pos.getIndex() + 1); + oldPos = pos.getIndex(); + n = numberFormat.parse(id, pos); + if (pos.getIndex() != id.length() + || (pos.getIndex() - oldPos) != 2) { + return false; + } + sec = n.intValue(); + } + } else { + // Supported formats are below - + // + // HHmmss + // Hmmss + // HHmm + // Hmm + // HH + // H + + int length = pos.getIndex() - start; + if (length <= 0 || 6 < length) { + // invalid length + return false; + } + switch (length) { + case 1: + case 2: + // already set to hour + break; + case 3: + case 4: + min = hour % 100; + hour /= 100; + break; + case 5: + case 6: + sec = hour % 100; + min = (hour/100) % 100; + hour /= 10000; + break; + } + } + + if (hour <= kMAX_CUSTOM_HOUR && min <= kMAX_CUSTOM_MIN && sec <= kMAX_CUSTOM_SEC) { + if (fields != null) { + if (fields.length >= 1) { + fields[0] = sign; + } + if (fields.length >= 2) { + fields[1] = hour; + } + if (fields.length >= 3) { + fields[2] = min; + } + if (fields.length >= 4) { + fields[3] = sec; + } + } + return true; + } + } + return false; + } + + /** + * Creates a custom zone for the offset + * @param offset GMT offset in milliseconds + * @return A custom TimeZone for the offset with normalized time zone id + */ + public static TimeZone getCustomTimeZone(int offset) { + boolean negative = false; + int tmp = offset; + if (offset < 0) { + negative = true; + tmp = -offset; + } + + int hour, min, sec, millis; + + millis = tmp % 1000; + if (ASSERT) { + Assert.assrt("millis!=0", millis != 0); + } + tmp /= 1000; + sec = tmp % 60; + tmp /= 60; + min = tmp % 60; + hour = tmp / 60; + + // Note: No millisecond part included in TZID for now + String zid = formatCustomID(hour, min, sec, negative); + + return new SimpleTimeZone(offset, zid); + } + + /* + * Returns the normalized custom TimeZone ID + */ + static String formatCustomID(int hour, int min, int sec, boolean negative) { + // Create normalized time zone ID - GMT[+|-]hh:mm[:ss] + StringBuilder zid = new StringBuilder(kCUSTOM_TZ_PREFIX); + if (hour != 0 || min != 0) { + if(negative) { + zid.append('-'); + } else { + zid.append('+'); + } + // Always use US-ASCII digits + if (hour < 10) { + zid.append('0'); + } + zid.append(hour); + zid.append(':'); + if (min < 10) { + zid.append('0'); + } + zid.append(min); + + if (sec != 0) { + // Optional second field + zid.append(':'); + if (sec < 10) { + zid.append('0'); + } + zid.append(sec); + } + } + return zid.toString(); + } + + /** + * Returns a CLDR metazone ID for the given Olson tzid and time. + */ + public static String getMetazoneID(String olsonID, long date) { + String mzid = null; + List mappings = getOlsonToMatazones(olsonID); + if (mappings != null) { + for (int i = 0; i < mappings.size(); i++) { + OlsonToMetaMappingEntry mzm = mappings.get(i); + if (date >= mzm.from && date < mzm.to) { + mzid = mzm.mzid; + break; + } + } + } + return mzid; + } + + private static ICUCache> OLSON_TO_META_CACHE = + new SimpleCache>(); + + static class OlsonToMetaMappingEntry { + String mzid; + long from; + long to; + } + + static List getOlsonToMatazones(String tzid) { + List mzMappings = OLSON_TO_META_CACHE.get(tzid); + if (mzMappings == null) { + try { + UResourceBundle bundle = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, "metaZones"); + UResourceBundle metazoneInfoBundle = bundle.get("metazoneInfo"); + + String canonicalID = TimeZone.getCanonicalID(tzid); + if (canonicalID == null) { + return null; + } + String tzkey = canonicalID.replace('/', ':'); + UResourceBundle zoneBundle = metazoneInfoBundle.get(tzkey); + + mzMappings = new LinkedList(); + + for (int idx = 0; idx < zoneBundle.getSize(); idx++) { + UResourceBundle mz = zoneBundle.get(idx); + String mzid = mz.getString(0); + String from = "1970-01-01 00:00"; + String to = "9999-12-31 23:59"; + if (mz.getSize() == 3) { + from = mz.getString(1); + to = mz.getString(2); + } + OlsonToMetaMappingEntry mzmap = new OlsonToMetaMappingEntry(); + mzmap.mzid = mzid.intern(); + try { + mzmap.from = parseDate(from); + mzmap.to = parseDate(to); + } catch (IllegalArgumentException baddate) { + // skip this + continue; + } + // Add this mapping to the list + mzMappings.add(mzmap); + } + + } catch (MissingResourceException mre) { + // fall through + } + if (mzMappings != null) { + OLSON_TO_META_CACHE.put(tzid, mzMappings); + } + } + return mzMappings; + } + + /* + * Convert a date string used by metazone mappings to long. + * The format used by CLDR metazone mapping is "yyyy-MM-dd HH:mm". + * We do not want to use SimpleDateFormat to parse the metazone + * mapping range strings in createOlsonToMeta, because it might be + * called from SimpleDateFormat initialization code. + */ + static long parseDate (String text) throws IllegalArgumentException { + int year = 0, month = 0, day = 0, hour = 0, min = 0; + int idx; + int n; + + // "yyyy" (0 - 3) + for (idx = 0; idx <= 3; idx++) { + n = text.charAt(idx) - '0'; + if (n >= 0 && n < 10) { + year = 10*year + n; + } else { + throw new IllegalArgumentException("Bad year"); + } + } + // "MM" (5 - 6) + for (idx = 5; idx <= 6; idx++) { + n = text.charAt(idx) - '0'; + if (n >= 0 && n < 10) { + month = 10*month + n; + } else { + throw new IllegalArgumentException("Bad month"); + } + } + // "dd" (8 - 9) + for (idx = 8; idx <= 9; idx++) { + n = text.charAt(idx) - '0'; + if (n >= 0 && n < 10) { + day = 10*day + n; + } else { + throw new IllegalArgumentException("Bad day"); + } + } + // "HH" (11 - 12) + for (idx = 11; idx <= 12; idx++) { + n = text.charAt(idx) - '0'; + if (n >= 0 && n < 10) { + hour = 10*hour + n; + } else { + throw new IllegalArgumentException("Bad hour"); + } + } + // "mm" (14 - 15) + for (idx = 14; idx <= 15; idx++) { + n = text.charAt(idx) - '0'; + if (n >= 0 && n < 10) { + min = 10*min + n; + } else { + throw new IllegalArgumentException("Bad minute"); + } + } + + long date = Grego.fieldsToDay(year, month - 1, day) * Grego.MILLIS_PER_DAY + + hour * Grego.MILLIS_PER_HOUR + min * Grego.MILLIS_PER_MINUTE; + return date; + } + + private static ICUCache> META_TO_OLSON_CACHE = + new SimpleCache>(); + + /** + * Returns an Olson ID for the ginve metazone and region + */ + public static String getZoneIdByMetazone(String metazoneID, String region) { + String tzid = null; + + // look up in the cache first + Map zoneMap = META_TO_OLSON_CACHE.get(metazoneID); + if (zoneMap == null) { + try { + // Create zone mappings for the metazone + UResourceBundle bundle = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, "metaZones"); + UResourceBundle mapTimezones = bundle.get("mapTimezones"); + UResourceBundle territoryMap = mapTimezones.get(metazoneID); + zoneMap = new HashMap(); + Set territories = territoryMap.keySet(); + for (String territory : territories) { + String zone = territoryMap.getString(territory); + zoneMap.put(territory, zone); + } + // cache this + META_TO_OLSON_CACHE.put(metazoneID, zoneMap); + } catch (MissingResourceException e) { + // ignore + } + } + + if (zoneMap != null) { + tzid = zoneMap.get(region); + if (tzid == null) { + tzid = zoneMap.get("001"); // use the mapping for world as fallback + } + } + + return tzid; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/ZoneStringFormat.java b/main/classes/core/src/com/ibm/icu/impl/ZoneStringFormat.java new file mode 100644 index 00000000000..65375bd5485 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/ZoneStringFormat.java @@ -0,0 +1,1096 @@ +/* + ******************************************************************************* + * Copyright (C) 2007-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.MissingResourceException; +import java.util.Set; + +import com.ibm.icu.impl.ZoneMeta.OlsonToMetaMappingEntry; +import com.ibm.icu.text.MessageFormat; +import com.ibm.icu.util.BasicTimeZone; +import com.ibm.icu.util.Calendar; +import com.ibm.icu.util.TimeZone; +import com.ibm.icu.util.TimeZoneTransition; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +/** + * @author yoshito + * + */ +public class ZoneStringFormat { + /** + * Constructs a ZoneStringFormat by zone strings array. + * The internal structure of zoneStrings is compatible with + * the one used by getZoneStrings/setZoneStrings in DateFormatSymbols. + * + * @param zoneStrings zone strings + */ + public ZoneStringFormat(String[][] zoneStrings) { + tzidToStrings = new HashMap(); + zoneStringsTrie = new TextTrieMap(true); + for (int i = 0; i < zoneStrings.length; i++) { + String tzid = zoneStrings[i][0]; + String[] names = new String[ZSIDX_MAX]; + for (int j = 1; j < zoneStrings[i].length; j++) { + if (zoneStrings[i][j] != null) { + int typeIdx = getNameTypeIndex(j); + if (typeIdx != -1) { + names[typeIdx] = zoneStrings[i][j]; + + // Put the name into the trie + int type = getNameType(typeIdx); + ZoneStringInfo zsinfo = new ZoneStringInfo(tzid, zoneStrings[i][j], type); + zoneStringsTrie.put(zoneStrings[i][j], zsinfo); + } + + } + } + ZoneStrings zstrings = new ZoneStrings(names, true, null); + tzidToStrings.put(tzid, zstrings); + } + isFullyLoaded = true; + } + + /** + * Gets an instance of ZoneStringFormat for the specified locale + * @param locale the locale + * @return An instance of ZoneStringFormat for the locale + */ + public static ZoneStringFormat getInstance(ULocale locale) { + ZoneStringFormat tzf = TZFORMAT_CACHE.get(locale); + if (tzf == null) { + tzf = new ZoneStringFormat(locale); + TZFORMAT_CACHE.put(locale, tzf); + } + return tzf; + } + + public String[][] getZoneStrings() { + return getZoneStrings(System.currentTimeMillis()); + } + + // APIs used by SimpleDateFormat to get a zone string + public String getSpecificLongString(Calendar cal) { + if (cal.get(Calendar.DST_OFFSET) == 0) { + return getString(cal.getTimeZone().getID(), ZSIDX_LONG_STANDARD, cal.getTimeInMillis(), false /* not used */); + } + return getString(cal.getTimeZone().getID(), ZSIDX_LONG_DAYLIGHT, cal.getTimeInMillis(), false /* not used */); + } + + public String getSpecificShortString(Calendar cal, boolean commonlyUsedOnly) { + if (cal.get(Calendar.DST_OFFSET) == 0) { + return getString(cal.getTimeZone().getID(), ZSIDX_SHORT_STANDARD, cal.getTimeInMillis(), commonlyUsedOnly); + } + return getString(cal.getTimeZone().getID(), ZSIDX_SHORT_DAYLIGHT, cal.getTimeInMillis(), commonlyUsedOnly); + } + + public String getGenericLongString(Calendar cal) { + return getGenericString(cal, false /* long */, false /* not used */); + } + + public String getGenericShortString(Calendar cal, boolean commonlyUsedOnly) { + return getGenericString(cal, true /* long */, commonlyUsedOnly); + } + + public String getGenericLocationString(Calendar cal) { + return getString(cal.getTimeZone().getID(), ZSIDX_LOCATION, cal.getTimeInMillis(), false /* not used */); + } + + // APIs used by SimpleDateFormat to lookup a zone string + public static class ZoneStringInfo { + private String id; + private String str; + private int type; + + private ZoneStringInfo(String id, String str, int type) { + this.id = id; + this.str = str; + this.type = type; + } + + public String getID() { + return id; + } + + public String getString() { + return str; + } + + public boolean isStandard() { + if ((type & STANDARD_LONG) != 0 || (type & STANDARD_SHORT) != 0) { + return true; + } + return false; + } + + public boolean isDaylight() { + if ((type & DAYLIGHT_LONG) != 0 || (type & DAYLIGHT_SHORT) != 0) { + return true; + } + return false; + } + + public boolean isGeneric() { + return !isStandard() && !isDaylight(); + } + + private int getType() { + return type; + } + } + + public ZoneStringInfo findSpecificLong(String text, int start) { + return find(text, start, STANDARD_LONG | DAYLIGHT_LONG); + } + + public ZoneStringInfo findSpecificShort(String text, int start) { + return find(text, start, STANDARD_SHORT | DAYLIGHT_SHORT); + } + + public ZoneStringInfo findGenericLong(String text, int start) { + return find(text, start, GENERIC_LONG | STANDARD_LONG | LOCATION); + } + + public ZoneStringInfo findGenericShort(String text, int start) { + return find(text, start, GENERIC_SHORT | STANDARD_SHORT | LOCATION); + } + + public ZoneStringInfo findGenericLocation(String text, int start) { + return find(text, start, LOCATION); + } + + // Following APIs are not used by SimpleDateFormat, but public for testing purpose + public String getLongStandard(String tzid, long date) { + return getString(tzid, ZSIDX_LONG_STANDARD, date, false /* not used */); + } + + public String getLongDaylight(String tzid, long date) { + return getString(tzid, ZSIDX_LONG_DAYLIGHT, date, false /* not used */); + } + + public String getLongGenericNonLocation(String tzid, long date) { + return getString(tzid, ZSIDX_LONG_GENERIC, date, false /* not used */); + } + + public String getLongGenericPartialLocation(String tzid, long date) { + return getGenericPartialLocationString(tzid, false, date, false /* not used */); + } + + public String getShortStandard(String tzid, long date, boolean commonlyUsedOnly) { + return getString(tzid, ZSIDX_SHORT_STANDARD, date, commonlyUsedOnly); + } + + public String getShortDaylight(String tzid, long date, boolean commonlyUsedOnly) { + return getString(tzid, ZSIDX_SHORT_DAYLIGHT, date, commonlyUsedOnly); + } + + public String getShortGenericNonLocation(String tzid, long date, boolean commonlyUsedOnly) { + return getString(tzid, ZSIDX_SHORT_GENERIC, date, commonlyUsedOnly); + } + + public String getShortGenericPartialLocation(String tzid, long date, boolean commonlyUsedOnly) { + return getGenericPartialLocationString(tzid, true, date, commonlyUsedOnly); + } + + public String getGenericLocation(String tzid) { + return getString(tzid, ZSIDX_LOCATION, 0L /* not used */, false /* not used */); + } + + /** + * Constructs a ZoneStringFormat by locale. Because an instance of ZoneStringFormat + * is read-only, only one instance for a locale is sufficient. Thus, this + * constructor is protected and only called from getInstance(ULocale) to + * create one for a locale. + * @param locale The locale + */ + protected ZoneStringFormat(ULocale locale) { + this.locale = locale; + tzidToStrings = new HashMap(); + mzidToStrings = new HashMap(); + zoneStringsTrie = new TextTrieMap(true); + } + + // Load only a single zone + private synchronized void loadZone(String id) { + if (isFullyLoaded) { + return; + } + String tzid = ZoneMeta.getCanonicalSystemID(id); + if (tzid == null || tzidToStrings.containsKey(tzid)) { + return; + } + + ICUResourceBundle zoneStringsBundle = null; + try { + ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_ZONE_BASE_NAME, locale); + zoneStringsBundle = bundle.getWithFallback("zoneStrings"); + } catch (MissingResourceException e) { + // If no locale bundles are available, zoneStringsBundle will be null. + // We still want to go through the rest of zone strings initialization, + // because generic location format is generated from tzid for the case. + // The rest of code should work even zoneStrings is null. + } + + String[] zstrarray = new String[ZSIDX_MAX]; + String[] mzstrarray = new String[ZSIDX_MAX]; + String[][] mzPartialLoc = new String[10][4]; // maximum 10 metazones per zone + + addSingleZone(tzid, zoneStringsBundle, + getFallbackFormat(locale), getRegionFormat(locale), + zstrarray, mzstrarray, mzPartialLoc); + } + + // Loading all zone strings + private synchronized void loadFull() { + if (isFullyLoaded) { + return; + } + ICUResourceBundle zoneStringsBundle = null; + try { + ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_ZONE_BASE_NAME, locale); + zoneStringsBundle = bundle.getWithFallback("zoneStrings"); + } catch (MissingResourceException e) { + // If no locale bundles are available, zoneStringsBundle will be null. + // We still want to go through the rest of zone strings initialization, + // because generic location format is generated from tzid for the case. + // The rest of code should work even zoneStrings is null. + } + + String[] zoneIDs = TimeZone.getAvailableIDs(); + + String[] zstrarray = new String[ZSIDX_MAX]; + String[] mzstrarray = new String[ZSIDX_MAX]; + String[][] mzPartialLoc = new String[10][4]; // maximum 10 metazones per zone + + for (int i = 0; i < zoneIDs.length; i++) { + // Skip aliases + String tzid = ZoneMeta.getCanonicalSystemID(zoneIDs[i]); + if (tzid == null || !zoneIDs[i].equals(tzid)) { + continue; + } + + if (tzidToStrings.containsKey(tzid)) { + continue; + } + + addSingleZone(tzid, zoneStringsBundle, + getFallbackFormat(locale), getRegionFormat(locale), + zstrarray, mzstrarray, mzPartialLoc); + } + isFullyLoaded = true; + } + + // This internal initialization code must be called in a synchronized block + private void addSingleZone(String tzid, ICUResourceBundle zoneStringsBundle, + MessageFormat fallbackFmt, MessageFormat regionFmt, + String[] zstrarray, String[] mzstrarray, String[][] mzPartialLoc) { + + if (tzidToStrings.containsKey(tzid)) { + return; + } + + String zoneKey = tzid.replace('/', ':'); + zstrarray[ZSIDX_LONG_STANDARD] = getZoneStringFromBundle(zoneStringsBundle, zoneKey, RESKEY_LONG_STANDARD); + zstrarray[ZSIDX_SHORT_STANDARD] = getZoneStringFromBundle(zoneStringsBundle, zoneKey, RESKEY_SHORT_STANDARD); + zstrarray[ZSIDX_LONG_DAYLIGHT] = getZoneStringFromBundle(zoneStringsBundle, zoneKey, RESKEY_LONG_DAYLIGHT); + zstrarray[ZSIDX_SHORT_DAYLIGHT] = getZoneStringFromBundle(zoneStringsBundle, zoneKey, RESKEY_SHORT_DAYLIGHT); + zstrarray[ZSIDX_LONG_GENERIC] = getZoneStringFromBundle(zoneStringsBundle, zoneKey, RESKEY_LONG_GENERIC); + zstrarray[ZSIDX_SHORT_GENERIC] = getZoneStringFromBundle(zoneStringsBundle, zoneKey, RESKEY_SHORT_GENERIC); + + // Compose location format string + String countryCode = ZoneMeta.getCanonicalCountry(tzid); + String country = null; + String city = null; + if (countryCode != null) { + city = getZoneStringFromBundle(zoneStringsBundle, zoneKey, RESKEY_EXEMPLAR_CITY); + if (city == null) { + city = tzid.substring(tzid.lastIndexOf('/') + 1).replace('_', ' '); + } + country = getLocalizedCountry(countryCode, locale); + if (ZoneMeta.getSingleCountry(tzid) != null) { + // If the zone is only one zone in the country, do not add city + zstrarray[ZSIDX_LOCATION] = regionFmt.format(new Object[] {country}); + } else { + zstrarray[ZSIDX_LOCATION] = fallbackFmt.format(new Object[] {city, country}); + } + } else { + if (tzid.startsWith("Etc/")) { + // "Etc/xxx" is not associated with a specific location, so localized + // GMT format is always used as generic location format. + zstrarray[ZSIDX_LOCATION] = null; + } else { + // When a new time zone ID, which is actually associated with a specific + // location, is added in tzdata, but the current CLDR data does not have + // the information yet, ICU creates a generic location string based on + // the ID. This implementation supports canonical time zone round trip + // with format pattern "VVVV". See #6602 for the details. + String location = tzid; + int slashIdx = location.lastIndexOf('/'); + if (slashIdx == -1) { + // A time zone ID without slash in the tz database is not + // associated with a specific location. For instances, + // MET, CET, EET and WET fall into this catetory. + zstrarray[ZSIDX_LOCATION] = null; + } else { + location = tzid.substring(slashIdx + 1); + zstrarray[ZSIDX_LOCATION] = regionFmt.format(new Object[] {location}); + } + } + } + + boolean commonlyUsed = isCommonlyUsed(zoneStringsBundle, zoneKey); + + // Resolve metazones used by this zone + int mzPartialLocIdx = 0; + List metazoneMappings = ZoneMeta.getOlsonToMatazones(tzid); + if (metazoneMappings != null) { + Iterator it = metazoneMappings.iterator(); + while (it.hasNext()) { + ZoneMeta.OlsonToMetaMappingEntry mzmap = it.next(); + ZoneStrings mzStrings = mzidToStrings.get(mzmap.mzid); + if (mzStrings == null) { + // If the metazone strings are not yet processed, do it now. + String mzkey = "meta:" + mzmap.mzid; + boolean mzCommonlyUsed = isCommonlyUsed(zoneStringsBundle, mzkey); + mzstrarray[ZSIDX_LONG_STANDARD] = getZoneStringFromBundle(zoneStringsBundle, mzkey, RESKEY_LONG_STANDARD); + mzstrarray[ZSIDX_SHORT_STANDARD] = getZoneStringFromBundle(zoneStringsBundle, mzkey, RESKEY_SHORT_STANDARD); + mzstrarray[ZSIDX_LONG_DAYLIGHT] = getZoneStringFromBundle(zoneStringsBundle, mzkey, RESKEY_LONG_DAYLIGHT); + mzstrarray[ZSIDX_SHORT_DAYLIGHT] = getZoneStringFromBundle(zoneStringsBundle, mzkey, RESKEY_SHORT_DAYLIGHT); + mzstrarray[ZSIDX_LONG_GENERIC] = getZoneStringFromBundle(zoneStringsBundle, mzkey, RESKEY_LONG_GENERIC); + mzstrarray[ZSIDX_SHORT_GENERIC] = getZoneStringFromBundle(zoneStringsBundle, mzkey, RESKEY_SHORT_GENERIC); + mzstrarray[ZSIDX_LOCATION] = null; + mzStrings = new ZoneStrings(mzstrarray, mzCommonlyUsed, null); + mzidToStrings.put(mzmap.mzid, mzStrings); + + // Add metazone strings to the zone string trie + String preferredIdForLocale = ZoneMeta.getZoneIdByMetazone(mzmap.mzid, getRegion()); + for (int j = 0; j < mzstrarray.length; j++) { + if (mzstrarray[j] != null) { + int type = getNameType(j); + ZoneStringInfo zsinfo = new ZoneStringInfo(preferredIdForLocale, mzstrarray[j], type); + zoneStringsTrie.put(mzstrarray[j], zsinfo); + } + } + } + // Compose generic partial location format + String lg = mzStrings.getString(ZSIDX_LONG_GENERIC); + String sg = mzStrings.getString(ZSIDX_SHORT_GENERIC); + if (lg != null || sg != null) { + boolean addMzPartialLocationNames = true; + for (int j = 0; j < mzPartialLocIdx; j++) { + if (mzPartialLoc[j][0].equals(mzmap.mzid)) { + // already added + addMzPartialLocationNames = false; + break; + } + } + if (addMzPartialLocationNames) { + String locationPart = null; + // Check if the zone is the preferred zone for the territory associated with the zone + String preferredID = ZoneMeta.getZoneIdByMetazone(mzmap.mzid, countryCode); + if (tzid.equals(preferredID)) { + // Use country for the location + locationPart = country; + } else { + // Use city for the location + locationPart = city; + } + mzPartialLoc[mzPartialLocIdx][0] = mzmap.mzid; + mzPartialLoc[mzPartialLocIdx][1] = null; + mzPartialLoc[mzPartialLocIdx][2] = null; + mzPartialLoc[mzPartialLocIdx][3] = null; + if (locationPart != null) { + if (lg != null) { + mzPartialLoc[mzPartialLocIdx][1] = fallbackFmt.format(new Object[] {locationPart, lg}); + } + if (sg != null) { + mzPartialLoc[mzPartialLocIdx][2] = fallbackFmt.format(new Object[] {locationPart, sg}); + boolean shortMzCommonlyUsed = mzStrings.isShortFormatCommonlyUsed(); + if (shortMzCommonlyUsed) { + mzPartialLoc[mzPartialLocIdx][3] = "1"; + } + } + } + mzPartialLocIdx++; + } + } + } + } + String[][] genericPartialLocationNames = null; + if (mzPartialLocIdx != 0) { + // metazone generic partial location names are collected + genericPartialLocationNames = new String[mzPartialLocIdx][]; + for (int mzi = 0; mzi < mzPartialLocIdx; mzi++) { + genericPartialLocationNames[mzi] = mzPartialLoc[mzi].clone(); + } + } + // Finally, create ZoneStrings instance and put it into the tzidToStinrgs map + ZoneStrings zstrings = new ZoneStrings(zstrarray, commonlyUsed, genericPartialLocationNames); + tzidToStrings.put(tzid, zstrings); + + // Also add all available names to the zone string trie + if (zstrarray != null) { + for (int j = 0; j < zstrarray.length; j++) { + if (zstrarray[j] != null) { + int type = getNameType(j); + ZoneStringInfo zsinfo = new ZoneStringInfo(tzid, zstrarray[j], type); + zoneStringsTrie.put(zstrarray[j], zsinfo); + } + } + } + if (genericPartialLocationNames != null) { + for (int j = 0; j < genericPartialLocationNames.length; j++) { + ZoneStringInfo zsinfo; + if (genericPartialLocationNames[j][1] != null) { + zsinfo = new ZoneStringInfo(tzid, genericPartialLocationNames[j][1], GENERIC_LONG); + zoneStringsTrie.put(genericPartialLocationNames[j][1], zsinfo); + } + if (genericPartialLocationNames[j][2] != null) { + zsinfo = new ZoneStringInfo(tzid, genericPartialLocationNames[j][1], GENERIC_SHORT); + zoneStringsTrie.put(genericPartialLocationNames[j][2], zsinfo); + } + } + } + } + + // Name types, these bit flag are used for zone string lookup + private static final int LOCATION = 0x0001; + private static final int GENERIC_LONG = 0x0002; + private static final int GENERIC_SHORT = 0x0004; + private static final int STANDARD_LONG = 0x0008; + private static final int STANDARD_SHORT = 0x0010; + private static final int DAYLIGHT_LONG = 0x0020; + private static final int DAYLIGHT_SHORT = 0x0040; + + // Name type index, these constants are used for index in ZoneStrings.strings + private static final int ZSIDX_LOCATION = 0; + private static final int ZSIDX_LONG_STANDARD = 1; + private static final int ZSIDX_SHORT_STANDARD = 2; + private static final int ZSIDX_LONG_DAYLIGHT = 3; + private static final int ZSIDX_SHORT_DAYLIGHT = 4; + private static final int ZSIDX_LONG_GENERIC = 5; + private static final int ZSIDX_SHORT_GENERIC = 6; + + private static final int ZSIDX_MAX = ZSIDX_SHORT_GENERIC + 1; + + // ZoneStringFormat cache + private static ICUCache TZFORMAT_CACHE = new SimpleCache(); + + /* + * The translation type of the translated zone strings + */ + private static final String + RESKEY_SHORT_GENERIC = "sg", + RESKEY_SHORT_STANDARD = "ss", + RESKEY_SHORT_DAYLIGHT = "sd", + RESKEY_LONG_GENERIC = "lg", + RESKEY_LONG_STANDARD = "ls", + RESKEY_LONG_DAYLIGHT = "ld", + RESKEY_EXEMPLAR_CITY = "ec", + RESKEY_COMMONLY_USED = "cu"; + + // Window size used for DST check for a zone in a metazone + private static final long DST_CHECK_RANGE = 184L*(24*60*60*1000); + + // Map from zone id to ZoneStrings + private Map tzidToStrings; + + // Map from metazone id to ZoneStrings + private Map mzidToStrings; + + // Zone string dictionary, used for look up + private TextTrieMap zoneStringsTrie; + + // Locale used for initializing zone strings + private ULocale locale; + + // Region used for resolving a zone in a metazone, initialized by locale + private transient String region; + + // Loading status + private boolean isFullyLoaded = false; + + /* + * Private method to get a zone string except generic partial location types. + */ + private String getString(String tzid, int typeIdx, long date, boolean commonlyUsedOnly) { + if (!isFullyLoaded) { + // Lazy loading + loadZone(tzid); + } + + String result = null; + ZoneStrings zstrings = tzidToStrings.get(tzid); + if (zstrings == null) { + // ICU's own array does not have entries for aliases + String canonicalID = ZoneMeta.getCanonicalSystemID(tzid); + if (canonicalID != null && !canonicalID.equals(tzid)) { + // Canonicalize tzid here. The rest of operations + // require tzid to be canonicalized. + tzid = canonicalID; + zstrings = tzidToStrings.get(tzid); + } + } + if (zstrings != null) { + switch (typeIdx) { + case ZSIDX_LONG_STANDARD: + case ZSIDX_LONG_DAYLIGHT: + case ZSIDX_LONG_GENERIC: + case ZSIDX_LOCATION: + result = zstrings.getString(typeIdx); + break; + case ZSIDX_SHORT_STANDARD: + case ZSIDX_SHORT_DAYLIGHT: + case ZSIDX_SHORT_GENERIC: + if (!commonlyUsedOnly || zstrings.isShortFormatCommonlyUsed()) { + result = zstrings.getString(typeIdx); + } + break; + } + } + if (result == null && mzidToStrings != null && typeIdx != ZSIDX_LOCATION) { + // Try metazone + String mzid = ZoneMeta.getMetazoneID(tzid, date); + if (mzid != null) { + ZoneStrings mzstrings = mzidToStrings.get(mzid); + if (mzstrings != null) { + switch (typeIdx) { + case ZSIDX_LONG_STANDARD: + case ZSIDX_LONG_DAYLIGHT: + case ZSIDX_LONG_GENERIC: + result = mzstrings.getString(typeIdx); + break; + case ZSIDX_SHORT_STANDARD: + case ZSIDX_SHORT_DAYLIGHT: + case ZSIDX_SHORT_GENERIC: + if (!commonlyUsedOnly || mzstrings.isShortFormatCommonlyUsed()) { + result = mzstrings.getString(typeIdx); + } + break; + } + } + } + } + return result; + } + + /* + * Private method to get a generic string, with fallback logic involved, + * that is, + * + * 1. If a generic non-location string is avaiable for the zone, return it. + * 2. If a generic non-location string is associated with a metazone and + * the zone never use daylight time around the given date, use the standard + * string (if available). + * + * Note: In CLDR1.5.1, the same localization is used for generic and standard. + * In this case, we do not use the standard string and do the rest. + * + * 3. If a generic non-location string is associated with a metazone and + * the offset at the given time is different from the preferred zone for the + * current locale, then return the generic partial location string (if avaiable) + * 4. If a generic non-location string is not available, use generic location + * string. + */ + private String getGenericString(Calendar cal, boolean isShort, boolean commonlyUsedOnly) { + String result = null; + TimeZone tz = cal.getTimeZone(); + String tzid = tz.getID(); + + if (!isFullyLoaded) { + // Lazy loading + loadZone(tzid); + } + + ZoneStrings zstrings = tzidToStrings.get(tzid); + if (zstrings == null) { + // ICU's own array does not have entries for aliases + String canonicalID = ZoneMeta.getCanonicalSystemID(tzid); + if (canonicalID != null && !canonicalID.equals(tzid)) { + // Canonicalize tzid here. The rest of operations + // require tzid to be canonicalized. + tzid = canonicalID; + zstrings = tzidToStrings.get(tzid); + } + } + if (zstrings != null) { + if (isShort) { + if (!commonlyUsedOnly || zstrings.isShortFormatCommonlyUsed()) { + result = zstrings.getString(ZSIDX_SHORT_GENERIC); + } + } else { + result = zstrings.getString(ZSIDX_LONG_GENERIC); + } + } + if (result == null && mzidToStrings != null) { + // try metazone + long time = cal.getTimeInMillis(); + String mzid = ZoneMeta.getMetazoneID(tzid, time); + if (mzid != null) { + boolean useStandard = false; + if (cal.get(Calendar.DST_OFFSET) == 0) { + useStandard = true; + // Check if the zone actually uses daylight saving time around the time + if (tz instanceof BasicTimeZone) { + BasicTimeZone btz = (BasicTimeZone)tz; + TimeZoneTransition before = btz.getPreviousTransition(time, true); + if (before != null + && (time - before.getTime() < DST_CHECK_RANGE) + && before.getFrom().getDSTSavings() != 0) { + useStandard = false; + } else { + TimeZoneTransition after = btz.getNextTransition(time, false); + if (after != null + && (after.getTime() - time < DST_CHECK_RANGE) + && after.getTo().getDSTSavings() != 0) { + useStandard = false; + } + } + } else { + // If not BasicTimeZone... only if the instance is not an ICU's implementation. + // We may get a wrong answer in edge case, but it should practically work OK. + int[] offsets = new int[2]; + tz.getOffset(time - DST_CHECK_RANGE, false, offsets); + if (offsets[1] != 0) { + useStandard = false; + } else { + tz.getOffset(time + DST_CHECK_RANGE, false, offsets); + if (offsets[1] != 0){ + useStandard = false; + } + } + } + } + if (useStandard) { + result = getString(tzid, (isShort ? ZSIDX_SHORT_STANDARD : ZSIDX_LONG_STANDARD), + time, commonlyUsedOnly); + + // Note: + // In CLDR 1.5.1, a same localization is used for both generic and standard + // for some metazones in some locales. This is actually data bugs and should + // be resolved in later versions of CLDR. For now, we check if the standard + // name is different from its generic name below. + if (result != null) { + String genericNonLocation = getString(tzid, (isShort ? ZSIDX_SHORT_GENERIC : ZSIDX_LONG_GENERIC), + time, commonlyUsedOnly); + if (genericNonLocation != null && result.equalsIgnoreCase(genericNonLocation)) { + result = null; + } + } + } + if (result == null){ + ZoneStrings mzstrings = mzidToStrings.get(mzid); + if (mzstrings != null) { + if (isShort) { + if (!commonlyUsedOnly || mzstrings.isShortFormatCommonlyUsed()) { + result = mzstrings.getString(ZSIDX_SHORT_GENERIC); + } + } else { + result = mzstrings.getString(ZSIDX_LONG_GENERIC); + } + } + if (result != null) { + // Check if the offsets at the given time matches the preferred zone's offsets + String preferredId = ZoneMeta.getZoneIdByMetazone(mzid, getRegion()); + if (!tzid.equals(preferredId)) { + // Check if the offsets at the given time are identical with the preferred zone + int raw = cal.get(Calendar.ZONE_OFFSET); + int sav = cal.get(Calendar.DST_OFFSET); + TimeZone preferredZone = TimeZone.getTimeZone(preferredId); + int[] preferredOffsets = new int[2]; + // Check offset in preferred time zone with wall time. + // With getOffset(time, false, preferredOffsets), + // you may get incorrect results because of time overlap at DST->STD + // transition. + preferredZone.getOffset(time + raw + sav, true, preferredOffsets); + if (raw != preferredOffsets[0] || sav != preferredOffsets[1]) { + // Use generic partial location string as fallback + result = zstrings.getGenericPartialLocationString(mzid, isShort, commonlyUsedOnly); + } + } + } + } + } + } + if (result == null) { + // Use location format as the final fallback + result = getString(tzid, ZSIDX_LOCATION, cal.getTimeInMillis(), false /* not used */); + } + return result; + } + + /* + * Private method to get a generic partial location string + */ + private String getGenericPartialLocationString(String tzid, boolean isShort, long date, boolean commonlyUsedOnly) { + if (!isFullyLoaded) { + // Lazy loading + loadZone(tzid); + } + + String result = null; + String mzid = ZoneMeta.getMetazoneID(tzid, date); + if (mzid != null) { + ZoneStrings zstrings = tzidToStrings.get(tzid); + if (zstrings != null) { + result = zstrings.getGenericPartialLocationString(mzid, isShort, commonlyUsedOnly); + } + } + return result; + } + + /* + * Gets zoneStrings compatible with DateFormatSymbols for the + * specified date. In CLDR 1.5, zone names can be changed + * time to time. This method generates flat 2-dimensional + * String array including zone ids and its localized strings + * at the moment. Thus, even you construct a new ZoneStringFormat + * by the zone strings array returned by this method, you will + * loose historic name changes. Also, commonly used flag for + * short types is not reflected in the result. + */ + private String[][] getZoneStrings(long date) { + loadFull(); + + Set tzids = tzidToStrings.keySet(); + String[][] zoneStrings = new String[tzids.size()][8]; + int idx = 0; + for (String tzid : tzids) { + zoneStrings[idx][0] = tzid; + zoneStrings[idx][1] = getLongStandard(tzid, date); + zoneStrings[idx][2] = getShortStandard(tzid, date, false); + zoneStrings[idx][3] = getLongDaylight(tzid, date); + zoneStrings[idx][4] = getShortDaylight(tzid, date, false); + zoneStrings[idx][5] = getGenericLocation(tzid); + zoneStrings[idx][6] = getLongGenericNonLocation(tzid, date); + zoneStrings[idx][7] = getShortGenericNonLocation(tzid, date, false); + idx++; + } + return zoneStrings; + } + + /* + * ZoneStrings is an internal implementation class for + * holding localized name information for a zone/metazone + */ + private static class ZoneStrings { + private String[] strings; + private String[][] genericPartialLocationStrings; + private boolean commonlyUsed; + + private ZoneStrings(String[] zstrarray, boolean commonlyUsed, String[][] genericPartialLocationStrings) { + if (zstrarray != null) { + int lastIdx = -1; + for (int i = 0; i < zstrarray.length; i++) { + if (zstrarray[i] != null) { + lastIdx = i; + } + } + if (lastIdx != -1) { + strings = new String[lastIdx + 1]; + System.arraycopy(zstrarray, 0, strings, 0, lastIdx + 1); + } + } + this.commonlyUsed = commonlyUsed; + this.genericPartialLocationStrings = genericPartialLocationStrings; + } + + private String getString(int typeIdx) { + if (strings != null && typeIdx >= 0 && typeIdx < strings.length) { + return strings[typeIdx]; + } + return null; + } + + private boolean isShortFormatCommonlyUsed() { + return commonlyUsed; + } + + private String getGenericPartialLocationString(String mzid, boolean isShort, boolean commonlyUsedOnly) { + String result = null; + if (genericPartialLocationStrings != null) { + for (int i = 0; i < genericPartialLocationStrings.length; i++) { + if (genericPartialLocationStrings[i][0].equals(mzid)) { + if (isShort) { + if (!commonlyUsedOnly || genericPartialLocationStrings[i][3] != null) { + result = genericPartialLocationStrings[i][2]; + } + } else { + result = genericPartialLocationStrings[i][1]; + } + break; + } + } + } + return result; + } + } + + /* + * Returns a localized zone string from bundle. + */ + private static String getZoneStringFromBundle(ICUResourceBundle bundle, String key, String type) { + String zstring = null; + if (bundle != null) { + try { + zstring = bundle.getStringWithFallback(key + "/" + type); + } catch (MissingResourceException ex) { + // throw away the exception + } + } + return zstring; + } + + /* + * Returns if the short strings of the zone/metazone is commonly used. + */ + private static boolean isCommonlyUsed(ICUResourceBundle bundle, String key) { + boolean commonlyUsed = false; + if (bundle != null) { + try { + UResourceBundle cuRes = bundle.getWithFallback(key + "/" + RESKEY_COMMONLY_USED); + int cuValue = cuRes.getInt(); + commonlyUsed = (cuValue != 0); + } catch (MissingResourceException ex) { + // throw away the exception + } + } + return commonlyUsed; + } + + /* + * Returns a localized country string for the country code. If no actual + * localized string is found, countryCode itself is returned. + */ + private static String getLocalizedCountry(String countryCode, ULocale locale) { + String countryStr = null; + if (countryCode != null) { + ICUResourceBundle rb = + (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_REGION_BASE_NAME, locale); +// +// TODO: There is a design bug in UResourceBundle and getLoadingStatus() does not work well. +// +// if (rb.getLoadingStatus() != ICUResourceBundle.FROM_ROOT && rb.getLoadingStatus() != ICUResourceBundle.FROM_DEFAULT) { +// country = ULocale.getDisplayCountry("xx_" + country_code, locale); +// } +// START WORKAROUND + ULocale rbloc = rb.getULocale(); + if (!rbloc.equals(ULocale.ROOT) && rbloc.getLanguage().equals(locale.getLanguage())) { + countryStr = ULocale.getDisplayCountry("xx_" + countryCode, locale); + } +// END WORKAROUND + if (countryStr == null || countryStr.length() == 0) { + countryStr = countryCode; + } + } + return countryStr; + } + + /* + * Gets an instance of MessageFormat used for formatting zone fallback string + */ + private static MessageFormat getFallbackFormat(ULocale locale) { + String fallbackPattern = ZoneMeta.getTZLocalizationInfo(locale, ZoneMeta.FALLBACK_FORMAT); + if (fallbackPattern == null) { + fallbackPattern = "{1} ({0})"; + } + return new MessageFormat(fallbackPattern, locale); + } + + /* + * Gets an instance of MessageFormat used for formatting zone region string + */ + private static MessageFormat getRegionFormat(ULocale locale) { + String regionPattern = ZoneMeta.getTZLocalizationInfo(locale, ZoneMeta.REGION_FORMAT); + if (regionPattern == null) { + regionPattern = "{0}"; + } + return new MessageFormat(regionPattern, locale); + } + + /* + * Index value mapping between DateFormatSymbols's zoneStrings and + * the string types defined in this class. + */ + private static final int[] INDEXMAP = { + -1, // 0 - zone id + ZSIDX_LONG_STANDARD, // 1 - long standard + ZSIDX_SHORT_STANDARD, // 2 - short standard + ZSIDX_LONG_DAYLIGHT, // 3 - long daylight + ZSIDX_SHORT_DAYLIGHT, // 4 - short daylight + ZSIDX_LOCATION, // 5 - generic location + ZSIDX_LONG_GENERIC, // 6 - long generic non-location + ZSIDX_SHORT_GENERIC // 7 - short generic non-location + }; + + /* + * Convert from zone string array index for zoneStrings used by DateFormatSymbols#get/setZoneStrings + * to the type constants defined by this class, such as ZSIDX_LONG_STANDARD. + */ + private static int getNameTypeIndex(int i) { + int idx = -1; + if (i >= 1 && i < INDEXMAP.length) { + idx = INDEXMAP[i]; + } + return idx; + } + + /* + * Mapping from name type index to name type + */ + private static final int[] NAMETYPEMAP = { + LOCATION, // ZSIDX_LOCATION + STANDARD_LONG, // ZSIDX_LONG_STANDARD + STANDARD_SHORT, // ZSIDX_SHORT_STANDARD + DAYLIGHT_LONG, // ZSIDX_LONG_DAYLIGHT + DAYLIGHT_SHORT, // ZSIDX_SHORT_DAYLIGHT + GENERIC_LONG, // ZSIDX_LONG_GENERIC + GENERIC_SHORT, // ZSIDX_SHORT_GENERIC + }; + + private static int getNameType(int typeIdx) { + int type = -1; + if (typeIdx >= 0 && typeIdx < NAMETYPEMAP.length) { + type = NAMETYPEMAP[typeIdx]; + } + return type; + } + + /* + * Returns region used for ZoneMeta#getZoneIdByMetazone. + */ + private String getRegion() { + if (region == null) { + if (locale != null) { + region = locale.getCountry(); + if (region.length() == 0) { + ULocale tmp = ULocale.addLikelySubtags(locale); + region = tmp.getCountry(); + } + } else { + region = ""; + } + } + return region; + } + + // This method does lazy zone string loading + private ZoneStringInfo find(String text, int start, int types) { + ZoneStringInfo result = subFind(text, start, types); + if (isFullyLoaded) { + return result; + } + // When zone string data is partially loaded, + // this method return the result only when + // the input text is fully consumed. + if (result != null) { + int matchLen = result.getString().length(); + if (text.length() - start == matchLen) { + return result; + } + } + // Now load all zone strings + loadFull(); + return subFind(text, start, types); + } + + /* + * Find a prefix matching time zone for the given zone string types. + * @param text The text contains a time zone string + * @param start The start index within the text + * @param types The bit mask representing a set of requested types + * @return If any zone string matched for the requested types, returns a + * ZoneStringInfo for the longest match. If no matches are found for + * the requested types, returns a ZoneStringInfo for the longest match + * for any other types. If nothing matches at all, returns null. + */ + private ZoneStringInfo subFind(String text, int start, int types) { + ZoneStringInfo result = null; + ZoneStringSearchResultHandler handler = new ZoneStringSearchResultHandler(); + zoneStringsTrie.find(text, start, handler); + List list = handler.getMatchedZoneStrings(); + ZoneStringInfo fallback = null; + if (list != null && list.size() > 0) { + Iterator it = list.iterator(); + while (it.hasNext()) { + ZoneStringInfo tmp = it.next(); + if ((types & tmp.getType()) != 0) { + if (result == null || result.getString().length() < tmp.getString().length()) { + result = tmp; + } else if (result.getString().length() == tmp.getString().length()) { + // Tie breaker - there are some examples that a + // long standard name is identical with a location + // name - for example, "Uruguay Time". In this case, + // we interpret it as generic, not specific. + if (tmp.isGeneric() && !result.isGeneric()) { + result = tmp; + } + } + } else if (result == null) { + if (fallback == null || fallback.getString().length() < tmp.getString().length()) { + fallback = tmp; + } else if (fallback.getString().length() == tmp.getString().length()) { + if (tmp.isGeneric() && !fallback.isGeneric()) { + fallback = tmp; + } + } + } + } + } + if (result == null && fallback != null) { + result = fallback; + } + return result; + } + + + + private static class ZoneStringSearchResultHandler implements TextTrieMap.ResultHandler { + + private ArrayList resultList; + + public boolean handlePrefixMatch(int matchLength, Iterator values) { + if (resultList == null) { + resultList = new ArrayList(); + } + while (values.hasNext()) { + ZoneStringInfo zsitem = values.next(); + if (zsitem == null) { + break; + } + int i = 0; + for (; i < resultList.size(); i++) { + ZoneStringInfo tmp = resultList.get(i); + if (zsitem.getType() == tmp.getType()) { + if (matchLength > tmp.getString().length()) { + resultList.set(i, zsitem); + } + break; + } + } + if (i == resultList.size()) { + // not found in the current list + resultList.add(zsitem); + } + } + return true; + } + + List getMatchedZoneStrings() { + if (resultList == null || resultList.size() == 0) { + return null; + } + return resultList; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules.java b/main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules.java new file mode 100644 index 00000000000..9cdb8d709be --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules.java @@ -0,0 +1,36 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +/** + * Default break-iterator rules. + * This resource tells which break iterator class type is instantiated + * for each of the standard (built-in) boundary types. + * + * Locales (Thai) needing a dictionary based iterator override this. + */ + +public class BreakIteratorRules extends ListResourceBundle { + public Object[][] getContents() { + return contents; + } + + static final Object[][] contents = { + // BreakIteratorClasses lists the class names to instantiate for each + // built-in type of BreakIterator + { "BreakIteratorClasses", + new String[] { "RuleBasedBreakIterator", // character-break iterator class + "RuleBasedBreakIterator", // word-break iterator class + "RuleBasedBreakIterator", // line-break iterator class + "RuleBasedBreakIterator", // sentence-break iterator class + "RuleBasedBreakIterator"} // Title-Case break iterator class + } + + }; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules_th.java b/main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules_th.java new file mode 100644 index 00000000000..a060f648644 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/BreakIteratorRules_th.java @@ -0,0 +1,41 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +import com.ibm.icu.impl.ICUData; + +public class BreakIteratorRules_th extends ListResourceBundle { + private static final String DATA_NAME = "data/th.brk"; + + public Object[][] getContents() { + final boolean exists = ICUData.exists(DATA_NAME); + + // if dictionary wasn't found, then this resource bundle doesn't have + // much to contribute... + if (!exists) { + return new Object[0][0]; + } + + return new Object[][] { + // names of classes to instantiate for the different kinds of break + // iterator. Notice we're now using DictionaryBasedBreakIterator + // for word and line breaking. + { "BreakIteratorClasses", + new String[] { "RuleBasedBreakIterator", // character-break iterator class + "DictionaryBasedBreakIterator", // word-break iterator class + "DictionaryBasedBreakIterator", // line-break iterator class + "RuleBasedBreakIterator" } // sentence-break iterator class + }, + + + { "WordBreakDictionary", DATA_NAME }, // now a path to ICU4J-specific resource + { "LineBreakDictionary", DATA_NAME } + }; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java new file mode 100644 index 00000000000..b6ae0e5167d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java @@ -0,0 +1,28 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle extends ListResourceBundle { + + // Normally, each HolidayBundle uses the holiday's US English name + // as the string key for looking up the localized name. This means + // that the key itself can be used if no name is found for the requested + // locale. + // + // For holidays where the key is _not_ the English name, e.g. in the + // case of conflicts, the English name must be given here. + // + static private final Object[][] fContents = { + { "", "" }, // Can't be empty! + }; + + public synchronized Object[][] getContents() { return fContents; } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java new file mode 100644 index 00000000000..b4ce7f525ae --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java @@ -0,0 +1,30 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_da extends ListResourceBundle +{ + static private final Object[][] fContents = + { + { "Armistice Day", "v\u00e5benhvile" }, + { "Ascension", "himmelfart" }, + { "Boxing Day", "anden juledag" }, + { "Christmas Eve", "juleaften" }, + { "Easter", "p\u00e5ske" }, + { "Epiphany", "helligtrekongersdag" }, + { "Good Friday", "langfredag" }, + { "Halloween", "allehelgensaften" }, + { "Maundy Thursday", "sk\u00e6rtorsdag" }, + { "Palm Sunday", "palmes\u00f8ndag" }, + { "Pentecost", "pinse" }, + { "Shrove Tuesday", "hvidetirsdag" }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java new file mode 100644 index 00000000000..ed59e448a8c --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java @@ -0,0 +1,38 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_da_DK extends ListResourceBundle +{ + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + new SimpleHoliday(Calendar.APRIL, 30, -Calendar.FRIDAY, "General Prayer Day"), + new SimpleHoliday(Calendar.JUNE, 5, "Constitution Day"), + SimpleHoliday.CHRISTMAS_EVE, + SimpleHoliday.CHRISTMAS, + SimpleHoliday.BOXING_DAY, + SimpleHoliday.NEW_YEARS_EVE, + + // Easter and related holidays + EasterHoliday.MAUNDY_THURSDAY, + EasterHoliday.GOOD_FRIDAY, + EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, + EasterHoliday.ASCENSION, + EasterHoliday.WHIT_MONDAY, + }; + + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java new file mode 100644 index 00000000000..35b10f20ca6 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java @@ -0,0 +1,67 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_de extends ListResourceBundle { + static private final Object[][] fContents = { + { "All Saints' Day", "Allerheiligen" }, + { "All Souls' Day", "Allerseelen" }, + { "Armistice Day", "Waffenstillstandstag" }, + { "Ascension", "Christi Himmelfahrt" }, + { "Ash Wednesday", "Aschermittwoch" }, + { "Assumption", "Mari\u00e4 Himmelfahrt" }, + { "Boxing Day", "2. Weihnachtstag" }, + { "Carnival", "Karneval" }, + { "Christmas", "Weihnachtstag" }, + { "Civic Holiday", "B\u00fcrgerfeiertag" }, + { "Constitution Day", "Verfassungstag" }, + { "Corpus Christi", "Fronleichnam" }, + { "Day of Prayer and Repentance", "Bu\u00df- und Bettag" }, + { "Easter Monday", "Ostermonntag" }, + { "Easter Sunday", "Ostersonntag" }, + { "Epiphany", "Heilige 3 K\u00f6nige" }, + { "Father's Day", "Vatertag" }, + { "Flag Day", "Jahrestag der Nationalflagge" }, + { "German Unity Day", "Tag der deutschen Einheit" }, + { "Good Friday", "Karfreitag" }, + { "Halloween", "Abend vor Allerheiligen" }, + { "Immaculate Conception", "Mari\u00e4 Empf\u00e4ngnis" }, + { "Independence Day", "Unabh\u00e4ngigkeitstag" }, + { "Labor Day", "Tag der Arbeit" }, + { "Liberation Day", "Befreiungstag" }, + { "Mardi Gras", "Faschingsdienstag" }, + { "Maundy Thursday", "Gr\u00fcndonnerstag" }, + { "May Day", "Maifeiertag" }, + { "Memorial Day", "Tag des Gedenkens" }, + { "Mother's Day", "Muttertag" }, + { "National Holiday", "Nationalfeiertag" }, + { "New Year's Day", "Neujahr" }, + { "New Year's Eve", "Silvesterabend" }, + { "Palm Sunday", "Palmsonntag" }, + { "Pentecost", "Pfingsten" }, + { "Presidents' Day", "Pr\u00e4sidentstag" }, + { "Remembrance Day", "Volkstrauertag" }, + { "Revolution Day", "Jahrestag der Revolution" }, + { "Rose Monday", "Rosenmontag" }, + { "St. Stephen's Day", "Stephanitag" }, + { "Shrove Tuesday", "Faschingsdienstag" }, + { "Spring Holiday", "Tag des Fr\u00fchlings" }, + { "Summer Bank Holiday", "Bankfeiertag" }, + { "Thanksgiving", "Dankfest" }, + { "Unity Day", "Einheitstag" }, + { "Veterans' Day", "Veteranstag" }, + { "Victory Day", "Tag der Befreiung" }, + { "Washington's Birthday", "Washingtons Geburtstag" }, + { "Whit Monday", "Pfingstmontag" }, + { "Whit Sunday", "Pfingstsonntag" }, + }; + + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java new file mode 100644 index 00000000000..3e6f714d15b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java @@ -0,0 +1,43 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_de_AT extends ListResourceBundle { + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + SimpleHoliday.EPIPHANY, + EasterHoliday.GOOD_FRIDAY, + EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, + EasterHoliday.ASCENSION, + EasterHoliday.WHIT_SUNDAY, + EasterHoliday.WHIT_MONDAY, + EasterHoliday.CORPUS_CHRISTI, + SimpleHoliday.ASSUMPTION, + SimpleHoliday.ALL_SAINTS_DAY, + SimpleHoliday.IMMACULATE_CONCEPTION, + SimpleHoliday.CHRISTMAS, + SimpleHoliday.ST_STEPHENS_DAY, + + new SimpleHoliday(Calendar.MAY, 1, 0, "National Holiday"), + new SimpleHoliday(Calendar.OCTOBER, 31, -Calendar.MONDAY, "National Holiday"), + }; + + static private final Object[][] fContents = { + { "holidays", fHolidays }, + + // Only holidays names different from those used in Germany are listed here + { "Christmas", "Christtag" }, + { "New Year's Day", "Neujahrstag" }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java new file mode 100644 index 00000000000..a39feee6cd6 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java @@ -0,0 +1,38 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_de_DE extends ListResourceBundle { + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + SimpleHoliday.MAY_DAY, + new SimpleHoliday(Calendar.JUNE, 15, Calendar.WEDNESDAY, "Memorial Day"), + new SimpleHoliday(Calendar.OCTOBER, 3, 0, "Unity Day"), + SimpleHoliday.ALL_SAINTS_DAY, + new SimpleHoliday(Calendar.NOVEMBER, 18, 0, "Day of Prayer and Repentance"), + SimpleHoliday.CHRISTMAS, + SimpleHoliday.BOXING_DAY, + + // Easter and related holidays + EasterHoliday.GOOD_FRIDAY, + EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, + EasterHoliday.ASCENSION, + EasterHoliday.WHIT_SUNDAY, + EasterHoliday.WHIT_MONDAY, + }; + + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java new file mode 100644 index 00000000000..f9d76ba731a --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java @@ -0,0 +1,29 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_el extends ListResourceBundle { + static private final Object[][] fContents = + { + { "Assumption", "15 \u0391\u03cd\u03b3\u03bf\u03cd\u03c3\u03c4\u03bf\u03c5" }, + { "Boxing Day", "\u0394\u03b5\u03cd\u03c4\u03b5\u03c1\u03b7 \u03bc\u03ad\u03c1\u03b1 \u03c4\u03ce\u03bd \u03a7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c5\u03b3\u03ad\u03bd\u03bd\u03c9\u03bd" }, + { "Christmas", "\u03a7\u03c1\u03b9\u03c3\u03c4\u03bf\u03cd\u03b3\u03b5\u03bd\u03bd\u03b1" }, + { "Clean Monday", "\u039a\u03b1\u03b8\u03b1\u03c1\u03ae \u0394\u03b5\u03c5\u03c4\u03ad\u03c1\u03b1" }, + { "Easter Monday", "\u0394\u03b5\u03cd\u03c4\u03b5\u03c1\u03b7 \u03bc\u03ad\u03c1\u03b1 \u03c4\u03bf\u03cd \u03a0\u03ac\u03c3\u03c7\u03b1" }, + { "Epiphany", "\u0388\u03c0\u03b9\u03c6\u03ac\u03bd\u03b5\u03b9\u03b1" }, + { "Good Friday", "\u039c\u03b5\u03b3\u03ac\u03bb\u03b7 \u03a0\u03b1\u03c1\u03b1\u03c3\u03ba\u03b5\u03c5\u03ae" }, + { "May Day", "\u03a0\u03c1\u03c9\u03c4\u03bf\u03bc\u03b1\u03b3\u03b9\u03ac" }, + { "New Year's Day", "\u03a0\u03c1\u03c9\u03c4\u03bf\u03c7\u03c1\u03bf\u03bd\u03b9\u03ac" }, + { "Ochi Day", "28 \u038c\u03ba\u03c4\u03c9\u03b2\u03c1\u03af\u03bf\u03c5" }, + { "Whit Monday", "\u0394\u03b5\u03cd\u03c4\u03b5\u03c1\u03b7 \u03bc\u03ad\u03c1\u03b1 \u03c4\u03bf\u03cd \u03a0\u03b5\u03bd\u03c4\u03b7\u03ba\u03bf\u03c3\u03c4\u03ae" }, + + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java new file mode 100644 index 00000000000..9e1fabbe994 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java @@ -0,0 +1,39 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_el_GR extends ListResourceBundle { + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + SimpleHoliday.EPIPHANY, + + new SimpleHoliday(Calendar.MARCH, 25, 0, "Independence Day"), + + SimpleHoliday.MAY_DAY, + SimpleHoliday.ASSUMPTION, + + new SimpleHoliday(Calendar.OCTOBER, 28, 0, "Ochi Day"), + + SimpleHoliday.CHRISTMAS, + SimpleHoliday.BOXING_DAY, + + // Easter and related holidays in the Orthodox calendar + new EasterHoliday(-2, true, "Good Friday"), + new EasterHoliday( 0, true, "Easter Sunday"), + new EasterHoliday( 1, true, "Easter Monday"), + new EasterHoliday(50, true, "Whit Monday"), + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java new file mode 100644 index 00000000000..abc51e7af2d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java @@ -0,0 +1,28 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_en extends ListResourceBundle { + + // Normally, each HolidayBundle uses the holiday's US English name + // as the string key for looking up the localized name. This means + // that the key itself can be used if no name is found for the requested + // locale. + // + // For holidays where the key is _not_ the English name, e.g. in the + // case of conflicts, the English name must be given here. + // + static private final Object[][] fContents = { + { "", "" }, // Can't be empty! + }; + + public synchronized Object[][] getContents() { return fContents; } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java new file mode 100644 index 00000000000..de9f8030fd1 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java @@ -0,0 +1,40 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_en_CA extends ListResourceBundle { + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + new SimpleHoliday(Calendar.MAY, 19, 0, "Victoria Day"), + new SimpleHoliday(Calendar.JULY, 1, 0, "Canada Day"), + new SimpleHoliday(Calendar.AUGUST, 1, Calendar.MONDAY, "Civic Holiday"), + new SimpleHoliday(Calendar.SEPTEMBER, 1, Calendar.MONDAY, "Labor Day"), + new SimpleHoliday(Calendar.OCTOBER, 8, Calendar.MONDAY, "Thanksgiving"), + new SimpleHoliday(Calendar.NOVEMBER, 11, 0, "Remembrance Day"), + SimpleHoliday.CHRISTMAS, + SimpleHoliday.BOXING_DAY, + SimpleHoliday.NEW_YEARS_EVE, + + // Easter and related holidays + //hey {jf} - where are these from? +// EasterHoliday.GOOD_FRIDAY, +// EasterHoliday.EASTER_SUNDAY, +// EasterHoliday.EASTER_MONDAY, + }; + + static private final Object[][] fContents = { + { "holidays", fHolidays }, + + { "Labor Day", "Labour Day" }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java new file mode 100644 index 00000000000..9a71420e675 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java @@ -0,0 +1,36 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_en_GB extends ListResourceBundle +{ + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + SimpleHoliday.MAY_DAY, + new SimpleHoliday(Calendar.MAY, 31, -Calendar.MONDAY, "Spring Holiday"), + new SimpleHoliday(Calendar.AUGUST, 31, -Calendar.MONDAY, "Summer Bank Holiday"), + SimpleHoliday.CHRISTMAS, + SimpleHoliday.BOXING_DAY, + new SimpleHoliday(Calendar.DECEMBER, 31, -Calendar.MONDAY, "Christmas Holiday"), + + // Easter and related holidays + EasterHoliday.GOOD_FRIDAY, + EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + + { "Labor Day", "Labour Day" }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java new file mode 100644 index 00000000000..11d1531bf15 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java @@ -0,0 +1,46 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_en_US extends ListResourceBundle +{ + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + new SimpleHoliday(Calendar.JANUARY, 15, Calendar.MONDAY, "Martin Luther King Day", 1986), + + new SimpleHoliday(Calendar.FEBRUARY, 15, Calendar.MONDAY, "Presidents' Day", 1976), + new SimpleHoliday(Calendar.FEBRUARY, 22, "Washington's Birthday", 1776, 1975), + + EasterHoliday.GOOD_FRIDAY, + EasterHoliday.EASTER_SUNDAY, + + new SimpleHoliday(Calendar.MAY, 8, Calendar.SUNDAY, "Mother's Day", 1914), + + new SimpleHoliday(Calendar.MAY, 31, -Calendar.MONDAY, "Memorial Day", 1971), + new SimpleHoliday(Calendar.MAY, 30, "Memorial Day", 1868, 1970), + + new SimpleHoliday(Calendar.JUNE, 15, Calendar.SUNDAY, "Father's Day", 1956), + new SimpleHoliday(Calendar.JULY, 4, "Independence Day", 1776), + new SimpleHoliday(Calendar.SEPTEMBER, 1, Calendar.MONDAY, "Labor Day", 1894), + new SimpleHoliday(Calendar.NOVEMBER, 2, Calendar.TUESDAY, "Election Day"), + new SimpleHoliday(Calendar.OCTOBER, 8, Calendar.MONDAY, "Columbus Day", 1971), + new SimpleHoliday(Calendar.OCTOBER , 31, "Halloween"), + new SimpleHoliday(Calendar.NOVEMBER, 11, "Veterans' Day", 1918), + new SimpleHoliday(Calendar.NOVEMBER, 22, Calendar.THURSDAY, "Thanksgiving", 1863), + + SimpleHoliday.CHRISTMAS, + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java new file mode 100644 index 00000000000..14925640db1 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java @@ -0,0 +1,50 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_es extends ListResourceBundle { + static private final Object[][] fContents = { + { "All Saints' Day", "Todos los Santos" }, + { "Armistice Day", "D\u00eda del Armisticio" }, + { "Ascension", "Ascensi\u00f3n" }, + { "Benito Ju\u00e1rez Day", "D\u00eda de la Benito Ju\u00e1rez" }, + { "Boxing Day", "D\u00eda en que se dan Aguinaldos Navide\u00f1os" }, + { "Canada Day", "D\u00eda del Canad\u00e1" }, + { "Christmas Eve", "V\u00EDspera de Navidad" }, + { "Christmas", "Navidad" }, + { "Constitution Day", "D\u00eda de la Constituci\u00f3n" }, + { "Day of the Dead", "D\u00eda de los Muertos" }, + { "Easter Sunday", "Pascua" }, + { "Easter Monday", "Pascua Lunes" }, + { "Epiphany", "Epifan\u00eda" }, + { "Father's Day", "D\u00eda del Padre" }, + { "Flag Day", "D\u00eda de la Bandera" }, + { "Good Friday", "Viernes Santo" }, + { "Halloween", "v\u00edspera de Todos los Santos" }, + { "Independence Day", "D\u00eda de la Independencia" }, + { "Labor Day", "D\u00eda de Trabajadores" }, + { "Maundy Thursday", "Jueves Santo" }, + { "May Day", "Primero de Mayo" }, + { "Memorial Day", "D\u00eda de la Rememoraci\u00f3n" }, + { "Mother's Day", "D\u00eda de la Madre" }, + { "New Year's Day", "A\u00f1o Nuevo" }, + { "Palm Sunday", "Domingo de Ramos" }, + { "Pentecost", "Pentecost\u00e9s" }, + { "Presidents' Day", "D\u00eda de Presidentes" }, + { "Revolution Day", "D\u00eda de la Revoluci\u00f3n" }, + { "Shrove Tuesday", "Martes de Carnaval" }, + { "Thanksgiving", "D\u00eda de Acci\u00f3n de Gracias" }, + { "Veterans' Day", "D\u00eda de Veteranos" }, + { "Victoria Day", "D\u00eda de Victoria" }, + { "Whit Sunday", "Pentecost\u00e9s" }, + }; + + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java new file mode 100644 index 00000000000..dc52fcf5100 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java @@ -0,0 +1,34 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_es_MX extends ListResourceBundle { + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + new SimpleHoliday(Calendar.FEBRUARY, 5, 0, "Constitution Day"), + new SimpleHoliday(Calendar.MARCH, 21, 0, "Benito Ju\u00E1rez Day"), + SimpleHoliday.MAY_DAY, + new SimpleHoliday(Calendar.MAY, 5, 0, "Cinco de Mayo"), + new SimpleHoliday(Calendar.JUNE, 1, 0, "Navy Day"), + new SimpleHoliday(Calendar.SEPTEMBER, 16, 0, "Independence Day"), + new SimpleHoliday(Calendar.OCTOBER, 12, 0, "D\u00EDa de la Raza"), + SimpleHoliday.ALL_SAINTS_DAY, + new SimpleHoliday(Calendar.NOVEMBER, 2, 0, "Day of the Dead"), + new SimpleHoliday(Calendar.NOVEMBER, 20, 0, "Revolution Day"), + new SimpleHoliday(Calendar.DECEMBER, 12, 0, "Flag Day"), + SimpleHoliday.CHRISTMAS, + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java new file mode 100644 index 00000000000..45647782152 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java @@ -0,0 +1,43 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_fr extends ListResourceBundle { + static private final Object[][] fContents = { + { "All Saints' Day", "Toussaint" }, + { "Armistice Day", "Jour de l'Armistice" }, + { "Ascension", "Ascension" }, + { "Bastille Day", "F\u00EAte de la Bastille" }, + { "Benito Ju\u00E1rez Day", "F\u00EAte de Benito Ju\u00E1rez" }, + { "Boxing Day", "Lendemain de No\u00EBl" }, + { "Christmas Eve", "Veille de No\u00EBl" }, + { "Christmas", "No\u00EBl" }, + { "Easter Monday", "P\u00E2ques lundi" }, + { "Easter Sunday", "P\u00E2ques" }, + { "Epiphany", "l'\u00C9piphanie" }, + { "Flag Day", "F\u00EAte du Drapeau" }, + { "Good Friday", "Vendredi Saint" }, + { "Halloween", "Veille de la Toussaint" }, + { "All Saints' Day", "Toussaint" }, + { "Independence Day", "F\u00EAte Ind\u00E9pendance" }, + { "Maundy Thursday", "Jeudi Saint" }, + { "Mother's Day", "F\u00EAte des m\u00E8res" }, + { "National Day", "F\u00EAte Nationale" }, + { "New Year's Day", "Jour de l'an" }, + { "Palm Sunday", "les Rameaux" }, + { "Pentecost", "Pentec\u00F4te" }, + { "Shrove Tuesday", "Mardi Gras" }, + { "St. Stephen's Day", "Saint-\u00C9tienne" }, + { "Victoria Day", "F\u00EAte de la Victoria" }, + { "Victory Day", "F\u00EAte de la Victoire" }, + }; + + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java new file mode 100644 index 00000000000..fd347771476 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java @@ -0,0 +1,37 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_fr_CA extends ListResourceBundle { + static private final Holiday[] fHolidays = { + new SimpleHoliday(Calendar.JANUARY, 1, 0, "New Year's Day"), + new SimpleHoliday(Calendar.MAY, 19, 0, "Victoria Day"), + new SimpleHoliday(Calendar.JUNE, 24, 0, "National Day"), + new SimpleHoliday(Calendar.JULY, 1, 0, "Canada Day"), + new SimpleHoliday(Calendar.AUGUST, 1, Calendar.MONDAY, "Civic Holiday"), + new SimpleHoliday(Calendar.SEPTEMBER, 1, Calendar.MONDAY, "Labour Day"), + new SimpleHoliday(Calendar.OCTOBER, 8, Calendar.MONDAY, "Thanksgiving"), + new SimpleHoliday(Calendar.NOVEMBER, 11, 0, "Remembrance Day"), + SimpleHoliday.CHRISTMAS, + SimpleHoliday.BOXING_DAY, + SimpleHoliday.NEW_YEARS_EVE, + + // Easter and related holidays + EasterHoliday.GOOD_FRIDAY, + EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java new file mode 100644 index 00000000000..244a9b683cc --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java @@ -0,0 +1,36 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_fr_FR extends ListResourceBundle { + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + new SimpleHoliday(Calendar.MAY, 1, 0, "Labor Day"), + new SimpleHoliday(Calendar.MAY, 8, 0, "Victory Day"), + new SimpleHoliday(Calendar.JULY, 14, 0, "Bastille Day"), + SimpleHoliday.ASSUMPTION, + SimpleHoliday.ALL_SAINTS_DAY, + new SimpleHoliday(Calendar.NOVEMBER, 11, 0, "Armistice Day"), + SimpleHoliday.CHRISTMAS, + + // Easter and related holidays + EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, + EasterHoliday.ASCENSION, + EasterHoliday.WHIT_SUNDAY, + EasterHoliday.WHIT_MONDAY, + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java new file mode 100644 index 00000000000..c12bdc742fc --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java @@ -0,0 +1,35 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_it extends ListResourceBundle { + static private final Object[][] fContents = + { + { "All Saints' Day", "Ognissanti" }, + { "Armistice Day", "armistizio" }, + { "Ascension", "ascensione" }, + { "Ash Wednesday", "mercoled\u00ec delle ceneri" }, + { "Boxing Day", "Santo Stefano" }, + { "Christmas", "natale" }, + { "Easter Sunday", "pasqua" }, + { "Epiphany", "Epifania" }, + { "Good Friday", "venerd\u00ec santo" }, + { "Halloween", "vigilia di Ognissanti" }, + { "Maundy Thursday", "gioved\u00ec santo" }, + { "New Year's Day", "anno nuovo" }, + { "Palm Sunday", "domenica delle palme" }, + { "Pentecost", "di Pentecoste" }, + { "Shrove Tuesday", "martedi grasso" }, + { "St. Stephen's Day", "Santo Stefano" }, + { "Thanksgiving", "Giorno del Ringraziamento" }, + + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java new file mode 100644 index 00000000000..919648f31a1 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java @@ -0,0 +1,35 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_it_IT extends ListResourceBundle { + static private final Holiday[] fHolidays = { + SimpleHoliday.NEW_YEARS_DAY, + SimpleHoliday.EPIPHANY, + new SimpleHoliday(Calendar.APRIL, 1, 0, "Liberation Day"), + new SimpleHoliday(Calendar.MAY, 1, 0, "Labor Day"), + SimpleHoliday.ASSUMPTION, + SimpleHoliday.ALL_SAINTS_DAY, + SimpleHoliday.IMMACULATE_CONCEPTION, + SimpleHoliday.CHRISTMAS, + new SimpleHoliday(Calendar.DECEMBER, 26, 0, "St. Stephens Day"), + SimpleHoliday.NEW_YEARS_EVE, + + // Easter and related holidays + EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java new file mode 100644 index 00000000000..53ab7f650a3 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java @@ -0,0 +1,20 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.util.ListResourceBundle; + +public class HolidayBundle_iw extends ListResourceBundle { + + static private final Object[][] fContents = { + { "", "" }, // Can't be empty! + }; + + public synchronized Object[][] getContents() { return fContents; } + +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java new file mode 100644 index 00000000000..7d60b086732 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java @@ -0,0 +1,28 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.ListResourceBundle; + +public class HolidayBundle_iw_IL extends ListResourceBundle { + static private final Holiday[] fHolidays = { + HebrewHoliday.ROSH_HASHANAH, + HebrewHoliday.YOM_KIPPUR, + HebrewHoliday.HANUKKAH, + HebrewHoliday.PURIM, + HebrewHoliday.PASSOVER, + HebrewHoliday.SHAVUOT, + HebrewHoliday.SELIHOT, + }; + + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java new file mode 100644 index 00000000000..e7ef8ff6a36 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java @@ -0,0 +1,22 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import com.ibm.icu.util.*; +import java.util.Calendar; +import java.util.ListResourceBundle; + +public class HolidayBundle_ja_JP extends ListResourceBundle { + static private final Holiday[] fHolidays = { + new SimpleHoliday(Calendar.FEBRUARY, 11, 0, "National Foundation Day"), + }; + static private final Object[][] fContents = { + { "holidays", fHolidays }, + }; + public synchronized Object[][] getContents() { return fContents; } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java b/main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java new file mode 100644 index 00000000000..63fd084b37b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java @@ -0,0 +1,242 @@ +/** + ******************************************************************************* + * Copyright (C) 2001-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.data; + +import java.io.*; + +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.Utility; + +/** + * A reader for text resource data in the current package or the package + * of a given class object. The + * resource data is loaded through the class loader, so it will + * typically be a file in the same directory as the *.class files, or + * a file within a JAR file in the corresponding subdirectory. The + * file must be a text file in one of the supported encodings; when the + * resource is opened by constructing a ResourceReader + * object the encoding is specified. + * + * @author Alan Liu + */ +public class ResourceReader { + private BufferedReader reader; + private String resourceName; + private String encoding; // null for default encoding + private Class root; + + /** + * The one-based line number. Has the special value -1 before the + * object is initialized. Has the special value 0 after initialization + * but before the first line is read. + */ + private int lineNo; + + /** + * Construct a reader object for the text file of the given name + * in this package, using the given encoding. + * @param resourceName the name of the text file located in this + * package's ".data" subpackage. + * @param encoding the encoding of the text file; if unsupported + * an exception is thrown + * @exception UnsupportedEncodingException if + * encoding is not supported by the JDK. + */ + public ResourceReader(String resourceName, String encoding) + throws UnsupportedEncodingException { + this(ICUData.class, "data/" + resourceName, encoding); + } + + /** + * Construct a reader object for the text file of the given name + * in this package, using the default encoding. + * @param resourceName the name of the text file located in this + * package's ".data" subpackage. + */ + public ResourceReader(String resourceName) { + this(ICUData.class, "data/" + resourceName); + } + + /** + * Construct a reader object for the text file of the given name + * in the given class's package, using the given encoding. + * @param resourceName the name of the text file located in the + * given class's package. + * @param encoding the encoding of the text file; if unsupported + * an exception is thrown + * @exception UnsupportedEncodingException if + * encoding is not supported by the JDK. + */ + public ResourceReader(Class rootClass, String resourceName, String encoding) + throws UnsupportedEncodingException { + this.root = rootClass; + this.resourceName = resourceName; + this.encoding = encoding; + lineNo = -1; + _reset(); + } + + /** + * Construct a reader object for the input stream associated with + * the given resource name. + * @param is the input stream of the resource + * @param resourceName the name of the resource + */ + public ResourceReader(InputStream is, String resourceName, String encoding) { + this.root = null; + this.resourceName = resourceName; + this.encoding = encoding; + + this.lineNo = -1; + try { + InputStreamReader isr = (encoding == null) + ? new InputStreamReader(is) + : new InputStreamReader(is, encoding); + + this.reader = new BufferedReader(isr); + this.lineNo= 0; + } + catch (UnsupportedEncodingException e) { + } + } + + /** + * Construct a reader object for the input stream associated with + * the given resource name. + * @param is the input stream of the resource + * @param resourceName the name of the resource + */ + public ResourceReader(InputStream is, String resourceName) { + this(is, resourceName, null); + } + + /** + * Construct a reader object for the text file of the given name + * in the given class's package, using the default encoding. + * @param resourceName the name of the text file located in the + * given class's package. + */ + public ResourceReader(Class rootClass, String resourceName) { + this.root = rootClass; + this.resourceName = resourceName; + this.encoding = null; + lineNo = -1; + try { + _reset(); + } catch (UnsupportedEncodingException e) {} + } + + /** + * Read and return the next line of the file or null + * if the end of the file has been reached. + */ + public String readLine() throws IOException { + if (lineNo == 0) { + // Remove BOMs + ++lineNo; + String line = reader.readLine(); + if (line.charAt(0) == '\uFFEF' || + line.charAt(0) == '\uFEFF') { + line = line.substring(1); + } + return line; + } + ++lineNo; + return reader.readLine(); + } + + /** + * Read a line, ignoring blank lines and lines that start with + * '#'. + * @param trim if true then trim leading rule white space. + */ + public String readLineSkippingComments(boolean trim) throws IOException { + for (;;) { + String line = readLine(); + if (line == null) { + return line; + } + // Skip over white space + int pos = Utility.skipWhitespace(line, 0); + // Ignore blank lines and comment lines + if (pos == line.length() || line.charAt(pos) == '#') { + continue; + } + // Process line + if (trim) line = line.substring(pos); + return line; + } + } + + + /** + * Read a line, ignoring blank lines and lines that start with + * '#'. Do not trim leading rule white space. + */ + public String readLineSkippingComments() throws IOException { + return readLineSkippingComments(false); + } + + /** + * Return the one-based line number of the last line returned by + * readLine() or readLineSkippingComments(). Should only be called + * after a call to one of these methods; otherwise the return + * value is undefined. + */ + public int getLineNumber() { + return lineNo; + } + + /** + * Return a string description of the position of the last line + * returned by readLine() or readLineSkippingComments(). + */ + public String describePosition() { + return resourceName + ':' + lineNo; + } + + /** + * Reset this reader so that the next call to + * readLine() returns the first line of the file + * again. This is a somewhat expensive call, however, calling + * reset() after calling it the first time does + * nothing if readLine() has not been called in + * between. + */ + public void reset() { + try { + _reset(); + } catch (UnsupportedEncodingException e) {} + // We swallow this exception, if there is one. If the encoding is + // invalid, the constructor will have thrown this exception already and + // the caller shouldn't use the object afterwards. + } + + /** + * Reset to the start by reconstructing the stream and readers. + * We could also use mark() and reset() on the stream or reader, + * but that would cause them to keep the stream data around in + * memory. We don't want that because some of the resource files + * are large, e.g., 400k. + */ + private void _reset() throws UnsupportedEncodingException { + if (lineNo == 0) { + return; + } + InputStream is = ICUData.getStream(root, resourceName); + if (is == null) { + throw new IllegalArgumentException("Can't open " + resourceName); + } + + InputStreamReader isr = + (encoding == null) ? new InputStreamReader(is) : + new InputStreamReader(is, encoding); + reader = new BufferedReader(isr); + lineNo = 0; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java b/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java new file mode 100644 index 00000000000..c99d2808323 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/TokenIterator.java @@ -0,0 +1,159 @@ +/* +********************************************************************** +* Copyright (c) 2004-2008, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: March 16 2004 +* Since: ICU 3.0 +********************************************************************** +*/ +package com.ibm.icu.impl.data; + +import java.io.IOException; + +import com.ibm.icu.impl.UCharacterProperty; +import com.ibm.icu.impl.Utility; +import com.ibm.icu.text.UTF16; + +/** + * An iterator class that returns successive string tokens from some + * source. String tokens are, in general, separated by rule white + * space in the source test. Furthermore, they may be delimited by + * either single or double quotes (opening and closing quotes must + * match). Escapes are processed using standard ICU unescaping. + */ +public class TokenIterator { + + private ResourceReader reader; + private String line; + private StringBuffer buf; + private boolean done; + private int pos; + private int lastpos; + + /** + * Construct an iterator over the tokens returned by the given + * ResourceReader, ignoring blank lines and comment lines (first + * non-blank character is '#'). Note that trailing comments on a + * line, beginning with the first unquoted '#', are recognized. + */ + public TokenIterator(ResourceReader r) { + reader = r; + line = null; + done = false; + buf = new StringBuffer(); + pos = lastpos = -1; + } + + /** + * Return the next token from this iterator, or null if the last + * token has been returned. + */ + public String next() throws IOException { + if (done) { + return null; + } + for (;;) { + if (line == null) { + line = reader.readLineSkippingComments(); + if (line == null) { + done = true; + return null; + } + pos = 0; + } + buf.setLength(0); + lastpos = pos; + pos = nextToken(pos); + if (pos < 0) { + line = null; + continue; + } + return buf.toString(); + } + } + + /** + * Return the one-based line number of the line of the last token returned by + * next(). Should only be called + * after a call to next(); otherwise the return + * value is undefined. + */ + public int getLineNumber() { + return reader.getLineNumber(); + } + + /** + * Return a string description of the position of the last line + * returned by readLine() or readLineSkippingComments(). + */ + public String describePosition() { + return reader.describePosition() + ':' + (lastpos+1); + } + + /** + * Read the next token from 'this.line' and append it to + * 'this.buf'. Tokens are separated by rule white space. Tokens + * may also be delimited by double or single quotes. The closing + * quote must match the opening quote. If a '#' is encountered, + * the rest of the line is ignored, unless it is backslash-escaped + * or within quotes. + * @param position the offset into the string + * @return offset to the next character to read from line, or if + * the end of the line is reached without scanning a valid token, + * -1 + */ + private int nextToken(int position) { + position = Utility.skipWhitespace(line, position); + if (position == line.length()) { + return -1; + } + int startpos = position; + char c = line.charAt(position++); + char quote = 0; + switch (c) { + case '"': + case '\'': + quote = c; + break; + case '#': + return -1; + default: + buf.append(c); + break; + } + int[] posref = null; + while (position < line.length()) { + c = line.charAt(position); // 16-bit ok + if (c == '\\') { + if (posref == null) { + posref = new int[1]; + } + posref[0] = position+1; + int c32 = Utility.unescapeAt(line, posref); + if (c32 < 0) { + throw new RuntimeException("Invalid escape at " + + reader.describePosition() + ':' + + position); + } + UTF16.append(buf, c32); + position = posref[0]; + } else if ((quote != 0 && c == quote) || + (quote == 0 && UCharacterProperty.isRuleWhiteSpace(c))) { + return ++position; + } else if (quote == 0 && c == '#') { + return position; // do NOT increment + } else { + buf.append(c); + ++position; + } + } + if (quote != 0) { + throw new RuntimeException("Unterminated quote at " + + reader.describePosition() + ':' + + startpos); + } + return position; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/data/package.html b/main/classes/core/src/com/ibm/icu/impl/data/package.html new file mode 100644 index 00000000000..4f9bcd30952 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/data/package.html @@ -0,0 +1,12 @@ + + + + + + +Resource data used by classes in com.ibm.text. + + \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormat.java b/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormat.java new file mode 100644 index 00000000000..2683ebc84f5 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormat.java @@ -0,0 +1,177 @@ +/* + ******************************************************************************* + * Copyright (C) 2007-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.duration; + +import java.text.FieldPosition; +import java.util.Date; + +import com.ibm.icu.text.DurationFormat; +import com.ibm.icu.util.ULocale; + +/** + * @author srl + */ +public class BasicDurationFormat extends DurationFormat { + + /** + * + */ + private static final long serialVersionUID = -3146984141909457700L; + + transient DurationFormatter formatter; + transient PeriodFormatter pformatter; + transient PeriodFormatterService pfs = null; + + public static BasicDurationFormat getInstance(ULocale locale) { + return new BasicDurationFormat(locale); + } + + private static boolean checkXMLDuration = true; + + public StringBuffer format(Object object, StringBuffer toAppend, FieldPosition pos) { + if(object instanceof Long) { + String res = formatDurationFromNow(((Long)object).longValue()); + return toAppend.append(res); + } else if(object instanceof Date) { + String res = formatDurationFromNowTo(((Date)object)); + return toAppend.append(res); + } + if(checkXMLDuration) try { + if(object instanceof javax.xml.datatype.Duration) { + String res = formatDuration(object); + return toAppend.append(res); + } + } catch ( NoClassDefFoundError ncdfe ) { + System.err.println("Skipping XML capability"); + checkXMLDuration = false; // don't try again + } + throw new IllegalArgumentException("Cannot format given Object as a Duration"); + + } + + public BasicDurationFormat() { + pfs = BasicPeriodFormatterService.getInstance(); + formatter = pfs.newDurationFormatterFactory().getFormatter(); + pformatter = pfs.newPeriodFormatterFactory().setDisplayPastFuture(false).getFormatter(); + } + /** + * + */ + public BasicDurationFormat(ULocale locale) { + super(locale); + pfs = BasicPeriodFormatterService.getInstance(); + formatter = pfs.newDurationFormatterFactory().setLocale(locale.getName()).getFormatter(); + pformatter = pfs.newPeriodFormatterFactory().setDisplayPastFuture(false).setLocale(locale.getName()).getFormatter(); + } + + /* (non-Javadoc) + * @see com.ibm.icu.text.DurationFormat#formatDurationFrom(long, long) + */ + public String formatDurationFrom(long duration, long referenceDate) { + return formatter.formatDurationFrom(duration, referenceDate); + } + + /* (non-Javadoc) + * @see com.ibm.icu.text.DurationFormat#formatDurationFromNow(long) + */ + public String formatDurationFromNow(long duration) { + return formatter.formatDurationFromNow(duration); + } + + /* (non-Javadoc) + * @see com.ibm.icu.text.DurationFormat#formatDurationFromNowTo(java.util.Date) + */ + public String formatDurationFromNowTo(Date targetDate) { + return formatter.formatDurationFromNowTo(targetDate); + } + + /** + * JDK 1.5+ only + * @param obj Object being passed. + * @return The PeriodFormatter object formatted to the object passed. + * @see "http://java.sun.com/j2se/1.5.0/docs/api/javax/xml/datatype/Duration.html" + */ + public String formatDuration(Object obj) { + javax.xml.datatype.DatatypeConstants.Field inFields[] = { + javax.xml.datatype.DatatypeConstants.YEARS, + javax.xml.datatype.DatatypeConstants.MONTHS, + javax.xml.datatype.DatatypeConstants.DAYS, + javax.xml.datatype.DatatypeConstants.HOURS, + javax.xml.datatype.DatatypeConstants.MINUTES, + javax.xml.datatype.DatatypeConstants.SECONDS, + }; + TimeUnit outFields[] = { + TimeUnit.YEAR, + TimeUnit.MONTH, + TimeUnit.DAY, + TimeUnit.HOUR, + TimeUnit.MINUTE, + TimeUnit.SECOND, + }; + + javax.xml.datatype.Duration inDuration = (javax.xml.datatype.Duration)obj; + Period p = null; + javax.xml.datatype.Duration duration = inDuration; + boolean inPast = false; + if(inDuration.getSign()<0) { + duration = inDuration.negate(); + inPast = true; + } + // convert a Duration to a Period + boolean sawNonZero = false; // did we have a set, non-zero field? + for(int i=0;i 0.0) { + alternateUnit = TimeUnit.MILLISECOND; + alternateVal=(float)millis; + floatVal=(float)intSeconds; + } + } + + if(p == null) { + p = Period.at(floatVal, outFields[i]); + } else { + p = p.and(floatVal, outFields[i]); + } + + if(alternateUnit != null) { + p = p.and(alternateVal, alternateUnit); // add in MILLISECONDs + } + } + } + + if(p == null) { + // no fields set = 0 seconds + return formatDurationFromNow(0); + } else { + if(inPast) {// was negated, above. + p = p.inPast(); + } else { + p = p.inFuture(); + } + } + + // now, format it. + return pformatter.format(p); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java b/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java new file mode 100644 index 00000000000..67ee04d2238 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java @@ -0,0 +1,118 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import java.util.Date; +import java.util.TimeZone; + +/** + * Core implementation class for DurationFormatter. + */ +class BasicDurationFormatter implements DurationFormatter { + private PeriodFormatter formatter; + private PeriodBuilder builder; + private DateFormatter fallback; + private long fallbackLimit; + private String localeName; + private TimeZone timeZone; + + /** + * Creates a basic duration formatter with the given formatter, + * builder, and fallback. It's up to the caller to ensure that + * the locales and timezones of these are in sync. + */ + public BasicDurationFormatter(PeriodFormatter formatter, + PeriodBuilder builder, + DateFormatter fallback, + long fallbackLimit) { + this.formatter = formatter; + this.builder = builder; + this.fallback = fallback; + this.fallbackLimit = fallbackLimit < 0 ? 0 : fallbackLimit; + } + + protected BasicDurationFormatter(PeriodFormatter formatter, + PeriodBuilder builder, + DateFormatter fallback, + long fallbackLimit, + String localeName, + TimeZone timeZone) { + this.formatter = formatter; + this.builder = builder; + this.fallback = fallback; + this.fallbackLimit = fallbackLimit; + this.localeName = localeName; + this.timeZone = timeZone; + } + + public String formatDurationFromNowTo(Date targetDate) { + long now = System.currentTimeMillis(); + long duration = now - targetDate.getTime(); + return formatDurationFrom(duration, now); + } + + public String formatDurationFromNow(long duration) { + return formatDurationFrom(duration, System.currentTimeMillis()); + } + + public String formatDurationFrom(long duration, long referenceDate) { + String s = doFallback(duration, referenceDate); + if (s == null) { + Period p = doBuild(duration, referenceDate); + s = doFormat(p); + } + return s; + } + + public DurationFormatter withLocale(String locName) { + if (!locName.equals(localeName)) { + PeriodFormatter newFormatter = formatter.withLocale(locName); + PeriodBuilder newBuilder = builder.withLocale(locName); + DateFormatter newFallback = fallback == null + ? null + : fallback.withLocale(locName); + return new BasicDurationFormatter(newFormatter, newBuilder, + newFallback, fallbackLimit, + locName, timeZone); + } + return this; + } + + public DurationFormatter withTimeZone(TimeZone tz) { + if (!tz.equals(timeZone)) { + PeriodBuilder newBuilder = builder.withTimeZone(tz); + DateFormatter newFallback = fallback == null + ? null + : fallback.withTimeZone(tz); + return new BasicDurationFormatter(formatter, newBuilder, + newFallback, fallbackLimit, + localeName, tz); + } + return this; + } + + protected String doFallback(long duration, long referenceDate) { + if (fallback != null + && fallbackLimit > 0 + && Math.abs(duration) >= fallbackLimit) { + return fallback.format(referenceDate + duration); + } + return null; + } + + protected Period doBuild(long duration, long referenceDate) { + return builder.createWithReferenceDate(duration, referenceDate); + } + + protected String doFormat(Period period) { + if (!period.isSet()) { + throw new IllegalArgumentException("period is not set"); + } + return formatter.format(period); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java b/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java new file mode 100644 index 00000000000..2a95be76b7b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java @@ -0,0 +1,246 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import java.util.Locale; +import java.util.TimeZone; + +/** + * Abstract factory object used to create DurationFormatters. + * DurationFormatters are immutable once created. + *

    + * Setters on the factory mutate the factory and return it, + * for chaining. + *

    + * Subclasses override getFormatter to return a custom + * DurationFormatter. + */ +class BasicDurationFormatterFactory implements DurationFormatterFactory { + private BasicPeriodFormatterService ps; + private PeriodFormatter formatter; + private PeriodBuilder builder; + private DateFormatter fallback; + private long fallbackLimit; + private String localeName; + private TimeZone timeZone; + private BasicDurationFormatter f; // cache + + /** + * Create a default formatter for the current locale and time zone. + */ + BasicDurationFormatterFactory(BasicPeriodFormatterService ps) { + this.ps = ps; + this.localeName = Locale.getDefault().toString(); + this.timeZone = TimeZone.getDefault(); + } + + /** + * Set the period formatter used by the factory. New formatters created + * with this factory will use the given period formatter. + * + * @return this BasicDurationFormatterFactory + */ + public DurationFormatterFactory setPeriodFormatter( + PeriodFormatter formatter) { + if (formatter != this.formatter) { + this.formatter = formatter; + reset(); + } + return this; + } + + /** + * Set the builder used by the factory. New formatters created + * with this factory will use the given locale. + * + * @param builder the builder to use + * @return this BasicDurationFormatterFactory + */ + public DurationFormatterFactory setPeriodBuilder(PeriodBuilder builder) { + if (builder != this.builder) { + this.builder = builder; + reset(); + } + return this; + } + + /** + * Set a fallback formatter for durations over a given limit. + * + * @param fallback the fallback formatter to use, or null + * @return this BasicDurationFormatterFactory + */ + public DurationFormatterFactory setFallback(DateFormatter fallback) { + boolean doReset = fallback == null + ? this.fallback != null + : !fallback.equals(this.fallback); + if (doReset) { + this.fallback = fallback; + reset(); + } + return this; + } + + /** + * Set a fallback limit for durations over a given limit. + * + * @param fallbackLimit the fallback limit to use, or 0 if none is desired. + * @return this BasicDurationFormatterFactory + */ + public DurationFormatterFactory setFallbackLimit(long fallbackLimit) { + if (fallbackLimit < 0) { + fallbackLimit = 0; + } + if (fallbackLimit != this.fallbackLimit) { + this.fallbackLimit = fallbackLimit; + reset(); + } + return this; + } + + /** + * Set the name of the locale that will be used when + * creating new formatters. + * + * @param localeName the name of the Locale + * @return this BasicDurationFormatterFactory + */ + public DurationFormatterFactory setLocale(String localeName) { + if (!localeName.equals(this.localeName)) { + this.localeName = localeName; + if (builder != null) { + builder = builder.withLocale(localeName); + } + if (formatter != null) { + formatter = formatter.withLocale(localeName); + } + reset(); + } + return this; + } + + /** + * Set the name of the locale that will be used when + * creating new formatters. + * + * @param timeZone The time zone to use. + * @return this BasicDurationFormatterFactory + */ + public DurationFormatterFactory setTimeZone(TimeZone timeZone) { + if (!timeZone.equals(this.timeZone)) { + this.timeZone = timeZone; + if (builder != null) { + builder = builder.withTimeZone(timeZone); + } + reset(); + } + return this; + } + + /** + * Return a formatter based on this factory's current settings. + * + * @return a BasicDurationFormatter + */ + public DurationFormatter getFormatter() { + if (f == null) { + if (fallback != null) { + fallback = fallback.withLocale(localeName).withTimeZone(timeZone); + } + formatter = getPeriodFormatter(); + builder = getPeriodBuilder(); + + f = createFormatter(); + } + return f; + } + + /** + * Return the current period formatter. + * + * @return the current period formatter + */ + public PeriodFormatter getPeriodFormatter() { + if (formatter == null) { + formatter = ps.newPeriodFormatterFactory() + .setLocale(localeName) + .getFormatter(); + } + return formatter; + } + + /** + * Return the current builder. + * + * @return the current builder + */ + public PeriodBuilder getPeriodBuilder() { + if (builder == null) { + builder = ps.newPeriodBuilderFactory() + .setLocale(localeName) + .setTimeZone(timeZone) + .getSingleUnitBuilder(); + } + return builder; + } + + /** + * Return the current fallback formatter. + * + * @return the fallback formatter, or null if there is no fallback + * formatter + */ + public DateFormatter getFallback() { + return fallback; + } + + /** + * Return the current fallback formatter limit + * + * @return the limit, or 0 if there is no fallback. + */ + public long getFallbackLimit() { + return fallback == null ? 0 : fallbackLimit; + } + + /** + * Return the current locale name. + * + * @return the current locale name + */ + public String getLocaleName() { + return localeName; + } + + /** + * Return the current locale name. + * + * @return the current locale name + */ + public TimeZone getTimeZone() { + return timeZone; + } + + /** + * Create the formatter. All local fields are already initialized. + */ + protected BasicDurationFormatter createFormatter() { + return new BasicDurationFormatter(formatter, builder, fallback, + fallbackLimit, localeName, + timeZone); + } + + /** + * Clear the cached formatter. Subclasses must call this if their + * state has changed. This is automatically invoked by setBuilder, + * setFormatter, setFallback, setLocaleName, and setTimeZone + */ + protected void reset() { + f = null; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java new file mode 100644 index 00000000000..027aa8d58bc --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java @@ -0,0 +1,514 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import com.ibm.icu.impl.duration.impl.DataRecord; +import com.ibm.icu.impl.duration.impl.PeriodFormatterData; +import com.ibm.icu.impl.duration.impl.PeriodFormatterDataService; + +import java.util.TimeZone; + +/** + * Default implementation of PeriodBuilderFactory. This creates builders that + * use approximate durations. + */ +class BasicPeriodBuilderFactory implements PeriodBuilderFactory { + private PeriodFormatterDataService ds; + private Settings settings; + + private static final short allBits = 0xff; + + BasicPeriodBuilderFactory(PeriodFormatterDataService ds) { + this.ds = ds; + this.settings = new Settings(); + } + + static long approximateDurationOf(TimeUnit unit) { + return TimeUnit.approxDurations[unit.ordinal]; + } + + class Settings { + boolean inUse; + short uset = allBits; + TimeUnit maxUnit = TimeUnit.YEAR; + TimeUnit minUnit = TimeUnit.MILLISECOND; + int maxLimit; + int minLimit; + boolean allowZero = true; + boolean weeksAloneOnly; + boolean allowMillis = true; + + Settings setUnits(int uset) { + if (this.uset == uset) { + return this; + } + Settings result = inUse ? copy() : this; + + result.uset = (short)uset; + + if ((uset & allBits) == allBits) { + result.uset = allBits; + result.maxUnit = TimeUnit.YEAR; + result.minUnit = TimeUnit.MILLISECOND; + } else { + int lastUnit = -1; + for (int i = 0; i < TimeUnit.units.length; ++i) { + if (0 != (uset & (1 << i))) { + if (lastUnit == -1) { + result.maxUnit = TimeUnit.units[i]; + } + lastUnit = i; + } + } + if (lastUnit == -1) { + // currently empty, but this might be transient so no fail + result.minUnit = result.maxUnit = null; + } else { + result.minUnit = TimeUnit.units[lastUnit]; + } + } + + return result; + } + + short effectiveSet() { + if (allowMillis) { + return uset; + } + return (short)(uset & ~(1 << TimeUnit.MILLISECOND.ordinal)); + } + + TimeUnit effectiveMinUnit() { + if (allowMillis || minUnit != TimeUnit.MILLISECOND) { + return minUnit; + } + // -1 to skip millisecond + for (int i = TimeUnit.units.length - 1; --i >= 0;) { + if (0 != (uset & (1 << i))) { + return TimeUnit.units[i]; + } + } + return TimeUnit.SECOND; // default for pathological case + } + + Settings setMaxLimit(float maxLimit) { + int val = maxLimit <= 0 ? 0 : (int)(maxLimit*1000); + if (maxLimit == val) { + return this; + } + Settings result = inUse ? copy() : this; + result.maxLimit = val; + return result; + } + + Settings setMinLimit(float minLimit) { + int val = minLimit <= 0 ? 0 : (int)(minLimit*1000); + if (minLimit == val) { + return this; + } + Settings result = inUse ? copy() : this; + result.minLimit = val; + return result; + } + + Settings setAllowZero(boolean allow) { + if (this.allowZero == allow) { + return this; + } + Settings result = inUse ? copy() : this; + result.allowZero = allow; + return result; + } + + Settings setWeeksAloneOnly(boolean weeksAlone) { + if (this.weeksAloneOnly == weeksAlone) { + return this; + } + Settings result = inUse ? copy() : this; + result.weeksAloneOnly = weeksAlone; + return result; + } + + Settings setAllowMilliseconds(boolean allowMillis) { + if (this.allowMillis == allowMillis) { + return this; + } + Settings result = inUse ? copy() : this; + result.allowMillis = allowMillis; + return result; + } + + Settings setLocale(String localeName) { + PeriodFormatterData data = ds.get(localeName); + return this + .setAllowZero(data.allowZero()) + .setWeeksAloneOnly(data.weeksAloneOnly()) + .setAllowMilliseconds(data.useMilliseconds() != DataRecord.EMilliSupport.NO); + } + + Settings setInUse() { + inUse = true; + return this; + } + + Period createLimited(long duration, boolean inPast) { + if (maxLimit > 0) { + long maxUnitDuration = approximateDurationOf(maxUnit); + if (duration * 1000 > maxLimit * maxUnitDuration) { + return Period.moreThan(maxLimit/1000f, maxUnit).inPast(inPast); + } + } + + if (minLimit > 0) { + TimeUnit emu = effectiveMinUnit(); + long emud = approximateDurationOf(emu); + long eml = (emu == minUnit) ? minLimit : + Math.max(1000, (approximateDurationOf(minUnit) * minLimit) / emud); + if (duration * 1000 < eml * emud) { + return Period.lessThan(eml/1000f, emu).inPast(inPast); + } + } + return null; + } + + public Settings copy() { + Settings result = new Settings(); + result.inUse = inUse; + result.uset = uset; + result.maxUnit = maxUnit; + result.minUnit = minUnit; + result.maxLimit = maxLimit; + result.minLimit = minLimit; + result.allowZero = allowZero; + result.weeksAloneOnly = weeksAloneOnly; + result.allowMillis = allowMillis; + return result; + } + } + + public PeriodBuilderFactory setAvailableUnitRange(TimeUnit minUnit, + TimeUnit maxUnit) { + int uset = 0; + for (int i = maxUnit.ordinal; i <= minUnit.ordinal; ++i) { + uset |= 1 << i; + } + if (uset == 0) { + throw new IllegalArgumentException("range " + minUnit + " to " + maxUnit + " is empty"); + } + settings = settings.setUnits(uset); + return this; + } + + public PeriodBuilderFactory setUnitIsAvailable(TimeUnit unit, + boolean available) { + int uset = settings.uset; + if (available) { + uset |= 1 << unit.ordinal; + } else { + uset &= ~(1 << unit.ordinal); + } + settings = settings.setUnits(uset); + return this; + } + + public PeriodBuilderFactory setMaxLimit(float maxLimit) { + settings = settings.setMaxLimit(maxLimit); + return this; + } + + public PeriodBuilderFactory setMinLimit(float minLimit) { + settings = settings.setMinLimit(minLimit); + return this; + } + + public PeriodBuilderFactory setAllowZero(boolean allow) { + settings = settings.setAllowZero(allow); + return this; + } + + public PeriodBuilderFactory setWeeksAloneOnly(boolean aloneOnly) { + settings = settings.setWeeksAloneOnly(aloneOnly); + return this; + } + + public PeriodBuilderFactory setAllowMilliseconds(boolean allow) { + settings = settings.setAllowMilliseconds(allow); + return this; + } + + public PeriodBuilderFactory setLocale(String localeName) { + settings = settings.setLocale(localeName); + return this; + } + + public PeriodBuilderFactory setTimeZone(TimeZone timeZone) { + // ignore this + return this; + } + + private Settings getSettings() { + if (settings.effectiveSet() == 0) { + return null; + } + return settings.setInUse(); + } + + /** + * Return a builder that represents relative time in terms of the single + * given TimeUnit + * + * @param unit the single TimeUnit with which to represent times + * @return a builder + */ + public PeriodBuilder getFixedUnitBuilder(TimeUnit unit) { + return FixedUnitBuilder.get(unit, getSettings()); + } + + /** + * Return a builder that represents relative time in terms of the + * largest period less than or equal to the duration. + * + * @return a builder + */ + public PeriodBuilder getSingleUnitBuilder() { + return SingleUnitBuilder.get(getSettings()); + } + + /** + * Return a builder that formats the largest one or two periods, + * Starting with the largest period less than or equal to the duration. + * It formats two periods if the first period has a count < 2 + * and the next period has a count >= 1. + * + * @return a builder + */ + public PeriodBuilder getOneOrTwoUnitBuilder() { + return OneOrTwoUnitBuilder.get(getSettings()); + } + + /** + * Return a builder that formats the given number of periods, + * starting with the largest period less than or equal to the + * duration. + * + * @return a builder + */ + public PeriodBuilder getMultiUnitBuilder(int periodCount) { + return MultiUnitBuilder.get(periodCount, getSettings()); + } +} + +abstract class PeriodBuilderImpl implements PeriodBuilder { + + protected BasicPeriodBuilderFactory.Settings settings; + + public Period create(long duration) { + return createWithReferenceDate(duration, System.currentTimeMillis()); + } + + public long approximateDurationOf(TimeUnit unit) { + return BasicPeriodBuilderFactory.approximateDurationOf(unit); + } + + public Period createWithReferenceDate(long duration, long referenceDate) { + boolean inPast = duration < 0; + if (inPast) { + duration = -duration; + } + Period ts = settings.createLimited(duration, inPast); + if (ts == null) { + ts = handleCreate(duration, referenceDate, inPast); + if (ts == null) { + ts = Period.lessThan(1, settings.effectiveMinUnit()).inPast(inPast); + } + } + return ts; + } + + public PeriodBuilder withTimeZone(TimeZone timeZone) { + // ignore the time zone + return this; + } + + public PeriodBuilder withLocale(String localeName) { + BasicPeriodBuilderFactory.Settings newSettings = settings.setLocale(localeName); + if (newSettings != settings) { + return withSettings(newSettings); + } + return this; + } + + protected abstract PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse); + + protected abstract Period handleCreate(long duration, long referenceDate, + boolean inPast); + + protected PeriodBuilderImpl(BasicPeriodBuilderFactory.Settings settings) { + this.settings = settings; + } +} + +class FixedUnitBuilder extends PeriodBuilderImpl { + private TimeUnit unit; + + public static FixedUnitBuilder get(TimeUnit unit, BasicPeriodBuilderFactory.Settings settingsToUse) { + if (settingsToUse != null && (settingsToUse.effectiveSet() & (1 << unit.ordinal)) != 0) { + return new FixedUnitBuilder(unit, settingsToUse); + } + return null; + } + + FixedUnitBuilder(TimeUnit unit, BasicPeriodBuilderFactory.Settings settings) { + super(settings); + this.unit = unit; + } + + protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { + return get(unit, settingsToUse); + } + + protected Period handleCreate(long duration, long referenceDate, + boolean inPast) { + if (unit == null) { + return null; + } + long unitDuration = approximateDurationOf(unit); + return Period.at((float)((double)duration/unitDuration), unit) + .inPast(inPast); + } +} + +class SingleUnitBuilder extends PeriodBuilderImpl { + SingleUnitBuilder(BasicPeriodBuilderFactory.Settings settings) { + super(settings); + } + + public static SingleUnitBuilder get(BasicPeriodBuilderFactory.Settings settings) { + if (settings == null) { + return null; + } + return new SingleUnitBuilder(settings); + } + + protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { + return SingleUnitBuilder.get(settingsToUse); + } + + protected Period handleCreate(long duration, long referenceDate, + boolean inPast) { + short uset = settings.effectiveSet(); + for (int i = 0; i < TimeUnit.units.length; ++i) { + if (0 != (uset & (1 << i))) { + TimeUnit unit = TimeUnit.units[i]; + long unitDuration = approximateDurationOf(unit); + if (duration >= unitDuration) { + return Period.at((float)((double)duration/unitDuration), unit) + .inPast(inPast); + } + } + } + return null; + } +} + +class OneOrTwoUnitBuilder extends PeriodBuilderImpl { + OneOrTwoUnitBuilder(BasicPeriodBuilderFactory.Settings settings) { + super(settings); + } + + public static OneOrTwoUnitBuilder get(BasicPeriodBuilderFactory.Settings settings) { + if (settings == null) { + return null; + } + return new OneOrTwoUnitBuilder(settings); + } + + protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { + return OneOrTwoUnitBuilder.get(settingsToUse); + } + + protected Period handleCreate(long duration, long referenceDate, + boolean inPast) { + Period period = null; + short uset = settings.effectiveSet(); + for (int i = 0; i < TimeUnit.units.length; ++i) { + if (0 != (uset & (1 << i))) { + TimeUnit unit = TimeUnit.units[i]; + long unitDuration = approximateDurationOf(unit); + if (duration >= unitDuration || period != null) { + double count = (double)duration/unitDuration; + if (period == null) { + if (count >= 2) { + period = Period.at((float)count, unit); + break; + } + period = Period.at(1, unit).inPast(inPast); + duration -= unitDuration; + } else { + if (count >= 1) { + period.and((float)count, unit); + } + break; + } + } + } + } + return period; + } +} + +class MultiUnitBuilder extends PeriodBuilderImpl { + private int nPeriods; + + MultiUnitBuilder(int nPeriods, BasicPeriodBuilderFactory.Settings settings) { + super(settings); + this.nPeriods = nPeriods; + } + + public static MultiUnitBuilder get(int nPeriods, BasicPeriodBuilderFactory.Settings settings) { + if (nPeriods > 0 && settings != null) { + return new MultiUnitBuilder(nPeriods, settings); + } + return null; + } + + protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { + return MultiUnitBuilder.get(nPeriods, settingsToUse); + } + + protected Period handleCreate(long duration, long referenceDate, + boolean inPast) { + Period period = null; + int n = 0; + short uset = settings.effectiveSet(); + for (int i = 0; i < TimeUnit.units.length; ++i) { + if (0 != (uset & (1 << i))) { + TimeUnit unit = TimeUnit.units[i]; + if (n == nPeriods) { + break; + } + long unitDuration = approximateDurationOf(unit); + if (duration >= unitDuration || n > 0) { + ++n; + double count = (double)duration / unitDuration; + if (n < nPeriods) { + count = Math.floor(count); + duration -= (long)(count * unitDuration); + } + if (period == null) { + period = Period.at((float)count, unit).inPast(inPast); + } else { + period.and((float)count, unit); + } + } + } + } + return period; + } +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java new file mode 100644 index 00000000000..6ec213d1ae0 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java @@ -0,0 +1,192 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import com.ibm.icu.impl.duration.BasicPeriodFormatterFactory.Customizations; + +import com.ibm.icu.impl.duration.impl.DataRecord.*; +import com.ibm.icu.impl.duration.impl.PeriodFormatterData; + +/** + * Core implementation class for PeriodFormatter. + */ +class BasicPeriodFormatter implements PeriodFormatter { + private BasicPeriodFormatterFactory factory; + private String localeName; + private PeriodFormatterData data; + private Customizations customs; + + BasicPeriodFormatter(BasicPeriodFormatterFactory factory, + String localeName, + PeriodFormatterData data, + Customizations customs) { + this.factory = factory; + this.localeName = localeName; + this.data = data; + this.customs = customs; + } + + public String format(Period period) { + if (!period.isSet()) { + throw new IllegalArgumentException("period is not set"); + } + return format(period.timeLimit, period.inFuture, period.counts); + } + + public PeriodFormatter withLocale(String locName) { + if (!this.localeName.equals(locName)) { + PeriodFormatterData newData = factory.getData(locName); + return new BasicPeriodFormatter(factory, locName, newData, + customs); + } + return this; + } + + private String format(int tl, boolean inFuture, int[] counts) { + int mask = 0; + for (int i = 0; i < counts.length; ++i) { + if (counts[i] > 0) { + mask |= 1 << i; + } + } + + // if the data does not allow formatting of zero periods, + // remove these from consideration. If the result has no + // periods set, return null to indicate we could not format + // the duration. + if (!data.allowZero()) { + for (int i = 0, m = 1; i < counts.length; ++i, m <<= 1) { + if ((mask & m) != 0 && counts[i] == 1) { + mask &= ~m; + } + } + if (mask == 0) { + return null; + } + } + + // if the data does not allow milliseconds but milliseconds are + // set, merge them with seconds and force display of seconds to + // decimal with 3 places. + boolean forceD3Seconds = false; + if (data.useMilliseconds() != EMilliSupport.YES && + (mask & (1 << TimeUnit.MILLISECOND.ordinal)) != 0) { + int sx = TimeUnit.SECOND.ordinal; + int mx = TimeUnit.MILLISECOND.ordinal; + int sf = 1 << sx; + int mf = 1 << mx; + switch (data.useMilliseconds()) { + case EMilliSupport.WITH_SECONDS: { + // if there are seconds, merge with seconds, otherwise leave alone + if ((mask & sf) != 0) { + counts[sx] += (counts[mx]-1)/1000; + mask &= ~mf; + forceD3Seconds = true; + } + } break; + case EMilliSupport.NO: { + // merge with seconds, reset seconds before use just in case + if ((mask & sf) == 0) { + mask |= sf; + counts[sx] = 1; + } + counts[sx] += (counts[mx]-1)/1000; + mask &= ~mf; + forceD3Seconds = true; + } break; + } + } + + // get the first and last units that are set. + int first = 0; + int last = counts.length - 1; + while (first < counts.length && (mask & (1 << first)) == 0) ++first; + while (last > first && (mask & (1 << last)) == 0) --last; + + // determine if there is any non-zero unit + boolean isZero = true; + for (int i = first; i <= last; ++i) { + if (((mask & (1 << i)) != 0) && counts[i] > 1) { + isZero = false; + break; + } + } + + StringBuffer sb = new StringBuffer(); + + // if we've been requested to not display a limit, or there are + // no non-zero units, do not display the limit. + if (!customs.displayLimit || isZero) { + tl = ETimeLimit.NOLIMIT; + } + + // if we've been requested to not display the direction, or there + // are no non-zero units, do not display the direction. + int td; + if (!customs.displayDirection || isZero) { + td = ETimeDirection.NODIRECTION; + } else { + td = inFuture ? ETimeDirection.FUTURE : ETimeDirection.PAST; + } + + // format the initial portion of the string before the units. + // record whether we need to use a digit prefix (because the + // initial portion forces it) + boolean useDigitPrefix = data.appendPrefix(tl, td, sb); + + // determine some formatting params and initial values + boolean multiple = first != last; + boolean wasSkipped = true; // no initial skip marker + boolean skipped = false; + boolean countSep = customs.separatorVariant != ESeparatorVariant.NONE; + + // loop for formatting the units + for (int i = first, j = i; i <= last; i = j) { + if (skipped) { + // we didn't format the previous unit + data.appendSkippedUnit(sb); + skipped = false; + wasSkipped = true; + } + + while (++j < last && (mask & (1 << j)) == 0) { + skipped = true; // skip + } + + TimeUnit unit = TimeUnit.units[i]; + int count = counts[i] - 1; + + int cv = customs.countVariant; + if (i == last) { + if (forceD3Seconds) { + cv = ECountVariant.DECIMAL3; + } + // else leave unchanged + } else { + cv = ECountVariant.INTEGER; + } + boolean isLast = i == last; + boolean mustSkip = data.appendUnit(unit, count, cv, customs.unitVariant, + countSep, useDigitPrefix, multiple, isLast, wasSkipped, sb); + skipped |= mustSkip; + wasSkipped = false; + + if (customs.separatorVariant != ESeparatorVariant.NONE && j <= last) { + boolean afterFirst = i == first; + boolean beforeLast = j == last; + boolean fullSep = customs.separatorVariant == ESeparatorVariant.FULL; + useDigitPrefix = data.appendUnitSeparator(unit, fullSep, afterFirst, beforeLast, sb); + } else { + useDigitPrefix = false; + } + } + data.appendSuffix(tl, td, sb); + + return sb.toString(); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java new file mode 100644 index 00000000000..91a9c3e0b9d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java @@ -0,0 +1,234 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import com.ibm.icu.impl.duration.impl.PeriodFormatterData; +import com.ibm.icu.impl.duration.impl.PeriodFormatterDataService; + +import com.ibm.icu.impl.duration.impl.DataRecord.EUnitVariant; +import com.ibm.icu.impl.duration.impl.DataRecord.ECountVariant; +import com.ibm.icu.impl.duration.impl.DataRecord.ESeparatorVariant; + +import java.util.Locale; + +/** + * An implementation of PeriodFormatterFactory that provides customization of + * formatting behavior. Instances of this factory are created by + * BasicPeriodFormatterService. + * + * The settings on BasicPeriodFormatterFactory are: + *

      + * + *
    • setDisplayLimit controls whether phrases like 'more than' + * or 'less than' will be displayed when the Period has a defined + * limit. Default is to display them.
    • + * + *
    • setDisplayPastFuture controls whether phrases like 'ago' + * or 'from now' will be displayed to indicate past or future + * time. Default is to display them.
    • + * + *
    • setSeparatorVariant controls how separators (between + * count and period, and multiple periods) will be displayed, when + * appropriate for the language. Default is to use full + * separators.
    • + * + *
    • setUnitVariant controls which of various types of + * unit names to use. PLURALIZED indicates that full names will be + * used. MEDIUM indicates that medium-length (usually 2-3 character) + * names will be used. SHORT indicates that short (usually single + * character) names will be used. If there is no localization data + * available for either the SHORT or MEDIUM names, the other will be + * used, if neither is available, the PLURALIZED names will be used. + * Default is PLURALIZED.
    • + * + *
    • setCountVariant controls how the count for the smallest + * unit will be formatted: either as an integer, a fraction to the + * smallest half, or as a decimal with 1, 2, or 3 decimal points.
    • + * Counts for higher units will be formatted as integers. + * + *
    + */ +public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { + private final PeriodFormatterDataService ds; + private PeriodFormatterData data; + private Customizations customizations; + private boolean customizationsInUse; + private String localeName; + + // package-only constructor + BasicPeriodFormatterFactory(PeriodFormatterDataService ds) { + this.ds = ds; + this.customizations = new Customizations(); + this.localeName = Locale.getDefault().toString(); + } + + /** + * Return the default rdf factory as a BasicPeriodFormatterFactory. + * + * @return a default BasicPeriodFormatterFactory + */ + public static BasicPeriodFormatterFactory getDefault() { + return (BasicPeriodFormatterFactory) + BasicPeriodFormatterService.getInstance().newPeriodFormatterFactory(); + } + + /** + * Set the locale for this factory. + */ + public PeriodFormatterFactory setLocale(String localeName) { + data = null; + this.localeName = localeName; + return this; + } + + /** + * Set whether limits will be displayed. + * + * @param display true if limits will be displayed + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setDisplayLimit(boolean display) { + updateCustomizations().displayLimit = display; + return this; + } + + /** + * Return true if limits will be displayed. + * + * @return true if limits will be displayed + */ + public boolean getDisplayLimit() { + return customizations.displayLimit; + } + + /** + * Set whether past and future will be displayed. + * + * @param display true if past and future will be displayed + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setDisplayPastFuture(boolean display) { + updateCustomizations().displayDirection = display; + return this; + } + + /** + * Return true if past and future will be displayed. + * + * @return true if past and future will be displayed + */ + public boolean getDisplayPastFuture() { + return customizations.displayDirection; + } + + /** + * Set how separators will be displayed. + * + * @param variant the variant indicating separators will be displayed + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setSeparatorVariant(int variant) { + updateCustomizations().separatorVariant = (byte) variant; + return this; + } + + /** + * Return the variant indicating how separators will be displayed. + * + * @return the variant + */ + public int getSeparatorVariant() { + return customizations.separatorVariant; + } + + /** + * Set the variant of the time unit names to use. + * + * @param variant the variant to use + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setUnitVariant(int variant) { + updateCustomizations().unitVariant = (byte) variant; + return this; + } + + /** + * Return the unit variant. + * + * @return the unit variant + */ + public int getUnitVariant() { + return customizations.unitVariant; + } + + /** + * Set the variant of the count to use. + * + * @param variant the variant to use + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setCountVariant(int variant) { + updateCustomizations().countVariant = (byte) variant; + return this; + } + + /** + * Return the count variant. + * + * @return the count variant + */ + public int getCountVariant() { + return customizations.countVariant; + } + + public PeriodFormatter getFormatter() { + customizationsInUse = true; + return new BasicPeriodFormatter(this, localeName, getData(), + customizations); + } + + private Customizations updateCustomizations() { + if (customizationsInUse) { + customizations = customizations.copy(); + customizationsInUse = false; + } + return customizations; + } + + // package access only + PeriodFormatterData getData() { + if (data == null) { + data = ds.get(localeName); + } + return data; + } + + // package access for use by BasicPeriodFormatter + PeriodFormatterData getData(String locName) { + return ds.get(locName); + } + + // package access for use by BasicPeriodFormatter + static class Customizations { + boolean displayLimit = true; + boolean displayDirection = true; + byte separatorVariant = ESeparatorVariant.FULL; + byte unitVariant = EUnitVariant.PLURALIZED; + byte countVariant = ECountVariant.INTEGER; + + public Customizations copy() { + Customizations result = new Customizations(); + result.displayLimit = displayLimit; + result.displayDirection = displayDirection; + result.separatorVariant = separatorVariant; + result.unitVariant = unitVariant; + result.countVariant = countVariant; + return result; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java new file mode 100644 index 00000000000..46c178fde06 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java @@ -0,0 +1,62 @@ +/* + ****************************************************************************** + * Copyright (C) 2007-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +package com.ibm.icu.impl.duration; + +import com.ibm.icu.impl.duration.impl.PeriodFormatterDataService; +import com.ibm.icu.impl.duration.impl.ResourceBasedPeriodFormatterDataService; + +import java.util.Collection; + +/** + * An implementation of PeriodFormatterService that constructs a + * BasicPeriodFormatterFactory. + */ +public class BasicPeriodFormatterService implements PeriodFormatterService { + private static BasicPeriodFormatterService instance; + private PeriodFormatterDataService ds; + + /** + * Return the default service instance. This uses the default data service. + * + * @return an BasicPeriodFormatterService + */ + public static BasicPeriodFormatterService getInstance() { + if (instance == null) { + PeriodFormatterDataService ds = ResourceBasedPeriodFormatterDataService + .getInstance(); + instance = new BasicPeriodFormatterService(ds); + } + return instance; + } + + /** + * Construct a BasicPeriodFormatterService using the given + * PeriodFormatterDataService. + * + * @param ds the data service to use + */ + public BasicPeriodFormatterService(PeriodFormatterDataService ds) { + this.ds = ds; + } + + public DurationFormatterFactory newDurationFormatterFactory() { + return new BasicDurationFormatterFactory(this); + } + + public PeriodFormatterFactory newPeriodFormatterFactory() { + return new BasicPeriodFormatterFactory(ds); + } + + public PeriodBuilderFactory newPeriodBuilderFactory() { + return new BasicPeriodBuilderFactory(ds); + } + + public Collection getAvailableLocaleNames() { + return ds.getAvailableLocales(); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/DateFormatter.java b/main/classes/core/src/com/ibm/icu/impl/duration/DateFormatter.java new file mode 100644 index 00000000000..e0232f7858b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/DateFormatter.java @@ -0,0 +1,50 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import java.util.Date; +import java.util.TimeZone; + +/** + * Abstract formatter for dates. Differs from DateFormat in that it + * provides withLocale and withTimeZone methods. + */ +public interface DateFormatter { + + /** + * Format the date, provided as a java Date object. + * + * @param date the date + * @return the formatted time + */ + String format(Date date); + + /** + * Format the date, provided as milliseconds. + * + * @param date the date in milliseconds + * @return the formatted time + */ + String format(long date); + + /** + * Returns a new DateFormatter that uses data for a new locale. + * + * @param locale the new locale to use + * @return a new formatter for the given locale + */ + DateFormatter withLocale(String localeName); + + /** + * Returns a new DateFormatter that uses the new time zone. + * + * @param tz the new time zone + * @return a new formatter for the given time zone + */ + DateFormatter withTimeZone(TimeZone tz); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatter.java b/main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatter.java new file mode 100644 index 00000000000..d78d80c7976 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatter.java @@ -0,0 +1,76 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import java.util.Date; +import java.util.TimeZone; + +/** + * Formatter for durations in milliseconds. + */ +public interface DurationFormatter { + + /** + * Formats the duration between now and a target date. + *

    + * This is a convenience method that calls + * formatDurationFrom(long, long) using now + * as the reference date, and the difference between now and + * targetDate.getTime() as the duration. + * + * @param targetDate the ending date + * @return the formatted time + */ + String formatDurationFromNowTo(Date targetDate); + + /** + * Formats a duration expressed in milliseconds. + *

    + * This is a convenience method that calls formatDurationFrom + * using the current system time as the reference date. + * + * @param duration the duration in milliseconds + * @param tz the time zone + * @return the formatted time + */ + String formatDurationFromNow(long duration); + + /** + * Formats a duration expressed in milliseconds from a reference date. + *

    + * The reference date allows formatters to use actual durations of + * variable-length periods (like months) if they wish. + *

    + * The duration is expressed as the number of milliseconds in the + * past (negative values) or future (positive values) with respect + * to a reference date (expressed as milliseconds in epoch). + * + * @param duration the duration in milliseconds + * @param referenceDate the date from which to compute the duration + * @return the formatted time + */ + String formatDurationFrom(long duration, long referenceDate); + + /** + * Returns a new DurationFormatter that's the same as this one + * but formats for a new locale. + * + * @param localeName the name of the new locale + * @return a new formatter for the given locale + */ + DurationFormatter withLocale(String localeName); + + /** + * Returns a new DurationFormatter that's the same as this one but + * uses a different time zone. + * + * @param tz the time zone in which to compute durations. + * @return a new formatter for the given locale + */ + DurationFormatter withTimeZone(TimeZone tz); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatterFactory.java b/main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatterFactory.java new file mode 100644 index 00000000000..2b1dda15744 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/DurationFormatterFactory.java @@ -0,0 +1,79 @@ +/* +****************************************************************************** +* Copyright (C) 2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import java.util.TimeZone; + +/** + * Factory used to construct DurationFormatters. + * Formatters are immutable once created. + *

    + * Setters on the factory mutate the factory and return it, + * for chaining. + */ +public interface DurationFormatterFactory { + + /** + * Set the period formatter used by the factory. New formatters created + * with this factory will use the given period formatter. + * + * @param formatter the formatter to use + * @return this DurationFormatterFactory + */ + public DurationFormatterFactory setPeriodFormatter(PeriodFormatter formatter); + + /** + * Set the builder used by the factory. New formatters created + * with this factory will use the given locale. + * + * @param builder the builder to use + * @return this DurationFormatterFactory + */ + public DurationFormatterFactory setPeriodBuilder(PeriodBuilder builder); + + /** + * Set a fallback formatter for durations over a given limit. + * + * @param fallback the fallback formatter to use, or null + * @return this DurationFormatterFactory + */ + public DurationFormatterFactory setFallback(DateFormatter fallback); + + /** + * Set a fallback limit for durations over a given limit. + * + * @param fallbackLimit the fallback limit to use, or 0 if none is desired. + * @return this DurationFormatterFactory + */ + public DurationFormatterFactory setFallbackLimit(long fallbackLimit); + + /** + * Set the name of the locale that will be used when + * creating new formatters. + * + * @param localeName the name of the Locale + * @return this DurationFormatterFactory + */ + public DurationFormatterFactory setLocale(String localeName); + + /** + * Set the name of the locale that will be used when + * creating new formatters. + * + * @param timeZone The time zone to set. + * @return this DurationFormatterFactory + */ + public DurationFormatterFactory setTimeZone(TimeZone timeZone); + + /** + * Return a formatter based on this factory's current settings. + * + * @return a DurationFormatter + */ + public DurationFormatter getFormatter(); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/Period.java b/main/classes/core/src/com/ibm/icu/impl/duration/Period.java new file mode 100644 index 00000000000..427de068644 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/Period.java @@ -0,0 +1,373 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import com.ibm.icu.impl.duration.impl.DataRecord.ETimeLimit; + +/** + * Represents an approximate duration in multiple TimeUnits. Each unit, + * if set, has a count (which can be fractional and must be non-negative). + * In addition Period can either represent the duration as being into the past + * or future, and as being more or less than the defined value. + *

    + * Use a PeriodFormatter to convert a Period to a String. + *

    + * Periods are immutable. Mutating operations return the new + * result leaving the original unchanged. + *

    + * Example:

    + * Period p1 = Period.at(3, WEEK).and(2, DAY).inFuture();
    + * Period p2 = p1.and(12, HOUR);
    + */ +public final class Period { + final byte timeLimit; + final boolean inFuture; + final int[] counts; + + /** + * Constructs a Period representing a duration of + * count units extending into the past. + * @param count the number of units, must be non-negative + * @param unit the unit + * @return the new Period + */ + public static Period at(float count, TimeUnit unit) { + checkCount(count); + return new Period(ETimeLimit.NOLIMIT, false, count, unit); + } + + /** + * Constructs a Period representing a duration more than + * count units extending into the past. + * @param count the number of units. must be non-negative + * @param unit the unit + * @return the new Period + */ + public static Period moreThan(float count, TimeUnit unit) { + checkCount(count); + return new Period(ETimeLimit.MT, false, count, unit); + } + + /** + * Constructs a Period representing a duration + * less than count units extending into the past. + * @param count the number of units. must be non-negative + * @param unit the unit + * @return the new Period + */ + public static Period lessThan(float count, TimeUnit unit) { + checkCount(count); + return new Period(ETimeLimit.LT, false, count, unit); + } + + /** + * Set the given unit to have the given count. Marks the + * unit as having been set. This can be used to set + * multiple units, or to reset a unit to have a new count. + * This does not add the count to an existing count + * for this unit. + * + * @param count the number of units. must be non-negative + * @param unit the unit + * @return the new Period + */ + public Period and(float count, TimeUnit unit) { + checkCount(count); + return setTimeUnitValue(unit, count); + } + + /** + * Mark the given unit as not being set. + * + * @param unit the unit to unset + * @return the new Period + */ + public Period omit(TimeUnit unit) { + return setTimeUnitInternalValue(unit, 0); + } + + /** + * Mark the duration as being at the defined duration. + * + * @return the new Period + */ + public Period at() { + return setTimeLimit(ETimeLimit.NOLIMIT); + } + + /** + * Mark the duration as being more than the defined duration. + * + * @return the new Period + */ + public Period moreThan() { + return setTimeLimit(ETimeLimit.MT); + } + + /** + * Mark the duration as being less than the defined duration. + * + * @return the new Period + */ + public Period lessThan() { + return setTimeLimit(ETimeLimit.LT); + } + + /** + * Mark the time as being in the future. + * + * @return the new Period + */ + public Period inFuture() { + return setFuture(true); + } + + /** + * Mark the duration as extending into the past. + * + * @return the new Period + */ + public Period inPast() { + return setFuture(false); + } + + /** + * Mark the duration as extending into the future if + * future is true, and into the past otherwise. + * + * @param future true if the time is in the future + * @return the new Period + */ + public Period inFuture(boolean future) { + return setFuture(future); + } + + /** + * Mark the duration as extending into the past if + * past is true, and into the future otherwise. + * + * @param past true if the time is in the past + * @return the new Period + */ + public Period inPast(boolean past) { + return setFuture(!past); + } + + /** + * Returns true if any unit is set. + * @return true if any unit is set + */ + public boolean isSet() { + for (int i = 0; i < counts.length; ++i) { + if (counts[i] != 0) { + return true; + } + } + return false; + } + + /** + * Returns true if the given unit is set. + * @param unit the unit to test + * @return true if the given unit is set. + */ + public boolean isSet(TimeUnit unit) { + return counts[unit.ordinal] > 0; + } + + /** + * Returns the count for the specified unit. If the + * unit is not set, returns 0. + * @param unit the unit to test + * @return the count + */ + public float getCount(TimeUnit unit) { + int ord = unit.ordinal; + if (counts[ord] == 0) { + return 0; + } + return (counts[ord] - 1)/1000f; + } + + /** + * Returns true if this represents a + * duration into the future. + * @return true if this represents a + * duration into the future. + */ + public boolean isInFuture() { + return inFuture; + } + + /** + * Returns true if this represents a + * duration into the past + * @return true if this represents a + * duration into the past + */ + public boolean isInPast () { + return !inFuture; + } + + /** + * Returns true if this represents a duration in + * excess of the defined duration. + * @return true if this represents a duration in + * excess of the defined duration. + */ + public boolean isMoreThan() { + return timeLimit == ETimeLimit.MT; + } + + /** + * Returns true if this represents a duration + * less than the defined duration. + * @return true if this represents a duration + * less than the defined duration. + */ + public boolean isLessThan() { + return timeLimit == ETimeLimit.LT; + } + + /** + * Returns true if rhs extends Period and + * the two Periods are equal. + * @param rhs the object to compare to + * @return true if rhs is a Period and is equal to this + */ + public boolean equals(Object rhs) { + try { + return equals((Period)rhs); + } + catch (ClassCastException e) { + return false; + } + } + + /** + * Returns true if the same units are defined with + * the same counts, both extend into the future or both into the + * past, and if the limits (at, more than, less than) are the same. + * Note that this means that a period of 1000ms and a period of 1sec + * will not compare equal. + * + * @param rhs the period to compare to + * @return true if the two periods are equal + */ + public boolean equals(Period rhs) { + if (rhs != null && + this.timeLimit == rhs.timeLimit && + this.inFuture == rhs.inFuture) { + for (int i = 0; i < counts.length; ++i) { + if (counts[i] != rhs.counts[i]) { + return false; + } + } + return true; + } + return false; + } + + /** + * Returns the hashCode. + * @return the hashCode + */ + public int hashCode() { + int hc = (timeLimit << 1) | (inFuture ? 1 : 0); + for (int i = 0; i < counts.length; ++i) { + hc = (hc << 2) ^ counts[i]; + } + return hc; + } + + /** + * Private constructor used by static factory methods. + */ + private Period(int limit, boolean future, float count, TimeUnit unit) { + this.timeLimit = (byte) limit; + this.inFuture = future; + this.counts = new int[TimeUnit.units.length]; + this.counts[unit.ordinal] = (int)(count * 1000) + 1; + } + + /** + * Package private constructor used by setters and factory. + */ + Period(int timeLimit, boolean inFuture, int[] counts) { + this.timeLimit = (byte) timeLimit; + this.inFuture = inFuture; + this.counts = counts; + } + + /** + * Set the unit's internal value, converting from float to int. + */ + private Period setTimeUnitValue(TimeUnit unit, float value) { + if (value < 0) { + throw new IllegalArgumentException("value: " + value); + } + return setTimeUnitInternalValue(unit, (int)(value * 1000) + 1); + } + + /** + * Sets the period to have the provided value, 1/1000 of the + * unit plus 1. Thus unset values are '0', 1' is the set value '0', + * 2 is the set value '1/1000', 3 is the set value '2/1000' etc. + * @param p the period to change + * @param value the int value as described above. + * @eturn the new Period object. + */ + private Period setTimeUnitInternalValue(TimeUnit unit, int value) { + int ord = unit.ordinal; + if (counts[ord] != value) { + int[] newCounts = new int[counts.length]; + for (int i = 0; i < counts.length; ++i) { + newCounts[i] = counts[i]; + } + newCounts[ord] = value; + return new Period(timeLimit, inFuture, newCounts); + } + return this; + } + + /** + * Sets whether this defines a future time. + * @param future true if the time is in the future + * @return the new Period + */ + private Period setFuture(boolean future) { + if (this.inFuture != future) { + return new Period(timeLimit, future, counts); + } + return this; + } + + /** + * Sets whether this is more than, less than, or + * 'about' the specified time. + * @param limit the kind of limit + * @return the new Period + */ + private Period setTimeLimit(byte limit) { + if (this.timeLimit != limit) { + return new Period(limit, inFuture, counts); + + } + return this; + } + + /** + * Validate count. + */ + private static void checkCount(float count) { + if (count < 0) { + throw new IllegalArgumentException("count (" + count + + ") cannot be negative"); + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilder.java b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilder.java new file mode 100644 index 00000000000..188b352e4b7 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilder.java @@ -0,0 +1,53 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import java.util.TimeZone; + +/** + * Constructs a Period given a base time and a duration in milliseconds. + *

    + * PeriodBuilder may be used alone or be set on a DurationFormatter + * to customize how that formatter constructs a Period for formatting. + *

    + * None of the operations on PeriodBuilder change the current builder. + */ +public interface PeriodBuilder { + /** + * Create a period of the given duration using the current system + * time as the reference time. + * + * @param duration the duration in milliseconds from the current time + * to the target time. A negative duration indicates a time in the past + * @return a Period that represents the duration + */ + Period create(long duration); + + /** + * Create a period of the given duration using the provided reference date. + * + * @param duration the duration in milliseconds from the referenct time + * to the target time. A negative duration indicates a time before the + * reference time + * @param referenceDate the reference date from which to compute the period + * @return a Period that represents the duration + */ + Period createWithReferenceDate(long duration, long referenceDate); + + /** + * Returns a new PeriodBuilder that uses the provided locale to + * determine what periods are available for use. + */ + PeriodBuilder withLocale(String localeName); + + /** + * Returns a new PeriodBuilder that computes periods starting at + * dates in the provided time zone. + */ + PeriodBuilder withTimeZone(TimeZone tz); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilderFactory.java b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilderFactory.java new file mode 100644 index 00000000000..5b022ffada8 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodBuilderFactory.java @@ -0,0 +1,131 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +import java.util.TimeZone; + +/** + */ +public interface PeriodBuilderFactory { + + /** + * Sets the time units available for use. Default is all units. + * @param minUnit the smallest time unit available for use + * @param maxUnit the largest time unit available for use + * @return this factory + */ + PeriodBuilderFactory setAvailableUnitRange(TimeUnit minUnit, + TimeUnit maxUnit); + + /** + * Sets whether the time unit is available for use. + * @param unit the time unit + * @param available true if the unit is available for use + * @return this factory + */ + PeriodBuilderFactory setUnitIsAvailable(TimeUnit unit, boolean available); + + /** + * Sets the maximum value for the largest available time unit (as + * set in setUnits). Periods that represent a longer duration than + * this will be pinned to this value of that time unit and return + * true for 'isMoreThan'. Default is no limit. Setting a value of + * zero restores the default. + */ + PeriodBuilderFactory setMaxLimit(float maxLimit); + + /** + * Sets the minimum value for the smallest available time unit (as + * set in setUnits). Periods that represent a shorter duration than + * this will be pinned to this value of that time unit and return + * true for 'isLessThan'. Default is no limit. Setting a value of + * zero restores the default. + */ + PeriodBuilderFactory setMinLimit(float minLimit); + + /** + * Sets whether units with a value of zero are represented in a + * period when 'gaps' appear between time units, e.g. + * '2 hours, 0 minutes, and 33 seconds'. Default is to + * not represent these explicitly ('2 hours and 33 seconds'). + */ + PeriodBuilderFactory setAllowZero(boolean allow); + + /** + * Sets whether weeks are used with other units, or only when + * weeks are the only unit. For example '3 weeks and 2 days' + * versus '23 days'. Default is to use them alone only. + */ + PeriodBuilderFactory setWeeksAloneOnly(boolean aloneOnly); + + /** + * Sets whether milliseconds are allowed. This is only examined + * when milliseconds are an available field. The default is to allow + * milliseconds to display normally. + *

    + * This is intended to be used to set locale-specific behavior. Typically clients will + * not call this API and instead call {@link #setLocale}. + * + * @param allow whether milliseconds should be allowed. + * @return a builder + */ + PeriodBuilderFactory setAllowMilliseconds(boolean allow); + + /** + * Sets the locale for the factory. Setting the locale can adjust + * the values for some or all of the other properties to reflect + * language or cultural conventions. Default is to use + * the default locale. + */ + PeriodBuilderFactory setLocale(String localeName); + + /** + * Sets the time zone for the factory. This can affect the timezone + * used for date computations. + * @param timeZone the timeZone + * @return a builder + */ + PeriodBuilderFactory setTimeZone(TimeZone timeZone); + /** + * Returns a builder that represents durations in terms of the single + * given TimeUnit. If the factory settings don't make the given unit + * available, this will return null. + * + * @param unit the single TimeUnit with which to represent times + * @return a builder + */ + PeriodBuilder getFixedUnitBuilder(TimeUnit unit); + + /** + * Returns a builder that represents durations in terms of the + * single largest period less than or equal to the duration. + * + * @return a builder + */ + PeriodBuilder getSingleUnitBuilder(); + + /** + * Returns a builder that formats the largest one or two time units, + * starting with the largest period less than or equal to the duration. + * It formats two periods if the first period has a count < 2 + * and the next period has a count >= 1. + * + * @return a builder + */ + PeriodBuilder getOneOrTwoUnitBuilder(); + + /** + * Returns a builder that formats up to the given number of time units, + * starting with the largest unit less than or equal to the + * duration. + * + * @return a builder + */ + PeriodBuilder getMultiUnitBuilder(int unitCount); +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatter.java b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatter.java new file mode 100644 index 00000000000..a3363f180a2 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatter.java @@ -0,0 +1,41 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +/** + * Formats a Period, such as '2 hours 23 minutes'. + * The Period defines the fields to format and their + * values, and the formatter defines how to format them. + *

    + * PeriodFormatters are immutable. + *

    + * PeriodFormatter can be instantiated using a PeriodFormatterFactory. + * + * @see Period + * @see PeriodBuilder + * @see PeriodFormatterFactory + */ +public interface PeriodFormatter { + /** + * Format a Period. + * + * @param ts the Period to format + * @return the formatted time + */ + String format(Period period); + + /** + * Return a new PeriodFormatter with the same customizations but + * using data for a new locale. Some locales impose limits on the + * fields that can be directly formatter. + * + * @param localeName the name of the new locale + * @return a new formatter for the given locale + */ + PeriodFormatter withLocale(String localeName); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterFactory.java b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterFactory.java new file mode 100644 index 00000000000..2110d454360 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterFactory.java @@ -0,0 +1,74 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +/** + * Abstract factory interface used to create PeriodFormatters. + * PeriodFormatters are immutable once created. + *

    + * Setters on the factory mutate the factory and return it, + * for chaining. + */ +public interface PeriodFormatterFactory { + + /** + * Set the name of the locale that will be used when + * creating new formatters. + * + * @param localeName the name of the Locale + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setLocale(String localeName); + + /** + * Set whether limits will be displayed. + * + * @param display true if limits will be displayed + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setDisplayLimit(boolean display); + + /** + * Set whether past and future will be displayed. + * + * @param display true if past and future will be displayed + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setDisplayPastFuture(boolean display); + + /** + * Set how separators will be displayed. + * + * @param variant the variant indicating how separators will be displayed + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setSeparatorVariant(int variant); + + /** + * Set the variant of the time unit names to use. + * + * @param variant the variant to use + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setUnitVariant(int variant); + + /** + * Set the variant of the count to use. + * + * @param variant the variant to use + * @return this PeriodFormatterFactory + */ + public PeriodFormatterFactory setCountVariant(int variant); + + /** + * Return a formatter based on this factory's current settings. + * + * @return a PeriodFormatter + */ + public PeriodFormatter getFormatter(); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterService.java b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterService.java new file mode 100644 index 00000000000..81353250a3e --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/PeriodFormatterService.java @@ -0,0 +1,46 @@ +/* + ****************************************************************************** + * Copyright (C) 2007-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +package com.ibm.icu.impl.duration; + +import java.util.Collection; + +/** + * Provider of Factory instances for building PeriodBuilders, PeriodFormatters, + * and DurationFormatters. + */ +public interface PeriodFormatterService { + + /** + * Creates a new factory for creating DurationFormatters. + * + * @return a new DurationFormatterFactory. + */ + DurationFormatterFactory newDurationFormatterFactory(); + + /** + * Creates a new factory for creating PeriodFormatters. + * + * @return a new PeriodFormatterFactory + */ + PeriodFormatterFactory newPeriodFormatterFactory(); + + /** + * Creates a new factory for creating PeriodBuilders. + * + * @return a new PeriodBuilderFactory + */ + PeriodBuilderFactory newPeriodBuilderFactory(); + + /** + * Return the names of locales supported by factories produced by this + * service. + * + * @return a collection of String (locale names) + */ + Collection getAvailableLocaleNames(); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java b/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java new file mode 100644 index 00000000000..a9f1e01d920 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java @@ -0,0 +1,85 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +/** + * 'Enum' for individual time units. Not an actual enum so that it can be + * used by Java 1.4. + */ +public final class TimeUnit { + /** The name of this unit, a key, not for localization. */ + final String name; + + /** The ordinal of the unit, in order from largest to smallest. */ + final byte ordinal; + + /** Private constructor */ + private TimeUnit(String name, int ordinal) { + this.name = name; + this.ordinal = (byte) ordinal; + } + + public String toString() { + return name; + } + + /** Represents a year. */ + public static final TimeUnit YEAR = new TimeUnit("year", 0); + + /** Represents a month. */ + public static final TimeUnit MONTH = new TimeUnit("month", 1); + + /** Represents a week. */ + public static final TimeUnit WEEK = new TimeUnit("week", 2); + + /** Represents a day. */ + public static final TimeUnit DAY = new TimeUnit("day", 3); + + /** Represents an hour. */ + public static final TimeUnit HOUR = new TimeUnit("hour", 4); + + /** Represents a minute. */ + public static final TimeUnit MINUTE = new TimeUnit("minute", 5); + + /** Represents a second. */ + public static final TimeUnit SECOND = new TimeUnit("second", 6); + + /** Represents a millisecond. */ + public static final TimeUnit MILLISECOND = new TimeUnit("millisecond", 7); + + /** Returns the next larger time unit, or null if this is the largest. */ + public TimeUnit larger() { + return ordinal == 0 ? null : units[ordinal - 1]; + } + + /** Returns the next smaller time unit, or null if this is the smallest. */ + public TimeUnit smaller() { + return ordinal == units.length - 1 ? null : units[ordinal + 1]; + } + + /** The list of units, in order from largest to smallest. */ + static final TimeUnit[] units = { + YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND + }; + + /** Returns the ordinal value of this time unit, largest is 0. **/ + public int ordinal() { + return ordinal; + } + + /** Approximate, durations for the units independent of the time at which + they are measured */ + + // hack, initialization long array using expressions with 'L' at end doesn't + // compute entire expression using 'long'. differs from initializtion of + // a single constant + static final long[] approxDurations = { + 36525L*24*60*60*10, 3045*24*60*60*10L, 7*24*60*60*1000L, 24*60*60*1000L, + 60*60*1000L, 60*1000L, 1000L, 1L + }; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnitConstants.java b/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnitConstants.java new file mode 100644 index 00000000000..9de023b1850 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnitConstants.java @@ -0,0 +1,37 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration; + +/** + * Easy access to time units for Java 1.4, which doesn't have static imports. + */ +public interface TimeUnitConstants { + /** Represents a year. */ + public static final TimeUnit YEAR = TimeUnit.YEAR; + + /** Represents a month. */ + public static final TimeUnit MONTH = TimeUnit.MONTH; + + /** Represents a week. */ + public static final TimeUnit WEEK = TimeUnit.WEEK; + + /** Represents a day. */ + public static final TimeUnit DAY = TimeUnit.DAY; + + /** Represents an hour. */ + public static final TimeUnit HOUR = TimeUnit.HOUR; + + /** Represents a minute. */ + public static final TimeUnit MINUTE = TimeUnit.MINUTE; + + /** Represents a second. */ + public static final TimeUnit SECOND = TimeUnit.SECOND; + + /** Represents a millisecond. */ + public static final TimeUnit MILLISECOND = TimeUnit.MILLISECOND; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/DataRecord.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/DataRecord.java new file mode 100644 index 00000000000..d1e55a8d58b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/DataRecord.java @@ -0,0 +1,311 @@ +/* + ****************************************************************************** + * Copyright (C) 2007-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +package com.ibm.icu.impl.duration.impl; + +import java.util.ArrayList; +import java.util.List; + +/** + * DataRecord contains the data used by PeriodFormatterData. Fields are + * package-private for ease of access. This is a struct, it knows how to read + * and write itself to/from simple XML, that's all. + */ +public class DataRecord { + byte pl; + String[][] pluralNames; + byte[] genders; // EGender + String[] singularNames; + String[] halfNames; + String[] numberNames; + String[] mediumNames; + String[] shortNames; + String[] measures; + String[] rqdSuffixes; + String[] optSuffixes; + String[] halves; + byte[] halfPlacements; // EHalfPlacement + byte[] halfSupport; // EHalfSupport + String fifteenMinutes; + String fiveMinutes; + boolean requiresDigitSeparator; + String digitPrefix; + String countSep; + String shortUnitSep; + String[] unitSep; + boolean[] unitSepRequiresDP; + boolean[] requiresSkipMarker; + byte numberSystem; // ENumberSystem + char zero; + char decimalSep; + boolean omitSingularCount; + boolean omitDualCount; + byte zeroHandling; // EZeroHandling + byte decimalHandling; // EDecimalHandling + byte fractionHandling; // EFractionHandling + String skippedUnitMarker; + boolean allowZero; + boolean weeksAloneOnly; + byte useMilliseconds; // EMilliSupport + ScopeData[] scopeData; + + public static DataRecord read(String ln, RecordReader in) { + if (in.open("DataRecord")) { + DataRecord record = new DataRecord(); + record.pl = in.namedIndex("pl", EPluralization.names); + record.pluralNames = in.stringTable("pluralName"); + record.genders = in.namedIndexArray("gender", EGender.names); + record.singularNames = in.stringArray("singularName"); + record.halfNames = in.stringArray("halfName"); + record.numberNames = in.stringArray("numberName"); + record.mediumNames = in.stringArray("mediumName"); + record.shortNames = in.stringArray("shortName"); + record.measures = in.stringArray("measure"); + record.rqdSuffixes = in.stringArray("rqdSuffix"); + record.optSuffixes = in.stringArray("optSuffix"); + record.halves = in.stringArray("halves"); + record.halfPlacements = in.namedIndexArray("halfPlacement", + EHalfPlacement.names); + record.halfSupport = in.namedIndexArray("halfSupport", + EHalfSupport.names); + record.fifteenMinutes = in.string("fifteenMinutes"); + record.fiveMinutes = in.string("fiveMinutes"); + record.requiresDigitSeparator = in.bool("requiresDigitSeparator"); + record.digitPrefix = in.string("digitPrefix"); + record.countSep = in.string("countSep"); + record.shortUnitSep = in.string("shortUnitSep"); + record.unitSep = in.stringArray("unitSep"); + record.unitSepRequiresDP = in.boolArray("unitSepRequiresDP"); + record.requiresSkipMarker = in.boolArray("requiresSkipMarker"); + record.numberSystem = in.namedIndex("numberSystem", + ENumberSystem.names); + record.zero = in.character("zero"); + record.decimalSep = in.character("decimalSep"); + record.omitSingularCount = in.bool("omitSingularCount"); + record.omitDualCount = in.bool("omitDualCount"); + record.zeroHandling = in.namedIndex("zeroHandling", + EZeroHandling.names); + record.decimalHandling = in.namedIndex("decimalHandling", + EDecimalHandling.names); + record.fractionHandling = in.namedIndex("fractionHandling", + EFractionHandling.names); + record.skippedUnitMarker = in.string("skippedUnitMarker"); + record.allowZero = in.bool("allowZero"); + record.weeksAloneOnly = in.bool("weeksAloneOnly"); + record.useMilliseconds = in.namedIndex("useMilliseconds", + EMilliSupport.names); + if (in.open("ScopeDataList")) { + List list = new ArrayList(); // of ScopeData + ScopeData data; + while (null != (data = ScopeData.read(in))) { + list.add(data); + } + if (in.close()) { + record.scopeData = list.toArray(new ScopeData[list.size()]); + } + } + + if (in.close()) { + return record; + } + } else { + throw new InternalError("did not find DataRecord while reading " + + ln); + } + throw new InternalError("null data read while reading " + ln); + // Thread.dumpStack(); + // return null; + } + + public void write(RecordWriter out) { + out.open("DataRecord"); + out.namedIndex("pl", EPluralization.names, pl); + out.stringTable("pluralName", pluralNames); + out.namedIndexArray("gender", EGender.names, genders); + out.stringArray("singularName", singularNames); + out.stringArray("halfName", halfNames); + out.stringArray("numberName", numberNames); + out.stringArray("mediumName", mediumNames); + out.stringArray("shortName", shortNames); + out.stringArray("measure", measures); + out.stringArray("rqdSuffix", rqdSuffixes); + out.stringArray("optSuffix", optSuffixes); + out.stringArray("halves", halves); + out.namedIndexArray("halfPlacement", EHalfPlacement.names, + halfPlacements); + out.namedIndexArray("halfSupport", EHalfSupport.names, halfSupport); + out.string("fifteenMinutes", fifteenMinutes); + out.string("fiveMinutes", fiveMinutes); + out.bool("requiresDigitSeparator", requiresDigitSeparator); + out.string("digitPrefix", digitPrefix); + out.string("countSep", countSep); + out.string("shortUnitSep", shortUnitSep); + out.stringArray("unitSep", unitSep); + out.boolArray("unitSepRequiresDP", unitSepRequiresDP); + out.boolArray("requiresSkipMarker", requiresSkipMarker); + out.namedIndex("numberSystem", ENumberSystem.names, numberSystem); + out.character("zero", zero); + out.character("decimalSep", decimalSep); + out.bool("omitSingularCount", omitSingularCount); + out.bool("omitDualCount", omitDualCount); + out.namedIndex("zeroHandling", EZeroHandling.names, zeroHandling); + out.namedIndex("decimalHandling", EDecimalHandling.names, + decimalHandling); + out.namedIndex("fractionHandling", EFractionHandling.names, + fractionHandling); + out.string("skippedUnitMarker", skippedUnitMarker); + out.bool("allowZero", allowZero); + out.bool("weeksAloneOnly", weeksAloneOnly); + out.namedIndex("useMilliseconds", EMilliSupport.names, useMilliseconds); + if (scopeData != null) { + out.open("ScopeDataList"); + for (int i = 0; i < scopeData.length; ++i) { + scopeData[i].write(out); + } + out.close(); + } + out.close(); + } + + public static class ScopeData { + String prefix; + boolean requiresDigitPrefix; + String suffix; + + public void write(RecordWriter out) { + out.open("ScopeData"); + out.string("prefix", prefix); + out.bool("requiresDigitPrefix", requiresDigitPrefix); + out.string("suffix", suffix); + out.close(); + } + + public static ScopeData read(RecordReader in) { + if (in.open("ScopeData")) { + ScopeData scope = new ScopeData(); + scope.prefix = in.string("prefix"); + scope.requiresDigitPrefix = in.bool("requiresDigitPrefix"); + scope.suffix = in.string("suffix"); + if (in.close()) { + return scope; + } + } + return null; + } + } + + public static interface ETimeLimit { + public static final byte NOLIMIT = 0; + public static final byte LT = 1; + public static final byte MT = 2; + public static final String[] names = { "NOLIMIT", "LT", "MT" }; + } + + public static interface ETimeDirection { + public static final byte NODIRECTION = 0; + public static final byte PAST = 1; + public static final byte FUTURE = 2; + public static final String[] names = { "NODIRECTION", "PAST", "FUTURE" }; + } + + public static interface EUnitVariant { + public static final byte PLURALIZED = 0; + public static final byte MEDIUM = 1; + public static final byte SHORT = 2; + public static final String[] names = { "PLURALIZED", "MEDIUM", "SHORT" }; + } + + public static interface ECountVariant { + public static final byte INTEGER = 0; + public static final byte INTEGER_CUSTOM = 1; + public static final byte HALF_FRACTION = 2; + public static final byte DECIMAL1 = 3; + public static final byte DECIMAL2 = 4; + public static final byte DECIMAL3 = 5; + public static final String[] names = { "INTEGER", "INTEGER_CUSTOM", + "HALF_FRACTION", "DECIMAL1", "DECIMAL2", "DECIMAL3" }; + } + + public static interface EPluralization { + public static final byte NONE = 0; + public static final byte PLURAL = 1; + public static final byte DUAL = 2; + public static final byte PAUCAL = 3; + public static final byte HEBREW = 4; + public static final byte ARABIC = 5; + public static final String[] names = { "NONE", "PLURAL", "DUAL", + "PAUCAL", "HEBREW", "ARABIC" }; + } + + public static interface EHalfPlacement { + public static final byte PREFIX = 0; + public static final byte AFTER_FIRST = 1; + public static final byte LAST = 2; + public static final String[] names = { "PREFIX", "AFTER_FIRST", "LAST" }; + } + + public static interface ENumberSystem { + public static final byte DEFAULT = 0; + public static final byte CHINESE_TRADITIONAL = 1; + public static final byte CHINESE_SIMPLIFIED = 2; + public static final byte KOREAN = 3; + public static final String[] names = { "DEFAULT", + "CHINESE_TRADITIONAL", "CHINESE_SIMPLIFIED", "KOREAN" }; + } + + public static interface EZeroHandling { + public static final byte ZPLURAL = 0; + public static final byte ZSINGULAR = 1; + public static final String[] names = { "ZPLURAL", "ZSINGULAR" }; + } + + public static interface EDecimalHandling { + public static final byte DPLURAL = 0; + public static final byte DSINGULAR = 1; + public static final byte DSINGULAR_SUBONE = 2; + public static final byte DPAUCAL = 3; + public static final String[] names = { "DPLURAL", "DSINGULAR", + "DSINGULAR_SUBONE", "DPAUCAL" }; + } + + public static interface EFractionHandling { + public static final byte FPLURAL = 0; + public static final byte FSINGULAR_PLURAL = 1; + public static final byte FSINGULAR_PLURAL_ANDAHALF = 2; + public static final byte FPAUCAL = 3; + public static final String[] names = { "FPLURAL", "FSINGULAR_PLURAL", + "FSINGULAR_PLURAL_ANDAHALF", "FPAUCAL" }; + } + + public static interface EHalfSupport { + public static final byte YES = 0; + public static final byte NO = 1; + public static final byte ONE_PLUS = 2; + public static final String[] names = { "YES", "NO", "ONE_PLUS" }; + } + + public static interface EMilliSupport { + public static final byte YES = 0; + public static final byte NO = 1; + public static final byte WITH_SECONDS = 2; + public static final String[] names = { "YES", "NO", "WITH_SECONDS" }; + } + + public static interface ESeparatorVariant { + public static final byte NONE = 0; + public static final byte SHORT = 1; + public static final byte FULL = 2; + public static final String[] names = { "NONE", "SHORT", "FULL" }; + } + + public static interface EGender { + public static final byte M = 0; + public static final byte F = 1; + public static final byte N = 2; + public static final String[] names = { "M", "F", "N" }; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterData.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterData.java new file mode 100644 index 00000000000..7183990c53c --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterData.java @@ -0,0 +1,661 @@ +/* +****************************************************************************** +* Copyright (C) 2009, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration.impl; + +import com.ibm.icu.impl.duration.TimeUnit; + +import com.ibm.icu.impl.duration.impl.DataRecord.*; + + +/** + * PeriodFormatterData provides locale-specific data used to format + * relative dates and times, and convenience api to access it. + * + * An instance of PeriodFormatterData is usually created by requesting + * data for a given locale from an PeriodFormatterDataService. + */ +public class PeriodFormatterData { + final DataRecord dr; + String localeName; + + // debug + public static boolean trace = false; + + public PeriodFormatterData(String localeName, DataRecord dr) { + this.dr = dr; + this.localeName = localeName; + if(localeName == null) { + throw new NullPointerException("localename is null"); + } +// System.err.println("** localeName is " + localeName); + if (dr == null) { +// Thread.dumpStack(); + throw new NullPointerException("data record is null"); + } + } + + // none - chinese (all forms the same) + // plural - english, special form for 1 + // dual - special form for 1 and 2 + // paucal - russian, special form for 1, for 2-4 and n > 20 && n % 10 == 2-4 + // rpt_dual_few - slovenian, special form for 1, 2, 3-4 and n as above + // hebrew, dual plus singular form for years > 11 + // arabic, dual, plus singular form for all terms > 10 + + /** + * Return the pluralization format used by this locale. + * @return the pluralization format + */ + public int pluralization() { + return dr.pl; + } + + /** + * Return true if zeros are allowed in the display. + * @return true if zeros should be allowed + */ + public boolean allowZero() { + return dr.allowZero; + } + + public boolean weeksAloneOnly() { + return dr.weeksAloneOnly; + } + + public int useMilliseconds() { + return dr.useMilliseconds; + } + + /** + * Append the appropriate prefix to the string builder, depending on whether and + * how a limit and direction are to be displayed. + * + * @param tl how and whether to display the time limit + * @param td how and whether to display the time direction + * @param sb the string builder to which to append the text + * @return true if a following digit will require a digit prefix + */ + public boolean appendPrefix(int tl, int td, StringBuffer sb) { + if (dr.scopeData != null) { + int ix = tl * 3 + td; + ScopeData sd = dr.scopeData[ix]; + if (sd != null) { + String prefix = sd.prefix; + if (prefix != null) { + sb.append(prefix); + return sd.requiresDigitPrefix; + } + } + } + return false; + } + + /** + * Append the appropriate suffix to the string builder, depending on whether and + * how a limit and direction are to be displayed. + * + * @param tl how and whether to display the time limit + * @param td how and whether to display the time direction + * @param sb the string builder to which to append the text + */ + public void appendSuffix(int tl, int td, StringBuffer sb) { + if (dr.scopeData != null) { + int ix = tl * 3 + td; + ScopeData sd = dr.scopeData[ix]; + if (sd != null) { + String suffix = sd.suffix; + if (suffix != null) { + if (trace) { + System.out.println("appendSuffix '" + suffix + "'"); + } + sb.append(suffix); + } + } + } + } + + /** + * Append the count and unit to the string builder. + * + * @param unit the unit to append + * @param count the count of units, * 1000 + * @param cv the format to use for displaying the count + * @param uv the format to use for displaying the unit + * @param useCountSep if false, force no separator between count and unit + * @param useDigitPrefix if true, use the digit prefix + * @param multiple true if there are multiple units in this string + * @param last true if this is the last unit + * @param wasSkipped true if the unit(s) before this were skipped + * @param sb the string builder to which to append the text + * @return true if will require skip marker + */ + @SuppressWarnings("fallthrough") + public boolean appendUnit(TimeUnit unit, int count, int cv, + int uv, boolean useCountSep, + boolean useDigitPrefix, boolean multiple, + boolean last, boolean wasSkipped, + StringBuffer sb) { + int px = unit.ordinal(); + + boolean willRequireSkipMarker = false; + if (dr.requiresSkipMarker != null && dr.requiresSkipMarker[px] && + dr.skippedUnitMarker != null) { + if (!wasSkipped && last) { + sb.append(dr.skippedUnitMarker); + } + willRequireSkipMarker = true; + } + + if (uv != EUnitVariant.PLURALIZED) { + boolean useMedium = uv == EUnitVariant.MEDIUM; + String[] names = useMedium ? dr.mediumNames : dr.shortNames; + if (names == null || names[px] == null) { + names = useMedium ? dr.shortNames : dr.mediumNames; + } + if (names != null && names[px] != null) { + appendCount(unit, false, false, count, cv, useCountSep, + names[px], last, sb); // omit suffix, ok? + return false; // omit skip marker + } + } + + // check cv + if (cv == ECountVariant.HALF_FRACTION && dr.halfSupport != null) { + switch (dr.halfSupport[px]) { + case EHalfSupport.YES: break; + case EHalfSupport.ONE_PLUS: + if (count > 1000) { + break; + } + // else fall through to decimal + case EHalfSupport.NO: { + count = (count / 500) * 500; // round to 1/2 + cv = ECountVariant.DECIMAL1; + } break; + } + } + + String name = null; + int form = computeForm(unit, count, cv, multiple && last); + if (form == FORM_SINGULAR_SPELLED) { + if (dr.singularNames == null) { + form = FORM_SINGULAR; + name = dr.pluralNames[px][form]; + } else { + name = dr.singularNames[px]; + } + } else if (form == FORM_SINGULAR_NO_OMIT) { + name = dr.pluralNames[px][FORM_SINGULAR]; + } else if (form == FORM_HALF_SPELLED) { + name = dr.halfNames[px]; + } else { + try { + name = dr.pluralNames[px][form]; + } catch (NullPointerException e) { + System.out.println("Null Pointer in PeriodFormatterData["+localeName+"].au px: " + px + " form: " + form + " pn: " + dr.pluralNames); + throw e; + } + } + if (name == null) { + form = FORM_PLURAL; + name = dr.pluralNames[px][form]; + } + + boolean omitCount = + (form == FORM_SINGULAR_SPELLED || form == FORM_HALF_SPELLED) || + (dr.omitSingularCount && form == FORM_SINGULAR) || + (dr.omitDualCount && form == FORM_DUAL); + + int suffixIndex = appendCount(unit, omitCount, useDigitPrefix, count, cv, + useCountSep, name, last, sb); + if (last && suffixIndex >= 0) { + String suffix = null; + if (dr.rqdSuffixes != null && suffixIndex < dr.rqdSuffixes.length) { + suffix = dr.rqdSuffixes[suffixIndex]; + } + if (suffix == null && dr.optSuffixes != null && + suffixIndex < dr.optSuffixes.length) { + suffix = dr.optSuffixes[suffixIndex]; + } + if (suffix != null) { + sb.append(suffix); + } + } + return willRequireSkipMarker; + } + + /** + * Append a count to the string builder. + * + * @param unit the unit + * @param count the count + * @param cv the format to use for displaying the count + * @param useSep whether to use the count separator, if available + * @param name the term name + * @param last true if this is the last unit to be formatted + * @param sb the string builder to which to append the text + * @return index to use if might have required or optional suffix, or -1 if none required + */ + public int appendCount(TimeUnit unit, boolean omitCount, + boolean useDigitPrefix, + int count, int cv, boolean useSep, + String name, boolean last, StringBuffer sb) { + if (cv == ECountVariant.HALF_FRACTION && dr.halves == null) { + cv = ECountVariant.INTEGER; + } + + if (!omitCount && useDigitPrefix && dr.digitPrefix != null) { + sb.append(dr.digitPrefix); + } + + int index = unit.ordinal(); + switch (cv) { + case ECountVariant.INTEGER: { + if (!omitCount) { + appendInteger(count/1000, 1, 10, sb); + } + } break; + + case ECountVariant.INTEGER_CUSTOM: { + int val = count / 1000; + // only custom names we have for now + if (unit == TimeUnit.MINUTE && + (dr.fiveMinutes != null || dr.fifteenMinutes != null)) { + if (val != 0 && val % 5 == 0) { + if (dr.fifteenMinutes != null && (val == 15 || val == 45)) { + val = val == 15 ? 1 : 3; + if (!omitCount) appendInteger(val, 1, 10, sb); + name = dr.fifteenMinutes; + index = 8; // hack + break; + } + if (dr.fiveMinutes != null) { + val = val / 5; + if (!omitCount) appendInteger(val, 1, 10, sb); + name = dr.fiveMinutes; + index = 9; // hack + break; + } + } + } + if (!omitCount) appendInteger(val, 1, 10, sb); + } break; + + case ECountVariant.HALF_FRACTION: { + // 0, 1/2, 1, 1-1/2... + int v = count / 500; + if (v != 1) { + if (!omitCount) appendCountValue(count, 1, 0, sb); + } + if ((v & 0x1) == 1) { + // hack, using half name + if (v == 1 && dr.halfNames != null && dr.halfNames[index] != null) { + sb.append(name); + return last ? index : -1; + } + + int solox = v == 1 ? 0 : 1; + if (dr.genders != null && dr.halves.length > 2) { + if (dr.genders[index] == EGender.F) { + solox += 2; + } + } + int hp = dr.halfPlacements == null + ? EHalfPlacement.PREFIX + : dr.halfPlacements[solox & 0x1]; + String half = dr.halves[solox]; + String measure = dr.measures == null ? null : dr.measures[index]; + switch (hp) { + case EHalfPlacement.PREFIX: + sb.append(half); + break; + case EHalfPlacement.AFTER_FIRST: { + if (measure != null) { + sb.append(measure); + sb.append(half); + if (useSep && !omitCount) { + sb.append(dr.countSep); + } + sb.append(name); + } else { // ignore sep completely + sb.append(name); + sb.append(half); + return last ? index : -1; // might use suffix + } + } return -1; // exit early + case EHalfPlacement.LAST: { + if (measure != null) { + sb.append(measure); + } + if (useSep && !omitCount) { + sb.append(dr.countSep); + } + sb.append(name); + sb.append(half); + } return last ? index : -1; // might use suffix + } + } + } break; + default: { + int decimals = 1; + switch (cv) { + case ECountVariant.DECIMAL2: decimals = 2; break; + case ECountVariant.DECIMAL3: decimals = 3; break; + default: break; + } + if (!omitCount) appendCountValue(count, 1, decimals, sb); + } break; + } + if (!omitCount && useSep) { + sb.append(dr.countSep); + } + if (!omitCount && dr.measures != null && index < dr.measures.length) { + String measure = dr.measures[index]; + if (measure != null) { + sb.append(measure); + } + } + sb.append(name); + return last ? index : -1; + } + + /** + * Append a count value to the builder. + * + * @param count the count + * @param integralDigits the number of integer digits to display + * @param decimalDigits the number of decimal digits to display, <= 3 + * @param sb the string builder to which to append the text + */ + public void appendCountValue(int count, int integralDigits, + int decimalDigits, StringBuffer sb) { + int ival = count / 1000; + if (decimalDigits == 0) { + appendInteger(ival, integralDigits, 10, sb); + return; + } + + if (dr.requiresDigitSeparator && sb.length() > 0) { + sb.append(' '); + } + appendDigits(ival, integralDigits, 10, sb); + int dval = count % 1000; + if (decimalDigits == 1) { + dval /= 100; + } else if (decimalDigits == 2) { + dval /= 10; + } + sb.append(dr.decimalSep); + appendDigits(dval, decimalDigits, decimalDigits, sb); + if (dr.requiresDigitSeparator) { + sb.append(' '); + } + } + + public void appendInteger(int num, int mindigits, int maxdigits, + StringBuffer sb) { + if (dr.numberNames != null && num < dr.numberNames.length) { + String name = dr.numberNames[num]; + if (name != null) { + sb.append(name); + return; + } + } + + if (dr.requiresDigitSeparator && sb.length() > 0) { + sb.append(' '); + } + switch (dr.numberSystem) { + case ENumberSystem.DEFAULT: appendDigits(num, mindigits, maxdigits, sb); break; + case ENumberSystem.CHINESE_TRADITIONAL: sb.append( + Utils.chineseNumber(num, Utils.ChineseDigits.TRADITIONAL)); break; + case ENumberSystem.CHINESE_SIMPLIFIED: sb.append( + Utils.chineseNumber(num, Utils.ChineseDigits.SIMPLIFIED)); break; + case ENumberSystem.KOREAN: sb.append( + Utils.chineseNumber(num, Utils.ChineseDigits.KOREAN)); break; + } + if (dr.requiresDigitSeparator) { + sb.append(' '); + } + } + + /** + * Append digits to the string builder, using this.zero for '0' etc. + * + * @param num the integer to append + * @param mindigits the minimum number of digits to append + * @param maxdigits the maximum number of digits to append + * @param sb the string builder to which to append the text + */ + public void appendDigits(long num, int mindigits, int maxdigits, + StringBuffer sb) { + char[] buf = new char[maxdigits]; + int ix = maxdigits; + while (ix > 0 && num > 0) { + buf[--ix] = (char)(dr.zero + (num % 10)); + num /= 10; + } + for (int e = maxdigits - mindigits; ix > e;) { + buf[--ix] = dr.zero; + } + sb.append(buf, ix, maxdigits - ix); + } + + /** + * Append a marker for skipped units internal to a string. + * @param sb the string builder to which to append the text + */ + public void appendSkippedUnit(StringBuffer sb) { + if (dr.skippedUnitMarker != null) { + sb.append(dr.skippedUnitMarker); + } + } + + /** + * Append the appropriate separator between units + * + * @param unit the unit to which to append the separator + * @param afterFirst true if this is the first unit formatted + * @param beforeLast true if this is the next-to-last unit to be formatted + * @param sb the string builder to which to append the text + * @return true if a prefix will be required before a following unit + */ + public boolean appendUnitSeparator(TimeUnit unit, boolean longSep, + boolean afterFirst, boolean beforeLast, + StringBuffer sb) { + // long seps + // false, false "...b', '...d" + // false, true "...', and 'c" + // true, false - "a', '...c" + // true, true - "a' and 'b" + if ((longSep && dr.unitSep != null) || dr.shortUnitSep != null) { + if (longSep && dr.unitSep != null) { + int ix = (afterFirst ? 2 : 0) + (beforeLast ? 1 : 0); + sb.append(dr.unitSep[ix]); + return dr.unitSepRequiresDP != null && dr.unitSepRequiresDP[ix]; + } + sb.append(dr.shortUnitSep); // todo: investigate whether DP is required + } + return false; + } + + private static final int + FORM_PLURAL = 0, + FORM_SINGULAR = 1, + FORM_DUAL = 2, + FORM_PAUCAL = 3, + FORM_SINGULAR_SPELLED = 4, // following are not in the pluralization list + FORM_SINGULAR_NO_OMIT = 5, // a hack + FORM_HALF_SPELLED = 6; + + private int computeForm(TimeUnit unit, int count, int cv, + boolean lastOfMultiple) { + // first check if a particular form is forced by the countvariant. if + // SO, just return that. otherwise convert the count to an integer + // and use pluralization rules to determine which form to use. + // careful, can't assume any forms but plural exist. + + if (trace) { + System.err.println("pfd.cf unit: " + unit + " count: " + count + " cv: " + cv + " dr.pl: " + dr.pl); + Thread.dumpStack(); + } + if (dr.pl == EPluralization.NONE) { + return FORM_PLURAL; + } + // otherwise, assume we have at least a singular and plural form + + int val = count/1000; + + switch (cv) { + case ECountVariant.INTEGER: + case ECountVariant.INTEGER_CUSTOM: { + // do more analysis based on floor of count + } break; + case ECountVariant.HALF_FRACTION: { + switch (dr.fractionHandling) { + case EFractionHandling.FPLURAL: + return FORM_PLURAL; + + case EFractionHandling.FSINGULAR_PLURAL_ANDAHALF: + case EFractionHandling.FSINGULAR_PLURAL: { + // if half-floor is 1/2, use singular + // else if half-floor is not integral, use plural + // else do more analysis + int v = count / 500; + if (v == 1) { + if (dr.halfNames != null && dr.halfNames[unit.ordinal()] != null) { + return FORM_HALF_SPELLED; + } + return FORM_SINGULAR_NO_OMIT; + } + if ((v & 0x1) == 1) { + if (dr.pl == EPluralization.ARABIC && v > 21) { // hack + return FORM_SINGULAR_NO_OMIT; + } + if (v == 3 && dr.pl == EPluralization.PLURAL && + dr.fractionHandling != EFractionHandling.FSINGULAR_PLURAL_ANDAHALF) { + return FORM_PLURAL; + } + } + + // it will display like an integer, so do more analysis + } break; + + case EFractionHandling.FPAUCAL: { + int v = count / 500; + if (v == 1 || v == 3) { + return FORM_PAUCAL; + } + // else use integral form + } break; + + default: + throw new IllegalStateException(); + } + } break; + default: { // for all decimals + switch (dr.decimalHandling) { + case EDecimalHandling.DPLURAL: break; + case EDecimalHandling.DSINGULAR: return FORM_SINGULAR_NO_OMIT; + case EDecimalHandling.DSINGULAR_SUBONE: + if (count < 1000) { + return FORM_SINGULAR_NO_OMIT; + } + break; + case EDecimalHandling.DPAUCAL: + if (dr.pl == EPluralization.PAUCAL) { + return FORM_PAUCAL; + } + break; + default: + break; + } + return FORM_PLURAL; + } + } + + // select among pluralization forms + if (trace && count == 0) { + System.err.println("EZeroHandling = " + dr.zeroHandling); + } + if (count == 0 && dr.zeroHandling == EZeroHandling.ZSINGULAR) { + return FORM_SINGULAR_SPELLED; + } + + int form = FORM_PLURAL; + switch(dr.pl) { + case EPluralization.NONE: break; // never get here + case EPluralization.PLURAL: { + if (val == 1) { + form = FORM_SINGULAR_SPELLED; // defaults to form_singular if no spelled forms + } + } break; + case EPluralization.DUAL: { + if (val == 2) { + form = FORM_DUAL; + } else if (val == 1) { + form = FORM_SINGULAR; + } + } break; + case EPluralization.PAUCAL: { + int v = val; + v = v % 100; + if (v > 20) { + v = v % 10; + } + if (v == 1) { + form = FORM_SINGULAR; + } else if (v > 1 && v < 5) { + form = FORM_PAUCAL; + } + } break; + /* + case EPluralization.RPT_DUAL_FEW: { + int v = val; + if (v > 20) { + v = v % 10; + } + if (v == 1) { + form = FORM_SINGULAR; + } else if (v == 2) { + form = FORM_DUAL; + } else if (v > 2 && v < 5) { + form = FORM_PAUCAL; + } + } break; + */ + case EPluralization.HEBREW: { + if (val == 2) { + form = FORM_DUAL; + } else if (val == 1) { + if (lastOfMultiple) { + form = FORM_SINGULAR_SPELLED; + } else { + form = FORM_SINGULAR; + } + } else if (unit == TimeUnit.YEAR && val > 11) { + form = FORM_SINGULAR_NO_OMIT; + } + } break; + case EPluralization.ARABIC: { + if (val == 2) { + form = FORM_DUAL; + } else if (val == 1) { + form = FORM_SINGULAR; + } else if (val > 10) { + form = FORM_SINGULAR_NO_OMIT; + } + } break; + default: + System.err.println("dr.pl is " + dr.pl); + throw new IllegalStateException(); + } + + return form; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterDataService.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterDataService.java new file mode 100644 index 00000000000..4bdcd99e567 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/PeriodFormatterDataService.java @@ -0,0 +1,31 @@ +/* + ****************************************************************************** + * Copyright (C) 2007-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +package com.ibm.icu.impl.duration.impl; + +import java.util.Collection; + +/** + * Abstract service for PeriodFormatterData, which defines the localization data + * used by period formatters. + */ +public abstract class PeriodFormatterDataService { + /** + * Returns a PeriodFormatterData for the given locale name. + * + * @param localeName the name of the locale + * @return a PeriodFormatterData object + */ + public abstract PeriodFormatterData get(String localeName); + + /** + * Returns a collection of all the locale names supported by this service. + * + * @return a collection of locale names, as String + */ + public abstract Collection getAvailableLocales(); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordReader.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordReader.java new file mode 100644 index 00000000000..64e3fee550e --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordReader.java @@ -0,0 +1,23 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration.impl; + +interface RecordReader { + boolean open(String title); + boolean close(); + + boolean bool(String name); + boolean[] boolArray(String name); + char character(String name); + char[] characterArray(String name); + byte namedIndex(String name, String[] names); + byte[] namedIndexArray(String name, String[] names); + String string(String name); + String[] stringArray(String name); + String[][] stringTable(String name); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordWriter.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordWriter.java new file mode 100644 index 00000000000..bb94d761a54 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/RecordWriter.java @@ -0,0 +1,23 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration.impl; + +interface RecordWriter { + boolean open(String title); + boolean close(); + + void bool(String name, boolean value); + void boolArray(String name, boolean[] values); + void character(String name, char value); + void characterArray(String name, char[] values); + void namedIndex(String name, String[] names, int value); + void namedIndexArray(String name, String[] names, byte[] values); + void string(String name, String value); + void stringArray(String name, String[] values); + void stringTable(String name, String[][] values); +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java new file mode 100644 index 00000000000..71326836dee --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java @@ -0,0 +1,162 @@ +/* + ****************************************************************************** + * Copyright (C) 2007-2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +package com.ibm.icu.impl.duration.impl; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.MissingResourceException; + +import com.ibm.icu.impl.ICUData; + +/** + * A PeriodFormatterDataService that serves PeriodFormatterData objects based on + * data files stored as resources in this directory. These are text files named + * after the locale, for example, 'pfd_he_IL.txt' specifies an period formatter + * data file for Hebrew as spoken in Israel. Data is in a JSON-like format. + */ +public class ResourceBasedPeriodFormatterDataService extends + PeriodFormatterDataService { + private Collection availableLocales; // of String + + private PeriodFormatterData lastData = null; + private String lastLocale = null; + private Map cache = new HashMap(); // String -> PeriodFormatterData + // private PeriodFormatterData fallbackFormatterData; + + private static final String PATH = "data/"; + + private static final ResourceBasedPeriodFormatterDataService singleton = new ResourceBasedPeriodFormatterDataService(); + + /** + * Returns the singleton instance of this class. + */ + public static ResourceBasedPeriodFormatterDataService getInstance() { + return singleton; + } + + /** + * Constructs the service. + */ + private ResourceBasedPeriodFormatterDataService() { + List localeNames = new ArrayList(); // of String + InputStream is = ICUData.getRequiredStream(getClass(), PATH + + "index.txt"); + try { + BufferedReader br = new BufferedReader(new InputStreamReader(is, + "UTF-8")); + String string = null; + while (null != (string = br.readLine())) { + string = string.trim(); + if (string.startsWith("#") || string.length() == 0) { + continue; + } + localeNames.add(string); + } + } catch (IOException e) { + throw new IllegalStateException("IO Error reading " + PATH + + "index.txt: " + e.toString()); + } + availableLocales = Collections.unmodifiableList(localeNames); + } + + public PeriodFormatterData get(String localeName) { + // remove tag info including calendar, we don't use the calendar + int x = localeName.indexOf('@'); + if (x != -1) { + localeName = localeName.substring(0, x); + } + + synchronized (this) { + if (lastLocale != null && lastLocale.equals(localeName)) { + return lastData; + } + + PeriodFormatterData ld = cache.get(localeName); + if (ld == null) { + String ln = localeName; + while (!availableLocales.contains(ln)) { + int ix = ln.lastIndexOf("_"); + if (ix > -1) { + ln = ln.substring(0, ix); + } else if (!"test".equals(ln)) { + ln = "test"; + } else { + ln = null; + break; + } + } + if (ln != null) { + String name = PATH + "pfd_" + ln + ".xml"; + try { + InputStream is = ICUData.getStream(getClass(), name); + if (is == null) { + throw new MissingResourceException( + "no resource named " + name, name, ""); + } else { + DataRecord dr = DataRecord.read(ln, + new XMLRecordReader(new InputStreamReader( + is, "UTF-8"))); + if (dr != null) { + // debug + // if (false && ln.equals("ar_EG")) { + // OutputStreamWriter osw = new + // OutputStreamWriter(System.out, "UTF-8"); + // XMLRecordWriter xrw = new + // XMLRecordWriter(osw); + // dr.write(xrw); + // osw.flush(); + // } + ld = new PeriodFormatterData(localeName, dr); + } + } + } catch (UnsupportedEncodingException e) { + throw new MissingResourceException( + "Unhandled Encoding for resource " + name, + name, ""); + } + } else { + throw new MissingResourceException( + "Duration data not found for " + localeName, PATH, + localeName); + } + + // if (ld == null) { + // ld = getFallbackFormatterData(); + // } + cache.put(localeName, ld); + } + lastData = ld; + lastLocale = localeName; + + return ld; + } + } + + public Collection getAvailableLocales() { + return availableLocales; + } + + // PeriodFormatterData getFallbackFormatterData() { + // synchronized (this) { + // if (fallbackFormatterData == null) { + // DataRecord dr = new DataRecord(); // hack, no default, will die if used + // fallbackFormatterData = new PeriodFormatterData(null, dr); + // } + // return fallbackFormatterData; + // } + // } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/Utils.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/Utils.java new file mode 100644 index 00000000000..d2ac08a36e6 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/Utils.java @@ -0,0 +1,224 @@ +/* +****************************************************************************** +* Copyright (C) 2007, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration.impl; + +import java.util.Locale; + +public class Utils { + public static final Locale localeFromString(String s) { + String language = s; + String region = ""; + String variant = ""; + + int x = language.indexOf("_"); + if (x != -1) { + region = language.substring(x+1); + language = language.substring(0, x); + } + x = region.indexOf("_"); + if (x != -1) { + variant = region.substring(x+1); + region = region.substring(0, x); + } + return new Locale(language, region, variant); + } + /* + public static T[] arraycopy(T[] src) { + T[] result = (T[])Array.newInstance(src.getClass().getComponentType(), src.length); // can we do this without casting? + for (int i = 0; i < src.length; ++i) { + result[i] = src[i]; + } + return result; + } + */ + + /** + * Interesting features of chinese numbers: + * - Each digit is followed by a unit symbol (10's, 100's, 1000's). + * - Units repeat in levels of 10,000, there are symbols for each level too (except 1's). + * - The digit 2 has a special form before the 10 symbol and at the end of the number. + * - If the first digit in the number is 1 and its unit is 10, the 1 is omitted. + * - Sequences of 0 digits and their units are replaced by a single 0 and no unit. + * - If there are two such sequences of 0 digits in a level (1000's and 10's), the 1000's 0 is also omitted. + * - The 1000's 0 is also omitted in alternating levels, such that it is omitted in the rightmost + * level with a 10's 0, or if none, in the rightmost level. + * - Level symbols are omitted if all of their units are omitted + */ + public static String chineseNumber(long n, ChineseDigits zh) { + if (n < 0) { + n = -n; + } + if (n <= 10) { + if (n == 2) { + return String.valueOf(zh.liang); + } + return String.valueOf(zh.digits[(int)n]); + } + + // 9223372036854775807 + char[] buf = new char[40]; // as long as we get, and actually we can't get this high, no units past zhao + char[] digits = String.valueOf(n).toCharArray(); + + // first, generate all the digits in place + // convert runs of zeros into a single zero, but keep places + // + boolean inZero = true; // true if we should zap zeros in this block, resets at start of block + boolean forcedZero = false; // true if we have a 0 in tens's place + int x = buf.length; + for (int i = digits.length, u = -1, l = -1; --i >= 0;) { + if (u == -1) { + if (l != -1) { + buf[--x] = zh.levels[l]; + inZero = true; + forcedZero = false; + } + ++u; + } else { + buf[--x] = zh.units[u++]; + if (u == 3) { + u = -1; + ++l; + } + } + int d = digits[i] - '0'; + if (d == 0) { + if (x < buf.length-1 && u != 0) { + buf[x] = '*'; + } + if (inZero || forcedZero) { + buf[--x] = '*'; + } else { + buf[--x] = zh.digits[0]; + inZero = true; + forcedZero = u == 1; + } + } else { + inZero = false; + buf[--x] = zh.digits[d]; + } + } + + // scanning from right, find first required 'ling' + // we only care if n > 101,0000 as this is the first case where + // it might shift. remove optional lings in alternating blocks. + if (n > 1000000) { + boolean last = true; + int i = buf.length - 3; + do { + if (buf[i] == '0') { + break; + } + i -= 8; + last = !last; + } while (i > x); + + i = buf.length - 7; + do { + if (buf[i] == zh.digits[0] && !last) { + buf[i] = '*'; + } + i -= 8; + last = !last; + } while (i > x); + + // remove levels for empty blocks + if (n >= 100000000) { + i = buf.length - 8; + do { + boolean empty = true; + for (int j = i-1, e = Math.max(x-1, i-8); j > e; --j) { + if (buf[j] != '*') { + empty = false; + break; + } + } + if (empty) { + if (buf[i+1] != '*' && buf[i+1] != zh.digits[0]) { + buf[i] = zh.digits[0]; + } else { + buf[i] = '*'; + } + } + i -= 8; + } while (i > x); + } + } + + // replace er by liang except before or after shi or after ling + for (int i = x; i < buf.length; ++i) { + if (buf[i] != zh.digits[2]) continue; + if (i < buf.length - 1 && buf[i+1] == zh.units[0]) continue; + if (i > x && (buf[i-1] == zh.units[0] || buf[i-1] == zh.digits[0] || buf[i-1] == '*')) continue; + + buf[i] = zh.liang; + } + + // eliminate leading 1 if following unit is shi + if (buf[x] == zh.digits[1] && (zh.ko || buf[x+1] == zh.units[0])) { + ++x; + } + + // now, compress out the '*' + int w = x; + for (int r = x; r < buf.length; ++r) { + if (buf[r] != '*') { + buf[w++] = buf[r]; + } + } + return new String(buf, x, w-x); + } + + public static void main(String[] args) { + for (int i = 0; i < args.length; ++i) { + String arg = args[i]; + System.out.print(arg); + System.out.print(" > "); + long n = Long.parseLong(arg); + System.out.println(chineseNumber(n, ChineseDigits.DEBUG)); + } + } + + public static class ChineseDigits { + final char[] digits; + final char[] units; + final char[] levels; + final char liang; + final boolean ko; + + ChineseDigits(String digits, String units, String levels, char liang, boolean ko) { + this.digits = digits.toCharArray(); + this.units = units.toCharArray(); + this.levels = levels.toCharArray(); + this.liang = liang; + this.ko = ko; + } + + public static final ChineseDigits DEBUG = + new ChineseDigits("0123456789s", "sbq", "WYZ", 'L', false); + + public static final ChineseDigits TRADITIONAL = + new ChineseDigits("\u96f6\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341", // to shi + "\u5341\u767e\u5343", // shi, bai, qian + "\u842c\u5104\u5146", // wan, yi, zhao + '\u5169', false); // liang + + public static final ChineseDigits SIMPLIFIED = + new ChineseDigits("\u96f6\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341", // to shi + "\u5341\u767e\u5343", // shi, bai, qian + "\u4e07\u4ebf\u5146", // wan, yi, zhao + '\u4e24', false); // liang + + // no 1 before first unit no matter what it is + // not sure if there are 'ling' units + public static final ChineseDigits KOREAN = + new ChineseDigits("\uc601\uc77c\uc774\uc0bc\uc0ac\uc624\uc721\uce60\ud314\uad6c\uc2ed", // to ten + "\uc2ed\ubc31\ucc9c", // 10, 100, 1000 + "\ub9cc\uc5b5?", // 10^4, 10^8, 10^12 + '\uc774', true); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java new file mode 100644 index 00000000000..d6274ba2c19 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java @@ -0,0 +1,306 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration.impl; + +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.List; + +import com.ibm.icu.lang.UCharacter; + +public class XMLRecordReader implements RecordReader { + private Reader r; + + private List nameStack; + + private boolean atTag; + + private String tag; // cache + + public XMLRecordReader(Reader r) { + this.r = r; + this.nameStack = new ArrayList(); + + // skip XML prologue + if (getTag().startsWith("?xml")) { + advance(); + } + + // skip FIRST comment + if (getTag().startsWith("!--")) { + advance(); + } + } + + public boolean open(String title) { + if (getTag().equals(title)) { + nameStack.add(title); + advance(); + return true; + } + return false; + } + + public boolean close() { + int ix = nameStack.size() - 1; + String name = nameStack.get(ix); + if (getTag().equals("/" + name)) { + nameStack.remove(ix); + advance(); + return true; + } + return false; + } + + public boolean bool(String name) { + String s = string(name); + if (s != null) { + return "true".equals(s); + } + return false; + } + + public boolean[] boolArray(String name) { + String[] sa = stringArray(name); + if (sa != null) { + boolean[] result = new boolean[sa.length]; + for (int i = 0; i < sa.length; ++i) { + result[i] = "true".equals(sa[i]); + } + return result; + } + return null; + } + + public char character(String name) { + String s = string(name); + if (s != null) { + return s.charAt(0); + } + return '\uffff'; + } + + public char[] characterArray(String name) { + String[] sa = stringArray(name); + if (sa != null) { + char[] result = new char[sa.length]; + for (int i = 0; i < sa.length; ++i) { + result[i] = sa[i].charAt(0); + } + return result; + } + return null; + } + + public byte namedIndex(String name, String[] names) { + String sa = string(name); + if (sa != null) { + for (int i = 0; i < names.length; ++i) { + if (sa.equals(names[i])) { + return (byte) i; + } + } + } + return (byte) -1; + } + + public byte[] namedIndexArray(String name, String[] names) { + String[] sa = stringArray(name); + if (sa != null) { + byte[] result = new byte[sa.length]; + loop: for (int i = 0; i < sa.length; ++i) { + String s = sa[i]; + for (int j = 0; j < names.length; ++j) { + if (names[j].equals(s)) { + result[i] = (byte) j; + continue loop; + } + } + result[i] = (byte) -1; + } + return result; + } + return null; + } + + public String string(String name) { + if (match(name)) { + String result = readData(); + if (match("/" + name)) { + return result; + } + } + return null; + } + + public String[] stringArray(String name) { + if (match(name + "List")) { + List list = new ArrayList(); + String s; + while (null != (s = string(name))) { + if ("Null".equals(s)) { + s = null; + } + list.add(s); + } + if (match("/" + name + "List")) { + return list.toArray(new String[list.size()]); + } + } + return null; + } + + public String[][] stringTable(String name) { + if (match(name + "Table")) { + List list = new ArrayList(); + String[] sa; + while (null != (sa = stringArray(name))) { + list.add(sa); + } + if (match("/" + name + "Table")) { + return list.toArray(new String[list.size()][]); + } + } + return null; + } + + private boolean match(String target) { + if (getTag().equals(target)) { + // System.out.println("match '" + target + "'"); + advance(); + return true; + } + return false; + } + + private String getTag() { + if (tag == null) { + tag = readNextTag(); + } + return tag; + } + + private void advance() { + tag = null; + } + + private String readData() { + StringBuilder sb = new StringBuilder(); + boolean inWhitespace = false; + // boolean inAmp = false; + while (true) { + int c = readChar(); + if (c == -1 || c == '<') { + atTag = c == '<'; + break; + } + if (c == '&') { + c = readChar(); + if (c == '#') { + StringBuilder numBuf = new StringBuilder(); + int radix = 10; + c = readChar(); + if (c == 'x') { + radix = 16; + c = readChar(); + } + while (c != ';' && c != -1) { + numBuf.append((char) c); + c = readChar(); + } + try { + int num = Integer.parseInt(numBuf.toString(), radix); + c = (char) num; + } catch (NumberFormatException ex) { + System.err.println("numbuf: " + numBuf.toString() + + " radix: " + radix); + throw ex; + } + } else { + StringBuilder charBuf = new StringBuilder(); + while (c != ';' && c != -1) { + charBuf.append((char) c); + c = readChar(); + } + String charName = charBuf.toString(); + if (charName.equals("lt")) { + c = '<'; + } else if (charName.equals("gt")) { + c = '>'; + } else if (charName.equals("quot")) { + c = '"'; + } else if (charName.equals("apos")) { + c = '\''; + } else if (charName.equals("amp")) { + c = '&'; + } else { + System.err.println("unrecognized character entity: '" + + charName + "'"); + continue; + } + } + } + + if (UCharacter.isWhitespace(c)) { + if (inWhitespace) { + continue; + } + c = ' '; + inWhitespace = true; + } else { + inWhitespace = false; + } + sb.append((char) c); + } + //System.err.println("read data: '" + sb.toString() + "'"); + return sb.toString(); + } + + private String readNextTag() { + int c = '\0'; + while (!atTag) { + c = readChar(); + if (c == '<' || c == -1) { + if (c == '<') { + atTag = true; + } + break; + } + if (!UCharacter.isWhitespace(c)) { + System.err.println("Unexpected non-whitespace character " + + Integer.toHexString(c)); + break; + } + } + + if (atTag) { + atTag = false; + StringBuilder sb = new StringBuilder(); + while (true) { + c = readChar(); + if (c == '>' || c == -1) { + break; + } + sb.append((char) c); + } + // System.err.println("read tag: '" + sb.toString() + "'"); + return sb.toString(); + } + return null; + } + + int readChar() { + try { + return r.read(); + } catch (IOException e) { + // assume end of input + } + return -1; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java new file mode 100644 index 00000000000..7d63ae2ef95 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java @@ -0,0 +1,250 @@ +/* + ****************************************************************************** + * Copyright (C) 2007-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + */ + +package com.ibm.icu.impl.duration.impl; + +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; + +import com.ibm.icu.lang.UCharacter; + +public class XMLRecordWriter implements RecordWriter { + private Writer w; + private List nameStack; + + public XMLRecordWriter(Writer w) { + this.w = w; + this.nameStack = new ArrayList(); + } + + public boolean open(String title) { + newline(); + writeString("<" + title + ">"); + nameStack.add(title); + return true; + } + + public boolean close() { + int ix = nameStack.size() - 1; + if (ix >= 0) { + String name = nameStack.remove(ix); + newline(); + writeString(""); + return true; + } + return false; + } + + public void flush() { + try { + w.flush(); + } catch (IOException e) { + } + } + + public void bool(String name, boolean value) { + internalString(name, String.valueOf(value)); + } + + public void boolArray(String name, boolean[] values) { + if (values != null) { + String[] stringValues = new String[values.length]; + for (int i = 0; i < values.length; ++i) { + stringValues[i] = String.valueOf(values[i]); + } + stringArray(name, stringValues); + } + } + + private static String ctos(char value) { + if (value == '<') { + return "<"; + } + if (value == '&') { + return "&"; + } + return String.valueOf(value); + } + + public void character(String name, char value) { + if (value != '\uffff') { + internalString(name, ctos(value)); + } + } + + public void characterArray(String name, char[] values) { + if (values != null) { + String[] stringValues = new String[values.length]; + for (int i = 0; i < values.length; ++i) { + char value = values[i]; + if (value == '\uffff') { + stringValues[i] = NULL_NAME; + } else { + stringValues[i] = ctos(value); + } + } + internalStringArray(name, stringValues); + } + } + + public void namedIndex(String name, String[] names, int value) { + if (value >= 0) { + internalString(name, names[value]); + } + } + + public void namedIndexArray(String name, String[] names, byte[] values) { + if (values != null) { + String[] stringValues = new String[values.length]; + for (int i = 0; i < values.length; ++i) { + int value = values[i]; + if (value < 0) { + stringValues[i] = NULL_NAME; + } else { + stringValues[i] = names[value]; + } + } + internalStringArray(name, stringValues); + } + } + + public static String normalize(String str) { + if (str == null) { + return null; + } + StringBuilder sb = null; + boolean inWhitespace = false; + char c = '\0'; + boolean special = false; + for (int i = 0; i < str.length(); ++i) { + c = str.charAt(i); + if (UCharacter.isWhitespace(c)) { + if (sb == null && (inWhitespace || c != ' ')) { + sb = new StringBuilder(str.substring(0, i)); + } + if (inWhitespace) { + continue; + } + inWhitespace = true; + special = false; + c = ' '; + } else { + inWhitespace = false; + special = c == '<' || c == '&'; + if (special && sb == null) { + sb = new StringBuilder(str.substring(0, i)); + } + } + if (sb != null) { + if (special) { + sb.append(c == '<' ? "<" : "&"); + } else { + sb.append(c); + } + } + } + if (sb != null) { + /* + * if (c == ' ') { int len = sb.length(); if (len == 0) { return + * " "; } if (len > 1 && c == ' ') { sb.deleteCharAt(len - 1); } } + */ + return sb.toString(); + } + return str; + } + + private void internalString(String name, String normalizedValue) { + if (normalizedValue != null) { + newline(); + writeString("<" + name + ">" + normalizedValue + ""); + } + } + + private void internalStringArray(String name, String[] normalizedValues) { + if (normalizedValues != null) { + push(name + "List"); + for (int i = 0; i < normalizedValues.length; ++i) { + String value = normalizedValues[i]; + if (value == null) { + value = NULL_NAME; + } + string(name, value); + } + pop(); + } + } + + public void string(String name, String value) { + internalString(name, normalize(value)); + } + + public void stringArray(String name, String[] values) { + if (values != null) { + push(name + "List"); + for (int i = 0; i < values.length; ++i) { + String value = normalize(values[i]); + if (value == null) { + value = NULL_NAME; + } + internalString(name, value); + } + pop(); + } + } + + public void stringTable(String name, String[][] values) { + if (values != null) { + push(name + "Table"); + for (int i = 0; i < values.length; ++i) { + String[] rowValues = values[i]; + if (rowValues == null) { + internalString(name + "List", NULL_NAME); + } else { + stringArray(name, rowValues); + } + } + pop(); + } + } + + private void push(String name) { + newline(); + writeString("<" + name + ">"); + nameStack.add(name); + } + + private void pop() { + int ix = nameStack.size() - 1; + String name = nameStack.remove(ix); + newline(); + writeString(""); + } + + private void newline() { + writeString("\n"); + for (int i = 0; i < nameStack.size(); ++i) { + writeString(INDENT); + } + } + + private void writeString(String str) { + if (w != null) { + try { + w.write(str); + } catch (IOException e) { + // if there's a problem, record it and stop writing + System.err.println(e.getMessage()); + w = null; + } + } + } + + static final String NULL_NAME = "Null"; + private static final String INDENT = " "; +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/YMDDateFormatter.java b/main/classes/core/src/com/ibm/icu/impl/duration/impl/YMDDateFormatter.java new file mode 100644 index 00000000000..6845e17850b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/YMDDateFormatter.java @@ -0,0 +1,98 @@ +/* +****************************************************************************** +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.impl.duration.impl; + +import com.ibm.icu.impl.duration.DateFormatter; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; + +/** + * A DateFormatter that formats the requested date fields. + */ +public class YMDDateFormatter implements DateFormatter { + private String requestedFields; + private String localeName; + private TimeZone timeZone; + private SimpleDateFormat df; // cache + + /** + * Creates a new formatter that formats the requested + * fields. The formatter defaults to the current locale + * and time zone. + * + * @param requestedFields the requested fields + */ + public YMDDateFormatter(String requestedFields) { + this(requestedFields, Locale.getDefault().toString(), + TimeZone.getDefault()); + } + + /** + * Creates a new formatter that formats the requested + * fields using the provided locale and time zone. + * + * @param requestedFields the requested fields + * @param localeName the locale to use + * @param timeZone the time zone to use + */ + public YMDDateFormatter(String requestedFields, String localeName, + TimeZone timeZone) { + this.requestedFields = requestedFields; + this.localeName = localeName; + this.timeZone = timeZone; + + Locale locale = Utils.localeFromString(localeName); + this.df = new SimpleDateFormat("yyyy/mm/dd", locale); + this.df.setTimeZone(timeZone); + } + + /** + * Returns a string representing the formatted date. + * @param date the date in milliseconds + */ + public String format(long date) { + return format(new Date(date)); + } + + /** + * Returns a string representing the formatted date. + * @param date the date + */ + public String format(Date date) { + synchronized (this) { + if (df == null) { + // ignores requested fields + // todo: make this really work + } + } + return df.format(date); + } + + /** + * Returns a version of this formatter customized to the provided locale. + */ + public DateFormatter withLocale(String locName) { + if (!locName.equals(localeName)) { + return new YMDDateFormatter(requestedFields, locName, timeZone); + } + return this; + } + + /** + * Returns a version of this formatter customized to the provided time zone. + */ + public DateFormatter withTimeZone(TimeZone tz) { + if (!tz.equals(timeZone)) { + return new YMDDateFormatter(requestedFields, localeName, tz); + } + return this; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/index.txt b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/index.txt new file mode 100644 index 00000000000..ead798812f7 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/index.txt @@ -0,0 +1,19 @@ +#****************************************************************************** +#* Copyright (C) 2007-2008 International Business Machines Corporation and * +#* others. All Rights Reserved. * +#****************************************************************************** +ar_EG +en +es +fr +he_IL +hi +it +ja +ko +ru +th +zh_Hans +zh_Hans_SG +zh_Hant +zh_Hant_HK diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml new file mode 100644 index 00000000000..f78c305f223 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml @@ -0,0 +1,118 @@ + + + + ARABIC + + + سنوات + سنة + سنتين + + + شهور + شهر + شهرين + + + أسابيع + أسبوع + أسبوعين + + + أيام + يوم + يومين + + + ساعات + ساعة + ساعتين + + + دقائق + دقيقة + دقيقتين + + + ثواني + ثانية + ثانيتين + + + أجزاء من الثانية + جزء من الثانية + جزئين من الثانية + + + + نصف + ونصف + + + PREFIX + LAST + + false + + + + ، و + ، و + ، و + و + + DEFAULT + ٠ + ٫ + true + true + DSINGULAR + FSINGULAR_PLURAL + true + false + YES + + + false + + + منذ + false + + + false + بعد الآن + + + أقل من + false + + + منذ أقل من + false + + + أقل من + false + بعد الآن + + + أكثر من + false + + + منذ أكثر من + false + + + أكثر من + false + بعد الآن + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml.escaped b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml.escaped new file mode 100644 index 00000000000..63bc4f92a76 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml.escaped @@ -0,0 +1,118 @@ + + + + ARABIC + + + \u0633\u0646\u0648\u0627\u062a + \u0633\u0646\u0629 + \u0633\u0646\u062a\u064a\u0646 + + + \u0634\u0647\u0648\u0631 + \u0634\u0647\u0631 + \u0634\u0647\u0631\u064a\u0646 + + + \u0623\u0633\u0627\u0628\u064a\u0639 + \u0623\u0633\u0628\u0648\u0639 + \u0623\u0633\u0628\u0648\u0639\u064a\u0646 + + + \u0623\u064a\u0627\u0645 + \u064a\u0648\u0645 + \u064a\u0648\u0645\u064a\u0646 + + + \u0633\u0627\u0639\u0627\u062a + \u0633\u0627\u0639\u0629 + \u0633\u0627\u0639\u062a\u064a\u0646 + + + \u062f\u0642\u0627\u0626\u0642 + \u062f\u0642\u064a\u0642\u0629 + \u062f\u0642\u064a\u0642\u062a\u064a\u0646 + + + \u062b\u0648\u0627\u0646\u064a + \u062b\u0627\u0646\u064a\u0629 + \u062b\u0627\u0646\u064a\u062a\u064a\u0646 + + + \u0623\u062c\u0632\u0627\u0621 \u0645\u0646 \u0627\u0644\u062b\u0627\u0646\u064a\u0629 + \u062c\u0632\u0621 \u0645\u0646 \u0627\u0644\u062b\u0627\u0646\u064a\u0629 + \u062c\u0632\u0626\u064a\u0646 \u0645\u0646 \u0627\u0644\u062b\u0627\u0646\u064a\u0629 + + + + \u0646\u0635\u0641 + \u0648\u0646\u0635\u0641 + + + PREFIX + LAST + + false + + + + \u060c \u0648 + \u060c \u0648 + \u060c \u0648 + \u0648 + + DEFAULT + \u0660 + \u066b + true + true + DSINGULAR + FSINGULAR_PLURAL + true + false + YES + + + false + + + \u0645\u0646\u0630 + false + + + false + \u0628\u0639\u062f \u0627\u0644\u0622\u0646 + + + \u0623\u0642\u0644 \u0645\u0646 + false + + + \u0645\u0646\u0630 \u0623\u0642\u0644 \u0645\u0646 + false + + + \u0623\u0642\u0644 \u0645\u0646 + false + \u0628\u0639\u062f \u0627\u0644\u0622\u0646 + + + \u0623\u0643\u062b\u0631 \u0645\u0646 + false + + + \u0645\u0646\u0630 \u0623\u0643\u062b\u0631 \u0645\u0646 + false + + + \u0623\u0643\u062b\u0631 \u0645\u0646 + false + \u0628\u0639\u062f \u0627\u0644\u0622\u0646 + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_en.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_en.xml new file mode 100644 index 00000000000..49b24f70323 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_en.xml @@ -0,0 +1,128 @@ + + + + PLURAL + + + years + year + + + months + month + + + weeks + week + + + days + day + + + hours + hour + + + minutes + minute + + + seconds + second + + + milliseconds + millisecond + + + + yr + mnth + wk + dy + hr + min + sec + ms + + + y + m + w + d + h + m + s + x + + + ½ + ½ + + false + + + + , + , and + , + and + + DEFAULT + 0 + . + false + false + DPLURAL + FSINGULAR_PLURAL + true + false + YES + + + false + + + false + ago + + + false + from now + + + less than + false + + + less than + false + ago + + + less than + false + from now + + + more than + false + + + more than + false + ago + + + more than + false + from now + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_es.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_es.xml new file mode 100644 index 00000000000..b1473e3b6d9 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_es.xml @@ -0,0 +1,150 @@ + + + + PLURAL + + + años + año + + + meses + mes + + + semanas + semana + + + días + día + + + horas + hora + + + minutos + minuto + + + segundos + segundo + + + milisegundos + milisegundo + + + + M + M + F + M + F + M + M + M + + + un año + un mes + una semana + un día + una hora + un minuto + un segundo + un milisegundo + + + Null + Null + Null + d + hr + min + seg + mseg + + + a + m + s + d + h + m + s + ms + + + medio + y medio + media + y media + + + PREFIX + LAST + + false + + + + , + y + , + y + + DEFAULT + 0 + , + false + false + DPLURAL + FSINGULAR_PLURAL_ANDAHALF + true + false + YES + + + false + + + hace + false + + + dentro de + false + + + menos de + false + + + hace menos de + false + + + dentro de menos de + false + + + más de + false + + + hace más de + false + + + dentro más de + false + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_fr.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_fr.xml new file mode 100644 index 00000000000..0809dd2be65 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_fr.xml @@ -0,0 +1,105 @@ + + + + PLURAL + + + ans + an + + + mois + mois + + + semaines + semaine + + + jours + jour + + + heures + heure + + + minutes + minute + + + secondes + seconde + + + millisecondes + milliseconde + + + + ½ + ½ + + false + + + + , + et + , + et + + DEFAULT + 0 + . + false + false + ZSINGULAR + DPLURAL + FSINGULAR_PLURAL + true + false + YES + + + false + + + il y a + false + + + dans + false + + + moins de + false + + + il y a moins de + false + + + dans moins de + false + + + plus de + false + + + il y a plus de + false + + + dans plus de + false + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_he_IL.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_he_IL.xml new file mode 100644 index 00000000000..4018d0d6c1b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_he_IL.xml @@ -0,0 +1,129 @@ + + + + HEBREW + + + שנים + שנה + שנתיים + + + חודשים + חודש + חודשיים + + + שבועות + שבוע + שבועיים + + + ימים + יום + יומיים + + + שעות + שעה + שעתיים + + + דקות + דקה + Null + + + שניות + שנייה + Null + + + מילישניות + מילישניה + Null + + + + שנה אחת + חודש אחד + שבוע אחד + יום אחד + שעה אחת + דקה אחת + שנייה אחת + מילישניה אחת + + + ½ + ½ + + false + ־ + + + + , + ו + , + ו + + + false + true + false + true + + DEFAULT + 0 + . + true + true + DSINGULAR_SUBONE + FSINGULAR_PLURAL + true + false + YES + + + false + + + לפני + false + + + אחרי + false + + + יותר מ + true + + + לפני יותר מ + true + + + אחרי יותר מ + true + + + פחות מ + true + + + לפני פחות מ + true + + + אחרי פחות מ + true + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_hi.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_hi.xml new file mode 100644 index 00000000000..6222ef5259f --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_hi.xml @@ -0,0 +1,105 @@ + + + + PLURAL + + + साल + साल + + + महीने + महीना + + + सप्ताह + सप्ताह + + + दिन + दिन + + + घंटे + घंटा + + + मिनट + मिनट + + + सेकण्ड + सेकण्ड + + + मिली सेकण्ड + मिली सेकण्ड + + + + ½ + ½ + + false + + + + , + और + , + और + + DEFAULT + + . + false + false + DPLURAL + FPLURAL + true + false + YES + + + false + + + false + पहले + + + अभी से + false + बाद + + + false + से कम + + + false + से कम पहले + + + false + से कम from now + + + false + से ज़्यादा + + + false + से ज़्यादा ago + + + false + से ज़्यादा from now + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_it.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_it.xml new file mode 100644 index 00000000000..245a317931b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_it.xml @@ -0,0 +1,175 @@ + + + + PLURAL + + + anni + anno + + + mesi + mese + + + settimane + settimana + + + giorni + giorno + + + ore + ora + + + minuti + minuto + + + secondi + secondo + + + millisecondi + millisecondo + + + + M + M + F + M + F + M + M + M + + + un anno + un mese + una settimana + un giorno + un'ora + un minuto + un secondo + un millisecondo + + + Null + Null + Null + Null + mezz'ora + Null + Null + Null + + + zero + Null + due + tre + quattro + cinque + sei + sette + otto + nove + dieci + + + ann. + mes. + sett. + gg. + or. + min. + sec. + msec. + + + A + M + S + G + H + M + S + Null + + + mezzo + e mezzo + mezza + e mezza + + + PREFIX + LAST + + false + + + + , + e + , + e + + DEFAULT + 0 + . + false + false + DPLURAL + FSINGULAR_PLURAL_ANDAHALF + true + false + YES + + + false + + + false + fa + + + fra + false + + + meno di + false + + + meno di + false + fa + + + fra meno di + false + + + oltre + false + + + oltre + false + fa + + + fra oltre + false + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ja.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ja.xml new file mode 100644 index 00000000000..a309ce8f2a0 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ja.xml @@ -0,0 +1,128 @@ + + + + NONE + + + + + + + + + 週間 + + + + + + 時間 + + + + + + + + + ミリ秒 + + + + Null + + Null + Null + Null + Null + Null + Null + + + + + + + PREFIX + LAST + + + YES + YES + NO + YES + YES + ONE_PLUS + YES + NO + + false + + + false + false + true + false + false + false + false + false + + DEFAULT + 0 + . + false + true + DPLURAL + FSINGULAR_PLURAL + + false + true + WITH_SECONDS + + + false + + + false + + + + 今から + false + + + + false + 以内 + + + 過去 + false + 以内 + + + 今から + false + 以内 + + + false + 以上 + + + false + 以上前 + + + 今から + false + 以上後 + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ko.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ko.xml new file mode 100644 index 00000000000..32de9a78850 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ko.xml @@ -0,0 +1,93 @@ + + + + NONE + + + + + + 개월 + + + + + + + + + 시간 + + + + + + + + + 밀리세컨드 + + + + ½ + ½ + + false + + + + + + + + DEFAULT + 0 + . + false + false + DPLURAL + FSINGULAR_PLURAL + true + false + YES + + + false + + + false + + + + 지금부터 + false + + + false + + + false + + + + 지금부터 + false + + + false + + + false + + + + 지금부터 + false + + + \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ru.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ru.xml new file mode 100644 index 00000000000..55dc35202d5 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_ru.xml @@ -0,0 +1,143 @@ + + + + PAUCAL + + + лет + год + Null + года + + + месяцев + месяц + Null + месяца + + + недель + неделя + Null + недели + + + дней + день + Null + дня + + + часов + час + Null + часа + + + минут + минута + Null + минуты + + + секунд + секунда + Null + секунды + + + миллисекунд + миллисекунда + Null + миллисекунды + + + + г + мес + нед + дн + ч + мин + с + мс + мкс + + + г + m + н + д + ч + м + с + x + + + ½ + ½ + + false + + + + , + и + , + и + + DEFAULT + 0 + . + false + true + DPAUCAL + FPAUCAL + true + false + YES + + + false + + + false + назад + + + через + false + + + меньше, чем + false + + + меньше, чем + false + назад + + + через + false + + + больше, чем + false + + + больше, чем + false + назад + + + через + false + + + diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_th.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_th.xml new file mode 100644 index 00000000000..f45365178e1 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_th.xml @@ -0,0 +1,118 @@ + + + + NONE + + + ปี + + + เดือน + + + อาทิตย์ + + + วัน + + + ชั่วโมง + + + นาที + + + วินาที + + + มิลลิวินาที + + + + Null + Null + Null + Null + ชม. + Null + Null + Null + + + ครึ่ง + ครึ่ง + + + PREFIX + LAST + + + YES + YES + YES + YES + YES + YES + NO + NO + + true + + + + DEFAULT + 0 + . + false + false + DPLURAL + FPLURAL + true + false + NO + + + false + + + false + ที่แล้ว + + + อีก + false + + + น้อยกว่า + false + + + น้อยกว่า + false + ที่แล้ว + + + ไม่ถึงอีก + false + + + มากกว่า + false + + + มากกว่า + false + ที่แล้ว + + + อีัก + false + กว่าๆ + + + \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans.xml new file mode 100644 index 00000000000..b3f60aefe0d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans.xml @@ -0,0 +1,132 @@ + + + + NONE + + + + + + + + + + + + + + + 小时 + + + + + + + + + 毫秒 + + + + Null + + Null + Null + + Null + Null + Null + Null + + + + Null + Null + Null + Null + Null + + Null + Null + + + Null + Null + Null + Null + Null + Null + + Null + + Null + + + + + + + PREFIX + AFTER_FIRST + + + false + + CHINESE_SIMPLIFIED + 0 + . + false + true + DPLURAL + FSINGULAR_PLURAL + true + false + YES + + + false + + + false + 以前 + + + false + 以后 + + + 不到 + false + + + 不到 + false + 以前 + + + 不到 + false + 以后 + + + 超过 + false + + + 超过 + false + 以前 + + + 超过 + false + 以后 + + + \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans_SG.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans_SG.xml new file mode 100644 index 00000000000..bb8eca8b84b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans_SG.xml @@ -0,0 +1,130 @@ + + + + NONE + + + + + + + + + + + + + + + 小时 + + + + + + + + + 毫秒 + + + + Null + + Null + Null + + Null + Null + Null + Null + + + + Null + Null + Null + Null + Null + + Null + Null + + + Null + Null + Null + Null + Null + Null + + Null + + + + + + + PREFIX + AFTER_FIRST + + + false + + CHINESE_SIMPLIFIED + 0 + . + false + true + DPLURAL + FSINGULAR_PLURAL + true + false + YES + + + false + + + false + 以前 + + + false + 以后 + + + 不到 + false + + + 不到 + false + 以前 + + + 不到 + false + 以后 + + + 超过 + false + + + 超过 + false + 以前 + + + 超过 + false + 以后 + + + \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant.xml new file mode 100644 index 00000000000..8b84f235af1 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant.xml @@ -0,0 +1,130 @@ + + + + NONE + + + + + + + + + + + + + + + 小時 + + + + + + + + + 毫秒 + + + + Null + + Null + Null + + Null + Null + Null + + + Null + Null + Null + Null + Null + + Null + Null + + + Null + Null + Null + Null + Null + Null + + Null + + Null + + + + + + + PREFIX + AFTER_FIRST + + + false + + CHINESE_TRADITIONAL + 0 + . + false + true + DPLURAL + FSINGULAR_PLURAL + true + false + YES + + + false + + + false + 以前 + + + false + 以後 + + + 不到 + false + + + 不到 + false + 以前 + + + 不到 + false + 以後 + + + 超過 + false + + + 超過 + false + 以前 + + + 超過 + false + 以後 + + + \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant_HK.xml b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant_HK.xml new file mode 100644 index 00000000000..e474dee3cee --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant_HK.xml @@ -0,0 +1,116 @@ + + + + NONE + + + + + + + + + + + + + + + 小時 + + + + + + + + + 毫秒 + + + + Null + + Null + Null + Null + Null + Null + Null + + + Null + Null + Null + Null + Null + + Null + Null + + + + + + + PREFIX + AFTER_FIRST + + false + + DEFAULT + 0 + . + false + true + DPLURAL + FSINGULAR_PLURAL + true + false + YES + + + false + + + false + 之前 + + + false + 之後 + + + 少於 + false + + + false + 以內 + + + 即時起 + false + 以內 + + + 超過 + false + + + 超過 + false + 之前 + + + 即時起 + false + 之後 + + + \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java b/main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java new file mode 100644 index 00000000000..78759fe1e3a --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java @@ -0,0 +1,180 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +public final class AsciiUtil { + public static boolean caseIgnoreMatch(String s1, String s2) { + if (s1 == s2) { + return true; + } + int len = s1.length(); + if (len != s2.length()) { + return false; + } + int i = 0; + while (i < len) { + char c1 = s1.charAt(i); + char c2 = s2.charAt(i); + if (c1 != c2 && toLower(c1) != toLower(c2)) { + break; + } + i++; + } + return (i == len); + } + + public static int caseIgnoreCompare(String s1, String s2) { + if (s1 == s2) { + return 0; + } + return AsciiUtil.toLowerString(s1).compareTo(AsciiUtil.toLowerString(s2)); + } + + + public static char toUpper(char c) { + if (c >= 'a' && c <= 'z') { + c -= 0x20; + } + return c; + } + + public static char toLower(char c) { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + return c; + } + + public static String toLowerString(String s) { + int idx = 0; + for (; idx < s.length(); idx++) { + char c = s.charAt(idx); + if (c >= 'A' && c <= 'Z') { + break; + } + } + if (idx == s.length()) { + return s; + } + StringBuilder buf = new StringBuilder(s.substring(0, idx)); + for (; idx < s.length(); idx++) { + buf.append(toLower(s.charAt(idx))); + } + return buf.toString(); + } + + public static String toUpperString(String s) { + int idx = 0; + for (; idx < s.length(); idx++) { + char c = s.charAt(idx); + if (c >= 'a' && c <= 'z') { + break; + } + } + if (idx == s.length()) { + return s; + } + StringBuilder buf = new StringBuilder(s.substring(0, idx)); + for (; idx < s.length(); idx++) { + buf.append(toUpper(s.charAt(idx))); + } + return buf.toString(); + } + + public static String toTitleString(String s) { + if (s.length() == 0) { + return s; + } + int idx = 0; + char c = s.charAt(idx); + if (!(c >= 'a' && c <= 'z')) { + for (idx = 1; idx < s.length(); idx++) { + if (c >= 'A' && c <= 'Z') { + break; + } + } + } + if (idx == s.length()) { + return s; + } + StringBuilder buf = new StringBuilder(s.substring(0, idx)); + if (idx == 0) { + buf.append(toUpper(s.charAt(idx))); + idx++; + } + for (; idx < s.length(); idx++) { + buf.append(toLower(s.charAt(idx))); + } + return buf.toString(); + } + + public static boolean isAlpha(char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + } + + public static boolean isAlphaString(String s) { + boolean b = true; + for (int i = 0; i < s.length(); i++) { + if (!isAlpha(s.charAt(i))) { + b = false; + break; + } + } + return b; + } + + public static boolean isNumeric(char c) { + return (c >= '0' && c <= '9'); + } + + public static boolean isNumericString(String s) { + boolean b = true; + for (int i = 0; i < s.length(); i++) { + if (!isNumeric(s.charAt(i))) { + b = false; + break; + } + } + return b; + } + + public static boolean isAlphaNumeric(char c) { + return isAlpha(c) || isNumeric(c); + } + + public static boolean isAlphaNumericString(String s) { + boolean b = true; + for (int i = 0; i < s.length(); i++) { + if (!isAlphaNumeric(s.charAt(i))) { + b = false; + break; + } + } + return b; + } + + public static class CaseInsensitiveKey { + private String _key; + private int _hash; + + public CaseInsensitiveKey(String key) { + _key = key; + _hash = AsciiUtil.toLowerString(key).hashCode(); + } + + public boolean equals(Object o) { + if (o instanceof CaseInsensitiveKey) { + return AsciiUtil.caseIgnoreMatch(_key, ((CaseInsensitiveKey)o)._key); + } + return false; + } + + public int hashCode() { + return _hash; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java b/main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java new file mode 100644 index 00000000000..f7f0927bb76 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java @@ -0,0 +1,221 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.impl.locale; + + +public final class BaseLocale { + + private static final boolean JDKIMPL = false; + + private String _language = ""; + private String _script = ""; + private String _region = ""; + private String _variant = ""; + + private transient volatile int _hash = 0; + + private static final LocaleObjectCache BASELOCALE_CACHE + = new LocaleObjectCache(); + + public static final BaseLocale ROOT = BaseLocale.getInstance("", "", "", ""); + + private BaseLocale(String language, String script, String region, String variant) { + if (language != null) { + _language = AsciiUtil.toLowerString(language).intern(); + } + if (script != null) { + _script = AsciiUtil.toTitleString(script).intern(); + } + if (region != null) { + _region = AsciiUtil.toUpperString(region).intern(); + } + if (variant != null) { + if (JDKIMPL) { + // preserve upper/lower cases + _variant = variant.intern(); + } else { + _variant = AsciiUtil.toUpperString(variant).intern(); + } + } + } + + public static BaseLocale getInstance(String language, String script, String region, String variant) { + if (JDKIMPL) { + // JDK uses deprecated ISO639.1 language codes for he, yi and id + if (AsciiUtil.caseIgnoreMatch(language, "he")) { + language = "iw"; + } else if (AsciiUtil.caseIgnoreMatch(language, "yi")) { + language = "ji"; + } else if (AsciiUtil.caseIgnoreMatch(language, "id")) { + language = "in"; + } + } + Key key = new Key(language, script, region, variant); + BaseLocale baseLocale = BASELOCALE_CACHE.get(key); + if (baseLocale == null) { + baseLocale = new BaseLocale(language, script, region, variant); + baseLocale = BASELOCALE_CACHE.put(baseLocale.createKey(), baseLocale); + } + return baseLocale; + } + + public String getLanguage() { + return _language; + } + + public String getScript() { + return _script; + } + + public String getRegion() { + return _region; + } + + public String getVariant() { + return _variant; + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + if (_language.length() > 0) { + buf.append("language="); + buf.append(_language); + } + if (_script.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("script="); + buf.append(_script); + } + if (_region.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("region="); + buf.append(_region); + } + if (_variant.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("variant="); + buf.append(_variant); + } + return buf.toString(); + } + + public int hashCode() { + int h = _hash; + if (h == 0) { + // Generating a hash value from language, script, region and variant + for (int i = 0; i < _language.length(); i++) { + h = 31*h + _language.charAt(i); + } + for (int i = 0; i < _script.length(); i++) { + h = 31*h + _script.charAt(i); + } + for (int i = 0; i < _region.length(); i++) { + h = 31*h + _region.charAt(i); + } + for (int i = 0; i < _variant.length(); i++) { + h = 31*h + _variant.charAt(i); + } + _hash = h; + } + return h; + } + + private Key createKey() { + return new Key(_language, _script, _region, _variant); + } + + private static class Key implements Comparable { + private String _lang = ""; + private String _scrt = ""; + private String _regn = ""; + private String _vart = ""; + + private volatile int _hash; // Default to 0 + + public Key(String language, String script, String region, String variant) { + if (language != null) { + _lang = language; + } + if (script != null) { + _scrt = script; + } + if (region != null) { + _regn = region; + } + if (variant != null) { + _vart = variant; + } + } + + public boolean equals(Object obj) { + if (JDKIMPL) { + return (this == obj) || + (obj instanceof Key) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._lang, this._lang) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._scrt, this._scrt) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._regn, this._regn) + && ((Key)obj)._vart.equals(_vart); // variant is case sensitive in JDK! + } + return (this == obj) || + (obj instanceof Key) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._lang, this._lang) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._scrt, this._scrt) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._regn, this._regn) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._vart, this._vart); + } + + public int compareTo(Key other) { + int res = AsciiUtil.caseIgnoreCompare(this._lang, other._lang); + if (res == 0) { + res = AsciiUtil.caseIgnoreCompare(this._scrt, other._scrt); + if (res == 0) { + res = AsciiUtil.caseIgnoreCompare(this._regn, other._regn); + if (res == 0) { + if (JDKIMPL) { + res = this._vart.compareTo(other._vart); + } else { + res = AsciiUtil.caseIgnoreCompare(this._vart, other._vart); + } + } + } + } + return res; + } + + public int hashCode() { + int h = _hash; + if (h == 0) { + // Generating a hash value from language, script, region and variant + for (int i = 0; i < _lang.length(); i++) { + h = 31*h + AsciiUtil.toLower(_lang.charAt(i)); + } + for (int i = 0; i < _scrt.length(); i++) { + h = 31*h + AsciiUtil.toLower(_scrt.charAt(i)); + } + for (int i = 0; i < _regn.length(); i++) { + h = 31*h + AsciiUtil.toLower(_regn.charAt(i)); + } + for (int i = 0; i < _vart.length(); i++) { + if (JDKIMPL) { + h = 31*h + _vart.charAt(i); + } else { + h = 31*h + AsciiUtil.toLower(_vart.charAt(i)); + } + } + _hash = h; + } + return h; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/Extension.java b/main/classes/core/src/com/ibm/icu/impl/locale/Extension.java new file mode 100644 index 00000000000..938e1e387db --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/Extension.java @@ -0,0 +1,114 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import com.ibm.icu.impl.locale.LanguageTag.ParseStatus; + +public class Extension { + private char _key; + protected String _value; + + protected Extension(char key) { + _key = key; + } + + public char getKey() { + return _key; + } + + public String getValue() { + return _value; + } + + public String getID() { + return _key + LanguageTag.SEP + _value; + } + + public String toString() { + return getID(); + } + + public static Extension create(StringTokenIterator itr, ParseStatus sts) { + if (sts.isError() || itr.isDone()) { + return null; + } + + Extension ext = null; + String key = itr.current(); + if (LanguageTag.isExtensionSingleton(key) || LanguageTag.isPrivateuseSingleton(key)) { + itr.next(); + ext = create(key.charAt(0), itr, sts); + } + + return ext; + } + + public static Extension create(char key, StringTokenIterator val, ParseStatus sts) { + if (sts.isError()) { + return null; + } + if (val.isDone()) { + sts.errorIndex = val.currentStart(); + sts.errorMsg = "Missing extension subtag for extension :" + key; + return null; + } + + Extension ext = null; + key = AsciiUtil.toLower(key); + + switch (key) { + case UnicodeLocaleExtension.SINGLETON: + ext = new UnicodeLocaleExtension(); + break; + case PrivateuseExtension.SINGLETON: + ext = new PrivateuseExtension(); + break; + default: + ext = new Extension(key); + break; + } + + ext.setExtensionValue(val, sts); + + if (ext.getValue() == null) { + // return null only when nothing parsed. + return null; + } + + return ext; + } + + protected void setExtensionValue(StringTokenIterator itr, ParseStatus sts) { + if (sts.isError() || itr.isDone()) { + _value = null; + return; + } + + StringBuilder buf = new StringBuilder(); + while (!itr.isDone()) { + String s = itr.current(); + if (!LanguageTag.isExtensionSubtag(s)) { + break; + } + s = LanguageTag.canonicalizeExtensionSubtag(s); + if (buf.length() != 0) { + buf.append(LanguageTag.SEP); + } + buf.append(s); + sts.parseLength = itr.currentEnd(); + itr.next(); + } + + if (buf.length() == 0) { + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Invalid extension subtag: " + itr.current(); + _value = null; + } else { + _value = buf.toString(); + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java b/main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java new file mode 100644 index 00000000000..f1d09ce44e6 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java @@ -0,0 +1,284 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.List; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import com.ibm.icu.impl.locale.LanguageTag.ParseStatus; + +public final class InternalLocaleBuilder { + + private String _language = ""; + private String _script = ""; + private String _region = ""; + private String _variant = ""; + private SortedMap _extMap; + + private final boolean _lenientVariant; + + private static final String LOCALESEP = "_"; + + public InternalLocaleBuilder() { + this(false); + } + + public InternalLocaleBuilder(boolean lenientVariant) { + _lenientVariant = lenientVariant; + } + + public boolean isLenientVariant() { + return _lenientVariant; + } + + public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { + String newval = ""; + if (language.length() > 0) { + if (!LanguageTag.isLanguage(language)) { + throw new LocaleSyntaxException("Ill-formed language: " + language, 0); + } + newval = LanguageTag.canonicalizeLanguage(language); + } + _language = newval; + return this; + } + + public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { + String newval = ""; + if (script.length() > 0) { + if (!LanguageTag.isScript(script)) { + throw new LocaleSyntaxException("Ill-formed script: " + script, 0); + } + newval = LanguageTag.canonicalizeScript(script); + } + _script = newval; + return this; + } + + public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { + String newval = ""; + if (region.length() > 0) { + if (!LanguageTag.isRegion(region)) { + throw new LocaleSyntaxException("Ill-formed region: " + region); + } + newval = LanguageTag.canonicalizeRegion(region); + } + _region = newval; + return this; + } + + public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { + String newval = ""; + if (variant.length() > 0) { + if (_lenientVariant) { + newval = variant; + } else { + newval = processVariant(variant); + } + } + _variant = newval; + return this; + } + + public InternalLocaleBuilder setUnicodeLocaleExtension(String key, String type) throws LocaleSyntaxException { + if (key.length() == 0) { + throw new LocaleSyntaxException("Empty Unicode locale extension key"); + } + if (!UnicodeLocaleExtension.isKey(key)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale extension key: " + key, 0); + } + + key = UnicodeLocaleExtension.canonicalizeKey(key); + + UnicodeLocaleExtension ulext = null; + if (_extMap != null) { + ulext = (UnicodeLocaleExtension)_extMap.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + } + + if (type.length() == 0) { + if (ulext != null) { + ulext.remove(key); + if (ulext.isEmpty()) { + _extMap.remove(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + } + } + } else { + StringBuilder buf = new StringBuilder(); + StringTokenIterator sti = new StringTokenIterator(type, LanguageTag.SEP); + for (String subtag = sti.first(); !sti.isDone(); subtag = sti.next()) { + if (!UnicodeLocaleExtension.isTypeSubtag(subtag)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale extension type: " + type, sti.currentStart()); + } + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(UnicodeLocaleExtension.canonicalizeTypeSubtag(subtag)); + } + if (ulext == null) { + SortedMap ktmap = new TreeMap(); + ktmap.put(key, buf.toString()); + ulext = new UnicodeLocaleExtension(ktmap); + if (_extMap == null) { + _extMap = new TreeMap(); + } + _extMap.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), ulext); + } else { + ulext.put(key, buf.toString()); + } + } + + return this; + } + + public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { + String strSingleton = String.valueOf(singleton); + if (!LanguageTag.isExtensionSingleton(strSingleton) && !LanguageTag.isPrivateuseSingleton(strSingleton)) { + throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); + } + + strSingleton = LanguageTag.canonicalizeExtensionSingleton(strSingleton); + Character key = Character.valueOf(strSingleton.charAt(0)); + + if (value.length() == 0) { + if (_extMap != null) { + _extMap.remove(key); + } + } else { + StringTokenIterator sti = new StringTokenIterator(value, LanguageTag.SEP); + ParseStatus sts = new ParseStatus(); + + Extension ext = Extension.create(key.charValue(), sti, sts); + if (sts.isError()) { + throw new LocaleSyntaxException(sts.errorMsg, sts.errorIndex); + } + if (sts.parseLength != value.length() || ext == null) { + throw new LocaleSyntaxException("Ill-formed extension value: " + value, sti.currentStart()); + } + if (_extMap == null) { + _extMap = new TreeMap(); + } + _extMap.put(key, ext); + } + return this; + } + + public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException { + String language = base.getLanguage(); + String script = base.getScript(); + String region = base.getRegion(); + String variant = base.getVariant(); + + // Validate base locale fields before updating internal state. + // LocaleExtensions always store validated/canonicalized values, + // so no checks are necessary. + if (language.length() > 0) { + if (!LanguageTag.isLanguage(language)) { + throw new LocaleSyntaxException("Ill-formed language: " + language); + } + language = LanguageTag.canonicalizeLanguage(language); + } + if (script.length() > 0) { + if (!LanguageTag.isScript(script)) { + throw new LocaleSyntaxException("Ill-formed script: " + script); + } + script = LanguageTag.canonicalizeScript(script); + } + if (region.length() > 0) { + if (!LanguageTag.isRegion(region)) { + throw new LocaleSyntaxException("Ill-formed region: " + region); + } + region = LanguageTag.canonicalizeRegion(region); + } + if (_lenientVariant) { + // In lenient variant mode, parse special private use value + // reserved for Java Locale. + String privuse = extensions.getExtensionValue(Character.valueOf(LanguageTag.PRIVATEUSE.charAt(0))); + if (privuse != null) { + variant = LanguageTag.getJavaCompatibleVariant(variant, privuse); + } + } else { + if (variant.length() > 0) { + variant = processVariant(variant); + } + } + + // update builder's internal fields + _language = language; + _script = script; + _region = region; + _variant = variant; + + // empty extensions + if (_extMap == null) { + _extMap = new TreeMap(); + } else { + _extMap.clear(); + } + + Set extKeys = extensions.getKeys(); + for (Character key : extKeys) { + Extension ext = extensions.getExtension(key); + if (_lenientVariant && (ext instanceof PrivateuseExtension)) { + String modPrivuse = LanguageTag.getJavaCompatiblePrivateuse(ext.getValue()); + if (!modPrivuse.equals(ext.getValue())) { + ext = new PrivateuseExtension(modPrivuse); + } + } + _extMap.put(key, ext); + } + + return this; + } + + public InternalLocaleBuilder clear() { + _language = ""; + _script = ""; + _region = ""; + _variant = ""; + removeLocaleExtensions(); + return this; + } + + public InternalLocaleBuilder removeLocaleExtensions() { + if (_extMap != null) { + _extMap.clear(); + } + return this; + } + + public BaseLocale getBaseLocale() { + return BaseLocale.getInstance(_language, _script, _region, _variant); + } + + public LocaleExtensions getLocaleExtensions() { + if (_extMap != null && _extMap.size() > 0) { + return LocaleExtensions.getInstance(_extMap); + } + return LocaleExtensions.EMPTY_EXTENSIONS; + } + + private String processVariant(String variant) throws LocaleSyntaxException { + StringTokenIterator sti = new StringTokenIterator(variant, LOCALESEP); + ParseStatus sts = new ParseStatus(); + + List variants = LanguageTag.DEFAULT_PARSER.parseVariants(sti, sts); + if (sts.parseLength != variant.length()) { + throw new LocaleSyntaxException("Ill-formed variant: " + variant, sti.currentStart()); + } + + StringBuilder buf = new StringBuilder(); + for (String var : variants) { + if (buf.length() != 0) { + buf.append(LOCALESEP); + } + buf.append(var); + } + return buf.toString(); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java b/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java new file mode 100644 index 00000000000..39e549b1aed --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java @@ -0,0 +1,897 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.Map.Entry; + +public class LanguageTag { + + private static final boolean JDKIMPL = false; + + // + // static fields + // + public static final String SEP = "-"; + public static final String PRIVATEUSE = "x"; + public static String UNDETERMINED = "und"; + + private static final String JAVAVARIANT = "variant"; + private static final String JAVASEP = "_"; + + private static final SortedMap EMPTY_EXTENSION_MAP = new TreeMap(); + + // + // Language tag parser instances + // + public static final Parser DEFAULT_PARSER = new Parser(false); + public static final Parser JAVA_VARIANT_COMPATIBLE_PARSER = new Parser(true); + + // + // Language subtag fields + // + private String _grandfathered = ""; // grandfathered tag + private String _language = ""; // language subtag + private String _script = ""; // script subtag + private String _region = ""; // region subtag + private String _privateuse = ""; // privateuse, not including leading "x-" + private List _extlangs = Collections.emptyList(); // extlang subtags + private List _variants = Collections.emptyList(); // variant subtags + private SortedMap _extensions = EMPTY_EXTENSION_MAP; // extension key/value pairs + + private boolean _javaCompatVariants = false; + + // Map contains grandfathered tags and its preferred mappings from + // http://www.ietf.org/rfc/rfc5646.txt + private static final Map GRANDFATHERED = + new HashMap(); + + static { + // grandfathered = irregular ; non-redundant tags registered + // / regular ; during the RFC 3066 era + // + // irregular = "en-GB-oed" ; irregular tags do not match + // / "i-ami" ; the 'langtag' production and + // / "i-bnn" ; would not otherwise be + // / "i-default" ; considered 'well-formed' + // / "i-enochian" ; These tags are all valid, + // / "i-hak" ; but most are deprecated + // / "i-klingon" ; in favor of more modern + // / "i-lux" ; subtags or subtag + // / "i-mingo" ; combination + // / "i-navajo" + // / "i-pwn" + // / "i-tao" + // / "i-tay" + // / "i-tsu" + // / "sgn-BE-FR" + // / "sgn-BE-NL" + // / "sgn-CH-DE" + // + // regular = "art-lojban" ; these tags match the 'langtag' + // / "cel-gaulish" ; production, but their subtags + // / "no-bok" ; are not extended language + // / "no-nyn" ; or variant subtags: their meaning + // / "zh-guoyu" ; is defined by their registration + // / "zh-hakka" ; and all of these are deprecated + // / "zh-min" ; in favor of a more modern + // / "zh-min-nan" ; subtag or sequence of subtags + // / "zh-xiang" + + final String[][] entries = { + //{"tag", "preferred"}, + {"art-lojban", "jbo"}, + {"cel-gaulish", "cel-gaulish"}, // gaulish is parsed as a variant + {"en-GB-oed", "en-GB"}, // oed (Oxford English Dictionary spelling) is ignored + {"i-ami", "ami"}, + {"i-bnn", "bnn"}, + {"i-default", UNDETERMINED}, // fallback + {"i-enochian", UNDETERMINED}, // fallback + {"i-hak", "hak"}, + {"i-klingon", "tlh"}, + {"i-lux", "lb"}, + {"i-mingo", UNDETERMINED}, // fallback + {"i-navajo", "nv"}, + {"i-pwn", "pwn"}, + {"i-tao", "tao"}, + {"i-tay", "tay"}, + {"i-tsu", "tsu"}, + {"no-bok", "nb"}, + {"no-nyn", "nn"}, + {"sgn-BE-FR", "sfb"}, + {"sgn-BE-NL", "vgt"}, + {"sgn-CH-DE", "sgg"}, + {"zh-guoyu", "cmn"}, + {"zh-hakka", "hak"}, + {"zh-min", "zh"}, // fallback + {"zh-min-nan", "nan"}, + {"zh-xiang", "hsn"}, + }; + for (String[] e : entries) { + GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e); + } + } + + private LanguageTag() { + } + + // + // Getter methods for language subtag fields + // + + public String getLanguage() { + return _language; + } + + public List getExtlangs() { + return Collections.unmodifiableList(_extlangs); + } + + public String getScript() { + return _script; + } + + public String getRegion() { + return _region; + } + + public List getVariants() { + return Collections.unmodifiableList(_variants); + } + + public SortedMap getExtensions() { + return Collections.unmodifiableSortedMap(_extensions); + } + + public String getPrivateuse() { + return _privateuse; + } + + public String getGrandfathered() { + return _grandfathered; + } + + private String getJavaVariant() { + StringBuilder buf = new StringBuilder(); + for (String var : _variants) { + if (buf.length() > 0) { + buf.append(JAVASEP); + } + buf.append(var); + } + if (_javaCompatVariants) { + return getJavaCompatibleVariant(buf.toString(), _privateuse); + } + + return buf.toString(); + } + + private String getJavaPrivateuse() { + if (_javaCompatVariants) { + return getJavaCompatiblePrivateuse(_privateuse); + } + return _privateuse; + } + + static String getJavaCompatibleVariant(String bcpVariants, String bcpPrivuse) { + StringBuilder buf = new StringBuilder(bcpVariants); + if (bcpPrivuse.length() > 0) { + int idx = -1; + if (bcpPrivuse.startsWith(JAVAVARIANT + SEP)) { + idx = (JAVAVARIANT + SEP).length(); + } else { + idx = bcpPrivuse.indexOf(SEP + JAVAVARIANT + SEP); + if (idx != -1) { + idx += (SEP + JAVAVARIANT + SEP).length(); + } + } + if (idx != -1) { + if (buf.length() != 0) { + buf.append(JAVASEP); + } + buf.append(bcpPrivuse.substring(idx).replace(SEP, JAVASEP)); + } + } + return buf.toString(); + } + + static String getJavaCompatiblePrivateuse(String bcpPrivuse) { + if (bcpPrivuse.length() > 0) { + int idx = -1; + if (bcpPrivuse.startsWith(JAVAVARIANT + SEP)) { + idx = 0; + } else { + idx = bcpPrivuse.indexOf(SEP + JAVAVARIANT + SEP); + } + if (idx != -1) { + return bcpPrivuse.substring(0, idx); + } + } + return bcpPrivuse; + } + + public BaseLocale getBaseLocale() { + String lang = _language; + if (_extlangs.size() > 0) { + // Extended language subtags are used for various historical + // and compatibility reasons. Each extended language subtag + // has a "Preferred-Value', that is exactly same with the extended + // language subtag itself. For example, + // + // Type: extlang + // Subtag: aao + // Description: Algerian Saharan Arabic + // Added: 2009-07-29 + // Preferred-Value: aao + // Prefix: ar + // Macrolanguage: ar + // + // For example, language tag "ar-aao-DZ" is equivalent to + // "aao-DZ". + // + // Strictly speaking, the mapping requires prefix validation + // (e.g. primary language must be "ar" in the example above). + // However, this implementation does not check the prefix + // and simply use the first extlang value as locale's language. + lang = _extlangs.get(0); + } + if (lang.equals(UNDETERMINED)) { + lang = ""; + } + return BaseLocale.getInstance(lang, _script, _region, getJavaVariant()); + } + + public LocaleExtensions getLocaleExtensions() { + String javaPrivuse = getJavaPrivateuse(); + if (_extensions == null && javaPrivuse.length() == 0) { + return LocaleExtensions.EMPTY_EXTENSIONS; + } + SortedMap exts = new TreeMap(); + if (_extensions != null) { + exts.putAll(_extensions); + } + if (javaPrivuse.length() > 0) { + PrivateuseExtension pext = new PrivateuseExtension(javaPrivuse); + exts.put(Character.valueOf(PrivateuseExtension.SINGLETON), pext); + } + return LocaleExtensions.getInstance(exts); + } + + public String getID() { + if (_grandfathered.length() > 0) { + return _grandfathered; + } + StringBuilder buf = new StringBuilder(); + if (_language.length() > 0) { + buf.append(_language); + if (_extlangs.size() > 0) { + for (String el : _extlangs) { + buf.append(SEP); + buf.append(el); + } + } + if (_script.length() > 0) { + buf.append(SEP); + buf.append(_script); + } + if (_region.length() > 0) { + buf.append(SEP); + buf.append(_region); + } + if (_variants.size() > 0) { + for (String var : _variants) { + buf.append(SEP); + buf.append(var); + } + } + if (_extensions.size() > 0) { + Set> exts = _extensions.entrySet(); + for (Entry ext : exts) { + buf.append(SEP); + buf.append(ext.getKey()); + buf.append(SEP); + buf.append(ext.getValue().getValue()); + } + } + } + if (_privateuse.length() > 0) { + if (buf.length() > 0) { + buf.append(SEP); + } + buf.append(PRIVATEUSE); + buf.append(SEP); + buf.append(_privateuse); + } + return buf.toString(); + } + + public String toString() { + return getID(); + } + + // + // Language subtag syntax checking methods + // + + public static boolean isLanguage(String s) { + // language = 2*3ALPHA ; shortest ISO 639 code + // ["-" extlang] ; sometimes followed by + // ; extended language subtags + // / 4ALPHA ; or reserved for future use + // / 5*8ALPHA ; or registered language subtag + return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s); + } + + public static boolean isExtlang(String s) { + // extlang = 3ALPHA ; selected ISO 639 codes + // *2("-" 3ALPHA) ; permanently reserved + return (s.length() == 3) && AsciiUtil.isAlphaString(s); + } + + public static boolean isScript(String s) { + // script = 4ALPHA ; ISO 15924 code + return (s.length() == 4) && AsciiUtil.isAlphaString(s); + } + + public static boolean isRegion(String s) { + // region = 2ALPHA ; ISO 3166-1 code + // / 3DIGIT ; UN M.49 code + return ((s.length() == 2) && AsciiUtil.isAlphaString(s)) + || ((s.length() == 3) && AsciiUtil.isNumericString(s)); + } + + public static boolean isVariant(String s) { + // variant = 5*8alphanum ; registered variants + // / (DIGIT 3alphanum) + int len = s.length(); + if (len >= 5 && len <= 8) { + return AsciiUtil.isAlphaNumericString(s); + } + if (len == 4) { + return AsciiUtil.isNumeric(s.charAt(0)) + && AsciiUtil.isAlphaNumeric(s.charAt(1)) + && AsciiUtil.isAlphaNumeric(s.charAt(2)) + && AsciiUtil.isAlphaNumeric(s.charAt(3)); + } + return false; + } + + public static boolean isExtensionSingleton(String s) { + // singleton = DIGIT ; 0 - 9 + // / %x41-57 ; A - W + // / %x59-5A ; Y - Z + // / %x61-77 ; a - w + // / %x79-7A ; y - z + + return (s.length() == 1) + && AsciiUtil.isAlphaString(s) + && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); + } + + public static boolean isExtensionSubtag(String s) { + // extension = singleton 1*("-" (2*8alphanum)) + return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isPrivateuseSingleton(String s) { + // privateuse = "x" 1*("-" (1*8alphanum)) + return (s.length() == 1) + && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); + } + + public static boolean isPrivateuseSubtag(String s) { + // privateuse = "x" 1*("-" (1*8alphanum)) + return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + // + // Language subtag canonicalization methods + // + + public static String canonicalizeLanguage(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtlang(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeScript(String s) { + return AsciiUtil.toTitleString(s); + } + + public static String canonicalizeRegion(String s) { + return AsciiUtil.toUpperString(s); + } + + public static String canonicalizeVariant(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtensionSingleton(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtensionSubtag(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizePrivateuseSubtag(String s) { + return AsciiUtil.toLowerString(s); + } + + + public static LanguageTag parse(String str, boolean javaCompatVar) { + LanguageTag tag = new LanguageTag(); + tag.parseString(str, javaCompatVar); + return tag; + } + + public static LanguageTag parseStrict(String str, boolean javaCompatVar) throws LocaleSyntaxException { + LanguageTag tag = new LanguageTag(); + ParseStatus sts = tag.parseString(str, javaCompatVar); + if (sts.isError()) { + throw new LocaleSyntaxException(sts.errorMsg, sts.errorIndex); + } + return tag; + } + + public static LanguageTag parseLocale(BaseLocale base, LocaleExtensions locExts) { + LanguageTag tag = new LanguageTag(); + tag._javaCompatVariants = true; + + String language = base.getLanguage(); + String script = base.getScript(); + String region = base.getRegion(); + String variant = base.getVariant(); + + String privuseVar = null; // store ill-formed variant subtags + + if (language.length() > 0 && isLanguage(language)) { + // Convert a deprecated language code used by Java to + // a new code + language = canonicalizeLanguage(language); + if (language.equals("iw")) { + language = "he"; + } else if (language.equals("ji")) { + language = "yi"; + } else if (language.equals("in")) { + language = "id"; + } + tag._language = language; + } + if (script.length() > 0 && isScript(script)) { + tag._script = canonicalizeScript(script); + } + if (region.length() > 0 && isRegion(region)) { + tag._region = canonicalizeRegion(region); + } + if (variant.length() > 0) { + List variants = null; + StringTokenIterator varitr = new StringTokenIterator(variant, JAVASEP); + while (!varitr.isDone()) { + String var = varitr.current(); + if (!isVariant(var)) { + break; + } + if (variants == null) { + variants = new ArrayList(); + } + if (JDKIMPL) { + variants.add(var); // Do not canonicalize! + } else { + variants.add(canonicalizeVariant(var)); + } + varitr.next(); + } + if (variants != null) { + tag._variants = variants; + } + if (!varitr.isDone()) { + // ill-formed variant subtags + StringBuilder buf = new StringBuilder(); + while (!varitr.isDone()) { + String prvv = varitr.current(); + if (!isPrivateuseSubtag(prvv)) { + // cannot use private use subtag - truncated + break; + } + if (buf.length() > 0) { + buf.append(SEP); + } + if (!JDKIMPL) { + prvv = AsciiUtil.toLowerString(prvv); + } + buf.append(prvv); + varitr.next(); + } + if (buf.length() > 0) { + privuseVar = buf.toString(); + } + } + } + + TreeMap extensions = null; + String privateuse = null; + + Set locextKeys = locExts.getKeys(); + for (Character locextKey : locextKeys) { + Extension ext = locExts.getExtension(locextKey); + if (ext instanceof PrivateuseExtension) { + privateuse = ext.getValue(); + } else { + if (extensions == null) { + extensions = new TreeMap(); + } + extensions.put(locextKey, ext); + } + } + + if (extensions != null) { + tag._extensions = extensions; + } + + // append ill-formed variant subtags to private use + if (privuseVar != null) { + if (privateuse == null) { + privateuse = JAVAVARIANT + SEP + privuseVar; + } else { + privateuse = privateuse + SEP + JAVAVARIANT + SEP + privuseVar.replace(JAVASEP, SEP); + } + } + + if (privateuse != null) { + tag._privateuse = privateuse; + } else if (tag._language.length() == 0) { + // use "und" if neither language nor privateuse is available + tag._language = UNDETERMINED; + } + + return tag; + } + + private ParseStatus parseString(String str, boolean javaCompatVar) { + // Check if the tag is grandfathered + String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(str)); + ParseStatus sts; + if (gfmap != null) { + _grandfathered = gfmap[0]; + sts = parseLanguageTag(gfmap[1], javaCompatVar); + sts.parseLength = str.length(); + } else { + _grandfathered = ""; + sts = parseLanguageTag(str, javaCompatVar); + } + return sts; + } + + /* + * Parse Language-Tag, except grandfathered. + * + * BNF in RFC5464 + * + * Language-Tag = langtag ; normal language tags + * / privateuse ; private use tag + * / grandfathered ; grandfathered tags + * + * + * langtag = language + * ["-" script] + * ["-" region] + * *("-" variant) + * *("-" extension) + * ["-" privateuse] + * + * language = 2*3ALPHA ; shortest ISO 639 code + * ["-" extlang] ; sometimes followed by + * ; extended language subtags + * / 4ALPHA ; or reserved for future use + * / 5*8ALPHA ; or registered language subtag + * + * extlang = 3ALPHA ; selected ISO 639 codes + * *2("-" 3ALPHA) ; permanently reserved + * + * script = 4ALPHA ; ISO 15924 code + * + * region = 2ALPHA ; ISO 3166-1 code + * / 3DIGIT ; UN M.49 code + * + * variant = 5*8alphanum ; registered variants + * / (DIGIT 3alphanum) + * + * extension = singleton 1*("-" (2*8alphanum)) + * + * ; Single alphanumerics + * ; "x" reserved for private use + * singleton = DIGIT ; 0 - 9 + * / %x41-57 ; A - W + * / %x59-5A ; Y - Z + * / %x61-77 ; a - w + * / %x79-7A ; y - z + * + * privateuse = "x" 1*("-" (1*8alphanum)) + * + */ + private ParseStatus parseLanguageTag(String langtag, boolean javaCompat) { + ParseStatus sts = new ParseStatus(); + StringTokenIterator itr = new StringTokenIterator(langtag, SEP); + Parser parser = javaCompat ? JAVA_VARIANT_COMPATIBLE_PARSER : DEFAULT_PARSER; + + _javaCompatVariants = javaCompat; + + // langtag must start with either language or privateuse + _language = parser.parseLanguage(itr, sts); + if (_language.length() > 0) { + _extlangs = parser.parseExtlangs(itr, sts); + _script = parser.parseScript(itr, sts); + _region = parser.parseRegion(itr, sts); + _variants = parser.parseVariants(itr, sts); + _extensions = parser.parseExtensions(itr, sts); + } + _privateuse = parser.parsePrivateuse(itr, sts); + + if (!itr.isDone() && !sts.isError()) { + String s = itr.current(); + sts.errorIndex = itr.currentStart(); + if (s.length() == 0) { + sts.errorMsg = "Empty subtag"; + } else { + sts.errorMsg = "Invalid subtag: " + s; + } + } + + return sts; + } + + public static class ParseStatus { + int parseLength = 0; + int errorIndex = -1; + String errorMsg = null; + + public void reset() { + parseLength = 0; + errorIndex = -1; + errorMsg = null; + } + + boolean isError() { + return (errorIndex >= 0); + } + } + + static class Parser { + private boolean _javaCompatVar; + + Parser(boolean javaCompatVar) { + _javaCompatVar = javaCompatVar; + } + + // + // Language subtag parsers + // + + public String parseLanguage(StringTokenIterator itr, ParseStatus sts) { + String language = ""; + + if (itr.isDone() || sts.isError()) { + return language; + } + + String s = itr.current(); + if (isLanguage(s)) { + language = canonicalizeLanguage(s); + sts.parseLength = itr.currentEnd(); + itr.next(); + } + return language; + } + + public List parseExtlangs(StringTokenIterator itr, ParseStatus sts) { + List extlangs = null; + + if (itr.isDone() || sts.isError()) { + return Collections.emptyList(); + } + + while (!itr.isDone()) { + String s = itr.current(); + if (!isExtlang(s)) { + break; + } + if (extlangs == null) { + extlangs = new ArrayList(3); + } + extlangs.add(canonicalizeExtlang(s)); + sts.parseLength = itr.currentEnd(); + itr.next(); + + if (extlangs.size() == 3) { + // Maximum 3 extlangs + break; + } + } + + if (extlangs == null) { + return Collections.emptyList(); + } + + return extlangs; + } + + public String parseScript(StringTokenIterator itr, ParseStatus sts) { + String script = ""; + + if (itr.isDone() || sts.isError()) { + return script; + } + + String s = itr.current(); + if (isScript(s)) { + script = canonicalizeScript(s); + sts.parseLength = itr.currentEnd(); + itr.next(); + } + + return script; + } + + public String parseRegion(StringTokenIterator itr, ParseStatus sts) { + String region = ""; + + if (itr.isDone() || sts.isError()) { + return region; + } + + String s = itr.current(); + if (isRegion(s)) { + region = canonicalizeRegion(s); + sts.parseLength = itr.currentEnd(); + itr.next(); + } + + return region; + } + + public List parseVariants(StringTokenIterator itr, ParseStatus sts) { + List variants = null; + + if (itr.isDone() || sts.isError()) { + return Collections.emptyList(); + } + + while (!itr.isDone()) { + String s = itr.current(); + if (!isVariant(s)) { + break; + } + if (variants == null) { + variants = new ArrayList(3); + } + if (_javaCompatVar) { + // preserve casing when Java compatibility option + // is enabled + variants.add(s); + } else { + variants.add(canonicalizeVariant(s)); + } + sts.parseLength = itr.currentEnd(); + itr.next(); + } + + if (variants == null) { + return Collections.emptyList(); + } + + return variants; + } + + public SortedMap parseExtensions(StringTokenIterator itr, ParseStatus sts) { + SortedMap extensionMap = null; + + if (itr.isDone() || sts.isError()) { + return EMPTY_EXTENSION_MAP; + } + + while (!itr.isDone()) { + String s = itr.current(); + if (!isExtensionSingleton(s)) { + break; + } + if (!itr.hasNext()) { + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Missing extension subtag for extension :" + s; + break; + } + + if (extensionMap == null) { + extensionMap = new TreeMap(); + } + + String singletonStr = canonicalizeExtensionSingleton(s); + Character singleton = Character.valueOf(singletonStr.charAt(0)); + + if (extensionMap.containsKey(singleton)) { + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Duplicated extension: " + s; + break; + } + + itr.next(); + Extension ext = Extension.create(singleton.charValue(), itr, sts); + if (ext != null) { + extensionMap.put(singleton, ext); + } + if (sts.isError()) { + break; + } + } + + if (extensionMap == null || extensionMap.size() == 0) { + return EMPTY_EXTENSION_MAP; + } + + return extensionMap; + } + + public String parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { + String privateuse = ""; + + if (itr.isDone() || sts.isError()) { + return privateuse; + } + + String s = itr.current(); + if (isPrivateuseSingleton(s)) { + StringBuilder buf = new StringBuilder(); + int singletonOffset = itr.currentStart(); + boolean preserveCasing = false; + itr.next(); + + while (!itr.isDone()) { + s = itr.current(); + if (!isPrivateuseSubtag(s)) { + break; + } + if (buf.length() != 0) { + buf.append(SEP); + } + if (!preserveCasing) { + s = canonicalizePrivateuseSubtag(s); + } + buf.append(s); + sts.parseLength = itr.currentEnd(); + + if (_javaCompatVar && s.equals(JAVAVARIANT)) { + // preserve casing after the special + // java reserved private use subtag + // when java compatibility variant option + // is enabled. + preserveCasing = true; + } + itr.next(); + } + + if (buf.length() == 0) { + // need at least 1 private subtag + sts.errorIndex = singletonOffset; + sts.errorMsg = "Incomplete privateuse"; + } else { + privateuse = buf.toString(); + } + } + + return privateuse; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java b/main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java new file mode 100644 index 00000000000..83f479a486f --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java @@ -0,0 +1,190 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.Collections; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.Map.Entry; + +import com.ibm.icu.impl.locale.LanguageTag.ParseStatus; + + +public class LocaleExtensions { + + private SortedMap _map = EMPTY_MAP; + private String _id = ""; + + private static final SortedMap EMPTY_MAP = + Collections.unmodifiableSortedMap(new TreeMap()); + + private static final LocaleObjectCache LOCALEEXTENSIONS_CACHE = + new LocaleObjectCache(); + + + public static LocaleExtensions EMPTY_EXTENSIONS = new LocaleExtensions(); + + public static final LocaleExtensions CALENDAR_JAPANESE; + public static final LocaleExtensions NUMBER_THAI; + + static { + CALENDAR_JAPANESE = new LocaleExtensions(); + CALENDAR_JAPANESE._id = UnicodeLocaleExtension.CA_JAPANESE.getID(); + CALENDAR_JAPANESE._map = new TreeMap(); + CALENDAR_JAPANESE._map.put(Character.valueOf(UnicodeLocaleExtension.CA_JAPANESE.getKey()), UnicodeLocaleExtension.CA_JAPANESE); + LOCALEEXTENSIONS_CACHE.put(CALENDAR_JAPANESE._id, CALENDAR_JAPANESE); + + NUMBER_THAI = new LocaleExtensions(); + NUMBER_THAI._id = UnicodeLocaleExtension.NU_THAI.getID(); + NUMBER_THAI._map = new TreeMap(); + NUMBER_THAI._map.put(Character.valueOf(UnicodeLocaleExtension.NU_THAI.getKey()), UnicodeLocaleExtension.NU_THAI); + LOCALEEXTENSIONS_CACHE.put(NUMBER_THAI._id, NUMBER_THAI); + } + + + private LocaleExtensions() { + } + + public static LocaleExtensions getInstance(String str) throws LocaleSyntaxException { + if (str == null || str.length() == 0) { + return EMPTY_EXTENSIONS; + } + LocaleExtensions exts = LOCALEEXTENSIONS_CACHE.get(str); + if (exts == null) { + StringTokenIterator itr = new StringTokenIterator(str, LanguageTag.SEP); + ParseStatus sts = new ParseStatus(); + TreeMap map = new TreeMap(); + + while (!itr.isDone()) { + int startOffset = itr.currentEnd(); + Extension ext = Extension.create(itr, sts); + if (sts.isError()) { + throw new LocaleSyntaxException(sts.errorMsg, sts.errorIndex); + } + if (ext == null) { + throw new LocaleSyntaxException("Invalid extension subtag: " + itr.current(), startOffset); + } + + Character keyChar = Character.valueOf(ext.getKey()); + if (map.containsKey(keyChar)) { + throw new LocaleSyntaxException("Duplicated extension: " + keyChar, startOffset); + } + + map.put(keyChar, ext); + } + + String id = toID(map); + // check the cache with canonicalized ID + exts = LOCALEEXTENSIONS_CACHE.get(id); + if (exts == null) { + exts = new LocaleExtensions(); + exts._map = map; + exts._id = id; + + exts = LOCALEEXTENSIONS_CACHE.put(id, exts); + } + } + return exts; + } + + static LocaleExtensions getInstance(SortedMap map) { + if (map == null || map.isEmpty()) { + return EMPTY_EXTENSIONS; + } + String id = toID(map); + LocaleExtensions exts = LOCALEEXTENSIONS_CACHE.get(id); + if (exts == null) { + exts = new LocaleExtensions(); + exts._map = new TreeMap(map); + exts._id = id; + + exts = LOCALEEXTENSIONS_CACHE.put(id, exts); + } + return exts; + } + + private static String toID(SortedMap map) { + StringBuilder buf = new StringBuilder(); + Extension privuse = null; + if (map != null && !map.isEmpty()) { + Set> entries = map.entrySet(); + for (Entry entry : entries) { + Character key = entry.getKey(); + if (key.charValue() == LanguageTag.PRIVATEUSE.charAt(0)) { + privuse = entry.getValue(); + continue; + } + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(entry.getKey()); + buf.append(LanguageTag.SEP); + buf.append(entry.getValue().getValue()); + } + } + if (privuse != null) { + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(LanguageTag.PRIVATEUSE); + buf.append(LanguageTag.SEP); + buf.append(privuse.getValue()); + } + return buf.toString(); + } + + public Set getKeys() { + return Collections.unmodifiableSet(_map.keySet()); + } + + public Extension getExtension(Character key) { + return _map.get(key); + } + + public String getExtensionValue(Character key) { + Extension ext = _map.get(key); + if (ext == null) { + return ""; + } + return ext.getValue(); + } + + public Set getUnicodeLocaleKeys() { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return Collections.emptySet(); + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getKeys(); + } + + public String getUnicodeLocaleType(String unicodeLocaleKey) { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return ""; + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getType(unicodeLocaleKey); + } + + public String toString() { + return _id; + } + + public String getID() { + return _id; + } + + public int hashCode() { + return _id.hashCode(); + } + + public static boolean isValidKey(String key) { + return LanguageTag.isExtensionSingleton(key) || LanguageTag.isPrivateuseSingleton(key); + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/LocaleObjectCache.java b/main/classes/core/src/com/ibm/icu/impl/locale/LocaleObjectCache.java new file mode 100644 index 00000000000..c30c872aa29 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/LocaleObjectCache.java @@ -0,0 +1,78 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.lang.ref.Reference; +import java.lang.ref.ReferenceQueue; +import java.lang.ref.WeakReference; +import java.util.concurrent.ConcurrentHashMap; + +public class LocaleObjectCache { + + private ConcurrentHashMap> _map = new ConcurrentHashMap>(); + private ReferenceQueue _rq = new ReferenceQueue(); + + public LocaleObjectCache() { + } + + public V get(Object key) { + expungeStaleEntries(); + WeakValueRef ref = _map.get(key); + if (ref != null) { + return ref.get(); + } + return null; + } + + /* + * Unlike Map#put, this method returns non-null value actually + * in the cache, even no values for the key was not available + * before. + */ + public V put(K key, V value) { + expungeStaleEntries(); + WeakValueRef ref = _map.get(key); + if (ref != null) { + // Make sure if another thread put the new value + V valInCache = ref.get(); + if (valInCache != null) { + return valInCache; + } + } + // We do not synchronize the internal map here. + // In the worst case, another thread may put the new + // value with the same contents, but it should not cause + // any serious problem. + _map.put(key, new WeakValueRef(key, value, _rq)); + return value; + } + + private void expungeStaleEntries() { + Reference val; + while ((val = _rq.poll()) != null) { + Object key = ((WeakValueRef)val).getKey(); + _map.remove(key); + } + } + + private static class WeakValueRef extends WeakReference { + private Object _key; + + public WeakValueRef(Object key, V value, ReferenceQueue rq) { + super(value, rq); + _key = key; + } + + public V get() { + return super.get(); + } + + public Object getKey() { + return _key; + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java b/main/classes/core/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java new file mode 100644 index 00000000000..14d1130e3db --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/LocaleSyntaxException.java @@ -0,0 +1,27 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +public class LocaleSyntaxException extends Exception { + + private static final long serialVersionUID = 1L; + + private int _index = -1; + + public LocaleSyntaxException(String msg) { + this(msg, 0); + } + + public LocaleSyntaxException(String msg, int errorIndex) { + super(msg); + _index = errorIndex; + } + + public int getErrorIndex() { + return _index; + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/PrivateuseExtension.java b/main/classes/core/src/com/ibm/icu/impl/locale/PrivateuseExtension.java new file mode 100644 index 00000000000..00ab5ee395b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/PrivateuseExtension.java @@ -0,0 +1,53 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import com.ibm.icu.impl.locale.LanguageTag.ParseStatus; + +public class PrivateuseExtension extends Extension { + public static final char SINGLETON = 'x'; + + protected PrivateuseExtension() { + super(SINGLETON); + } + + /* + * package local constructor only used by LanguageTag implementation + */ + PrivateuseExtension(String privuse) { + super(SINGLETON); + _value = privuse; + } + + protected void setExtensionValue(StringTokenIterator itr, ParseStatus sts) { + if (sts.isError() || itr.isDone()) { + _value = null; + return; + } + + StringBuilder buf = new StringBuilder(); + while (!itr.isDone()) { + String s = itr.current(); + if (!LanguageTag.isPrivateuseSubtag(s)) { + break; + } + s = LanguageTag.canonicalizePrivateuseSubtag(s); + if (buf.length() != 0) { + buf.append(LanguageTag.SEP); + } + buf.append(s); + sts.parseLength = itr.currentEnd(); + itr.next(); + } + + if (buf.length() == 0) { + _value = null; + } else { + _value = buf.toString(); + } + } +} diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/StringTokenIterator.java b/main/classes/core/src/com/ibm/icu/impl/locale/StringTokenIterator.java new file mode 100644 index 00000000000..13cad08d85b --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/StringTokenIterator.java @@ -0,0 +1,93 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +public class StringTokenIterator { + private String _text; + private String _dlms; + + private String _token; + private int _start; + private int _end; + private boolean _done; + + public StringTokenIterator(String text, String dlms) { + _text = text; + _dlms = dlms; + setStart(0); + } + + public String first() { + setStart(0); + return _token; + } + + public String current() { + return _token; + } + + public int currentStart() { + return _start; + } + + public int currentEnd() { + return _end; + } + + public boolean isDone() { + return _done; + } + + public String next() { + if (hasNext()) { + _start = _end + 1; + _end = nextDelimiter(_start); + _token = _text.substring(_start, _end); + } else { + _start = _end; + _token = null; + _done = true; + } + return _token; + } + + public boolean hasNext() { + return (_end < _text.length()); + } + + public StringTokenIterator setStart(int offset) { + if (offset > _text.length()) { + throw new IndexOutOfBoundsException(); + } + _start = offset; + _end = nextDelimiter(_start); + _token = _text.substring(_start, _end); + _done = false; + return this; + } + + public StringTokenIterator setText(String text) { + _text = text; + setStart(0); + return this; + } + + private int nextDelimiter(int start) { + int idx = start; + outer: while (idx < _text.length()) { + char c = _text.charAt(idx); + for (int i = 0; i < _dlms.length(); i++) { + if (c == _dlms.charAt(i)) { + break outer; + } + } + idx++; + } + return idx; + } +} + diff --git a/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java b/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java new file mode 100644 index 00000000000..bbb5709d42d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java @@ -0,0 +1,207 @@ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.impl.locale; + +import java.util.Collections; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.Map.Entry; + +import com.ibm.icu.impl.locale.LanguageTag.ParseStatus; + +public class UnicodeLocaleExtension extends Extension { + public static final char SINGLETON = 'u'; + + public static final UnicodeLocaleExtension CA_JAPANESE = new UnicodeLocaleExtension().put("ca", "japanese"); + public static final UnicodeLocaleExtension NU_THAI = new UnicodeLocaleExtension().put("nu", "thai"); + + private SortedMap _keyTypeMap; + + protected UnicodeLocaleExtension() { + super(SINGLETON); + } + + /* + * Package local constructor only used by InternalLocaleBuilder + */ + UnicodeLocaleExtension(SortedMap keyTypeMap) { + super(SINGLETON); + _keyTypeMap = keyTypeMap; + updateStringValue(); + } + + protected void setExtensionValue(StringTokenIterator itr, ParseStatus sts) { + if (sts.isError() || itr.isDone()) { + _value = null; + return; + } + + SortedMap keyTypeMap = new TreeMap(); + String ukey = null; + StringBuilder buf = new StringBuilder(); + int typeEnd = -1; + + while (!itr.isDone()) { + String s = itr.current(); + + if (isTypeSubtag(s)) { + if (ukey == null) { + // key is expected + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Invalid Unicode locale extension key: " + s; + break; + } + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(canonicalizeTypeSubtag(s)); + typeEnd = itr.currentEnd(); + + if (!itr.hasNext()) { + // emit the last key/type + keyTypeMap.put(ukey, buf.toString()); + sts.parseLength = typeEnd; + itr.next(); + break; + } + } else { + // key or others + if (ukey != null) { + if (buf.length() > 0) { + // emit previous key and value + keyTypeMap.put(ukey, buf.toString()); + sts.parseLength = typeEnd; + } else { + // type is expected + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Invalid Unicode locale extension type: " + s; + break; + } + } + if (isKey(s)) { + if (itr.hasNext()) { + ukey = canonicalizeKey(s); + if (keyTypeMap.containsKey(ukey)) { + // duplicated key + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Duplicate Unicode locale extension key: " + s; + break; + } + buf.setLength(0); + typeEnd = -1; + } else { + // missing type + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Missing subtag for Unicode locale extension: " + s; + itr.next(); + break; + } + } else { + // others + if (keyTypeMap.size() == 0) { + // key is expected + sts.errorIndex = itr.currentStart(); + sts.errorMsg = "Invalid Unicode locale extension key: " + s; + } + break; + } + } + itr.next(); + } + + if (keyTypeMap.size() == 0) { + _value = null; + return; + } + + _keyTypeMap = keyTypeMap; + updateStringValue(); + } + + public Set getKeys() { + if (_keyTypeMap == null) { + return Collections.emptySet(); + } + return Collections.unmodifiableSet(_keyTypeMap.keySet()); + } + + public String getType(String key) { + String type = null; + if (_keyTypeMap != null) { + type = _keyTypeMap.get(canonicalizeKey(key)); + } + + return (type == null ? "" : type); + } + + public static boolean isKey(String s) { + // 2alphanum + return (s.length() == 2) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isTypeSubtag(String s) { + // 3*8alphanum + return (s.length() >= 3) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + public static String canonicalizeKey(String s) { + return LanguageTag.canonicalizeExtensionSubtag(s); + } + + public static String canonicalizeTypeSubtag(String s) { + return LanguageTag.canonicalizeExtensionSubtag(s); + } + + // These methods are only used by InterlaLocaleBuilder + UnicodeLocaleExtension remove(String key) { + if (_keyTypeMap != null) { + _keyTypeMap.remove(key); + updateStringValue(); + } + return this; + } + + UnicodeLocaleExtension put(String key, String type) { + if (_keyTypeMap == null) { + _keyTypeMap = new TreeMap(); + } + _keyTypeMap.put(key, type); + updateStringValue(); + return this; + } + + boolean isEmpty() { + return (_keyTypeMap.size() == 0); + } + + private void updateStringValue() { + _value = null; + + if (_keyTypeMap != null) { + // re-construct string representation + StringBuilder valBuf = new StringBuilder(); + Set> entries = _keyTypeMap.entrySet(); + boolean isFirst = true; + for (Entry e : entries) { + if (isFirst) { + isFirst = false; + } else { + valBuf.append(LanguageTag.SEP); + } + valBuf.append(e.getKey()); + valBuf.append(LanguageTag.SEP); + valBuf.append(e.getValue()); + } + + if (valBuf.length() > 0) { + _value = valBuf.toString(); + } + } + } +} + diff --git a/main/classes/core/src/com/ibm/icu/lang/UCharacter.java b/main/classes/core/src/com/ibm/icu/lang/UCharacter.java new file mode 100644 index 00000000000..8424ea93e3f --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UCharacter.java @@ -0,0 +1,6404 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.lang; + +import java.lang.ref.SoftReference; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import com.ibm.icu.impl.IllegalIcuArgumentException; +import com.ibm.icu.impl.Norm2AllModes; +import com.ibm.icu.impl.Normalizer2Impl; +import com.ibm.icu.impl.UBiDiProps; +import com.ibm.icu.impl.UCaseProps; +import com.ibm.icu.impl.UCharacterName; +import com.ibm.icu.impl.UCharacterNameChoice; +import com.ibm.icu.impl.UCharacterProperty; +import com.ibm.icu.impl.UCharacterUtility; +import com.ibm.icu.impl.UPropertyAliases; +import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; +import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; +import com.ibm.icu.text.BreakIterator; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.util.RangeValueIterator; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.ValueIterator; +import com.ibm.icu.util.VersionInfo; + +/** + * {@icuenhanced java.lang.Character}.{@icu _usage_} + * + *

    The UCharacter class provides extensions to the + * + * java.lang.Character class. These extensions provide support for + * more Unicode properties and together with the UTF16 + * class, provide support for supplementary characters (those with code + * points above U+FFFF). + * Each ICU release supports the latest version of Unicode available at that time. + * + *

    Code points are represented in these API using ints. While it would be + * more convenient in Java to have a separate primitive datatype for them, + * ints suffice in the meantime. + * + *

    To use this class please add the jar file name icu4j.jar to the + * class path, since it contains data files which supply the information used + * by this file.
    + * E.g. In Windows
    + * set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar.
    + * Otherwise, another method would be to copy the files uprops.dat and + * unames.icu from the icu4j source subdirectory + * $ICU4J_SRC/src/com.ibm.icu.impl.data to your class directory + * $ICU4J_CLASS/com.ibm.icu.impl.data. + * + *

    Aside from the additions for UTF-16 support, and the updated Unicode + * properties, the main differences between UCharacter and Character are: + *

      + *
    • UCharacter is not designed to be a char wrapper and does not have + * APIs to which involves management of that single char.
      + * These include: + *
        + *
      • char charValue(), + *
      • int compareTo(java.lang.Character, java.lang.Character), etc. + *
      + *
    • UCharacter does not include Character APIs that are deprecated, nor + * does it include the Java-specific character information, such as + * boolean isJavaIdentifierPart(char ch). + *
    • Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric + * values '10' - '35'. UCharacter also does this in digit and + * getNumericValue, to adhere to the java semantics of these + * methods. New methods unicodeDigit, and + * getUnicodeNumericValue do not treat the above code points + * as having numeric values. This is a semantic change from ICU4J 1.3.1. + *
    + *

    + * Further detail on differences can be determined using the program + * + * com.ibm.icu.dev.test.lang.UCharacterCompare + *

    + *

    + * In addition to Java compatibility functions, which calculate derived properties, + * this API provides low-level access to the Unicode Character Database. + *

    + *

    + * Unicode assigns each code point (not just assigned character) values for + * many properties. + * Most of them are simple boolean flags, or constants from a small enumerated list. + * For some properties, values are strings or other relatively more complex types. + *

    + *

    + * For more information see + * "About the Unicode Character Database" + * (http://www.unicode.org/ucd/) + * and the ICU + * User Guide chapter on Properties + * (http://www.icu-project.org/userguide/properties.html). + *

    + *

    + * There are also functions that provide easy migration from C/POSIX functions + * like isblank(). Their use is generally discouraged because the C/POSIX + * standards do not define their semantics beyond the ASCII range, which means + * that different implementations exhibit very different behavior. + * Instead, Unicode properties should be used directly. + *

    + *

    + * There are also only a few, broad C/POSIX character classes, and they tend + * to be used for conflicting purposes. For example, the "isalpha()" class + * is sometimes used to determine word boundaries, while a more sophisticated + * approach would at least distinguish initial letters from continuation + * characters (the latter including combining marks). + * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) + * Another example: There is no "istitle()" class for titlecase characters. + *

    + *

    + * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. + * ICU implements them according to the Standard Recommendations in + * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions + * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). + *

    + *

    + * API access for C/POSIX character classes is as follows: + *

    {@code
    + * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
    + * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
    + * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
    + * - punct:     ((1<
    + * 

    + *

    + * The C/POSIX character classes are also available in UnicodeSet patterns, + * using patterns like [:graph:] or \p{graph}. + *

    + * + * {@icunote} There are several ICU (and Java) whitespace functions. + * Comparison:
      + *
    • isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; + * most of general categories "Z" (separators) + most whitespace ISO controls + * (including no-break spaces, but excluding IS1..IS4 and ZWSP) + *
    • isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces + *
    • isSpaceChar: just Z (including no-break spaces)
    + *

    + *

    + * This class is not subclassable. + *

    + * @author Syn Wee Quek + * @stable ICU 2.1 + * @see com.ibm.icu.lang.UCharacterEnums + */ + +public final class UCharacter implements ECharacterCategory, ECharacterDirection +{ + // public inner classes ---------------------------------------------- + + /** + * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} + * + * A family of character subsets representing the character blocks in the + * Unicode specification, generated from Unicode Data file Blocks.txt. + * Character blocks generally define characters used for a specific script + * or purpose. A character is contained by at most one Unicode block. + * + * {@icunote} All fields named XXX_ID are specific to ICU. + * + * @stable ICU 2.4 + */ + public static final class UnicodeBlock extends Character.Subset + { + // block id corresponding to icu4c ----------------------------------- + + /** + * @stable ICU 2.4 + */ + public static final int INVALID_CODE_ID = -1; + /** + * @stable ICU 2.4 + */ + public static final int BASIC_LATIN_ID = 1; + /** + * @stable ICU 2.4 + */ + public static final int LATIN_1_SUPPLEMENT_ID = 2; + /** + * @stable ICU 2.4 + */ + public static final int LATIN_EXTENDED_A_ID = 3; + /** + * @stable ICU 2.4 + */ + public static final int LATIN_EXTENDED_B_ID = 4; + /** + * @stable ICU 2.4 + */ + public static final int IPA_EXTENSIONS_ID = 5; + /** + * @stable ICU 2.4 + */ + public static final int SPACING_MODIFIER_LETTERS_ID = 6; + /** + * @stable ICU 2.4 + */ + public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; + /** + * Unicode 3.2 renames this block to "Greek and Coptic". + * @stable ICU 2.4 + */ + public static final int GREEK_ID = 8; + /** + * @stable ICU 2.4 + */ + public static final int CYRILLIC_ID = 9; + /** + * @stable ICU 2.4 + */ + public static final int ARMENIAN_ID = 10; + /** + * @stable ICU 2.4 + */ + public static final int HEBREW_ID = 11; + /** + * @stable ICU 2.4 + */ + public static final int ARABIC_ID = 12; + /** + * @stable ICU 2.4 + */ + public static final int SYRIAC_ID = 13; + /** + * @stable ICU 2.4 + */ + public static final int THAANA_ID = 14; + /** + * @stable ICU 2.4 + */ + public static final int DEVANAGARI_ID = 15; + /** + * @stable ICU 2.4 + */ + public static final int BENGALI_ID = 16; + /** + * @stable ICU 2.4 + */ + public static final int GURMUKHI_ID = 17; + /** + * @stable ICU 2.4 + */ + public static final int GUJARATI_ID = 18; + /** + * @stable ICU 2.4 + */ + public static final int ORIYA_ID = 19; + /** + * @stable ICU 2.4 + */ + public static final int TAMIL_ID = 20; + /** + * @stable ICU 2.4 + */ + public static final int TELUGU_ID = 21; + /** + * @stable ICU 2.4 + */ + public static final int KANNADA_ID = 22; + /** + * @stable ICU 2.4 + */ + public static final int MALAYALAM_ID = 23; + /** + * @stable ICU 2.4 + */ + public static final int SINHALA_ID = 24; + /** + * @stable ICU 2.4 + */ + public static final int THAI_ID = 25; + /** + * @stable ICU 2.4 + */ + public static final int LAO_ID = 26; + /** + * @stable ICU 2.4 + */ + public static final int TIBETAN_ID = 27; + /** + * @stable ICU 2.4 + */ + public static final int MYANMAR_ID = 28; + /** + * @stable ICU 2.4 + */ + public static final int GEORGIAN_ID = 29; + /** + * @stable ICU 2.4 + */ + public static final int HANGUL_JAMO_ID = 30; + /** + * @stable ICU 2.4 + */ + public static final int ETHIOPIC_ID = 31; + /** + * @stable ICU 2.4 + */ + public static final int CHEROKEE_ID = 32; + /** + * @stable ICU 2.4 + */ + public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; + /** + * @stable ICU 2.4 + */ + public static final int OGHAM_ID = 34; + /** + * @stable ICU 2.4 + */ + public static final int RUNIC_ID = 35; + /** + * @stable ICU 2.4 + */ + public static final int KHMER_ID = 36; + /** + * @stable ICU 2.4 + */ + public static final int MONGOLIAN_ID = 37; + /** + * @stable ICU 2.4 + */ + public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; + /** + * @stable ICU 2.4 + */ + public static final int GREEK_EXTENDED_ID = 39; + /** + * @stable ICU 2.4 + */ + public static final int GENERAL_PUNCTUATION_ID = 40; + /** + * @stable ICU 2.4 + */ + public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; + /** + * @stable ICU 2.4 + */ + public static final int CURRENCY_SYMBOLS_ID = 42; + /** + * Unicode 3.2 renames this block to "Combining Diacritical Marks for + * Symbols". + * @stable ICU 2.4 + */ + public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; + /** + * @stable ICU 2.4 + */ + public static final int LETTERLIKE_SYMBOLS_ID = 44; + /** + * @stable ICU 2.4 + */ + public static final int NUMBER_FORMS_ID = 45; + /** + * @stable ICU 2.4 + */ + public static final int ARROWS_ID = 46; + /** + * @stable ICU 2.4 + */ + public static final int MATHEMATICAL_OPERATORS_ID = 47; + /** + * @stable ICU 2.4 + */ + public static final int MISCELLANEOUS_TECHNICAL_ID = 48; + /** + * @stable ICU 2.4 + */ + public static final int CONTROL_PICTURES_ID = 49; + /** + * @stable ICU 2.4 + */ + public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; + /** + * @stable ICU 2.4 + */ + public static final int ENCLOSED_ALPHANUMERICS_ID = 51; + /** + * @stable ICU 2.4 + */ + public static final int BOX_DRAWING_ID = 52; + /** + * @stable ICU 2.4 + */ + public static final int BLOCK_ELEMENTS_ID = 53; + /** + * @stable ICU 2.4 + */ + public static final int GEOMETRIC_SHAPES_ID = 54; + /** + * @stable ICU 2.4 + */ + public static final int MISCELLANEOUS_SYMBOLS_ID = 55; + /** + * @stable ICU 2.4 + */ + public static final int DINGBATS_ID = 56; + /** + * @stable ICU 2.4 + */ + public static final int BRAILLE_PATTERNS_ID = 57; + /** + * @stable ICU 2.4 + */ + public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; + /** + * @stable ICU 2.4 + */ + public static final int KANGXI_RADICALS_ID = 59; + /** + * @stable ICU 2.4 + */ + public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; + /** + * @stable ICU 2.4 + */ + public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; + /** + * @stable ICU 2.4 + */ + public static final int HIRAGANA_ID = 62; + /** + * @stable ICU 2.4 + */ + public static final int KATAKANA_ID = 63; + /** + * @stable ICU 2.4 + */ + public static final int BOPOMOFO_ID = 64; + /** + * @stable ICU 2.4 + */ + public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; + /** + * @stable ICU 2.4 + */ + public static final int KANBUN_ID = 66; + /** + * @stable ICU 2.4 + */ + public static final int BOPOMOFO_EXTENDED_ID = 67; + /** + * @stable ICU 2.4 + */ + public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; + /** + * @stable ICU 2.4 + */ + public static final int CJK_COMPATIBILITY_ID = 69; + /** + * @stable ICU 2.4 + */ + public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; + /** + * @stable ICU 2.4 + */ + public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; + /** + * @stable ICU 2.4 + */ + public static final int YI_SYLLABLES_ID = 72; + /** + * @stable ICU 2.4 + */ + public static final int YI_RADICALS_ID = 73; + /** + * @stable ICU 2.4 + */ + public static final int HANGUL_SYLLABLES_ID = 74; + /** + * @stable ICU 2.4 + */ + public static final int HIGH_SURROGATES_ID = 75; + /** + * @stable ICU 2.4 + */ + public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; + /** + * @stable ICU 2.4 + */ + public static final int LOW_SURROGATES_ID = 77; + /** + * Same as public static final int PRIVATE_USE. + * Until Unicode 3.1.1; the corresponding block name was "Private Use"; + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" + * and adds separate blocks for the supplementary PUAs. + * @stable ICU 2.4 + */ + public static final int PRIVATE_USE_AREA_ID = 78; + /** + * Same as public static final int PRIVATE_USE_AREA. + * Until Unicode 3.1.1; the corresponding block name was "Private Use"; + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" + * and adds separate blocks for the supplementary PUAs. + * @stable ICU 2.4 + */ + public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; + /** + * @stable ICU 2.4 + */ + public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; + /** + * @stable ICU 2.4 + */ + public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; + /** + * @stable ICU 2.4 + */ + public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; + /** + * @stable ICU 2.4 + */ + public static final int COMBINING_HALF_MARKS_ID = 82; + /** + * @stable ICU 2.4 + */ + public static final int CJK_COMPATIBILITY_FORMS_ID = 83; + /** + * @stable ICU 2.4 + */ + public static final int SMALL_FORM_VARIANTS_ID = 84; + /** + * @stable ICU 2.4 + */ + public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; + /** + * @stable ICU 2.4 + */ + public static final int SPECIALS_ID = 86; + /** + * @stable ICU 2.4 + */ + public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; + /** + * @stable ICU 2.4 + */ + public static final int OLD_ITALIC_ID = 88; + /** + * @stable ICU 2.4 + */ + public static final int GOTHIC_ID = 89; + /** + * @stable ICU 2.4 + */ + public static final int DESERET_ID = 90; + /** + * @stable ICU 2.4 + */ + public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; + /** + * @stable ICU 2.4 + */ + public static final int MUSICAL_SYMBOLS_ID = 92; + /** + * @stable ICU 2.4 + */ + public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; + /** + * @stable ICU 2.4 + */ + public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; + /** + * @stable ICU 2.4 + */ + public static final int + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; + /** + * @stable ICU 2.4 + */ + public static final int TAGS_ID = 96; + + // New blocks in Unicode 3.2 + + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 2.4 + */ + public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 3.0 + */ + + public static final int CYRILLIC_SUPPLEMENT_ID = 97; + /** + * @stable ICU 2.4 + */ + public static final int TAGALOG_ID = 98; + /** + * @stable ICU 2.4 + */ + public static final int HANUNOO_ID = 99; + /** + * @stable ICU 2.4 + */ + public static final int BUHID_ID = 100; + /** + * @stable ICU 2.4 + */ + public static final int TAGBANWA_ID = 101; + /** + * @stable ICU 2.4 + */ + public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; + /** + * @stable ICU 2.4 + */ + public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; + /** + * @stable ICU 2.4 + */ + public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; + /** + * @stable ICU 2.4 + */ + public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; + /** + * @stable ICU 2.4 + */ + public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; + /** + * @stable ICU 2.4 + */ + public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; + /** + * @stable ICU 2.4 + */ + public static final int VARIATION_SELECTORS_ID = 108; + /** + * @stable ICU 2.4 + */ + public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; + /** + * @stable ICU 2.4 + */ + public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; + + /** + * @stable ICU 2.6 + */ + public static final int LIMBU_ID = 111; /*[1900]*/ + /** + * @stable ICU 2.6 + */ + public static final int TAI_LE_ID = 112; /*[1950]*/ + /** + * @stable ICU 2.6 + */ + public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ + /** + * @stable ICU 2.6 + */ + public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ + /** + * @stable ICU 2.6 + */ + public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ + /** + * @stable ICU 2.6 + */ + public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ + /** + * @stable ICU 2.6 + */ + public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ + /** + * @stable ICU 2.6 + */ + public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ + /** + * @stable ICU 2.6 + */ + public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ + /** + * @stable ICU 2.6 + */ + public static final int UGARITIC_ID = 120; /*[10380]*/ + /** + * @stable ICU 2.6 + */ + public static final int SHAVIAN_ID = 121; /*[10450]*/ + /** + * @stable ICU 2.6 + */ + public static final int OSMANYA_ID = 122; /*[10480]*/ + /** + * @stable ICU 2.6 + */ + public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ + /** + * @stable ICU 2.6 + */ + public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ + /** + * @stable ICU 2.6 + */ + public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ + + /* New blocks in Unicode 4.1 */ + + /** + * @stable ICU 3.4 + */ + public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ + + /** + * @stable ICU 3.4 + */ + public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ + + /** + * @stable ICU 3.4 + */ + public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ + + /** + * @stable ICU 3.4 + */ + public static final int BUGINESE_ID = 129; /*[1A00]*/ + + /** + * @stable ICU 3.4 + */ + public static final int CJK_STROKES_ID = 130; /*[31C0]*/ + + /** + * @stable ICU 3.4 + */ + public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ + + /** + * @stable ICU 3.4 + */ + public static final int COPTIC_ID = 132; /*[2C80]*/ + + /** + * @stable ICU 3.4 + */ + public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ + + /** + * @stable ICU 3.4 + */ + public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ + + /** + * @stable ICU 3.4 + */ + public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ + + /** + * @stable ICU 3.4 + */ + public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ + + /** + * @stable ICU 3.4 + */ + public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ + + /** + * @stable ICU 3.4 + */ + public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ + + /** + * @stable ICU 3.4 + */ + public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ + + /** + * @stable ICU 3.4 + */ + public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ + + /** + * @stable ICU 3.4 + */ + public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ + + /** + * @stable ICU 3.4 + */ + public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ + + /** + * @stable ICU 3.4 + */ + public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ + + /** + * @stable ICU 3.4 + */ + public static final int TIFINAGH_ID = 144; /*[2D30]*/ + + /** + * @stable ICU 3.4 + */ + public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ + + /* New blocks in Unicode 5.0 */ + + /** + * @stable ICU 3.6 + */ + public static final int NKO_ID = 146; /*[07C0]*/ + /** + * @stable ICU 3.6 + */ + public static final int BALINESE_ID = 147; /*[1B00]*/ + /** + * @stable ICU 3.6 + */ + public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ + /** + * @stable ICU 3.6 + */ + public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ + /** + * @stable ICU 3.6 + */ + public static final int PHAGS_PA_ID = 150; /*[A840]*/ + /** + * @stable ICU 3.6 + */ + public static final int PHOENICIAN_ID = 151; /*[10900]*/ + /** + * @stable ICU 3.6 + */ + public static final int CUNEIFORM_ID = 152; /*[12000]*/ + /** + * @stable ICU 3.6 + */ + public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ + /** + * @stable ICU 3.6 + */ + public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ + + /** + * @stable ICU 4.0 + */ + public static final int SUNDANESE_ID = 155; /* [1B80] */ + + /** + * @stable ICU 4.0 + */ + public static final int LEPCHA_ID = 156; /* [1C00] */ + + /** + * @stable ICU 4.0 + */ + public static final int OL_CHIKI_ID = 157; /* [1C50] */ + + /** + * @stable ICU 4.0 + */ + public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ + + /** + * @stable ICU 4.0 + */ + public static final int VAI_ID = 159; /* [A500] */ + + /** + * @stable ICU 4.0 + */ + public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ + + /** + * @stable ICU 4.0 + */ + public static final int SAURASHTRA_ID = 161; /* [A880] */ + + /** + * @stable ICU 4.0 + */ + public static final int KAYAH_LI_ID = 162; /* [A900] */ + + /** + * @stable ICU 4.0 + */ + public static final int REJANG_ID = 163; /* [A930] */ + + /** + * @stable ICU 4.0 + */ + public static final int CHAM_ID = 164; /* [AA00] */ + + /** + * @stable ICU 4.0 + */ + public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ + + /** + * @stable ICU 4.0 + */ + public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ + + /** + * @stable ICU 4.0 + */ + public static final int LYCIAN_ID = 167; /* [10280] */ + + /** + * @stable ICU 4.0 + */ + public static final int CARIAN_ID = 168; /* [102A0] */ + + /** + * @stable ICU 4.0 + */ + public static final int LYDIAN_ID = 169; /* [10920] */ + + /** + * @stable ICU 4.0 + */ + public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ + + /** + * @stable ICU 4.0 + */ + public static final int DOMINO_TILES_ID = 171; /* [1F030] */ + + /* New blocks in Unicode 5.2 */ + + /** @stable ICU 4.4 */ + public static final int SAMARITAN_ID = 172; /*[0800]*/ + /** @stable ICU 4.4 */ + public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ + /** @stable ICU 4.4 */ + public static final int TAI_THAM_ID = 174; /*[1A20]*/ + /** @stable ICU 4.4 */ + public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ + /** @stable ICU 4.4 */ + public static final int LISU_ID = 176; /*[A4D0]*/ + /** @stable ICU 4.4 */ + public static final int BAMUM_ID = 177; /*[A6A0]*/ + /** @stable ICU 4.4 */ + public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ + /** @stable ICU 4.4 */ + public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ + /** @stable ICU 4.4 */ + public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ + /** @stable ICU 4.4 */ + public static final int JAVANESE_ID = 181; /*[A980]*/ + /** @stable ICU 4.4 */ + public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ + /** @stable ICU 4.4 */ + public static final int TAI_VIET_ID = 183; /*[AA80]*/ + /** @stable ICU 4.4 */ + public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ + /** @stable ICU 4.4 */ + public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ + /** @stable ICU 4.4 */ + public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ + /** @stable ICU 4.4 */ + public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ + /** @stable ICU 4.4 */ + public static final int AVESTAN_ID = 188; /*[10B00]*/ + /** @stable ICU 4.4 */ + public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ + /** @stable ICU 4.4 */ + public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ + /** @stable ICU 4.4 */ + public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ + /** @stable ICU 4.4 */ + public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ + /** @stable ICU 4.4 */ + public static final int KAITHI_ID = 193; /*[11080]*/ + /** @stable ICU 4.4 */ + public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ + /** @stable ICU 4.4 */ + public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ + /** @stable ICU 4.4 */ + public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ + /** @stable ICU 4.4 */ + public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ + + /** + * @stable ICU 2.4 + */ + public static final int COUNT = 198; + + // blocks objects --------------------------------------------------- + + /** + * Array of UnicodeBlocks, for easy access in getInstance(int) + */ + private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; + + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock NO_BLOCK + = new UnicodeBlock("NO_BLOCK", 0); + + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BASIC_LATIN + = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock LATIN_1_SUPPLEMENT + = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock LATIN_EXTENDED_A + = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock LATIN_EXTENDED_B + = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock IPA_EXTENSIONS + = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SPACING_MODIFIER_LETTERS + = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS + = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); + /** + * Unicode 3.2 renames this block to "Greek and Coptic". + * @stable ICU 2.4 + */ + public static final UnicodeBlock GREEK + = new UnicodeBlock("GREEK", GREEK_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CYRILLIC + = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ARMENIAN + = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HEBREW + = new UnicodeBlock("HEBREW", HEBREW_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ARABIC + = new UnicodeBlock("ARABIC", ARABIC_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SYRIAC + = new UnicodeBlock("SYRIAC", SYRIAC_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock THAANA + = new UnicodeBlock("THAANA", THAANA_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock DEVANAGARI + = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BENGALI + = new UnicodeBlock("BENGALI", BENGALI_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock GURMUKHI + = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock GUJARATI + = new UnicodeBlock("GUJARATI", GUJARATI_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ORIYA + = new UnicodeBlock("ORIYA", ORIYA_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock TAMIL + = new UnicodeBlock("TAMIL", TAMIL_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock TELUGU + = new UnicodeBlock("TELUGU", TELUGU_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock KANNADA + = new UnicodeBlock("KANNADA", KANNADA_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MALAYALAM + = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SINHALA + = new UnicodeBlock("SINHALA", SINHALA_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock THAI + = new UnicodeBlock("THAI", THAI_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock LAO + = new UnicodeBlock("LAO", LAO_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock TIBETAN + = new UnicodeBlock("TIBETAN", TIBETAN_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MYANMAR + = new UnicodeBlock("MYANMAR", MYANMAR_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock GEORGIAN + = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HANGUL_JAMO + = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ETHIOPIC + = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CHEROKEE + = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS + = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", + UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock OGHAM + = new UnicodeBlock("OGHAM", OGHAM_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock RUNIC + = new UnicodeBlock("RUNIC", RUNIC_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock KHMER + = new UnicodeBlock("KHMER", KHMER_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MONGOLIAN + = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL + = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock GREEK_EXTENDED + = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock GENERAL_PUNCTUATION + = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS + = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CURRENCY_SYMBOLS + = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); + /** + * Unicode 3.2 renames this block to "Combining Diacritical Marks for + * Symbols". + * @stable ICU 2.4 + */ + public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS + = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock LETTERLIKE_SYMBOLS + = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock NUMBER_FORMS + = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ARROWS + = new UnicodeBlock("ARROWS", ARROWS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MATHEMATICAL_OPERATORS + = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MISCELLANEOUS_TECHNICAL + = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CONTROL_PICTURES + = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION + = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ENCLOSED_ALPHANUMERICS + = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BOX_DRAWING + = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BLOCK_ELEMENTS + = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock GEOMETRIC_SHAPES + = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MISCELLANEOUS_SYMBOLS + = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock DINGBATS + = new UnicodeBlock("DINGBATS", DINGBATS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BRAILLE_PATTERNS + = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT + = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock KANGXI_RADICALS + = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS + = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", + IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION + = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HIRAGANA + = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock KATAKANA + = new UnicodeBlock("KATAKANA", KATAKANA_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BOPOMOFO + = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO + = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock KANBUN + = new UnicodeBlock("KANBUN", KANBUN_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BOPOMOFO_EXTENDED + = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS + = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", + ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_COMPATIBILITY + = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A + = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS + = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock YI_SYLLABLES + = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock YI_RADICALS + = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HANGUL_SYLLABLES + = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HIGH_SURROGATES + = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES + = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock LOW_SURROGATES + = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); + /** + * Same as public static final int PRIVATE_USE. + * Until Unicode 3.1.1; the corresponding block name was "Private Use"; + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" + * and adds separate blocks for the supplementary PUAs. + * @stable ICU 2.4 + */ + public static final UnicodeBlock PRIVATE_USE_AREA + = new UnicodeBlock("PRIVATE_USE_AREA", 78); + /** + * Same as public static final int PRIVATE_USE_AREA. + * Until Unicode 3.1.1; the corresponding block name was "Private Use"; + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" + * and adds separate blocks for the supplementary PUAs. + * @stable ICU 2.4 + */ + public static final UnicodeBlock PRIVATE_USE + = PRIVATE_USE_AREA; + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS + = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS + = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A + = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock COMBINING_HALF_MARKS + = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_COMPATIBILITY_FORMS + = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SMALL_FORM_VARIANTS + = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B + = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SPECIALS + = new UnicodeBlock("SPECIALS", SPECIALS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS + = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock OLD_ITALIC + = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock GOTHIC + = new UnicodeBlock("GOTHIC", GOTHIC_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock DESERET + = new UnicodeBlock("DESERET", DESERET_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS + = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MUSICAL_SYMBOLS + = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS + = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", + MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B + = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT + = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock TAGS + = new UnicodeBlock("TAGS", TAGS_ID); + + // New blocks in Unicode 3.2 + + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 2.4 + */ + public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY + = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 3.0 + */ + public static final UnicodeBlock CYRILLIC_SUPPLEMENT + = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock TAGALOG + = new UnicodeBlock("TAGALOG", TAGALOG_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock HANUNOO + = new UnicodeBlock("HANUNOO", HANUNOO_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock BUHID + = new UnicodeBlock("BUHID", BUHID_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock TAGBANWA + = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A + = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", + MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A + = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B + = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B + = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", + MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS + = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", + SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS + = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock VARIATION_SELECTORS + = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A + = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", + SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B + = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", + SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); + + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock LIMBU + = new UnicodeBlock("LIMBU", LIMBU_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock TAI_LE + = new UnicodeBlock("TAI_LE", TAI_LE_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock KHMER_SYMBOLS + = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); + + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock PHONETIC_EXTENSIONS + = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); + + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS + = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", + MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS + = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock LINEAR_B_SYLLABARY + = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock LINEAR_B_IDEOGRAMS + = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock AEGEAN_NUMBERS + = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock UGARITIC + = new UnicodeBlock("UGARITIC", UGARITIC_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock SHAVIAN + = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock OSMANYA + = new UnicodeBlock("OSMANYA", OSMANYA_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock CYPRIOT_SYLLABARY + = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS + = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); + + /** + * @stable ICU 2.6 + */ + public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT + = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); + + /* New blocks in Unicode 4.1 */ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = + new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", + ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = + new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock ARABIC_SUPPLEMENT = + new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock BUGINESE = + new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock CJK_STROKES = + new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = + new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", + COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock ETHIOPIC_EXTENDED = + new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = + new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock GEORGIAN_SUPPLEMENT = + new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock GLAGOLITIC = + new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock KHAROSHTHI = + new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock MODIFIER_TONE_LETTERS = + new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock NEW_TAI_LUE = + new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock OLD_PERSIAN = + new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = + new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", + PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = + new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock SYLOTI_NAGRI = + new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock TIFINAGH = + new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ + + /** + * @stable ICU 3.4 + */ + public static final UnicodeBlock VERTICAL_FORMS = + new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ + + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock BALINESE = + new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock LATIN_EXTENDED_C = + new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock LATIN_EXTENDED_D = + new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock PHAGS_PA = + new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock PHOENICIAN = + new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock CUNEIFORM = + new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = + new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", + CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ + /** + * @stable ICU 3.6 + */ + public static final UnicodeBlock COUNTING_ROD_NUMERALS = + new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock SUNDANESE = + new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock LEPCHA = + new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock OL_CHIKI = + new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock CYRILLIC_EXTENDED_A = + new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock CYRILLIC_EXTENDED_B = + new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock SAURASHTRA = + new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock KAYAH_LI = + new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock REJANG = + new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock CHAM = + new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock ANCIENT_SYMBOLS = + new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock PHAISTOS_DISC = + new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock LYCIAN = + new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock CARIAN = + new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock LYDIAN = + new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock MAHJONG_TILES = + new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ + + /** + * @stable ICU 4.0 + */ + public static final UnicodeBlock DOMINO_TILES = + new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ + + /* New blocks in Unicode 5.2 */ + + /** @stable ICU 4.4 */ + public static final UnicodeBlock SAMARITAN = + new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = + new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", + UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock TAI_THAM = + new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock VEDIC_EXTENSIONS = + new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock LISU = + new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock BAMUM = + new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = + new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock DEVANAGARI_EXTENDED = + new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = + new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock JAVANESE = + new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock MYANMAR_EXTENDED_A = + new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock TAI_VIET = + new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock MEETEI_MAYEK = + new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = + new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock IMPERIAL_ARAMAIC = + new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock OLD_SOUTH_ARABIAN = + new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock AVESTAN = + new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = + new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = + new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock OLD_TURKIC = + new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = + new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock KAITHI = + new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = + new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = + new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", + ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = + new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", + ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ + /** @stable ICU 4.4 */ + public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = + new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ + + /** + * @stable ICU 2.4 + */ + public static final UnicodeBlock INVALID_CODE + = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); + + static { + for (int blockId = 0; blockId < COUNT; ++blockId) { + if (BLOCKS_[blockId] == null) { + throw new java.lang.IllegalStateException( + "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); + } + } + } + + // public methods -------------------------------------------------- + + /** + * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. + * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. + * @param id UnicodeBlock ID + * @return the only instance of the UnicodeBlock with the argument ID + * if it exists, otherwise a INVALID_CODE UnicodeBlock will be + * returned. + * @stable ICU 2.4 + */ + public static UnicodeBlock getInstance(int id) + { + if (id >= 0 && id < BLOCKS_.length) { + return BLOCKS_[id]; + } + return INVALID_CODE; + } + + /** + * Returns the Unicode allocation block that contains the code point, + * or null if the code point is not a member of a defined block. + * @param ch code point to be tested + * @return the Unicode allocation block that contains the code point + * @stable ICU 2.4 + */ + public static UnicodeBlock of(int ch) + { + if (ch > MAX_VALUE) { + return INVALID_CODE; + } + + return UnicodeBlock.getInstance((UCharacterProperty.INSTANCE.getAdditional(ch, 0) + & BLOCK_MASK_) >> BLOCK_SHIFT_); + } + + /* + * Internal function returning of(ch).getID(). + * + * @param ch + * @return numeric block value + */ + static int idOf(int ch) { + if (ch < 0 || ch > MAX_VALUE) { + return -1; + } + + return (UCharacterProperty.INSTANCE.getAdditional(ch, 0) & BLOCK_MASK_) >> BLOCK_SHIFT_; + } + + /** + * Cover the JDK 1.5 API. Return the Unicode block with the + * given name. {@icunote} Unlike JDK 1.5, this only matches + * against the official UCD name and the Java block name + * (ignoring case). + * @param blockName the name of the block to match + * @return the UnicodeBlock with that name + * @throws IllegalArgumentException if the blockName could not be matched + * @stable ICU 3.0 + */ + public static final UnicodeBlock forName(String blockName) { + Map m = null; + if (mref != null) { + m = mref.get(); + } + if (m == null) { + m = new HashMap(BLOCKS_.length); + for (int i = 0; i < BLOCKS_.length; ++i) { + UnicodeBlock b = BLOCKS_[i]; + String name = trimBlockName( + getPropertyValueName(UProperty.BLOCK, b.getID(), + UProperty.NameChoice.LONG)); + m.put(name, b); + } + mref = new SoftReference>(m); + } + UnicodeBlock b = m.get(trimBlockName(blockName)); + if (b == null) { + throw new IllegalArgumentException(); + } + return b; + } + private static SoftReference> mref; + + private static String trimBlockName(String name) { + String upper = name.toUpperCase(); + StringBuilder result = new StringBuilder(upper.length()); + for (int i = 0; i < upper.length(); i++) { + char c = upper.charAt(i); + if (c != ' ' && c != '_' && c != '-') { + result.append(c); + } + } + return result.toString(); + } + + /** + * {icu} Returns the type ID of this Unicode block + * @return integer type ID of this Unicode block + * @stable ICU 2.4 + */ + public int getID() + { + return m_id_; + } + + // private data members --------------------------------------------- + + /** + * Identification code for this UnicodeBlock + */ + private int m_id_; + + // private constructor ---------------------------------------------- + + /** + * UnicodeBlock constructor + * @param name name of this UnicodeBlock + * @param id unique id of this UnicodeBlock + * @exception NullPointerException if name is null + */ + private UnicodeBlock(String name, int id) + { + super(name); + m_id_ = id; + if (id >= 0) { + BLOCKS_[id] = this; + } + } + } + + /** + * East Asian Width constants. + * @see UProperty#EAST_ASIAN_WIDTH + * @see UCharacter#getIntPropertyValue + * @stable ICU 2.4 + */ + public static interface EastAsianWidth + { + /** + * @stable ICU 2.4 + */ + public static final int NEUTRAL = 0; + /** + * @stable ICU 2.4 + */ + public static final int AMBIGUOUS = 1; + /** + * @stable ICU 2.4 + */ + public static final int HALFWIDTH = 2; + /** + * @stable ICU 2.4 + */ + public static final int FULLWIDTH = 3; + /** + * @stable ICU 2.4 + */ + public static final int NARROW = 4; + /** + * @stable ICU 2.4 + */ + public static final int WIDE = 5; + /** + * @stable ICU 2.4 + */ + public static final int COUNT = 6; + } + + /** + * Decomposition Type constants. + * @see UProperty#DECOMPOSITION_TYPE + * @stable ICU 2.4 + */ + public static interface DecompositionType + { + /** + * @stable ICU 2.4 + */ + public static final int NONE = 0; + /** + * @stable ICU 2.4 + */ + public static final int CANONICAL = 1; + /** + * @stable ICU 2.4 + */ + public static final int COMPAT = 2; + /** + * @stable ICU 2.4 + */ + public static final int CIRCLE = 3; + /** + * @stable ICU 2.4 + */ + public static final int FINAL = 4; + /** + * @stable ICU 2.4 + */ + public static final int FONT = 5; + /** + * @stable ICU 2.4 + */ + public static final int FRACTION = 6; + /** + * @stable ICU 2.4 + */ + public static final int INITIAL = 7; + /** + * @stable ICU 2.4 + */ + public static final int ISOLATED = 8; + /** + * @stable ICU 2.4 + */ + public static final int MEDIAL = 9; + /** + * @stable ICU 2.4 + */ + public static final int NARROW = 10; + /** + * @stable ICU 2.4 + */ + public static final int NOBREAK = 11; + /** + * @stable ICU 2.4 + */ + public static final int SMALL = 12; + /** + * @stable ICU 2.4 + */ + public static final int SQUARE = 13; + /** + * @stable ICU 2.4 + */ + public static final int SUB = 14; + /** + * @stable ICU 2.4 + */ + public static final int SUPER = 15; + /** + * @stable ICU 2.4 + */ + public static final int VERTICAL = 16; + /** + * @stable ICU 2.4 + */ + public static final int WIDE = 17; + /** + * @stable ICU 2.4 + */ + public static final int COUNT = 18; + } + + /** + * Joining Type constants. + * @see UProperty#JOINING_TYPE + * @stable ICU 2.4 + */ + public static interface JoiningType + { + /** + * @stable ICU 2.4 + */ + public static final int NON_JOINING = 0; + /** + * @stable ICU 2.4 + */ + public static final int JOIN_CAUSING = 1; + /** + * @stable ICU 2.4 + */ + public static final int DUAL_JOINING = 2; + /** + * @stable ICU 2.4 + */ + public static final int LEFT_JOINING = 3; + /** + * @stable ICU 2.4 + */ + public static final int RIGHT_JOINING = 4; + /** + * @stable ICU 2.4 + */ + public static final int TRANSPARENT = 5; + /** + * @stable ICU 2.4 + */ + public static final int COUNT = 6; + } + + /** + * Joining Group constants. + * @see UProperty#JOINING_GROUP + * @stable ICU 2.4 + */ + public static interface JoiningGroup + { + /** + * @stable ICU 2.4 + */ + public static final int NO_JOINING_GROUP = 0; + /** + * @stable ICU 2.4 + */ + public static final int AIN = 1; + /** + * @stable ICU 2.4 + */ + public static final int ALAPH = 2; + /** + * @stable ICU 2.4 + */ + public static final int ALEF = 3; + /** + * @stable ICU 2.4 + */ + public static final int BEH = 4; + /** + * @stable ICU 2.4 + */ + public static final int BETH = 5; + /** + * @stable ICU 2.4 + */ + public static final int DAL = 6; + /** + * @stable ICU 2.4 + */ + public static final int DALATH_RISH = 7; + /** + * @stable ICU 2.4 + */ + public static final int E = 8; + /** + * @stable ICU 2.4 + */ + public static final int FEH = 9; + /** + * @stable ICU 2.4 + */ + public static final int FINAL_SEMKATH = 10; + /** + * @stable ICU 2.4 + */ + public static final int GAF = 11; + /** + * @stable ICU 2.4 + */ + public static final int GAMAL = 12; + /** + * @stable ICU 2.4 + */ + public static final int HAH = 13; + /** + * @stable ICU 2.4 + */ + public static final int HAMZA_ON_HEH_GOAL = 14; + /** + * @stable ICU 2.4 + */ + public static final int HE = 15; + /** + * @stable ICU 2.4 + */ + public static final int HEH = 16; + /** + * @stable ICU 2.4 + */ + public static final int HEH_GOAL = 17; + /** + * @stable ICU 2.4 + */ + public static final int HETH = 18; + /** + * @stable ICU 2.4 + */ + public static final int KAF = 19; + /** + * @stable ICU 2.4 + */ + public static final int KAPH = 20; + /** + * @stable ICU 2.4 + */ + public static final int KNOTTED_HEH = 21; + /** + * @stable ICU 2.4 + */ + public static final int LAM = 22; + /** + * @stable ICU 2.4 + */ + public static final int LAMADH = 23; + /** + * @stable ICU 2.4 + */ + public static final int MEEM = 24; + /** + * @stable ICU 2.4 + */ + public static final int MIM = 25; + /** + * @stable ICU 2.4 + */ + public static final int NOON = 26; + /** + * @stable ICU 2.4 + */ + public static final int NUN = 27; + /** + * @stable ICU 2.4 + */ + public static final int PE = 28; + /** + * @stable ICU 2.4 + */ + public static final int QAF = 29; + /** + * @stable ICU 2.4 + */ + public static final int QAPH = 30; + /** + * @stable ICU 2.4 + */ + public static final int REH = 31; + /** + * @stable ICU 2.4 + */ + public static final int REVERSED_PE = 32; + /** + * @stable ICU 2.4 + */ + public static final int SAD = 33; + /** + * @stable ICU 2.4 + */ + public static final int SADHE = 34; + /** + * @stable ICU 2.4 + */ + public static final int SEEN = 35; + /** + * @stable ICU 2.4 + */ + public static final int SEMKATH = 36; + /** + * @stable ICU 2.4 + */ + public static final int SHIN = 37; + /** + * @stable ICU 2.4 + */ + public static final int SWASH_KAF = 38; + /** + * @stable ICU 2.4 + */ + public static final int SYRIAC_WAW = 39; + /** + * @stable ICU 2.4 + */ + public static final int TAH = 40; + /** + * @stable ICU 2.4 + */ + public static final int TAW = 41; + /** + * @stable ICU 2.4 + */ + public static final int TEH_MARBUTA = 42; + /** + * @stable ICU 2.4 + */ + public static final int TETH = 43; + /** + * @stable ICU 2.4 + */ + public static final int WAW = 44; + /** + * @stable ICU 2.4 + */ + public static final int YEH = 45; + /** + * @stable ICU 2.4 + */ + public static final int YEH_BARREE = 46; + /** + * @stable ICU 2.4 + */ + public static final int YEH_WITH_TAIL = 47; + /** + * @stable ICU 2.4 + */ + public static final int YUDH = 48; + /** + * @stable ICU 2.4 + */ + public static final int YUDH_HE = 49; + /** + * @stable ICU 2.4 + */ + public static final int ZAIN = 50; + /** + * @stable ICU 2.6 + */ + public static final int FE = 51; + /** + * @stable ICU 2.6 + */ + public static final int KHAPH = 52; + /** + * @stable ICU 2.6 + */ + public static final int ZHAIN = 53; + /** + * @stable ICU 4.0 + */ + public static final int BURUSHASKI_YEH_BARREE = 54; + /** @stable ICU 4.4 */ + public static final int FARSI_YEH = 55; + /** @stable ICU 4.4 */ + public static final int NYA = 56; + /** + * @stable ICU 2.4 + */ + public static final int COUNT = 57; + } + + /** + * Grapheme Cluster Break constants. + * @see UProperty#GRAPHEME_CLUSTER_BREAK + * @stable ICU 3.4 + */ + public static interface GraphemeClusterBreak { + /** + * @stable ICU 3.4 + */ + public static final int OTHER = 0; + /** + * @stable ICU 3.4 + */ + public static final int CONTROL = 1; + /** + * @stable ICU 3.4 + */ + public static final int CR = 2; + /** + * @stable ICU 3.4 + */ + public static final int EXTEND = 3; + /** + * @stable ICU 3.4 + */ + public static final int L = 4; + /** + * @stable ICU 3.4 + */ + public static final int LF = 5; + /** + * @stable ICU 3.4 + */ + public static final int LV = 6; + /** + * @stable ICU 3.4 + */ + public static final int LVT = 7; + /** + * @stable ICU 3.4 + */ + public static final int T = 8; + /** + * @stable ICU 3.4 + */ + public static final int V = 9; + /** + * @stable ICU 4.0 + */ + public static final int SPACING_MARK = 10; + /** + * @stable ICU 4.0 + */ + public static final int PREPEND = 11; + /** + * @stable ICU 3.4 + */ + public static final int COUNT = 12; + } + + /** + * Word Break constants. + * @see UProperty#WORD_BREAK + * @stable ICU 3.4 + */ + public static interface WordBreak { + /** + * @stable ICU 3.8 + */ + public static final int OTHER = 0; + /** + * @stable ICU 3.8 + */ + public static final int ALETTER = 1; + /** + * @stable ICU 3.8 + */ + public static final int FORMAT = 2; + /** + * @stable ICU 3.8 + */ + public static final int KATAKANA = 3; + /** + * @stable ICU 3.8 + */ + public static final int MIDLETTER = 4; + /** + * @stable ICU 3.8 + */ + public static final int MIDNUM = 5; + /** + * @stable ICU 3.8 + */ + public static final int NUMERIC = 6; + /** + * @stable ICU 3.8 + */ + public static final int EXTENDNUMLET = 7; + /** + * @stable ICU 4.0 + */ + public static final int CR = 8; + /** + * @stable ICU 4.0 + */ + public static final int EXTEND = 9; + /** + * @stable ICU 4.0 + */ + public static final int LF = 10; + /** + * @stable ICU 4.0 + */ + public static final int MIDNUMLET = 11; + /** + * @stable ICU 4.0 + */ + public static final int NEWLINE = 12; + /** + * @stable ICU 4.0 + */ + public static final int COUNT = 13; + } + + /** + * Sentence Break constants. + * @see UProperty#SENTENCE_BREAK + * @stable ICU 3.4 + */ + public static interface SentenceBreak { + /** + * @stable ICU 3.8 + */ + public static final int OTHER = 0; + /** + * @stable ICU 3.8 + */ + public static final int ATERM = 1; + /** + * @stable ICU 3.8 + */ + public static final int CLOSE = 2; + /** + * @stable ICU 3.8 + */ + public static final int FORMAT = 3; + /** + * @stable ICU 3.8 + */ + public static final int LOWER = 4; + /** + * @stable ICU 3.8 + */ + public static final int NUMERIC = 5; + /** + * @stable ICU 3.8 + */ + public static final int OLETTER = 6; + /** + * @stable ICU 3.8 + */ + public static final int SEP = 7; + /** + * @stable ICU 3.8 + */ + public static final int SP = 8; + /** + * @stable ICU 3.8 + */ + public static final int STERM = 9; + /** + * @stable ICU 3.8 + */ + public static final int UPPER = 10; + /** + * @stable ICU 4.0 + */ + public static final int CR = 11; + /** + * @stable ICU 4.0 + */ + public static final int EXTEND = 12; + /** + * @stable ICU 4.0 + */ + public static final int LF = 13; + /** + * @stable ICU 4.0 + */ + public static final int SCONTINUE = 14; + /** + * @stable ICU 4.0 + */ + public static final int COUNT = 15; + } + + /** + * Line Break constants. + * @see UProperty#LINE_BREAK + * @stable ICU 2.4 + */ + public static interface LineBreak + { + /** + * @stable ICU 2.4 + */ + public static final int UNKNOWN = 0; + /** + * @stable ICU 2.4 + */ + public static final int AMBIGUOUS = 1; + /** + * @stable ICU 2.4 + */ + public static final int ALPHABETIC = 2; + /** + * @stable ICU 2.4 + */ + public static final int BREAK_BOTH = 3; + /** + * @stable ICU 2.4 + */ + public static final int BREAK_AFTER = 4; + /** + * @stable ICU 2.4 + */ + public static final int BREAK_BEFORE = 5; + /** + * @stable ICU 2.4 + */ + public static final int MANDATORY_BREAK = 6; + /** + * @stable ICU 2.4 + */ + public static final int CONTINGENT_BREAK = 7; + /** + * @stable ICU 2.4 + */ + public static final int CLOSE_PUNCTUATION = 8; + /** + * @stable ICU 2.4 + */ + public static final int COMBINING_MARK = 9; + /** + * @stable ICU 2.4 + */ + public static final int CARRIAGE_RETURN = 10; + /** + * @stable ICU 2.4 + */ + public static final int EXCLAMATION = 11; + /** + * @stable ICU 2.4 + */ + public static final int GLUE = 12; + /** + * @stable ICU 2.4 + */ + public static final int HYPHEN = 13; + /** + * @stable ICU 2.4 + */ + public static final int IDEOGRAPHIC = 14; + /** + * @see #INSEPARABLE + * @stable ICU 2.4 + */ + public static final int INSEPERABLE = 15; + /** + * Renamed from the misspelled "inseperable" in Unicode 4.0.1. + * @stable ICU 3.0 + */ + public static final int INSEPARABLE = 15; + /** + * @stable ICU 2.4 + */ + public static final int INFIX_NUMERIC = 16; + /** + * @stable ICU 2.4 + */ + public static final int LINE_FEED = 17; + /** + * @stable ICU 2.4 + */ + public static final int NONSTARTER = 18; + /** + * @stable ICU 2.4 + */ + public static final int NUMERIC = 19; + /** + * @stable ICU 2.4 + */ + public static final int OPEN_PUNCTUATION = 20; + /** + * @stable ICU 2.4 + */ + public static final int POSTFIX_NUMERIC = 21; + /** + * @stable ICU 2.4 + */ + public static final int PREFIX_NUMERIC = 22; + /** + * @stable ICU 2.4 + */ + public static final int QUOTATION = 23; + /** + * @stable ICU 2.4 + */ + public static final int COMPLEX_CONTEXT = 24; + /** + * @stable ICU 2.4 + */ + public static final int SURROGATE = 25; + /** + * @stable ICU 2.4 + */ + public static final int SPACE = 26; + /** + * @stable ICU 2.4 + */ + public static final int BREAK_SYMBOLS = 27; + /** + * @stable ICU 2.4 + */ + public static final int ZWSPACE = 28; + + /** + * @stable ICU 2.6 + */ + public static final int NEXT_LINE = 29; /*[NL]*/ + + /* from here on: new in Unicode 4/ICU 2.6 */ + + /** + * @stable ICU 2.6 + */ + public static final int WORD_JOINER = 30; /*[WJ]*/ + + /* from here on: new in Unicode 4.1/ICU 3.4 */ + + /** + * @stable ICU 3.4 + */ + public static final int H2 = 31; + /** + * @stable ICU 3.4 + */ + public static final int H3 = 32; + /** + * @stable ICU 3.4 + */ + public static final int JL = 33; + /** + * @stable ICU 3.4 + */ + public static final int JT = 34; + /** + * @stable ICU 3.4 + */ + public static final int JV = 35; + /** @stable ICU 4.4 */ + public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ + + /* new in Unicode 5.2/ICU 4.4 */ + + /** + * @stable ICU 2.4 + */ + public static final int COUNT = 37; + } + + /** + * Numeric Type constants. + * @see UProperty#NUMERIC_TYPE + * @stable ICU 2.4 + */ + public static interface NumericType + { + /** + * @stable ICU 2.4 + */ + public static final int NONE = 0; + /** + * @stable ICU 2.4 + */ + public static final int DECIMAL = 1; + /** + * @stable ICU 2.4 + */ + public static final int DIGIT = 2; + /** + * @stable ICU 2.4 + */ + public static final int NUMERIC = 3; + /** + * @stable ICU 2.4 + */ + public static final int COUNT = 4; + } + + /** + * Hangul Syllable Type constants. + * + * @see UProperty#HANGUL_SYLLABLE_TYPE + * @stable ICU 2.6 + */ + public static interface HangulSyllableType + { + /** + * @stable ICU 2.6 + */ + public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ + /** + * @stable ICU 2.6 + */ + public static final int LEADING_JAMO = 1; /*[L]*/ + /** + * @stable ICU 2.6 + */ + public static final int VOWEL_JAMO = 2; /*[V]*/ + /** + * @stable ICU 2.6 + */ + public static final int TRAILING_JAMO = 3; /*[T]*/ + /** + * @stable ICU 2.6 + */ + public static final int LV_SYLLABLE = 4; /*[LV]*/ + /** + * @stable ICU 2.6 + */ + public static final int LVT_SYLLABLE = 5; /*[LVT]*/ + /** + * @stable ICU 2.6 + */ + public static final int COUNT = 6; + } + + // public data members ----------------------------------------------- + + /** + * The lowest Unicode code point value. + * @stable ICU 2.1 + */ + public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE; + + /** + * The highest Unicode code point value (scalar value) according to the + * Unicode Standard. + * This is a 21-bit value (21 bits, rounded up).
    + * Up-to-date Unicode implementation of java.lang.Character.MAX_VALUE + * @stable ICU 2.1 + */ + public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE; + + /** + * The minimum value for Supplementary code points + * @stable ICU 2.1 + */ + public static final int SUPPLEMENTARY_MIN_VALUE = + UTF16.SUPPLEMENTARY_MIN_VALUE; + + /** + * Unicode value used when translating into Unicode encoding form and there + * is no existing character. + * @stable ICU 2.1 + */ + public static final int REPLACEMENT_CHAR = '\uFFFD'; + + /** + * Special value that is returned by getUnicodeNumericValue(int) when no + * numeric value is defined for a code point. + * @stable ICU 2.4 + * @see #getUnicodeNumericValue + */ + public static final double NO_NUMERIC_VALUE = -123456789; + + /** + * Compatibility constant for Java Character's MIN_RADIX. + * @stable ICU 3.4 + */ + public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; + + /** + * Compatibility constant for Java Character's MAX_RADIX. + * @stable ICU 3.4 + */ + public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; + + /** + * Do not lowercase non-initial parts of words when titlecasing. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will titlecase the first cased character + * of a word and lowercase all other characters. + * With this option, the other characters will not be modified. + * + * @see #toTitleCase + * @stable ICU 3.8 + */ + public static final int TITLECASE_NO_LOWERCASE = 0x100; + + /** + * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; + * titlecase exactly the characters at breaks from the iterator. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will take each break iterator index, + * adjust it by looking for the next cased character, and titlecase that one. + * Other characters are lowercased. + * + * This follows Unicode 4 & 5 section 3.13 Default Case Operations: + * + * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex + * #29, "Text Boundaries." Between each pair of word boundaries, find the first + * cased character F. If F exists, map F to default_title(F); then map each + * subsequent character C to default_lower(C). + * + * @see #toTitleCase + * @see #TITLECASE_NO_LOWERCASE + * @stable ICU 3.8 + */ + public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; + + // public methods ---------------------------------------------------- + + /** + * Returnss the numeric value of a decimal digit code point. + *
    This method observes the semantics of + * java.lang.Character.digit(). Note that this + * will return positive values for code points for which isDigit + * returns false, just like java.lang.Character. + *
    Semantic Change: In release 1.3.1 and + * prior, this did not treat the European letters as having a + * digit value, and also treated numeric letters and other numbers as + * digits. + * This has been changed to conform to the java semantics. + *
    A code point is a valid digit if and only if: + *
      + *
    • ch is a decimal digit or one of the european letters, and + *
    • the value of ch is less than the specified radix. + *
    + * @param ch the code point to query + * @param radix the radix + * @return the numeric value represented by the code point in the + * specified radix, or -1 if the code point is not a decimal digit + * or if its value is too large for the radix + * @stable ICU 2.1 + */ + public static int digit(int ch, int radix) + { + if (2 <= radix && radix <= 36) { + int value = digit(ch); + if (value < 0) { + // ch is not a decimal digit, try latin letters + value = getEuropeanDigit(ch); + } + return (value < radix) ? value : -1; + } else { + return -1; // invalid radix + } + } + + /** + * Returnss the numeric value of a decimal digit code point. + *
    This is a convenience overload of digit(int, int) + * that provides a decimal radix. + *
    Semantic Change: In release 1.3.1 and prior, this + * treated numeric letters and other numbers as digits. This has + * been changed to conform to the java semantics. + * @param ch the code point to query + * @return the numeric value represented by the code point, + * or -1 if the code point is not a decimal digit or if its + * value is too large for a decimal radix + * @stable ICU 2.1 + */ + public static int digit(int ch) + { + int props = getProperty(ch); + int value = getNumericTypeValue(props) - NTV_DECIMAL_START_; + if(value<=9) { + return value; + } else { + return -1; + } + } + + /** + * Returns the numeric value of the code point as a nonnegative + * integer. + *
    If the code point does not have a numeric value, then -1 is returned. + *
    + * If the code point has a numeric value that cannot be represented as a + * nonnegative integer (for example, a fractional value), then -2 is + * returned. + * @param ch the code point to query + * @return the numeric value of the code point, or -1 if it has no numeric + * value, or -2 if it has a numeric value that cannot be represented as a + * nonnegative integer + * @stable ICU 2.1 + */ + public static int getNumericValue(int ch) + { + // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit() + int props = UCharacterProperty.INSTANCE.getProperty(ch); + int ntv = getNumericTypeValue(props); + + if(ntv==NTV_NONE_) { + return getEuropeanDigit(ch); + } else if(ntv>5)-14; + int exp=(ntv&0x1f)+2; + if(exp<9 || (exp==9 && mant<=2)) { + int numValue=mant; + do { + numValue*=10; + } while(--exp>0); + return numValue; + } else { + return -2; + } + } else { + /* reserved */ + return -2; + } + } + + /** + * {@icu} Returns the numeric value for a Unicode code point as defined in the + * Unicode Character Database.

    + *

    A "double" return type is necessary because some numeric values are + * fractions, negative, or too large for int.

    + *

    For characters without any numeric values in the Unicode Character + * Database, this function will return NO_NUMERIC_VALUE.

    + *

    API Change: In release 2.2 and prior, this API has a + * return type int and returns -1 when the argument ch does not have a + * corresponding numeric value. This has been changed to synch with ICU4C + *

    + * This corresponds to the ICU4C function u_getNumericValue. + * @param ch Code point to get the numeric value for. + * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. + * @stable ICU 2.4 + */ + public static double getUnicodeNumericValue(int ch) + { + // equivalent to c version double u_getNumericValue(UChar32 c) + int props = UCharacterProperty.INSTANCE.getProperty(ch); + int ntv = getNumericTypeValue(props); + + if(ntv==NTV_NONE_) { + return NO_NUMERIC_VALUE; + } else if(ntv>4)-12; + int denominator=(ntv&0xf)+1; + return (double)numerator/denominator; + } else if(ntv>5)-14; + int exp=(ntv&0x1f)+2; + numValue=mant; + + /* multiply by 10^exp without math.h */ + while(exp>=4) { + numValue*=10000.; + exp-=4; + } + switch(exp) { + case 3: + numValue*=1000.; + break; + case 2: + numValue*=100.; + break; + case 1: + numValue*=10.; + break; + case 0: + default: + break; + } + + return numValue; + } else { + /* reserved */ + return NO_NUMERIC_VALUE; + } + } + + /** + * Compatibility override of Java deprecated method. This + * method will always remain deprecated. + * Same as java.lang.Character.isSpace(). + * @param ch the code point + * @return true if the code point is a space character as + * defined by java.lang.Character.isSpace. + * @deprecated ICU 3.4 (Java) + */ + public static boolean isSpace(int ch) { + return ch <= 0x20 && + (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); + } + + /** + * Returns a value indicating a code point's Unicode category. + * Up-to-date Unicode implementation of java.lang.Character.getType() + * except for the above mentioned code points that had their category + * changed.
    + * Return results are constants from the interface + * UCharacterCategory
    + * NOTE: the UCharacterCategory values are not compatible with + * those returned by java.lang.Character.getType. UCharacterCategory values + * match the ones used in ICU4C, while java.lang.Character type + * values, though similar, skip the value 17.

    + * @param ch code point whose type is to be determined + * @return category which is a value of UCharacterCategory + * @stable ICU 2.1 + */ + public static int getType(int ch) + { + return getProperty(ch) & UCharacterProperty.TYPE_MASK; + } + + /** + * Determines if a code point has a defined meaning in the up-to-date + * Unicode standard. + * E.g. supplementary code points though allocated space are not defined in + * Unicode yet.
    + * Up-to-date Unicode implementation of java.lang.Character.isDefined() + * @param ch code point to be determined if it is defined in the most + * current version of Unicode + * @return true if this code point is defined in unicode + * @stable ICU 2.1 + */ + public static boolean isDefined(int ch) + { + return getType(ch) != 0; + } + + /** + * Determines if a code point is a Java digit. + *
    This method observes the semantics of + * java.lang.Character.isDigit(). It returns true for decimal + * digits only. + *
    Semantic Change: In release 1.3.1 and prior, this treated + * numeric letters and other numbers as digits. + * This has been changed to conform to the java semantics. + * @param ch code point to query + * @return true if this code point is a digit + * @stable ICU 2.1 + */ + public static boolean isDigit(int ch) + { + return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; + } + + /** + * Determines if the specified code point is an ISO control character. + * A code point is considered to be an ISO control character if it is in + * the range \u0000 through \u001F or in the range \u007F through + * \u009F.
    + * Up-to-date Unicode implementation of java.lang.Character.isISOControl() + * @param ch code point to determine if it is an ISO control character + * @return true if code point is a ISO control character + * @stable ICU 2.1 + */ + public static boolean isISOControl(int ch) + { + return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && + ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); + } + + /** + * Determines if the specified code point is a letter. + * Up-to-date Unicode implementation of java.lang.Character.isLetter() + * @param ch code point to determine if it is a letter + * @return true if code point is a letter + * @stable ICU 2.1 + */ + public static boolean isLetter(int ch) + { + // if props == 0, it will just fall through and return false + return ((1 << getType(ch)) + & ((1 << UCharacterCategory.UPPERCASE_LETTER) + | (1 << UCharacterCategory.LOWERCASE_LETTER) + | (1 << UCharacterCategory.TITLECASE_LETTER) + | (1 << UCharacterCategory.MODIFIER_LETTER) + | (1 << UCharacterCategory.OTHER_LETTER))) != 0; + } + + /** + * Determines if the specified code point is a letter or digit. + * {@icunote} This method, unlike java.lang.Character does not regard the ascii + * characters 'A' - 'Z' and 'a' - 'z' as digits. + * @param ch code point to determine if it is a letter or a digit + * @return true if code point is a letter or a digit + * @stable ICU 2.1 + */ + public static boolean isLetterOrDigit(int ch) + { + return ((1 << getType(ch)) + & ((1 << UCharacterCategory.UPPERCASE_LETTER) + | (1 << UCharacterCategory.LOWERCASE_LETTER) + | (1 << UCharacterCategory.TITLECASE_LETTER) + | (1 << UCharacterCategory.MODIFIER_LETTER) + | (1 << UCharacterCategory.OTHER_LETTER) + | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; + } + + /** + * Compatibility override of Java deprecated method. This + * method will always remain deprecated. Delegates to + * java.lang.Character.isJavaIdentifierStart. + * @param cp the code point + * @return true if the code point can start a java identifier. + * @deprecated ICU 3.4 (Java) + */ + public static boolean isJavaLetter(int cp) { + return isJavaIdentifierStart(cp); + } + + /** + * Compatibility override of Java deprecated method. This + * method will always remain deprecated. Delegates to + * java.lang.Character.isJavaIdentifierPart. + * @param cp the code point + * @return true if the code point can continue a java identifier. + * @deprecated ICU 3.4 (Java) + */ + public static boolean isJavaLetterOrDigit(int cp) { + return isJavaIdentifierPart(cp); + } + + /** + * Compatibility override of Java method, delegates to + * java.lang.Character.isJavaIdentifierStart. + * @param cp the code point + * @return true if the code point can start a java identifier. + * @stable ICU 3.4 + */ + public static boolean isJavaIdentifierStart(int cp) { + // note, downcast to char for jdk 1.4 compatibility + return java.lang.Character.isJavaIdentifierStart((char)cp); + } + + /** + * Compatibility override of Java method, delegates to + * java.lang.Character.isJavaIdentifierPart. + * @param cp the code point + * @return true if the code point can continue a java identifier. + * @stable ICU 3.4 + */ + public static boolean isJavaIdentifierPart(int cp) { + // note, downcast to char for jdk 1.4 compatibility + return java.lang.Character.isJavaIdentifierPart((char)cp); + } + + /** + * Determines if the specified code point is a lowercase character. + * UnicodeData only contains case mappings for code points where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
    For more information about Unicode case mapping + * please refer to the + * Technical report + * #21.
    + * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() + * @param ch code point to determine if it is in lowercase + * @return true if code point is a lowercase character + * @stable ICU 2.1 + */ + public static boolean isLowerCase(int ch) + { + // if props == 0, it will just fall through and return false + return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; + } + + /** + * Determines if the specified code point is a white space character. + * A code point is considered to be an whitespace character if and only + * if it satisfies one of the following criteria: + *
      + *
    • It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not + * also a non-breaking space (\u00A0 or \u2007 or \u202F). + *
    • It is \u0009, HORIZONTAL TABULATION. + *
    • It is \u000A, LINE FEED. + *
    • It is \u000B, VERTICAL TABULATION. + *
    • It is \u000C, FORM FEED. + *
    • It is \u000D, CARRIAGE RETURN. + *
    • It is \u001C, FILE SEPARATOR. + *
    • It is \u001D, GROUP SEPARATOR. + *
    • It is \u001E, RECORD SEPARATOR. + *
    • It is \u001F, UNIT SEPARATOR. + *
    + * + * This API tries to sync with the semantics of Java's + * java.lang.Character.isWhitespace(), but it may not return + * the exact same results because of the Unicode version + * difference. + *

    Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) + * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. + * See http://www.unicode.org/versions/Unicode4.0.1/ + * @param ch code point to determine if it is a white space + * @return true if the specified code point is a white space character + * @stable ICU 2.1 + */ + public static boolean isWhitespace(int ch) + { + // exclude no-break spaces + // if props == 0, it will just fall through and return false + return ((1 << getType(ch)) & + ((1 << UCharacterCategory.SPACE_SEPARATOR) + | (1 << UCharacterCategory.LINE_SEPARATOR) + | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 + && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) + // TAB VT LF FF CR FS GS RS US NL are all control characters + // that are white spaces. + || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); + } + + /** + * Determines if the specified code point is a Unicode specified space + * character, i.e. if code point is in the category Zs, Zl and Zp. + * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). + * @param ch code point to determine if it is a space + * @return true if the specified code point is a space character + * @stable ICU 2.1 + */ + public static boolean isSpaceChar(int ch) + { + // if props == 0, it will just fall through and return false + return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) + | (1 << UCharacterCategory.LINE_SEPARATOR) + | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) + != 0; + } + + /** + * Determines if the specified code point is a titlecase character. + * UnicodeData only contains case mappings for code points where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
    + * For more information about Unicode case mapping please refer to the + * + * Technical report #21.
    + * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). + * @param ch code point to determine if it is in title case + * @return true if the specified code point is a titlecase character + * @stable ICU 2.1 + */ + public static boolean isTitleCase(int ch) + { + // if props == 0, it will just fall through and return false + return getType(ch) == UCharacterCategory.TITLECASE_LETTER; + } + + /** + * Determines if the specified code point may be any part of a Unicode + * identifier other than the starting character. + * A code point may be part of a Unicode identifier if and only if it is + * one of the following: + *

      + *
    • Lu Uppercase letter + *
    • Ll Lowercase letter + *
    • Lt Titlecase letter + *
    • Lm Modifier letter + *
    • Lo Other letter + *
    • Nl Letter number + *
    • Pc Connecting punctuation character + *
    • Nd decimal number + *
    • Mc Spacing combining mark + *
    • Mn Non-spacing mark + *
    • Cf formatting code + *
    + * Up-to-date Unicode implementation of + * java.lang.Character.isUnicodeIdentifierPart().
    + * See UTR #8. + * @param ch code point to determine if is can be part of a Unicode + * identifier + * @return true if code point is any character belonging a unicode + * identifier suffix after the first character + * @stable ICU 2.1 + */ + public static boolean isUnicodeIdentifierPart(int ch) + { + // if props == 0, it will just fall through and return false + // cat == format + return ((1 << getType(ch)) + & ((1 << UCharacterCategory.UPPERCASE_LETTER) + | (1 << UCharacterCategory.LOWERCASE_LETTER) + | (1 << UCharacterCategory.TITLECASE_LETTER) + | (1 << UCharacterCategory.MODIFIER_LETTER) + | (1 << UCharacterCategory.OTHER_LETTER) + | (1 << UCharacterCategory.LETTER_NUMBER) + | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) + | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) + | (1 << UCharacterCategory.COMBINING_SPACING_MARK) + | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 + || isIdentifierIgnorable(ch); + } + + /** + * Determines if the specified code point is permissible as the first + * character in a Unicode identifier. + * A code point may start a Unicode identifier if it is of type either + *
      + *
    • Lu Uppercase letter + *
    • Ll Lowercase letter + *
    • Lt Titlecase letter + *
    • Lm Modifier letter + *
    • Lo Other letter + *
    • Nl Letter number + *
    + * Up-to-date Unicode implementation of + * java.lang.Character.isUnicodeIdentifierStart().
    + * See UTR #8. + * @param ch code point to determine if it can start a Unicode identifier + * @return true if code point is the first character belonging a unicode + * identifier + * @stable ICU 2.1 + */ + public static boolean isUnicodeIdentifierStart(int ch) + { + /*int cat = getType(ch);*/ + // if props == 0, it will just fall through and return false + return ((1 << getType(ch)) + & ((1 << UCharacterCategory.UPPERCASE_LETTER) + | (1 << UCharacterCategory.LOWERCASE_LETTER) + | (1 << UCharacterCategory.TITLECASE_LETTER) + | (1 << UCharacterCategory.MODIFIER_LETTER) + | (1 << UCharacterCategory.OTHER_LETTER) + | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; + } + + /** + * Determines if the specified code point should be regarded as an + * ignorable character in a Java identifier. + * A character is Java-identifier-ignorable if it has the general category + * Cf Formatting Control, or it is a non-Java-whitespace ISO control: + * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.
    + * Up-to-date Unicode implementation of + * java.lang.Character.isIdentifierIgnorable().
    + * See UTR #8. + *

    Note that Unicode just recommends to ignore Cf (format controls). + * @param ch code point to be determined if it can be ignored in a Unicode + * identifier. + * @return true if the code point is ignorable + * @stable ICU 2.1 + */ + public static boolean isIdentifierIgnorable(int ch) + { + // see java.lang.Character.isIdentifierIgnorable() on range of + // ignorable characters. + if (ch <= 0x9f) { + return isISOControl(ch) + && !((ch >= 0x9 && ch <= 0xd) + || (ch >= 0x1c && ch <= 0x1f)); + } + return getType(ch) == UCharacterCategory.FORMAT; + } + + /** + * Determines if the specified code point is an uppercase character. + * UnicodeData only contains case mappings for code point where they are + * one-to-one mappings; it also omits information about context-sensitive + * case mappings.
    + * For language specific case conversion behavior, use + * toUpperCase(locale, str).
    + * For example, the case conversion for dot-less i and dotted I in Turkish, + * or for final sigma in Greek. + * For more information about Unicode case mapping please refer to the + * + * Technical report #21.
    + * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). + * @param ch code point to determine if it is in uppercase + * @return true if the code point is an uppercase character + * @stable ICU 2.1 + */ + public static boolean isUpperCase(int ch) + { + // if props == 0, it will just fall through and return false + return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; + } + + /** + * The given code point is mapped to its lowercase equivalent; if the code + * point has no lowercase equivalent, the code point itself is returned. + * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() + * + *

    This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the case mapping functions + * that take String parameters rather than code points (int). + * See also the User Guide chapter on C/POSIX migration: + * http://www.icu-project.org/userguide/posix.html#case_mappings + * + * @param ch code point whose lowercase equivalent is to be retrieved + * @return the lowercase equivalent code point + * @stable ICU 2.1 + */ + public static int toLowerCase(int ch) { + return UCaseProps.INSTANCE.tolower(ch); + } + + /** + * Converts argument code point and returns a String object representing + * the code point's value in UTF16 format. + * The result is a string whose length is 1 for non-supplementary code + * points, 2 otherwise.
    + * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this + * function.
    + * Up-to-date Unicode implementation of java.lang.Character.toString() + * @param ch code point + * @return string representation of the code point, null if code point is not + * defined in unicode + * @stable ICU 2.1 + */ + public static String toString(int ch) + { + if (ch < MIN_VALUE || ch > MAX_VALUE) { + return null; + } + + if (ch < SUPPLEMENTARY_MIN_VALUE) { + return String.valueOf((char)ch); + } + + StringBuilder result = new StringBuilder(); + result.append(UTF16.getLeadSurrogate(ch)); + result.append(UTF16.getTrailSurrogate(ch)); + return result.toString(); + } + + /** + * Converts the code point argument to titlecase. + * If no titlecase is available, the uppercase is returned. If no uppercase + * is available, the code point itself is returned. + * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() + * + *

    This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the case mapping functions + * that take String parameters rather than code points (int). + * See also the User Guide chapter on C/POSIX migration: + * http://www.icu-project.org/userguide/posix.html#case_mappings + * + * @param ch code point whose title case is to be retrieved + * @return titlecase code point + * @stable ICU 2.1 + */ + public static int toTitleCase(int ch) { + return UCaseProps.INSTANCE.totitle(ch); + } + + /** + * Converts the character argument to uppercase. + * If no uppercase is available, the character itself is returned. + * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() + * + *

    This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the case mapping functions + * that take String parameters rather than code points (int). + * See also the User Guide chapter on C/POSIX migration: + * http://www.icu-project.org/userguide/posix.html#case_mappings + * + * @param ch code point whose uppercase is to be retrieved + * @return uppercase code point + * @stable ICU 2.1 + */ + public static int toUpperCase(int ch) { + return UCaseProps.INSTANCE.toupper(ch); + } + + // extra methods not in java.lang.Character -------------------------- + + /** + * {@icu} Determines if the code point is a supplementary character. + * A code point is a supplementary character if and only if it is greater + * than SUPPLEMENTARY_MIN_VALUE + * @param ch code point to be determined if it is in the supplementary + * plane + * @return true if code point is a supplementary character + * @stable ICU 2.1 + */ + public static boolean isSupplementary(int ch) + { + return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && + ch <= UCharacter.MAX_VALUE; + } + + /** + * {@icu} Determines if the code point is in the BMP plane. + * @param ch code point to be determined if it is not a supplementary + * character + * @return true if code point is not a supplementary character + * @stable ICU 2.1 + */ + public static boolean isBMP(int ch) + { + return (ch >= 0 && ch <= LAST_CHAR_MASK_); + } + + /** + * {@icu} Determines whether the specified code point is a printable character + * according to the Unicode standard. + * @param ch code point to be determined if it is printable + * @return true if the code point is a printable character + * @stable ICU 2.1 + */ + public static boolean isPrintable(int ch) + { + int cat = getType(ch); + // if props == 0, it will just fall through and return false + return (cat != UCharacterCategory.UNASSIGNED && + cat != UCharacterCategory.CONTROL && + cat != UCharacterCategory.FORMAT && + cat != UCharacterCategory.PRIVATE_USE && + cat != UCharacterCategory.SURROGATE && + cat != UCharacterCategory.GENERAL_OTHER_TYPES); + } + + /** + * {@icu} Determines whether the specified code point is of base form. + * A code point of base form does not graphically combine with preceding + * characters, and is neither a control nor a format character. + * @param ch code point to be determined if it is of base form + * @return true if the code point is of base form + * @stable ICU 2.1 + */ + public static boolean isBaseForm(int ch) + { + int cat = getType(ch); + // if props == 0, it will just fall through and return false + return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || + cat == UCharacterCategory.OTHER_NUMBER || + cat == UCharacterCategory.LETTER_NUMBER || + cat == UCharacterCategory.UPPERCASE_LETTER || + cat == UCharacterCategory.LOWERCASE_LETTER || + cat == UCharacterCategory.TITLECASE_LETTER || + cat == UCharacterCategory.MODIFIER_LETTER || + cat == UCharacterCategory.OTHER_LETTER || + cat == UCharacterCategory.NON_SPACING_MARK || + cat == UCharacterCategory.ENCLOSING_MARK || + cat == UCharacterCategory.COMBINING_SPACING_MARK; + } + + /** + * {@icu} Returns the Bidirection property of a code point. + * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional + * property.
    + * Result returned belongs to the interface + * UCharacterDirection + * @param ch the code point to be determined its direction + * @return direction constant from UCharacterDirection. + * @stable ICU 2.1 + */ + public static int getDirection(int ch) + { + return UBiDiProps.INSTANCE.getClass(ch); + } + + /** + * Determines whether the code point has the "mirrored" property. + * This property is set for characters that are commonly used in + * Right-To-Left contexts and need to be displayed with a "mirrored" + * glyph. + * @param ch code point whose mirror is to be determined + * @return true if the code point has the "mirrored" property + * @stable ICU 2.1 + */ + public static boolean isMirrored(int ch) + { + return UBiDiProps.INSTANCE.isMirrored(ch); + } + + /** + * {@icu} Maps the specified code point to a "mirror-image" code point. + * For code points with the "mirrored" property, implementations sometimes + * need a "poor man's" mapping to another code point such that the default + * glyph may serve as the mirror-image of the default glyph of the + * specified code point.
    + * This is useful for text conversion to and from codepages with visual + * order, and for displays without glyph selection capabilities. + * @param ch code point whose mirror is to be retrieved + * @return another code point that may serve as a mirror-image substitute, + * or ch itself if there is no such mapping or ch does not have the + * "mirrored" property + * @stable ICU 2.1 + */ + public static int getMirror(int ch) + { + return UBiDiProps.INSTANCE.getMirror(ch); + } + + /** + * {@icu} Returns the combining class of the argument codepoint + * @param ch code point whose combining is to be retrieved + * @return the combining class of the codepoint + * @stable ICU 2.1 + */ + public static int getCombiningClass(int ch) + { + if (ch < MIN_VALUE || ch > MAX_VALUE) { + throw new IllegalArgumentException("Codepoint out of bounds"); + } + Normalizer2Impl impl = Norm2AllModes.getNFCInstance().impl; + return impl.getCC(impl.getNorm16(ch)); + } + + /** + * {@icu} A code point is illegal if and only if + *

      + *
    • Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE + *
    • A surrogate value, 0xD800 to 0xDFFF + *
    • Not-a-character, having the form 0x xxFFFF or 0x xxFFFE + *
    + * Note: legal does not mean that it is assigned in this version of Unicode. + * @param ch code point to determine if it is a legal code point by itself + * @return true if and only if legal. + * @stable ICU 2.1 + */ + public static boolean isLegal(int ch) + { + if (ch < MIN_VALUE) { + return false; + } + if (ch < UTF16.SURROGATE_MIN_VALUE) { + return true; + } + if (ch <= UTF16.SURROGATE_MAX_VALUE) { + return false; + } + if (UCharacterUtility.isNonCharacter(ch)) { + return false; + } + return (ch <= MAX_VALUE); + } + + /** + * {@icu} A string is legal iff all its code points are legal. + * A code point is illegal if and only if + *
      + *
    • Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE + *
    • A surrogate value, 0xD800 to 0xDFFF + *
    • Not-a-character, having the form 0x xxFFFF or 0x xxFFFE + *
    + * Note: legal does not mean that it is assigned in this version of Unicode. + * @param str containing code points to examin + * @return true if and only if legal. + * @stable ICU 2.1 + */ + public static boolean isLegal(String str) + { + int size = str.length(); + int codepoint; + for (int i = 0; i < size; i ++) + { + codepoint = UTF16.charAt(str, i); + if (!isLegal(codepoint)) { + return false; + } + if (isSupplementary(codepoint)) { + i ++; + } + } + return true; + } + + /** + * {@icu} Returns the version of Unicode data used. + * @return the unicode version number used + * @stable ICU 2.1 + */ + public static VersionInfo getUnicodeVersion() + { + return UCharacterProperty.INSTANCE.m_unicodeVersion_; + } + + /** + * {@icu} Returns the most current Unicode name of the argument code point, or + * null if the character is unassigned or outside the range + * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. + *
    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param ch the code point for which to get the name + * @return most current Unicode name + * @stable ICU 2.1 + */ + public static String getName(int ch) + { + return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); + } + + /** + * {@icu} Returns the names for each of the characters in a string + * @param s string to format + * @param separator string to go between names + * @return string of names + * @stable ICU 3.8 + */ + public static String getName(String s, String separator) { + if (s.length() == 1) { // handle common case + return getName(s.charAt(0)); + } + int cp; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(s,i); + if (i != 0) sb.append(separator); + sb.append(UCharacter.getName(cp)); + } + return sb.toString(); + } + + /** + * {@icu} Returns the earlier version 1.0 Unicode name of the argument code + * point, or null if the character is unassigned or outside the range + * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. + *
    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param ch the code point for which to get the name + * @return version 1.0 Unicode name + * @stable ICU 2.1 + */ + public static String getName1_0(int ch) + { + return UCharacterName.INSTANCE.getName(ch, + UCharacterNameChoice.UNICODE_10_CHAR_NAME); + } + + /** + * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and + * getName1_0(int), this method will return a name even for codepoints that + * are not assigned a name in UnicodeData.txt. + *

    + * The names are returned in the following order. + *
      + *
    • Most current Unicode name if there is any + *
    • Unicode 1.0 name if there is any + *
    • Extended name in the form of + * "". E.g. + *
    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param ch the code point for which to get the name + * @return a name for the argument codepoint + * @stable ICU 2.6 + */ + public static String getExtendedName(int ch) { + return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); + } + + /** + * {@icu} Returns the corrected name from NameAliases.txt if there is one. + * Returns null if the character is unassigned or outside the range + * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. + *
    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param ch the code point for which to get the name alias + * @return Unicode name alias, or null + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public static String getNameAlias(int ch) + { + return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); + } + + /** + * {@icu} Returns the ISO 10646 comment for a character. + * The ISO 10646 comment is an informative field in the Unicode Character + * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list. + * + * Note: Unicode 5.2 removes all ISO comment data, resulting in empty strings + * returned for all characters. + * + * @param ch The code point for which to get the ISO comment. + * It must be the case that {@code 0 <= ch <= 0x10ffff}. + * @return The ISO comment, or null if there is no comment for this + * character. + * @stable ICU 2.4 + */ + public static String getISOComment(int ch) + { + if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE) { + return null; + } + + String result = UCharacterName.INSTANCE.getGroupName(ch, + UCharacterNameChoice.ISO_COMMENT_); + return result; + } + + /** + * {@icu}

    Finds a Unicode code point by its most current Unicode name and + * return its code point value. All Unicode names are in uppercase.

    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name most current Unicode character name whose code point is to + * be returned + * @return code point or -1 if name is not found + * @stable ICU 2.1 + */ + public static int getCharFromName(String name){ + return UCharacterName.INSTANCE.getCharFromName( + UCharacterNameChoice.UNICODE_CHAR_NAME, name); + } + + /** + * {@icu}

    Find a Unicode character by its version 1.0 Unicode name and return + * its code point value. All Unicode names are in uppercase.

    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name Unicode 1.0 code point name whose code point is to + * returned + * @return code point or -1 if name is not found + * @stable ICU 2.1 + */ + public static int getCharFromName1_0(String name){ + return UCharacterName.INSTANCE.getCharFromName( + UCharacterNameChoice.UNICODE_10_CHAR_NAME, name); + } + + /** + * {@icu}

    Find a Unicode character by either its name and return its code + * point value. All Unicode names are in uppercase. + * Extended names are all lowercase except for numbers and are contained + * within angle brackets.

    + * The names are searched in the following order + *
      + *
    • Most current Unicode name if there is any + *
    • Unicode 1.0 name if there is any + *
    • Extended name in the form of + * "". E.g. + *
    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name codepoint name + * @return code point associated with the name or -1 if the name is not + * found. + * @stable ICU 2.6 + */ + public static int getCharFromExtendedName(String name){ + return UCharacterName.INSTANCE.getCharFromName( + UCharacterNameChoice.EXTENDED_CHAR_NAME, name); + } + + /** + * {@icu}

    Find a Unicode character by its corrected name alias and return + * its code point value. All Unicode names are in uppercase.

    + * Note calling any methods related to code point names, e.g. get*Name*() + * incurs a one-time initialisation cost to construct the name tables. + * @param name Unicode name alias whose code point is to be returned + * @return code point or -1 if name is not found + * @draft ICU 4.4 + * @provisional This API might change or be removed in a future release. + */ + public static int getCharFromNameAlias(String name){ + return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); + } + + /** + * {@icu} Return the Unicode name for a given property, as given in the + * Unicode database file PropertyAliases.txt. Most properties + * have more than one name. The nameChoice determines which one + * is returned. + * + * In addition, this function maps the property + * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / + * "General_Category_Mask". These names are not in + * PropertyAliases.txt. + * + * @param property UProperty selector. + * + * @param nameChoice UProperty.NameChoice selector for which name + * to get. All properties have a long name. Most have a short + * name, but some do not. Unicode allows for additional names; if + * present these will be returned by UProperty.NameChoice.LONG + i, + * where i=1, 2,... + * + * @return a name, or null if Unicode explicitly defines no name + * ("n/a") for a given property/nameChoice. If a given nameChoice + * throws an exception, then all larger values of nameChoice will + * throw an exception. If null is returned for a given + * nameChoice, then other nameChoice values may return non-null + * results. + * + * @exception IllegalArgumentException thrown if property or + * nameChoice are invalid. + * + * @see UProperty + * @see UProperty.NameChoice + * @stable ICU 2.4 + */ + public static String getPropertyName(int property, + int nameChoice) { + return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); + } + + /** + * {@icu} Return the UProperty selector for a given property name, as + * specified in the Unicode database file PropertyAliases.txt. + * Short, long, and any other variants are recognized. + * + * In addition, this function maps the synthetic names "gcm" / + * "General_Category_Mask" to the property + * UProperty.GENERAL_CATEGORY_MASK. These names are not in + * PropertyAliases.txt. + * + * @param propertyAlias the property name to be matched. The name + * is compared using "loose matching" as described in + * PropertyAliases.txt. + * + * @return a UProperty enum. + * + * @exception IllegalArgumentException thrown if propertyAlias + * is not recognized. + * + * @see UProperty + * @stable ICU 2.4 + */ + public static int getPropertyEnum(String propertyAlias) { + int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); + if (propEnum == UProperty.UNDEFINED) { + throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); + } + return propEnum; + } + + /** + * {@icu} Return the Unicode name for a given property value, as given in + * the Unicode database file PropertyValueAliases.txt. Most + * values have more than one name. The nameChoice determines + * which one is returned. + * + * Note: Some of the names in PropertyValueAliases.txt can only be + * retrieved using UProperty.GENERAL_CATEGORY_MASK, not + * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * UProperty.INT_START <= property < UProperty.INT_LIMIT or + * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or + * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. + * If out of range, null is returned. + * + * @param value selector for a value for the given property. In + * general, valid values range from 0 up to some maximum. There + * are a few exceptions: (1.) UProperty.BLOCK values begin at the + * non-zero value BASIC_LATIN.getID(). (2.) + * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous + * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values + * are mask values produced by left-shifting 1 by + * UCharacter.getType(). This allows grouped categories such as + * [:L:] to be represented. Mask values are non-contiguous. + * + * @param nameChoice UProperty.NameChoice selector for which name + * to get. All values have a long name. Most have a short name, + * but some do not. Unicode allows for additional names; if + * present these will be returned by UProperty.NameChoice.LONG + i, + * where i=1, 2,... + * + * @return a name, or null if Unicode explicitly defines no name + * ("n/a") for a given property/value/nameChoice. If a given + * nameChoice throws an exception, then all larger values of + * nameChoice will throw an exception. If null is returned for a + * given nameChoice, then other nameChoice values may return + * non-null results. + * + * @exception IllegalArgumentException thrown if property, value, + * or nameChoice are invalid. + * + * @see UProperty + * @see UProperty.NameChoice + * @stable ICU 2.4 + */ + public static String getPropertyValueName(int property, + int value, + int nameChoice) + { + if ((property == UProperty.CANONICAL_COMBINING_CLASS + || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS + || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) + && value >= UCharacter.getIntPropertyMinValue( + UProperty.CANONICAL_COMBINING_CLASS) + && value <= UCharacter.getIntPropertyMaxValue( + UProperty.CANONICAL_COMBINING_CLASS) + && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { + // this is hard coded for the valid cc + // because PropertyValueAliases.txt does not contain all of them + try { + return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, + nameChoice); + } + catch (IllegalArgumentException e) { + return null; + } + } + return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); + } + + /** + * {@icu} Return the property value integer for a given value name, as + * specified in the Unicode database file PropertyValueAliases.txt. + * Short, long, and any other variants are recognized. + * + * Note: Some of the names in PropertyValueAliases.txt will only be + * recognized with UProperty.GENERAL_CATEGORY_MASK, not + * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * UProperty.INT_START <= property < UProperty.INT_LIMIT or + * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or + * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. + * Only these properties can be enumerated. + * + * @param valueAlias the value name to be matched. The name is + * compared using "loose matching" as described in + * PropertyValueAliases.txt. + * + * @return a value integer. Note: UProperty.GENERAL_CATEGORY + * values are mask values produced by left-shifting 1 by + * UCharacter.getType(). This allows grouped categories such as + * [:L:] to be represented. + * + * @see UProperty + * @throws IllegalArgumentException if property is not a valid UProperty + * selector + * @stable ICU 2.4 + */ + public static int getPropertyValueEnum(int property, String valueAlias) { + int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); + if (propEnum == UProperty.UNDEFINED) { + throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); + } + return propEnum; + } + + /** + * {@icu} Returns a code point corresponding to the two UTF16 characters. + * @param lead the lead char + * @param trail the trail char + * @return code point if surrogate characters are valid. + * @exception IllegalArgumentException thrown when argument characters do + * not form a valid codepoint + * @stable ICU 2.1 + */ + public static int getCodePoint(char lead, char trail) + { + if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) { + return UCharacterProperty.getRawSupplementary(lead, trail); + } + throw new IllegalArgumentException("Illegal surrogate characters"); + } + + /** + * {@icu} Returns the code point corresponding to the UTF16 character. + * @param char16 the UTF16 character + * @return code point if argument is a valid character. + * @exception IllegalArgumentException thrown when char16 is not a valid + * codepoint + * @stable ICU 2.1 + */ + public static int getCodePoint(char char16) + { + if (UCharacter.isLegal(char16)) { + return char16; + } + throw new IllegalArgumentException("Illegal codepoint"); + } + + /** + * Implementation of UCaseProps.ContextIterator, iterates over a String. + * See ustrcase.c/utf16_caseContextIterator(). + */ + private static class StringContextIterator implements UCaseProps.ContextIterator { + /** + * Constructor. + * @param s String to iterate over. + */ + StringContextIterator(String s) { + this.s=s; + limit=s.length(); + cpStart=cpLimit=index=0; + dir=0; + } + + /** + * Set the iteration limit for nextCaseMapCP() to an index within the string. + * If the limit parameter is negative or past the string, then the + * string length is restored as the iteration limit. + * + * This limit does not affect the next() function which always + * iterates to the very end of the string. + * + * @param lim The iteration limit. + */ + public void setLimit(int lim) { + if(0<=lim && lim<=s.length()) { + limit=lim; + } else { + limit=s.length(); + } + } + + /** + * Move to the iteration limit without fetching code points up to there. + */ + public void moveToLimit() { + cpStart=cpLimit=limit; + } + + /** + * Iterate forward through the string to fetch the next code point + * to be case-mapped, and set the context indexes for it. + * Performance optimization, to save on function calls and redundant + * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex(). + * + * When the iteration limit is reached (and -1 is returned), + * getCPStart() will be at the iteration limit. + * + * Iteration with next() does not affect the position for nextCaseMapCP(). + * + * @return The next code point to be case-mapped, or <0 when the iteration is done. + */ + public int nextCaseMapCP() { + cpStart=cpLimit; + if(cpLimit0) { + /* reset for forward iteration */ + dir=1; + index=cpLimit; + } else if(direction<0) { + /* reset for backward iteration */ + dir=-1; + index=cpStart; + } else { + // not a valid direction + dir=0; + index=0; + } + } + + public int next() { + int c; + + if(dir>0 && index0) { + c=UTF16.charAt(s, index-1); + index-=UTF16.getCharCount(c); + return c; + } + return -1; + } + + // variables + protected String s; + protected int index, limit, cpStart, cpLimit; + protected int dir; // 0=initial state >0=forward <0=backward + } + + /** + * Returns the uppercase version of the argument string. + * Casing is dependent on the default locale and context-sensitive. + * @param str source string to be performed on + * @return uppercase version of the argument string + * @stable ICU 2.1 + */ + public static String toUpperCase(String str) + { + return toUpperCase(ULocale.getDefault(), str); + } + + /** + * Returns the lowercase version of the argument string. + * Casing is dependent on the default locale and context-sensitive + * @param str source string to be performed on + * @return lowercase version of the argument string + * @stable ICU 2.1 + */ + public static String toLowerCase(String str) + { + return toLowerCase(ULocale.getDefault(), str); + } + + /** + *

    Returns the titlecase version of the argument string.

    + *

    Position for titlecasing is determined by the argument break + * iterator, hence the user can customize his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *

    + *

    Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.

    + *

    Casing is dependent on the default locale and context-sensitive

    + * @param str source string to be performed on + * @param breakiter break iterator to determine the positions in which + * the character should be title cased. + * @return lowercase version of the argument string + * @stable ICU 2.6 + */ + public static String toTitleCase(String str, BreakIterator breakiter) + { + return toTitleCase(ULocale.getDefault(), str, breakiter); + } + + /** + * Returns the uppercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive. + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return uppercase version of the argument string + * @stable ICU 2.1 + */ + public static String toUpperCase(Locale locale, String str) + { + return toUpperCase(ULocale.forLocale(locale), str); + } + + /** + * Returns the uppercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive. + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return uppercase version of the argument string + * @stable ICU 3.2 + */ + public static String toUpperCase(ULocale locale, String str) { + StringContextIterator iter = new StringContextIterator(str); + StringBuffer result = new StringBuffer(str.length()); + int[] locCache = new int[1]; + int c; + + if (locale == null) { + locale = ULocale.getDefault(); + } + locCache[0]=0; + + while((c=iter.nextCaseMapCP())>=0) { + c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache); + + /* decode the result */ + if(c<0) { + /* (not) original code point */ + c=~c; + } else if(c<=UCaseProps.MAX_STRING_LENGTH) { + /* mapping already appended to result */ + continue; + /* } else { append single-code point mapping */ + } + if(c<=0xffff) { + result.append((char)c); + } else { + UTF16.append(result, c); + } + } + return result.toString(); + } + + /** + * Returns the lowercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return lowercase version of the argument string + * @stable ICU 2.1 + */ + public static String toLowerCase(Locale locale, String str) + { + return toLowerCase(ULocale.forLocale(locale), str); + } + + /** + * Returns the lowercase version of the argument string. + * Casing is dependent on the argument locale and context-sensitive + * @param locale which string is to be converted in + * @param str source string to be performed on + * @return lowercase version of the argument string + * @stable ICU 3.2 + */ + public static String toLowerCase(ULocale locale, String str) { + StringContextIterator iter = new StringContextIterator(str); + StringBuffer result = new StringBuffer(str.length()); + int[] locCache = new int[1]; + int c; + + if (locale == null) { + locale = ULocale.getDefault(); + } + locCache[0]=0; + + while((c=iter.nextCaseMapCP())>=0) { + c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache); + + /* decode the result */ + if(c<0) { + /* (not) original code point */ + c=~c; + } else if(c<=UCaseProps.MAX_STRING_LENGTH) { + /* mapping already appended to result */ + continue; + /* } else { append single-code point mapping */ + } + if(c<=0xffff) { + result.append((char)c); + } else { + UTF16.append(result, c); + } + } + return result.toString(); + } + + /** + *

    Returns the titlecase version of the argument string.

    + *

    Position for titlecasing is determined by the argument break + * iterator, hence the user can customize his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *

    + *

    Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.

    + *

    Casing is dependent on the argument locale and context-sensitive

    + * @param locale which string is to be converted in + * @param str source string to be performed on + * @param breakiter break iterator to determine the positions in which + * the character should be title cased. + * @return lowercase version of the argument string + * @stable ICU 2.6 + */ + public static String toTitleCase(Locale locale, String str, + BreakIterator breakiter) + { + return toTitleCase(ULocale.forLocale(locale), str, breakiter); + } + + /** + *

    Returns the titlecase version of the argument string.

    + *

    Position for titlecasing is determined by the argument break + * iterator, hence the user can customize his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *

    + *

    Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.

    + *

    Casing is dependent on the argument locale and context-sensitive

    + * @param locale which string is to be converted in + * @param str source string to be performed on + * @param titleIter break iterator to determine the positions in which + * the character should be title cased. + * @return lowercase version of the argument string + * @stable ICU 3.2 + */ + public static String toTitleCase(ULocale locale, String str, + BreakIterator titleIter) { + return toTitleCase(locale, str, titleIter, 0); + } + + /** + *

    Returns the titlecase version of the argument string.

    + *

    Position for titlecasing is determined by the argument break + * iterator, hence the user can customize his break iterator for + * a specialized titlecasing. In this case only the forward iteration + * needs to be implemented. + * If the break iterator passed in is null, the default Unicode algorithm + * will be used to determine the titlecase positions. + *

    + *

    Only positions returned by the break iterator will be title cased, + * character in between the positions will all be in lower case.

    + *

    Casing is dependent on the argument locale and context-sensitive

    + * @param locale which string is to be converted in + * @param str source string to be performed on + * @param titleIter break iterator to determine the positions in which + * the character should be title cased. + * @param options bit set to modify the titlecasing operation + * @return lowercase version of the argument string + * @stable ICU 3.8 + * @see #TITLECASE_NO_LOWERCASE + * @see #TITLECASE_NO_BREAK_ADJUSTMENT + */ + public static String toTitleCase(ULocale locale, String str, + BreakIterator titleIter, + int options) { + StringContextIterator iter = new StringContextIterator(str); + StringBuffer result = new StringBuffer(str.length()); + int[] locCache = new int[1]; + int c, nc, srcLength = str.length(); + + if (locale == null) { + locale = ULocale.getDefault(); + } + locCache[0]=0; + + if(titleIter == null) { + titleIter = BreakIterator.getWordInstance(locale); + } + titleIter.setText(str); + + int prev, titleStart, index; + boolean isFirstIndex; + boolean isDutch = locale.getLanguage().equals("nl"); + boolean FirstIJ = true; + + /* set up local variables */ + prev=0; + isFirstIndex=true; + + /* titlecasing loop */ + while(prevsrcLength) { + index=srcLength; + } + + /* + * Unicode 4 & 5 section 3.13 Default Case Operations: + * + * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex + * #29, "Text Boundaries." Between each pair of word boundaries, find the first + * cased character F. If F exists, map F to default_title(F); then map each + * subsequent character C to default_lower(C). + * + * In this implementation, segment [prev..index[ into 3 parts: + * a) uncased characters (copy as-is) [prev..titleStart[ + * b) first case letter (titlecase) [titleStart..titleLimit[ + * c) subsequent characters (lowercase) [titleLimit..index[ + */ + if(prev=0 + && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {} + titleStart=iter.getCPStart(); + if(prev=0) { + if (isDutch && (nc == 0x004A || nc == 0x006A) + && (c == 0x0049) && (FirstIJ == true)) { + c = 0x004A; /* J */ + FirstIJ = false; + } else { + /* Normal operation: Lowercase the rest of the word. */ + c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale, + locCache); + } + } else { + break; + } + } + } + } + + prev=index; + } + return result.toString(); + } + + /** + * {@icu} The given character is mapped to its case folding equivalent according + * to UnicodeData.txt and CaseFolding.txt; if the character has no case + * folding equivalent, the character itself is returned. + * + *

    This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They can map to a result string with a different length as appropriate. + * Full case mappings are applied by the case mapping functions + * that take String parameters rather than code points (int). + * See also the User Guide chapter on C/POSIX migration: + * http://www.icu-project.org/userguide/posix.html#case_mappings + * + * @param ch the character to be converted + * @param defaultmapping Indicates if all mappings defined in + * CaseFolding.txt is to be used, otherwise the + * mappings for dotted I and dotless i marked with + * 'I' in CaseFolding.txt will be skipped. + * @return the case folding equivalent of the character, if + * any; otherwise the character itself. + * @see #foldCase(String, boolean) + * @stable ICU 2.1 + */ + public static int foldCase(int ch, boolean defaultmapping) { + return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); + } + + /** + * {@icu} The given string is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; if any character has no case + * folding equivalent, the character itself is returned. + * "Full", multiple-code point case folding mappings are returned here. + * For "simple" single-code point mappings use the API + * foldCase(int ch, boolean defaultmapping). + * @param str the String to be converted + * @param defaultmapping Indicates if all mappings defined in + * CaseFolding.txt is to be used, otherwise the + * mappings for dotted I and dotless i marked with + * 'I' in CaseFolding.txt will be skipped. + * @return the case folding equivalent of the character, if + * any; otherwise the character itself. + * @see #foldCase(int, boolean) + * @stable ICU 2.1 + */ + public static String foldCase(String str, boolean defaultmapping) { + return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); + } + + /** + * {@icu} Option value for case folding: use default mappings defined in + * CaseFolding.txt. + * @stable ICU 2.6 + */ + public static final int FOLD_CASE_DEFAULT = 0x0000; + /** + * {@icu} Option value for case folding: exclude the mappings for dotted I + * and dotless i marked with 'I' in CaseFolding.txt. + * @stable ICU 2.6 + */ + public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; + + /** + * {@icu} The given character is mapped to its case folding equivalent according + * to UnicodeData.txt and CaseFolding.txt; if the character has no case + * folding equivalent, the character itself is returned. + * + *

    This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They can map to a result string with a different length as appropriate. + * Full case mappings are applied by the case mapping functions + * that take String parameters rather than code points (int). + * See also the User Guide chapter on C/POSIX migration: + * http://www.icu-project.org/userguide/posix.html#case_mappings + * + * @param ch the character to be converted + * @param options A bit set for special processing. Currently the recognised options + * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT + * @return the case folding equivalent of the character, if any; otherwise the + * character itself. + * @see #foldCase(String, boolean) + * @stable ICU 2.6 + */ + public static int foldCase(int ch, int options) { + return UCaseProps.INSTANCE.fold(ch, options); + } + + /** + * {@icu} The given string is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; if any character has no case + * folding equivalent, the character itself is returned. + * "Full", multiple-code point case folding mappings are returned here. + * For "simple" single-code point mappings use the API + * foldCase(int ch, boolean defaultmapping). + * @param str the String to be converted + * @param options A bit set for special processing. Currently the recognised options + * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT + * @return the case folding equivalent of the character, if any; otherwise the + * character itself. + * @see #foldCase(int, boolean) + * @stable ICU 2.6 + */ + public static final String foldCase(String str, int options) { + StringBuffer result = new StringBuffer(str.length()); + int c, i, length; + + length = str.length(); + for(i=0; i This returns the value of Han 'numeric' code points, + * including those for zero, ten, hundred, thousand, ten thousand, + * and hundred million. + * This includes both the standard and 'checkwriting' + * characters, the 'big circle' zero character, and the standard + * zero character. + * @param ch code point to query + * @return value if it is a Han 'numeric character,' otherwise return -1. + * @stable ICU 2.4 + */ + public static int getHanNumericValue(int ch) + { + // TODO: Are these all covered by Unicode numeric value data? + switch(ch) + { + case IDEOGRAPHIC_NUMBER_ZERO_ : + case CJK_IDEOGRAPH_COMPLEX_ZERO_ : + return 0; // Han Zero + case CJK_IDEOGRAPH_FIRST_ : + case CJK_IDEOGRAPH_COMPLEX_ONE_ : + return 1; // Han One + case CJK_IDEOGRAPH_SECOND_ : + case CJK_IDEOGRAPH_COMPLEX_TWO_ : + return 2; // Han Two + case CJK_IDEOGRAPH_THIRD_ : + case CJK_IDEOGRAPH_COMPLEX_THREE_ : + return 3; // Han Three + case CJK_IDEOGRAPH_FOURTH_ : + case CJK_IDEOGRAPH_COMPLEX_FOUR_ : + return 4; // Han Four + case CJK_IDEOGRAPH_FIFTH_ : + case CJK_IDEOGRAPH_COMPLEX_FIVE_ : + return 5; // Han Five + case CJK_IDEOGRAPH_SIXTH_ : + case CJK_IDEOGRAPH_COMPLEX_SIX_ : + return 6; // Han Six + case CJK_IDEOGRAPH_SEVENTH_ : + case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : + return 7; // Han Seven + case CJK_IDEOGRAPH_EIGHTH_ : + case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : + return 8; // Han Eight + case CJK_IDEOGRAPH_NINETH_ : + case CJK_IDEOGRAPH_COMPLEX_NINE_ : + return 9; // Han Nine + case CJK_IDEOGRAPH_TEN_ : + case CJK_IDEOGRAPH_COMPLEX_TEN_ : + return 10; + case CJK_IDEOGRAPH_HUNDRED_ : + case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : + return 100; + case CJK_IDEOGRAPH_THOUSAND_ : + case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : + return 1000; + case CJK_IDEOGRAPH_TEN_THOUSAND_ : + return 10000; + case CJK_IDEOGRAPH_HUNDRED_MILLION_ : + return 100000000; + } + return -1; // no value + } + + /** + * {@icu}

    Returns an iterator for character types, iterating over codepoints.

    + * Example of use:
    + *
    +     * RangeValueIterator iterator = UCharacter.getTypeIterator();
    +     * RangeValueIterator.Element element = new RangeValueIterator.Element();
    +     * while (iterator.next(element)) {
    +     *     System.out.println("Codepoint \\u" +
    +     *                        Integer.toHexString(element.start) +
    +     *                        " to codepoint \\u" +
    +     *                        Integer.toHexString(element.limit - 1) +
    +     *                        " has the character type " +
    +     *                        element.value);
    +     * }
    +     * 
    + * @return an iterator + * @stable ICU 2.6 + */ + public static RangeValueIterator getTypeIterator() + { + return new UCharacterTypeIterator(UCharacterProperty.INSTANCE); + } + + /** + * {@icu}

    Returns an iterator for character names, iterating over codepoints.

    + *

    This API only gets the iterator for the modern, most up-to-date + * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or + * for extended names use getExtendedNameIterator().

    + * Example of use:
    + *
    +     * ValueIterator iterator = UCharacter.getNameIterator();
    +     * ValueIterator.Element element = new ValueIterator.Element();
    +     * while (iterator.next(element)) {
    +     *     System.out.println("Codepoint \\u" +
    +     *                        Integer.toHexString(element.codepoint) +
    +     *                        " has the name " + (String)element.value);
    +     * }
    +     * 
    + *

    The maximal range which the name iterator iterates is from + * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.

    + * @return an iterator + * @stable ICU 2.6 + */ + public static ValueIterator getNameIterator(){ + return new UCharacterNameIterator(UCharacterName.INSTANCE, + UCharacterNameChoice.UNICODE_CHAR_NAME); + } + + /** + * {@icu}

    Returns an iterator for character names, iterating over codepoints.

    + *

    This API only gets the iterator for the older 1.0 Unicode names. + * For modern, most up-to-date Unicode names use getNameIterator() or + * for extended names use getExtendedNameIterator().

    + * Example of use:
    + *
    +     * ValueIterator iterator = UCharacter.get1_0NameIterator();
    +     * ValueIterator.Element element = new ValueIterator.Element();
    +     * while (iterator.next(element)) {
    +     *     System.out.println("Codepoint \\u" +
    +     *                        Integer.toHexString(element.codepoint) +
    +     *                        " has the name " + (String)element.value);
    +     * }
    +     * 
    + *

    The maximal range which the name iterator iterates is from + * @return an iterator + * @stable ICU 2.6 + */ + public static ValueIterator getName1_0Iterator(){ + return new UCharacterNameIterator(UCharacterName.INSTANCE, + UCharacterNameChoice.UNICODE_10_CHAR_NAME); + } + + /** + * {@icu}

    Returns an iterator for character names, iterating over codepoints.

    + *

    This API only gets the iterator for the extended names. + * For modern, most up-to-date Unicode names use getNameIterator() or + * for older 1.0 Unicode names use get1_0NameIterator().

    + * Example of use:
    + *
    +     * ValueIterator iterator = UCharacter.getExtendedNameIterator();
    +     * ValueIterator.Element element = new ValueIterator.Element();
    +     * while (iterator.next(element)) {
    +     *     System.out.println("Codepoint \\u" +
    +     *                        Integer.toHexString(element.codepoint) +
    +     *                        " has the name " + (String)element.value);
    +     * }
    +     * 
    + *

    The maximal range which the name iterator iterates is from + * @return an iterator + * @stable ICU 2.6 + */ + public static ValueIterator getExtendedNameIterator(){ + return new UCharacterNameIterator(UCharacterName.INSTANCE, + UCharacterNameChoice.EXTENDED_CHAR_NAME); + } + + /** + * {@icu} Returns the "age" of the code point.

    + *

    The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) or assigned a + * character. + *

    This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters.

    + *

    The data is from the UCD file DerivedAge.txt.

    + * @param ch The code point. + * @return the Unicode version number + * @stable ICU 2.6 + */ + public static VersionInfo getAge(int ch) + { + if (ch < MIN_VALUE || ch > MAX_VALUE) { + throw new IllegalArgumentException("Codepoint out of bounds"); + } + return UCharacterProperty.INSTANCE.getAge(ch); + } + + /** + * {@icu}

    Check a binary Unicode property for a code point.

    + *

    Unicode, especially in version 3.2, defines many more properties + * than the original set in UnicodeData.txt.

    + *

    This API is intended to reflect Unicode properties as defined in + * the Unicode Character Database (UCD) and Unicode Technical Reports + * (UTR).

    + *

    For details about the properties see + * http://www.unicode.org/.

    + *

    For names of Unicode properties see the UCD file + * PropertyAliases.txt.

    + *

    This API does not check the validity of the codepoint.

    + *

    Important: If ICU is built with UCD files from Unicode versions + * below 3.2, then properties marked with "new" are not or + * not fully available.

    + * @param ch code point to test. + * @param property selector constant from com.ibm.icu.lang.UProperty, + * identifies which binary property to check. + * @return true or false according to the binary Unicode property value + * for ch. Also false if property is out of bounds or if the + * Unicode version does not have data for the property at all, or + * not for this code point. + * @see com.ibm.icu.lang.UProperty + * @stable ICU 2.6 + */ + public static boolean hasBinaryProperty(int ch, int property) + { + if (ch < MIN_VALUE || ch > MAX_VALUE) { + throw new IllegalArgumentException("Codepoint out of bounds"); + } + return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); + } + + /** + * {@icu}

    Check if a code point has the Alphabetic Unicode property.

    + *

    Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).

    + *

    Different from UCharacter.isLetter(ch)!

    + * @stable ICU 2.6 + * @param ch codepoint to be tested + */ + public static boolean isUAlphabetic(int ch) + { + return hasBinaryProperty(ch, UProperty.ALPHABETIC); + } + + /** + * {@icu}

    Check if a code point has the Lowercase Unicode property.

    + *

    Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).

    + *

    This is different from UCharacter.isLowerCase(ch)!

    + * @param ch codepoint to be tested + * @stable ICU 2.6 + */ + public static boolean isULowercase(int ch) + { + return hasBinaryProperty(ch, UProperty.LOWERCASE); + } + + /** + * {@icu}

    Check if a code point has the Uppercase Unicode property.

    + *

    Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).

    + *

    This is different from UCharacter.isUpperCase(ch)!

    + * @param ch codepoint to be tested + * @stable ICU 2.6 + */ + public static boolean isUUppercase(int ch) + { + return hasBinaryProperty(ch, UProperty.UPPERCASE); + } + + /** + * {@icu}

    Check if a code point has the White_Space Unicode property.

    + *

    Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).

    + *

    This is different from both UCharacter.isSpace(ch) and + * UCharacter.isWhitespace(ch)!

    + * @param ch codepoint to be tested + * @stable ICU 2.6 + */ + public static boolean isUWhiteSpace(int ch) + { + return hasBinaryProperty(ch, UProperty.WHITE_SPACE); + } + + /* + * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. + * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. + */ + private static final int /* UHangulSyllableType */ gcbToHst[]={ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CONTROL */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CR */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_EXTEND */ + HangulSyllableType.LEADING_JAMO, /* U_GCB_L */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_LF */ + HangulSyllableType.LV_SYLLABLE, /* U_GCB_LV */ + HangulSyllableType.LVT_SYLLABLE, /* U_GCB_LVT */ + HangulSyllableType.TRAILING_JAMO, /* U_GCB_T */ + HangulSyllableType.VOWEL_JAMO /* U_GCB_V */ + /* + * Omit GCB values beyond what we need for hst. + * The code below checks for the array length. + */ + }; + + /** + * {@icu}

    Returns the property value for an Unicode property type of a code point. + * Also returns binary and mask property values.

    + *

    Unicode, especially in version 3.2, defines many more properties than + * the original set in UnicodeData.txt.

    + *

    The properties APIs are intended to reflect Unicode properties as + * defined in the Unicode Character Database (UCD) and Unicode Technical + * Reports (UTR). For details about the properties see + * http://www.unicode.org/.

    + *

    For names of Unicode properties see the UCD file PropertyAliases.txt. + *

    + *
    +     * Sample usage:
    +     * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
    +     * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
    +     * boolean b = (ideo == 1) ? true : false;
    +     * 
    + * @param ch code point to test. + * @param type UProperty selector constant, identifies which binary + * property to check. Must be + * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or + * UProperty.INT_START <= type < UProperty.INT_LIMIT or + * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. + * @return numeric value that is directly the property value or, + * for enumerated properties, corresponds to the numeric value of + * the enumerated constant of the respective property value + * enumeration type (cast to enum type if necessary). + * Returns 0 or 1 (for false / true) for binary Unicode properties. + * Returns a bit-mask for mask properties. + * Returns 0 if 'type' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code + * point. + * @see UProperty + * @see #hasBinaryProperty + * @see #getIntPropertyMinValue + * @see #getIntPropertyMaxValue + * @see #getUnicodeVersion + * @stable ICU 2.4 + */ + public static int getIntPropertyValue(int ch, int type) + { + if (type < UProperty.BINARY_START) { + return 0; // undefined + } + else if (type < UProperty.BINARY_LIMIT) { + return hasBinaryProperty(ch, type) ? 1 : 0; + } + else if (type < UProperty.INT_START) { + return 0; // undefined + } + else if (type < UProperty.INT_LIMIT) { + switch (type) { + case UProperty.BIDI_CLASS: + return getDirection(ch); + case UProperty.BLOCK: + return UnicodeBlock.idOf(ch); + case UProperty.CANONICAL_COMBINING_CLASS: + return getCombiningClass(ch); + case UProperty.DECOMPOSITION_TYPE: + return UCharacterProperty.INSTANCE.getAdditional(ch, 2) + & DECOMPOSITION_TYPE_MASK_; + case UProperty.EAST_ASIAN_WIDTH: + return (UCharacterProperty.INSTANCE.getAdditional(ch, 0) + & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_; + case UProperty.GENERAL_CATEGORY: + return getType(ch); + case UProperty.JOINING_GROUP: + return UBiDiProps.INSTANCE.getJoiningGroup(ch); + case UProperty.JOINING_TYPE: + return UBiDiProps.INSTANCE.getJoiningType(ch); + case UProperty.LINE_BREAK: + return (UCharacterProperty.INSTANCE + .getAdditional(ch, LB_VWORD)& LB_MASK)>>LB_SHIFT; + case UProperty.NUMERIC_TYPE: + return ntvGetType(getNumericTypeValue(UCharacterProperty + .INSTANCE.getProperty(ch))); + case UProperty.SCRIPT: + return UScript.getScript(ch); + case UProperty.HANGUL_SYLLABLE_TYPE: { + /* see comments on gcbToHst[] above */ + int gcb=(UCharacterProperty.INSTANCE.getAdditional(ch, 2)&GCB_MASK)>>GCB_SHIFT; + if(gcb>8; + case UProperty.TRAIL_CANONICAL_COMBINING_CLASS: + return Norm2AllModes.getNFCInstance().impl.getFCDTrie().get(ch)&0xff; + case UProperty.GRAPHEME_CLUSTER_BREAK: + return (UCharacterProperty.INSTANCE.getAdditional(ch, 2)& GCB_MASK)>>GCB_SHIFT; + case UProperty.SENTENCE_BREAK: + return (UCharacterProperty.INSTANCE.getAdditional(ch, 2)& SB_MASK)>>SB_SHIFT; + case UProperty.WORD_BREAK: + return (UCharacterProperty.INSTANCE.getAdditional(ch, 2)& WB_MASK)>>WB_SHIFT; + /* Values were tested for variable type from Integer.MIN_VALUE + * to UProperty.INT_LIMIT and none would not reach the default case. + */ + ///CLOVER:OFF + default: return 0; /* undefined */ + ///CLOVER:ON + } + } else if (type == UProperty.GENERAL_CATEGORY_MASK) { + return UCharacterProperty.getMask(getType(ch)); + } + return 0; // undefined + } + /** + * {@icu} Returns a string version of the property value. + * @param propertyEnum The property enum value. + * @param codepoint The codepoint value. + * @param nameChoice The choice of the name. + * @return value as string + * @internal + * @deprecated This API is ICU internal only. + */ + ///CLOVER:OFF + public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { + if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || + (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { + return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), + nameChoice); + } + if (propertyEnum == UProperty.NUMERIC_VALUE) { + return String.valueOf(getUnicodeNumericValue(codepoint)); + } + // otherwise must be string property + switch (propertyEnum) { + case UProperty.AGE: return getAge(codepoint).toString(); + case UProperty.ISO_COMMENT: return getISOComment(codepoint); + case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint)); + case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true); + case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint)); + case UProperty.NAME: return getName(codepoint); + case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true)); + case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint)); + case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint)); + case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint)); + case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null); + case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); + case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint)); + } + throw new IllegalArgumentException("Illegal Property Enum"); + } + ///CLOVER:ON + + /** + * {@icu} Returns the minimum value for an integer/binary Unicode property type. + * Can be used together with UCharacter.getIntPropertyMaxValue(int) + * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. + * @param type UProperty selector constant, identifies which binary + * property to check. Must be + * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or + * UProperty.INT_START <= type < UProperty.INT_LIMIT. + * @return Minimum value returned by UCharacter.getIntPropertyValue(int) + * for a Unicode property. 0 if the property + * selector 'type' is out of range. + * @see UProperty + * @see #hasBinaryProperty + * @see #getUnicodeVersion + * @see #getIntPropertyMaxValue + * @see #getIntPropertyValue + * @stable ICU 2.4 + */ + public static int getIntPropertyMinValue(int type){ + + return 0; // undefined; and: all other properties have a minimum value of 0 + } + + + /** + * {@icu} Returns the maximum value for an integer/binary Unicode property. + * Can be used together with UCharacter.getIntPropertyMinValue(int) + * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. + * Examples for min/max values (for Unicode 3.2): + *
      + *
    • UProperty.BIDI_CLASS: 0/18 + * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) + *
    • UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) + *
    • UProperty.IDEOGRAPHIC: 0/1 (false/true) + *
    + * For undefined UProperty constant values, min/max values will be 0/-1. + * @param type UProperty selector constant, identifies which binary + * property to check. Must be + * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or + * UProperty.INT_START <= type < UProperty.INT_LIMIT. + * @return Maximum value returned by u_getIntPropertyValue for a Unicode + * property. <= 0 if the property selector 'type' is out of range. + * @see UProperty + * @see #hasBinaryProperty + * @see #getUnicodeVersion + * @see #getIntPropertyMaxValue + * @see #getIntPropertyValue + * @stable ICU 2.4 + */ + public static int getIntPropertyMaxValue(int type) + { + if (type < UProperty.BINARY_START) { + return -1; // undefined + } + else if (type < UProperty.BINARY_LIMIT) { + return 1; // maximum TRUE for all binary properties + } + else if (type < UProperty.INT_START) { + return -1; // undefined + } + else if (type < UProperty.INT_LIMIT) { + switch (type) { + case UProperty.BIDI_CLASS: + case UProperty.JOINING_GROUP: + case UProperty.JOINING_TYPE: + return UBiDiProps.INSTANCE.getMaxValue(type); + case UProperty.BLOCK: + return (UCharacterProperty.INSTANCE.getMaxValues(0) & BLOCK_MASK_) + >> BLOCK_SHIFT_; + case UProperty.CANONICAL_COMBINING_CLASS: + case UProperty.LEAD_CANONICAL_COMBINING_CLASS: + case UProperty.TRAIL_CANONICAL_COMBINING_CLASS: + return 0xff; // TODO do we need to be more precise, + // getting the actual maximum? + case UProperty.DECOMPOSITION_TYPE: + return UCharacterProperty.INSTANCE.getMaxValues(2) & DECOMPOSITION_TYPE_MASK_; + case UProperty.EAST_ASIAN_WIDTH: + return (UCharacterProperty.INSTANCE.getMaxValues(0) & EAST_ASIAN_MASK_) + >> EAST_ASIAN_SHIFT_; + case UProperty.GENERAL_CATEGORY: + return UCharacterCategory.CHAR_CATEGORY_COUNT - 1; + case UProperty.LINE_BREAK: + return (UCharacterProperty.INSTANCE.getMaxValues(LB_VWORD) & LB_MASK) + >> LB_SHIFT; + case UProperty.NUMERIC_TYPE: + return NumericType.COUNT - 1; + case UProperty.SCRIPT: + return UCharacterProperty.INSTANCE.getMaxValues(0) & SCRIPT_MASK_; + case UProperty.HANGUL_SYLLABLE_TYPE: + return HangulSyllableType.COUNT-1; + case UProperty.NFD_QUICK_CHECK: + case UProperty.NFKD_QUICK_CHECK: + return 1; // YES -- these are never "maybe", only "no" or "yes" + case UProperty.NFC_QUICK_CHECK: + case UProperty.NFKC_QUICK_CHECK: + return 2; // MAYBE + case UProperty.GRAPHEME_CLUSTER_BREAK: + return (UCharacterProperty.INSTANCE.getMaxValues(2) & GCB_MASK) >> GCB_SHIFT; + case UProperty.SENTENCE_BREAK: + return (UCharacterProperty.INSTANCE.getMaxValues(2) & SB_MASK) >> SB_SHIFT; + case UProperty.WORD_BREAK: + return (UCharacterProperty.INSTANCE.getMaxValues(2) & WB_MASK) >> WB_SHIFT; + /* Values were tested for variable type from Integer.MIN_VALUE + * to UProperty.INT_LIMIT and none would not reach the default case. + */ + ///CLOVER:OFF + default: return -1; // undefined + ///CLOVER:ON + } + } + return -1; // undefined + } + + /** + * Provide the java.lang.Character forDigit API, for convenience. + * @stable ICU 3.0 + */ + public static char forDigit(int digit, int radix) { + return java.lang.Character.forDigit(digit, radix); + } + + // JDK 1.5 API coverage + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#LEAD_SURROGATE_MIN_VALUE + * @stable ICU 3.0 + */ + public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#LEAD_SURROGATE_MAX_VALUE + * @stable ICU 3.0 + */ + public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#TRAIL_SURROGATE_MIN_VALUE + * @stable ICU 3.0 + */ + public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#TRAIL_SURROGATE_MAX_VALUE + * @stable ICU 3.0 + */ + public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#SURROGATE_MIN_VALUE + * @stable ICU 3.0 + */ + public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#SURROGATE_MAX_VALUE + * @stable ICU 3.0 + */ + public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#SUPPLEMENTARY_MIN_VALUE + * @stable ICU 3.0 + */ + public static final int MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#CODEPOINT_MAX_VALUE + * @stable ICU 3.0 + */ + public static final int MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @see UTF16#CODEPOINT_MIN_VALUE + * @stable ICU 3.0 + */ + public static final int MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE; + + /** + * Cover the JDK 1.5 API, for convenience. + * @param cp the code point to check + * @return true if cp is a valid code point + * @stable ICU 3.0 + */ + public static final boolean isValidCodePoint(int cp) { + return cp >= 0 && cp <= MAX_CODE_POINT; + } + + /** + * Cover the JDK 1.5 API, for convenience. + * @param cp the code point to check + * @return true if cp is a supplementary code point + * @stable ICU 3.0 + */ + public static final boolean isSupplementaryCodePoint(int cp) { + return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE + && cp <= UTF16.CODEPOINT_MAX_VALUE; + } + + /** + * Cover the JDK 1.5 API, for convenience. + * @param ch the char to check + * @return true if ch is a high (lead) surrogate + * @stable ICU 3.0 + */ + public static boolean isHighSurrogate(char ch) { + return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; + } + + /** + * Cover the JDK 1.5 API, for convenience. + * @param ch the char to check + * @return true if ch is a low (trail) surrogate + * @stable ICU 3.0 + */ + public static boolean isLowSurrogate(char ch) { + return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return true if the chars + * form a valid surrogate pair. + * @param high the high (lead) char + * @param low the low (trail) char + * @return true if high, low form a surrogate pair + * @stable ICU 3.0 + */ + public static final boolean isSurrogatePair(char high, char low) { + return isHighSurrogate(high) && isLowSurrogate(low); + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the number of chars needed + * to represent the code point. This does not check the + * code point for validity. + * @param cp the code point to check + * @return the number of chars needed to represent the code point + * @see UTF16#getCharCount + * @stable ICU 3.0 + */ + public static int charCount(int cp) { + return UTF16.getCharCount(cp); + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point represented by + * the characters. This does not check the surrogate pair for validity. + * @param high the high (lead) surrogate + * @param low the low (trail) surrogate + * @return the code point formed by the surrogate pair + * @stable ICU 3.0 + */ + public static final int toCodePoint(char high, char low) { + return UCharacterProperty.getRawSupplementary(high, low); + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point at index. + *
    Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index and index+1. + * @param seq the characters to check + * @param index the index of the first or only char forming the code point + * @return the code point at the index + * @stable ICU 3.0 + */ + public static final int codePointAt(CharSequence seq, int index) { + char c1 = seq.charAt(index++); + if (isHighSurrogate(c1)) { + if (index < seq.length()) { + char c2 = seq.charAt(index); + if (isLowSurrogate(c2)) { + return toCodePoint(c1, c2); + } + } + } + return c1; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point at index. + *
    Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index and index+1. + * @param text the characters to check + * @param index the index of the first or only char forming the code point + * @return the code point at the index + * @stable ICU 3.0 + */ + public static final int codePointAt(char[] text, int index) { + char c1 = text[index++]; + if (isHighSurrogate(c1)) { + if (index < text.length) { + char c2 = text[index]; + if (isLowSurrogate(c2)) { + return toCodePoint(c1, c2); + } + } + } + return c1; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point at index. + *
    Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index and index+1. + * @param text the characters to check + * @param index the index of the first or only char forming the code point + * @param limit the limit of the valid text + * @return the code point at the index + * @stable ICU 3.0 + */ + public static final int codePointAt(char[] text, int index, int limit) { + if (index >= limit || limit > text.length) { + throw new IndexOutOfBoundsException(); + } + char c1 = text[index++]; + if (isHighSurrogate(c1)) { + if (index < limit) { + char c2 = text[index]; + if (isLowSurrogate(c2)) { + return toCodePoint(c1, c2); + } + } + } + return c1; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point before index. + *
    Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index-1 and index-2. + * @param seq the characters to check + * @param index the index after the last or only char forming the code point + * @return the code point before the index + * @stable ICU 3.0 + */ + public static final int codePointBefore(CharSequence seq, int index) { + char c2 = seq.charAt(--index); + if (isLowSurrogate(c2)) { + if (index > 0) { + char c1 = seq.charAt(--index); + if (isHighSurrogate(c1)) { + return toCodePoint(c1, c2); + } + } + } + return c2; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point before index. + *
    Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index-1 and index-2. + * @param text the characters to check + * @param index the index after the last or only char forming the code point + * @return the code point before the index + * @stable ICU 3.0 + */ + public static final int codePointBefore(char[] text, int index) { + char c2 = text[--index]; + if (isLowSurrogate(c2)) { + if (index > 0) { + char c1 = text[--index]; + if (isHighSurrogate(c1)) { + return toCodePoint(c1, c2); + } + } + } + return c2; + } + + /** + * Cover the JDK 1.5 API, for convenience. Return the code point before index. + *
    Note: the semantics of this API is different from the related UTF16 + * API. This examines only the characters at index-1 and index-2. + * @param text the characters to check + * @param index the index after the last or only char forming the code point + * @param limit the start of the valid text + * @return the code point before the index + * @stable ICU 3.0 + */ + public static final int codePointBefore(char[] text, int index, int limit) { + if (index <= limit || limit < 0) { + throw new IndexOutOfBoundsException(); + } + char c2 = text[--index]; + if (isLowSurrogate(c2)) { + if (index > limit) { + char c1 = text[--index]; + if (isHighSurrogate(c1)) { + return toCodePoint(c1, c2); + } + } + } + return c2; + } + + /** + * Cover the JDK 1.5 API, for convenience. Writes the chars representing the + * code point into the destination at the given index. + * @param cp the code point to convert + * @param dst the destination array into which to put the char(s) representing the code point + * @param dstIndex the index at which to put the first (or only) char + * @return the count of the number of chars written (1 or 2) + * @throws IllegalArgumentException if cp is not a valid code point + * @stable ICU 3.0 + */ + public static final int toChars(int cp, char[] dst, int dstIndex) { + if (cp >= 0) { + if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { + dst[dstIndex] = (char)cp; + return 1; + } + if (cp <= MAX_CODE_POINT) { + dst[dstIndex] = UTF16.getLeadSurrogate(cp); + dst[dstIndex+1] = UTF16.getTrailSurrogate(cp); + return 2; + } + } + throw new IllegalArgumentException(); + } + + /** + * Cover the JDK 1.5 API, for convenience. Returns a char array + * representing the code point. + * @param cp the code point to convert + * @return an array containing the char(s) representing the code point + * @throws IllegalArgumentException if cp is not a valid code point + * @stable ICU 3.0 + */ + public static final char[] toChars(int cp) { + if (cp >= 0) { + if (cp < MIN_SUPPLEMENTARY_CODE_POINT) { + return new char[] { (char)cp }; + } + if (cp <= MAX_CODE_POINT) { + return new char[] { + UTF16.getLeadSurrogate(cp), + UTF16.getTrailSurrogate(cp) + }; + } + } + throw new IllegalArgumentException(); + } + + /** + * Cover the JDK API, for convenience. Return a byte representing the directionality of + * the character. + * + * {@icunote} Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined + * or out-of-bounds characters. + * + * {@icunote} The return value must be tested using the constants defined in {@link + * UCharacterEnums.ECharacterDirection} since the values are different from the ones + * defined by java.lang.Character. + * @param cp the code point to check + * @return the directionality of the code point + * @see #getDirection + * @stable ICU 3.0 + */ + public static byte getDirectionality(int cp) + { + return (byte)getDirection(cp); + } + + /** + * Cover the JDK API, for convenience. Count the number of code points in the range of text. + * @param text the characters to check + * @param start the start of the range + * @param limit the limit of the range + * @return the number of code points in the range + * @stable ICU 3.0 + */ + public static int codePointCount(CharSequence text, int start, int limit) { + if (start < 0 || limit < start || limit > text.length()) { + throw new IndexOutOfBoundsException("start (" + start + + ") or limit (" + limit + + ") invalid or out of range 0, " + text.length()); + } + + int len = limit - start; + while (limit > start) { + char ch = text.charAt(--limit); + while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { + ch = text.charAt(--limit); + if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { + --len; + break; + } + } + } + return len; + } + + /** + * Cover the JDK API, for convenience. Count the number of code points in the range of text. + * @param text the characters to check + * @param start the start of the range + * @param limit the limit of the range + * @return the number of code points in the range + * @stable ICU 3.0 + */ + public static int codePointCount(char[] text, int start, int limit) { + if (start < 0 || limit < start || limit > text.length) { + throw new IndexOutOfBoundsException("start (" + start + + ") or limit (" + limit + + ") invalid or out of range 0, " + text.length); + } + + int len = limit - start; + while (limit > start) { + char ch = text[--limit]; + while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { + ch = text[--limit]; + if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { + --len; + break; + } + } + } + return len; + } + + /** + * Cover the JDK API, for convenience. Adjust the char index by a code point offset. + * @param text the characters to check + * @param index the index to adjust + * @param codePointOffset the number of code points by which to offset the index + * @return the adjusted index + * @stable ICU 3.0 + */ + public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { + if (index < 0 || index > text.length()) { + throw new IndexOutOfBoundsException("index ( " + index + + ") out of range 0, " + text.length()); + } + + if (codePointOffset < 0) { + while (++codePointOffset <= 0) { + char ch = text.charAt(--index); + while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { + ch = text.charAt(--index); + if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { + if (++codePointOffset > 0) { + return index+1; + } + } + } + } + } else { + int limit = text.length(); + while (--codePointOffset >= 0) { + char ch = text.charAt(index++); + while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { + ch = text.charAt(index++); + if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { + if (--codePointOffset < 0) { + return index-1; + } + } + } + } + } + + return index; + } + + /** + * Cover the JDK API, for convenience. Adjust the char index by a code point offset. + * @param text the characters to check + * @param start the start of the range to check + * @param count the length of the range to check + * @param index the index to adjust + * @param codePointOffset the number of code points by which to offset the index + * @return the adjusted index + * @stable ICU 3.0 + */ + public static int offsetByCodePoints(char[] text, int start, int count, int index, + int codePointOffset) { + int limit = start + count; + if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { + throw new IndexOutOfBoundsException("index ( " + index + + ") out of range " + start + + ", " + limit + + " in array 0, " + text.length); + } + + if (codePointOffset < 0) { + while (++codePointOffset <= 0) { + char ch = text[--index]; + if (index < start) { + throw new IndexOutOfBoundsException("index ( " + index + + ") < start (" + start + + ")"); + } + while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { + ch = text[--index]; + if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { + if (++codePointOffset > 0) { + return index+1; + } + } + } + } + } else { + while (--codePointOffset >= 0) { + char ch = text[index++]; + if (index > limit) { + throw new IndexOutOfBoundsException("index ( " + index + + ") > limit (" + limit + + ")"); + } + while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { + ch = text[index++]; + if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { + if (--codePointOffset < 0) { + return index-1; + } + } + } + } + } + + return index; + } + + // private variables ------------------------------------------------- + + /** + * To get the last character out from a data type + */ + private static final int LAST_CHAR_MASK_ = 0xFFFF; + +// /** +// * To get the last byte out from a data type +// */ +// private static final int LAST_BYTE_MASK_ = 0xFF; +// +// /** +// * Shift 16 bits +// */ +// private static final int SHIFT_16_ = 16; +// +// /** +// * Shift 24 bits +// */ +// private static final int SHIFT_24_ = 24; +// +// /** +// * Decimal radix +// */ +// private static final int DECIMAL_RADIX_ = 10; + + /** + * No break space code point + */ + private static final int NO_BREAK_SPACE_ = 0xA0; + + /** + * Figure space code point + */ + private static final int FIGURE_SPACE_ = 0x2007; + + /** + * Narrow no break space code point + */ + private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; + + /** + * Ideographic number zero code point + */ + private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; + + /** + * CJK Ideograph, First code point + */ + private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; + + /** + * CJK Ideograph, Second code point + */ + private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; + + /** + * CJK Ideograph, Third code point + */ + private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; + + /** + * CJK Ideograph, Fourth code point + */ + private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8; + + /** + * CJK Ideograph, FIFTH code point + */ + private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; + + /** + * CJK Ideograph, Sixth code point + */ + private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; + + /** + * CJK Ideograph, Seventh code point + */ + private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; + + /** + * CJK Ideograph, Eighth code point + */ + private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; + + /** + * CJK Ideograph, Nineth code point + */ + private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; + + /** + * Application Program command code point + */ + private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; + + /** + * Unit separator code point + */ + private static final int UNIT_SEPARATOR_ = 0x001F; + + /** + * Delete code point + */ + private static final int DELETE_ = 0x007F; + /** + * Numeric types and values in the main properties words. + */ + private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6; + private static final int getNumericTypeValue(int props) { + return props >> NUMERIC_TYPE_VALUE_SHIFT_; + } + /* constants for the storage form of numeric types and values */ + private static final int NTV_NONE_ = 0; + private static final int NTV_DECIMAL_START_ = 1; + private static final int NTV_DIGIT_START_ = 11; + private static final int NTV_NUMERIC_START_ = 21; + private static final int NTV_FRACTION_START_ = 0xb0; + private static final int NTV_LARGE_START_ = 0x1e0; + private static final int NTV_RESERVED_START_ = 0x300; + + private static final int ntvGetType(int ntv) { + return + (ntv==NTV_NONE_) ? NumericType.NONE : + (ntv 0x7a && ch < 0xff21) + || ch < 0x41 || (ch > 0x5a && ch < 0x61) + || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) { + return -1; + } + if (ch <= 0x7a) { + // ch >= 0x41 or ch < 0x61 + return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61); + } + // ch >= 0xff21 + if (ch <= 0xff3a) { + return ch + 10 - 0xff21; + } + // ch >= 0xff41 && ch <= 0xff5a + return ch + 10 - 0xff41; + } + + /** + * Returns the property value at the index. + * This is optimized. + * Note this is alittle different from CharTrie the index m_trieData_ + * is never negative. + * This is a duplicate of UCharacterProperty.getProperty. For optimization + * purposes, this method calls the trie data directly instead of through + * UCharacterProperty.getProperty. + * @param ch code point whose property value is to be retrieved + * @return property value of code point + * @stable ICU 2.6 + */ + private static final int getProperty(int ch) + { + if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE + || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE + && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) { + // BMP codepoint 0000..D7FF or DC00..FFFF + try { // using try for ch < 0 is faster than using an if statement + return UCharacterProperty.INSTANCE.m_trieData_[ + (UCharacterProperty.INSTANCE.m_trieIndex_[ch >> 5] << 2) + + (ch & 0x1f)]; + } catch (ArrayIndexOutOfBoundsException e) { + // TODO: Tested all the values from 0 ... UTF16.LEAD_SURROGATE_MIN_VALUE + // and UTF16.LEAD_SURROGATE_MAX_VALUE ... UTF16.SUPPLEMENTARY_MIN_VALUE + // but it never results into the catch section of the try-catch + ///CLOVER:OFF + return UCharacterProperty.INSTANCE.m_trieInitialValue_; + ///CLOVER:ON + } + } + if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { + // lead surrogate D800..DBFF + return UCharacterProperty.INSTANCE.m_trieData_[ + (UCharacterProperty.INSTANCE.m_trieIndex_[(0x2800 >> 5) + + (ch >> 5)] << 2) + + (ch & 0x1f)]; + } + // for optimization + if (ch <= UTF16.CODEPOINT_MAX_VALUE) { + // supplementary code point 10000..10FFFF + // look at the construction of supplementary characters + // trail forms the ends of it. + return UCharacterProperty.INSTANCE.m_trie_.getSurrogateValue( + UTF16.getLeadSurrogate(ch), + (char)(ch & 0x3ff)); + } + // return m_dataOffset_ if there is an error, in this case we return + // the default value: m_initialValue_ + // we cannot assume that m_initialValue_ is at offset 0 + // this is for optimization. + return UCharacterProperty.INSTANCE.m_trieInitialValue_; + } +} diff --git a/main/classes/core/src/com/ibm/icu/lang/UCharacterCategory.java b/main/classes/core/src/com/ibm/icu/lang/UCharacterCategory.java new file mode 100644 index 00000000000..60c0a649c3c --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UCharacterCategory.java @@ -0,0 +1,112 @@ +/** + ******************************************************************************* + * Copyright (C) 1996-2004, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.lang; + +import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; + +/** + * Enumerated Unicode category types from the UnicodeData.txt file. + * Used as return results from UCharacter + * Equivalent to icu's UCharCategory. + * Refer to + * Unicode Consortium for more information about UnicodeData.txt. + *

    + * NOTE: the UCharacterCategory values are not compatible with + * those returned by java.lang.Character.getType. UCharacterCategory values + * match the ones used in ICU4C, while java.lang.Character type + * values, though similar, skip the value 17.

    + *

    + * This class is not subclassable + *

    + * @author Syn Wee Quek + * @stable ICU 2.1 + */ + +public final class UCharacterCategory implements ECharacterCategory +{ + /** + * Gets the name of the argument category + * @param category to retrieve name + * @return category name + * @stable ICU 2.1 + */ + public static String toString(int category) + { + switch (category) { + case UPPERCASE_LETTER : + return "Letter, Uppercase"; + case LOWERCASE_LETTER : + return "Letter, Lowercase"; + case TITLECASE_LETTER : + return "Letter, Titlecase"; + case MODIFIER_LETTER : + return "Letter, Modifier"; + case OTHER_LETTER : + return "Letter, Other"; + case NON_SPACING_MARK : + return "Mark, Non-Spacing"; + case ENCLOSING_MARK : + return "Mark, Enclosing"; + case COMBINING_SPACING_MARK : + return "Mark, Spacing Combining"; + case DECIMAL_DIGIT_NUMBER : + return "Number, Decimal Digit"; + case LETTER_NUMBER : + return "Number, Letter"; + case OTHER_NUMBER : + return "Number, Other"; + case SPACE_SEPARATOR : + return "Separator, Space"; + case LINE_SEPARATOR : + return "Separator, Line"; + case PARAGRAPH_SEPARATOR : + return "Separator, Paragraph"; + case CONTROL : + return "Other, Control"; + case FORMAT : + return "Other, Format"; + case PRIVATE_USE : + return "Other, Private Use"; + case SURROGATE : + return "Other, Surrogate"; + case DASH_PUNCTUATION : + return "Punctuation, Dash"; + case START_PUNCTUATION : + return "Punctuation, Open"; + case END_PUNCTUATION : + return "Punctuation, Close"; + case CONNECTOR_PUNCTUATION : + return "Punctuation, Connector"; + case OTHER_PUNCTUATION : + return "Punctuation, Other"; + case MATH_SYMBOL : + return "Symbol, Math"; + case CURRENCY_SYMBOL : + return "Symbol, Currency"; + case MODIFIER_SYMBOL : + return "Symbol, Modifier"; + case OTHER_SYMBOL : + return "Symbol, Other"; + case INITIAL_PUNCTUATION : + return "Punctuation, Initial quote"; + case FINAL_PUNCTUATION : + return "Punctuation, Final quote"; + } + return "Unassigned"; + } + + // private constructor ----------------------------------------------- + ///CLOVER:OFF + /** + * Private constructor to prevent initialisation + */ + private UCharacterCategory() + { + } + ///CLOVER:ON +} diff --git a/main/classes/core/src/com/ibm/icu/lang/UCharacterDirection.java b/main/classes/core/src/com/ibm/icu/lang/UCharacterDirection.java new file mode 100644 index 00000000000..884c6bec404 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UCharacterDirection.java @@ -0,0 +1,84 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2004, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.lang; + +import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; + +/** + * Enumerated Unicode character linguistic direction constants. + * Used as return results from UCharacter + *

    + * This class is not subclassable + *

    + * @author Syn Wee Quek + * @stable ICU 2.1 + */ + +public final class UCharacterDirection implements ECharacterDirection { + + // private constructor ========================================= + ///CLOVER:OFF + /** + * Private constructor to prevent initialisation + */ + private UCharacterDirection() + { + } + ///CLOVER:ON + + /** + * Gets the name of the argument direction + * @param dir direction type to retrieve name + * @return directional name + * @stable ICU 2.1 + */ + public static String toString(int dir) { + switch(dir) + { + case LEFT_TO_RIGHT : + return "Left-to-Right"; + case RIGHT_TO_LEFT : + return "Right-to-Left"; + case EUROPEAN_NUMBER : + return "European Number"; + case EUROPEAN_NUMBER_SEPARATOR : + return "European Number Separator"; + case EUROPEAN_NUMBER_TERMINATOR : + return "European Number Terminator"; + case ARABIC_NUMBER : + return "Arabic Number"; + case COMMON_NUMBER_SEPARATOR : + return "Common Number Separator"; + case BLOCK_SEPARATOR : + return "Paragraph Separator"; + case SEGMENT_SEPARATOR : + return "Segment Separator"; + case WHITE_SPACE_NEUTRAL : + return "Whitespace"; + case OTHER_NEUTRAL : + return "Other Neutrals"; + case LEFT_TO_RIGHT_EMBEDDING : + return "Left-to-Right Embedding"; + case LEFT_TO_RIGHT_OVERRIDE : + return "Left-to-Right Override"; + case RIGHT_TO_LEFT_ARABIC : + return "Right-to-Left Arabic"; + case RIGHT_TO_LEFT_EMBEDDING : + return "Right-to-Left Embedding"; + case RIGHT_TO_LEFT_OVERRIDE : + return "Right-to-Left Override"; + case POP_DIRECTIONAL_FORMAT : + return "Pop Directional Format"; + case DIR_NON_SPACING_MARK : + return "Non-Spacing Mark"; + case BOUNDARY_NEUTRAL : + return "Boundary Neutral"; + } + return "Unassigned"; + } +} diff --git a/main/classes/core/src/com/ibm/icu/lang/UCharacterEnums.java b/main/classes/core/src/com/ibm/icu/lang/UCharacterEnums.java new file mode 100644 index 00000000000..d1e8c267805 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UCharacterEnums.java @@ -0,0 +1,491 @@ +/** + ******************************************************************************* + * Copyright (C) 2004-2007, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package com.ibm.icu.lang; + +/** + * A container for the different 'enumerated types' used by UCharacter. + * @stable ICU 3.0 + */ +public class UCharacterEnums { + + /** This is just a namespace, it is not instantiatable. */ + ///CLOVER:OFF + private UCharacterEnums() {} + + /** + * 'Enum' for the CharacterCategory constants. These constants are + * compatible in name but not in value with those defined in + * java.lang.Character. + * @see UCharacterCategory + * @stable ICU 3.0 + */ + public static interface ECharacterCategory { + /** + * Unassigned character type + * @stable ICU 2.1 + */ + public static final byte UNASSIGNED = 0; + + /** + * Character type Cn + * Not Assigned (no characters in [UnicodeData.txt] have this property) + * @stable ICU 2.6 + */ + public static final byte GENERAL_OTHER_TYPES = 0; + + /** + * Character type Lu + * @stable ICU 2.1 + */ + public static final byte UPPERCASE_LETTER = 1; + + /** + * Character type Ll + * @stable ICU 2.1 + */ + public static final byte LOWERCASE_LETTER = 2; + + /** + * Character type Lt + * @stable ICU 2.1 + */ + + public static final byte TITLECASE_LETTER = 3; + + /** + * Character type Lm + * @stable ICU 2.1 + */ + public static final byte MODIFIER_LETTER = 4; + + /** + * Character type Lo + * @stable ICU 2.1 + */ + public static final byte OTHER_LETTER = 5; + + /** + * Character type Mn + * @stable ICU 2.1 + */ + public static final byte NON_SPACING_MARK = 6; + + /** + * Character type Me + * @stable ICU 2.1 + */ + public static final byte ENCLOSING_MARK = 7; + + /** + * Character type Mc + * @stable ICU 2.1 + */ + public static final byte COMBINING_SPACING_MARK = 8; + + /** + * Character type Nd + * @stable ICU 2.1 + */ + public static final byte DECIMAL_DIGIT_NUMBER = 9; + + /** + * Character type Nl + * @stable ICU 2.1 + */ + public static final byte LETTER_NUMBER = 10; + + /** + * Character type No + * @stable ICU 2.1 + */ + public static final byte OTHER_NUMBER = 11; + + /** + * Character type Zs + * @stable ICU 2.1 + */ + public static final byte SPACE_SEPARATOR = 12; + + /** + * Character type Zl + * @stable ICU 2.1 + */ + public static final byte LINE_SEPARATOR = 13; + + /** + * Character type Zp + * @stable ICU 2.1 + */ + public static final byte PARAGRAPH_SEPARATOR = 14; + + /** + * Character type Cc + * @stable ICU 2.1 + */ + public static final byte CONTROL = 15; + + /** + * Character type Cf + * @stable ICU 2.1 + */ + public static final byte FORMAT = 16; + + /** + * Character type Co + * @stable ICU 2.1 + */ + public static final byte PRIVATE_USE = 17; + + /** + * Character type Cs + * @stable ICU 2.1 + */ + public static final byte SURROGATE = 18; + + /** + * Character type Pd + * @stable ICU 2.1 + */ + public static final byte DASH_PUNCTUATION = 19; + + /** + * Character type Ps + * @stable ICU 2.1 + */ + public static final byte START_PUNCTUATION = 20; + + /** + * Character type Pe + * @stable ICU 2.1 + */ + public static final byte END_PUNCTUATION = 21; + + /** + * Character type Pc + * @stable ICU 2.1 + */ + public static final byte CONNECTOR_PUNCTUATION = 22; + + /** + * Character type Po + * @stable ICU 2.1 + */ + public static final byte OTHER_PUNCTUATION = 23; + + /** + * Character type Sm + * @stable ICU 2.1 + */ + public static final byte MATH_SYMBOL = 24; + + /** + * Character type Sc + * @stable ICU 2.1 + */ + public static final byte CURRENCY_SYMBOL = 25; + + /** + * Character type Sk + * @stable ICU 2.1 + */ + public static final byte MODIFIER_SYMBOL = 26; + + /** + * Character type So + * @stable ICU 2.1 + */ + public static final byte OTHER_SYMBOL = 27; + + /** + * Character type Pi + * @see #INITIAL_QUOTE_PUNCTUATION + * @stable ICU 2.1 + */ + public static final byte INITIAL_PUNCTUATION = 28; + + /** + * Character type Pi + * This name is compatible with java.lang.Character's name for this type. + * @see #INITIAL_PUNCTUATION + * @stable ICU 2.8 + */ + public static final byte INITIAL_QUOTE_PUNCTUATION = 28; + + /** + * Character type Pf + * @see #FINAL_QUOTE_PUNCTUATION + * @stable ICU 2.1 + */ + public static final byte FINAL_PUNCTUATION = 29; + + /** + * Character type Pf + * This name is compatible with java.lang.Character's name for this type. + * @see #FINAL_PUNCTUATION + * @stable ICU 2.8 + */ + public static final byte FINAL_QUOTE_PUNCTUATION = 29; + + /** + * Character type count + * @stable ICU 2.1 + */ + public static final byte CHAR_CATEGORY_COUNT = 30; + } + + /** + * 'Enum' for the CharacterDirection constants. There are two sets + * of names, those used in ICU, and those used in the JDK. The + * JDK constants are compatible in name but not in value + * with those defined in java.lang.Character. + * @see UCharacterDirection + * @stable ICU 3.0 + */ + public static interface ECharacterDirection { + /** + * Directional type L + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT = 0; + + /** + * JDK-compatible synonym for LEFT_TO_RIGHT. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = (byte)LEFT_TO_RIGHT; + + /** + * Directional type R + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT = 1; + + /** + * JDK-compatible synonym for RIGHT_TO_LEFT. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = (byte)RIGHT_TO_LEFT; + + /** + * Directional type EN + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER = 2; + + /** + * JDK-compatible synonym for EUROPEAN_NUMBER. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = (byte)EUROPEAN_NUMBER; + + /** + * Directional type ES + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_SEPARATOR = 3; + + /** + * JDK-compatible synonym for EUROPEAN_NUMBER_SEPARATOR. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = (byte)EUROPEAN_NUMBER_SEPARATOR; + + /** + * Directional type ET + * @stable ICU 2.1 + */ + public static final int EUROPEAN_NUMBER_TERMINATOR = 4; + + /** + * JDK-compatible synonym for EUROPEAN_NUMBER_TERMINATOR. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = (byte)EUROPEAN_NUMBER_TERMINATOR; + + /** + * Directional type AN + * @stable ICU 2.1 + */ + public static final int ARABIC_NUMBER = 5; + + /** + * JDK-compatible synonym for ARABIC_NUMBER. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_ARABIC_NUMBER = (byte)ARABIC_NUMBER; + + /** + * Directional type CS + * @stable ICU 2.1 + */ + public static final int COMMON_NUMBER_SEPARATOR = 6; + + /** + * JDK-compatible synonym for COMMON_NUMBER_SEPARATOR. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = (byte)COMMON_NUMBER_SEPARATOR; + + /** + * Directional type B + * @stable ICU 2.1 + */ + public static final int BLOCK_SEPARATOR = 7; + + /** + * JDK-compatible synonym for BLOCK_SEPARATOR. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = (byte)BLOCK_SEPARATOR; + + /** + * Directional type S + * @stable ICU 2.1 + */ + public static final int SEGMENT_SEPARATOR = 8; + + /** + * JDK-compatible synonym for SEGMENT_SEPARATOR. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = (byte)SEGMENT_SEPARATOR; + + /** + * Directional type WS + * @stable ICU 2.1 + */ + public static final int WHITE_SPACE_NEUTRAL = 9; + + /** + * JDK-compatible synonym for WHITE_SPACE_NEUTRAL. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_WHITESPACE = (byte)WHITE_SPACE_NEUTRAL; + + /** + * Directional type ON + * @stable ICU 2.1 + */ + public static final int OTHER_NEUTRAL = 10; + + /** + * JDK-compatible synonym for OTHER_NEUTRAL. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_OTHER_NEUTRALS = (byte)OTHER_NEUTRAL; + + /** + * Directional type LRE + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_EMBEDDING = 11; + + /** + * JDK-compatible synonym for LEFT_TO_RIGHT_EMBEDDING. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = (byte)LEFT_TO_RIGHT_EMBEDDING; + + /** + * Directional type LRO + * @stable ICU 2.1 + */ + public static final int LEFT_TO_RIGHT_OVERRIDE = 12; + + /** + * JDK-compatible synonym for LEFT_TO_RIGHT_OVERRIDE. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = (byte)LEFT_TO_RIGHT_OVERRIDE; + + /** + * Directional type AL + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_ARABIC = 13; + + /** + * JDK-compatible synonym for RIGHT_TO_LEFT_ARABIC. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = (byte)RIGHT_TO_LEFT_ARABIC; + + /** + * Directional type RLE + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_EMBEDDING = 14; + + /** + * JDK-compatible synonym for RIGHT_TO_LEFT_EMBEDDING. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = (byte)RIGHT_TO_LEFT_EMBEDDING; + + /** + * Directional type RLO + * @stable ICU 2.1 + */ + public static final int RIGHT_TO_LEFT_OVERRIDE = 15; + + /** + * JDK-compatible synonym for RIGHT_TO_LEFT_OVERRIDE. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = (byte)RIGHT_TO_LEFT_OVERRIDE; + + /** + * Directional type PDF + * @stable ICU 2.1 + */ + public static final int POP_DIRECTIONAL_FORMAT = 16; + + /** + * JDK-compatible synonym for POP_DIRECTIONAL_FORMAT. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = (byte)POP_DIRECTIONAL_FORMAT; + + /** + * Directional type NSM + * @stable ICU 2.1 + */ + public static final int DIR_NON_SPACING_MARK = 17; + + /** + * JDK-compatible synonym for DIR_NON_SPACING_MARK. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_NONSPACING_MARK = (byte)DIR_NON_SPACING_MARK; + + /** + * Directional type BN + * @stable ICU 2.1 + */ + public static final int BOUNDARY_NEUTRAL = 18; + + /** + * JDK-compatible synonym for BOUNDARY_NEUTRAL. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = (byte)BOUNDARY_NEUTRAL; + + /** + * Number of directional types + * @stable ICU 2.1 + */ + public static final int CHAR_DIRECTION_COUNT = 19; + + /** + * Undefined bidirectional character type. Undefined char + * values have undefined directionality in the Unicode specification. + * @stable ICU 3.0 + */ + public static final byte DIRECTIONALITY_UNDEFINED = -1; + } +} diff --git a/main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java b/main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java new file mode 100644 index 00000000000..efcb01bce70 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java @@ -0,0 +1,336 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.lang; + +import com.ibm.icu.impl.UCharacterName; +import com.ibm.icu.impl.UCharacterNameChoice; +import com.ibm.icu.util.ValueIterator; + +/** + *

    Class enabling iteration of the codepoints and their names.

    + *

    Result of each iteration contains a valid codepoint that has valid + * name.

    + *

    See UCharacter.getNameIterator() for an example of use.

    + * @author synwee + * @since release 2.1, March 5 2002 + */ +class UCharacterNameIterator implements ValueIterator +{ + // public methods ---------------------------------------------------- + + /** + *

    Gets the next result for this iteration and returns + * true if we are not at the end of the iteration, false otherwise.

    + *

    If the return boolean is a false, the contents of elements will not + * be updated.

    + * @param element for storing the result codepoint and name + * @return true if we are not at the end of the iteration, false otherwise. + * @see ValueIterator.Element + */ + public boolean next(ValueIterator.Element element) + { + if (m_current_ >= m_limit_) { + return false; + } + + if (m_choice_ == UCharacterNameChoice.UNICODE_CHAR_NAME || + m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME + ) { + int length = m_name_.getAlgorithmLength(); + if (m_algorithmIndex_ < length) { + while (m_algorithmIndex_ < length) { + // find the algorithm range that could contain m_current_ + if (m_algorithmIndex_ < 0 || + m_name_.getAlgorithmEnd(m_algorithmIndex_) < + m_current_) { + m_algorithmIndex_ ++; + } + else { + break; + } + } + + if (m_algorithmIndex_ < length) { + // interleave the data-driven ones with the algorithmic ones + // iterate over all algorithmic ranges; assume that they are + // in ascending order + int start = m_name_.getAlgorithmStart(m_algorithmIndex_); + if (m_current_ < start) { + // this should get rid of those codepoints that are not + // in the algorithmic range + int end = start; + if (m_limit_ <= start) { + end = m_limit_; + } + if (!iterateGroup(element, end)) { + m_current_ ++; + return true; + } + } + /* + // "if (m_current_ >= m_limit_)" would not return true + // because it can never be reached due to: + // 1) It has already been checked earlier + // 2) When m_current_ is updated earlier, it returns true + // 3) No updates on m_limit_*/ + if (m_current_ >= m_limit_) { + // after iterateGroup fails, current codepoint may be + // greater than limit + return false; + } + + element.integer = m_current_; + element.value = m_name_.getAlgorithmName(m_algorithmIndex_, + m_current_); + // reset the group index if we are in the algorithmic names + m_groupIndex_ = -1; + m_current_ ++; + return true; + } + } + } + // enumerate the character names after the last algorithmic range + if (!iterateGroup(element, m_limit_)) { + m_current_ ++; + return true; + } + else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) { + if (!iterateExtended(element, m_limit_)) { + m_current_ ++; + return true; + } + } + + return false; + } + + /** + *

    Resets the iterator to start iterating from the integer index + * UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously. + *

    + */ + public void reset() + { + m_current_ = m_start_; + m_groupIndex_ = -1; + m_algorithmIndex_ = -1; + } + + /** + *

    Restricts the range of integers to iterate and resets the iteration + * to begin at the index argument start.

    + *

    If setRange(start, end) is not performed before next(element) is + * called, the iteration will start from the integer index + * UCharacter.MIN_VALUE and end at UCharacter.MAX_VALUE.

    + *

    + * If this range is set outside the range of UCharacter.MIN_VALUE and + * UCharacter.MAX_VALUE, next(element) will always return false. + *

    + * @param start first integer in range to iterate + * @param limit 1 integer after the last integer in range + * @exception IllegalArgumentException thrown when attempting to set an + * illegal range. E.g limit <= start + */ + public void setRange(int start, int limit) + { + if (start >= limit) { + throw new IllegalArgumentException( + "start or limit has to be valid Unicode codepoints and start < limit"); + } + if (start < UCharacter.MIN_VALUE) { + m_start_ = UCharacter.MIN_VALUE; + } + else { + m_start_ = start; + } + + if (limit > UCharacter.MAX_VALUE + 1) { + m_limit_ = UCharacter.MAX_VALUE + 1; + } + else { + m_limit_ = limit; + } + m_current_ = m_start_; + } + + // protected constructor --------------------------------------------- + + /** + * Constructor + * @param name name data + * @param choice name choice from the class + * com.ibm.icu.lang.UCharacterNameChoice + */ + protected UCharacterNameIterator(UCharacterName name, int choice) + { + if(name==null){ + throw new IllegalArgumentException("UCharacterName name argument cannot be null. Missing unames.icu?"); + } + m_name_ = name; + // no explicit choice in UCharacter so no checks on choice + m_choice_ = choice; + m_start_ = UCharacter.MIN_VALUE; + m_limit_ = UCharacter.MAX_VALUE + 1; + m_current_ = m_start_; + } + + // private data members --------------------------------------------- + + /** + * Name data + */ + private UCharacterName m_name_; + /** + * Name choice + */ + private int m_choice_; + /** + * Start iteration range + */ + private int m_start_; + /** + * End + 1 iteration range + */ + private int m_limit_; + /** + * Current codepoint + */ + private int m_current_; + /** + * Group index + */ + private int m_groupIndex_ = -1; + /** + * Algorithm index + */ + private int m_algorithmIndex_ = -1; + /** + * Group use + */ + private static char GROUP_OFFSETS_[] = + new char[UCharacterName.LINES_PER_GROUP_ + 1]; + private static char GROUP_LENGTHS_[] = + new char[UCharacterName.LINES_PER_GROUP_ + 1]; + + // private methods -------------------------------------------------- + + /** + * Group name iteration, iterate all the names in the current 32-group and + * returns the first codepoint that has a valid name. + * @param result stores the result codepoint and name + * @param limit last codepoint + 1 in range to search + * @return false if a codepoint with a name is found in group and we can + * bail from further iteration, true to continue on with the + * iteration + */ + private boolean iterateSingleGroup(ValueIterator.Element result, int limit) + { + synchronized(GROUP_OFFSETS_) { + synchronized(GROUP_LENGTHS_) { + int index = m_name_.getGroupLengths(m_groupIndex_, GROUP_OFFSETS_, + GROUP_LENGTHS_); + while (m_current_ < limit) { + int offset = UCharacterName.getGroupOffset(m_current_); + String name = m_name_.getGroupName( + index + GROUP_OFFSETS_[offset], + GROUP_LENGTHS_[offset], m_choice_); + if ((name == null || name.length() == 0) && + m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) { + name = m_name_.getExtendedName(m_current_); + } + if (name != null && name.length() > 0) { + result.integer = m_current_; + result.value = name; + return false; + } + ++ m_current_; + } + } + } + return true; + } + + /** + * Group name iteration, iterate all the names in the current 32-group and + * returns the first codepoint that has a valid name. + * @param result stores the result codepoint and name + * @param limit last codepoint + 1 in range to search + * @return false if a codepoint with a name is found in group and we can + * bail from further iteration, true to continue on with the + * iteration + */ + private boolean iterateGroup(ValueIterator.Element result, int limit) + { + if (m_groupIndex_ < 0) { + m_groupIndex_ = m_name_.getGroup(m_current_); + } + + while (m_groupIndex_ < m_name_.m_groupcount_ && + m_current_ < limit) { + // iterate till the last group or the last codepoint + int startMSB = UCharacterName.getCodepointMSB(m_current_); + int gMSB = m_name_.getGroupMSB(m_groupIndex_); // can be -1 + if (startMSB == gMSB) { + if (startMSB == UCharacterName.getCodepointMSB(limit - 1)) { + // if start and limit - 1 are in the same group, then enumerate + // only in that one + return iterateSingleGroup(result, limit); + } + // enumerate characters in the partial start group + // if (m_name_.getGroupOffset(m_current_) != 0) { + if (!iterateSingleGroup(result, + UCharacterName.getGroupLimit(gMSB))) { + return false; + } + ++ m_groupIndex_; // continue with the next group + } + else if (startMSB > gMSB) { + // make sure that we start enumerating with the first group + // after start + m_groupIndex_ ++; + } + else { + int gMIN = UCharacterName.getGroupMin(gMSB); + if (gMIN > limit) { + gMIN = limit; + } + if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) { + if (!iterateExtended(result, gMIN)) { + return false; + } + } + m_current_ = gMIN; + } + } + + return true; + } + + /** + * Iterate extended names. + * @param result stores the result codepoint and name + * @param limit last codepoint + 1 in range to search + * @return false if a codepoint with a name is found and we can + * bail from further iteration, true to continue on with the + * iteration (this will always be false for valid codepoints) + */ + private boolean iterateExtended(ValueIterator.Element result, + int limit) + { + while (m_current_ < limit) { + String name = m_name_.getExtendedOr10Name(m_current_); + if (name != null && name.length() > 0) { + result.integer = m_current_; + result.value = name; + return false; + } + ++ m_current_; + } + return true; + } +} diff --git a/main/classes/core/src/com/ibm/icu/lang/UCharacterTypeIterator.java b/main/classes/core/src/com/ibm/icu/lang/UCharacterTypeIterator.java new file mode 100644 index 00000000000..6025a989a42 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UCharacterTypeIterator.java @@ -0,0 +1,62 @@ +/* +****************************************************************************** +* Copyright (C) 1996-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +****************************************************************************** +*/ + +package com.ibm.icu.lang; + +import com.ibm.icu.impl.TrieIterator; +import com.ibm.icu.impl.UCharacterProperty; + +/** + * Class enabling iteration of the codepoints according to their types. + * Result of each iteration contains the interval of codepoints that have + * the same type. + * Example of use:
    + *
    + * RangeValueIterator iterator = UCharacter.getTypeIterator();
    + * RangeValueIterator.Element element = new RangeValueIterator.Element();
    + * while (iterator.next(element)) {
    + *     System.out.println("Codepoint \\u" + 
    + *                        Integer.toHexString(element.start) + 
    + *                        " to codepoint \\u" +
    + *                        Integer.toHexString(element.limit - 1) + 
    + *                        " has the character type " + 
    + *                        element.value);
    + * }
    + * 
    + * @author synwee + * @see com.ibm.icu.util.TrieIterator + * @since release 2.1, Jan 24 2002 + */ +class UCharacterTypeIterator extends TrieIterator +{ + // protected constructor --------------------------------------------- + + /** + * TrieEnumeration constructor + * @param property the unicode character properties to be used + */ + protected UCharacterTypeIterator(UCharacterProperty property) + { + super(property.m_trie_); + } + + // protected methods ---------------------------------------------- + + /** + * Called by nextElement() to extracts a 32 bit value from a trie value + * used for comparison. + * This method is to be overwritten if special manipulation is to be done + * to retrieve a relevant comparison. + * The default function is to return the value as it is. + * @param value a value from the trie + * @return extracted value + */ + protected int extract(int value) + { + return value & UCharacterProperty.TYPE_MASK; + } +} \ No newline at end of file diff --git a/main/classes/core/src/com/ibm/icu/lang/UProperty.java b/main/classes/core/src/com/ibm/icu/lang/UProperty.java new file mode 100644 index 00000000000..c6931b0f8e1 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UProperty.java @@ -0,0 +1,878 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2010, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.lang; + +/** + *

    Selection constants for Unicode properties.

    + *

    These constants are used in functions like + * UCharacter.hasBinaryProperty(int) to select one of the Unicode properties. + *

    + *

    The properties APIs are intended to reflect Unicode properties as + * defined in the Unicode Character Database (UCD) and Unicode Technical + * Reports (UTR).

    + *

    For details about the properties see + * http://www.unicode.org.

    + *

    For names of Unicode properties see the UCD file PropertyAliases.txt. + *

    + *

    Important: If ICU is built with UCD files from Unicode versions below + * 3.2, then properties marked with "new" are not or not fully + * available. Check UCharacter.getUnicodeVersion() to be sure.

    + * @author Syn Wee Quek + * @stable ICU 2.6 + * @see com.ibm.icu.lang.UCharacter + */ +public interface UProperty +{ + // public data member -------------------------------------------------- + + /** + * Special value indicating undefined property. + * @internal + * @deprecated This API is ICU internal only. + */ + public static final int UNDEFINED = -1; + + /** + *

    Binary property Alphabetic.

    + *

    Property for UCharacter.isUAlphabetic(), different from the property + * in UCharacter.isalpha().

    + *

    Lu + Ll + Lt + Lm + Lo + Nl + Other_Alphabetic.

    + * @stable ICU 2.6 + */ + public static final int ALPHABETIC = 0; + + /** + * First constant for binary Unicode properties. + * @stable ICU 2.6 + */ + public static final int BINARY_START = ALPHABETIC; + + /** + * Binary property ASCII_Hex_Digit (0-9 A-F a-f). + * @stable ICU 2.6 + */ + public static final int ASCII_HEX_DIGIT = 1; + + /** + *

    Binary property Bidi_Control.

    + *

    Format controls which have specific functions in the Bidi Algorithm. + *

    + * @stable ICU 2.6 + */ + public static final int BIDI_CONTROL = 2; + + /** + *

    Binary property Bidi_Mirrored.

    + *

    Characters that may change display in RTL text.

    + *

    Property for UCharacter.isMirrored().

    + *

    See Bidi Algorithm; UTR 9.

    + * @stable ICU 2.6 + */ + public static final int BIDI_MIRRORED = 3; + + /** + *

    Binary property Dash.

    + *

    Variations of dashes.

    + * @stable ICU 2.6 + */ + public static final int DASH = 4; + + /** + *

    Binary property Default_Ignorable_Code_Point (new). + *

    + *

    Property that indicates codepoint is ignorable in most processing. + *

    + *

    Codepoints (2060..206F, FFF0..FFFB, E0000..E0FFF) + + * Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)

    + * @stable ICU 2.6 + */ + public static final int DEFAULT_IGNORABLE_CODE_POINT = 5; + + /** + *

    Binary property Deprecated (new).

    + *

    The usage of deprecated characters is strongly discouraged.

    + * @stable ICU 2.6 + */ + public static final int DEPRECATED = 6; + + /** + *

    Binary property Diacritic.

    + *

    Characters that linguistically modify the meaning of another + * character to which they apply.

    + * @stable ICU 2.6 + */ + public static final int DIACRITIC = 7; + + /** + *

    Binary property Extender.

    + *

    Extend the value or shape of a preceding alphabetic character, e.g. + * length and iteration marks.

    + * @stable ICU 2.6 + */ + public static final int EXTENDER = 8; + + /** + *

    Binary property Full_Composition_Exclusion.

    + *

    CompositionExclusions.txt + Singleton Decompositions + + * Non-Starter Decompositions.

    + * @stable ICU 2.6 + */ + public static final int FULL_COMPOSITION_EXCLUSION = 9; + + /** + *

    Binary property Grapheme_Base (new).

    + *

    For programmatic determination of grapheme cluster boundaries. + * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ

    + * @stable ICU 2.6 + */ + public static final int GRAPHEME_BASE = 10; + + /** + *

    Binary property Grapheme_Extend (new).

    + *

    For programmatic determination of grapheme cluster boundaries.

    + *

    Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ

    + * @stable ICU 2.6 + */ + public static final int GRAPHEME_EXTEND = 11; + + /** + *

    Binary property Grapheme_Link (new).

    + *

    For programmatic determination of grapheme cluster boundaries.

    + * @stable ICU 2.6 + */ + public static final int GRAPHEME_LINK = 12; + + /** + *

    Binary property Hex_Digit.

    + *

    Characters commonly used for hexadecimal numbers.

    + * @stable ICU 2.6 + */ + public static final int HEX_DIGIT = 13; + + /** + *

    Binary property Hyphen.

    + *

    Dashes used to mark connections between pieces of words, plus the + * Katakana middle dot.

    + * @stable ICU 2.6 + */ + public static final int HYPHEN = 14; + + /** + *

    Binary property ID_Continue.

    + *

    Characters that can continue an identifier.

    + *

    ID_Start+Mn+Mc+Nd+Pc

    + * @stable ICU 2.6 + */ + public static final int ID_CONTINUE = 15; + + /** + *

    Binary property ID_Start.

    + *

    Characters that can start an identifier.

    + *

    Lu+Ll+Lt+Lm+Lo+Nl

    + * @stable ICU 2.6 + */ + public static final int ID_START = 16; + + /** + *

    Binary property Ideographic.

    + *

    CJKV ideographs.

    + * @stable ICU 2.6 + */ + public static final int IDEOGRAPHIC = 17; + + /** + *

    Binary property IDS_Binary_Operator (new).

    + *

    For programmatic determination of Ideographic Description Sequences. + *

    + * @stable ICU 2.6 + */ + public static final int IDS_BINARY_OPERATOR = 18; + + /** + *

    Binary property IDS_Trinary_Operator (new).

    + * + * @stable ICU 2.6 + */ + public static final int IDS_TRINARY_OPERATOR = 19; + + /** + *

    Binary property Join_Control.

    + *

    Format controls for cursive joining and ligation.

    + * @stable ICU 2.6 + */ + public static final int JOIN_CONTROL = 20; + + /** + *

    Binary property Logical_Order_Exception (new).

    + *

    Characters that do not use logical order and require special + * handling in most processing.

    + * @stable ICU 2.6 + */ + public static final int LOGICAL_ORDER_EXCEPTION = 21; + + /** + *

    Binary property Lowercase.

    + *

    Same as UCharacter.isULowercase(), different from + * UCharacter.islower().

    + *

    Ll+Other_Lowercase

    + * @stable ICU 2.6 + */ + public static final int LOWERCASE = 22; + + /**

    Binary property Math.

    + *

    Sm+Other_Math

    + * @stable ICU 2.6 + */ + public static final int MATH = 23; + + /** + *

    Binary property Noncharacter_Code_Point.

    + *

    Code points that are explicitly defined as illegal for the encoding + * of characters.

    + * @stable ICU 2.6 + */ + public static final int NONCHARACTER_CODE_POINT = 24; + + /** + *

    Binary property Quotation_Mark.

    + * @stable ICU 2.6 + */ + public static final int QUOTATION_MARK = 25; + + /** + *

    Binary property Radical (new).

    + *

    For programmatic determination of Ideographic Description + * Sequences.

    + * @stable ICU 2.6 + */ + public static final int RADICAL = 26; + + /** + *

    Binary property Soft_Dotted (new).

    + *

    Characters with a "soft dot", like i or j.

    + *

    An accent placed on these characters causes the dot to disappear.

    + * @stable ICU 2.6 + */ + public static final int SOFT_DOTTED = 27; + + /** + *

    Binary property Terminal_Punctuation.

    + *

    Punctuation characters that generally mark the end of textual + * units.

    + * @stable ICU 2.6 + */ + public static final int TERMINAL_PUNCTUATION = 28; + + /** + *

    Binary property Unified_Ideograph (new).

    + *

    For programmatic determination of Ideographic Description + * Sequences.

    + * @stable ICU 2.6 + */ + public static final int UNIFIED_IDEOGRAPH = 29; + + /** + *

    Binary property Uppercase.

    + *

    Same as UCharacter.isUUppercase(), different from + * UCharacter.isUpperCase().

    + *

    Lu+Other_Uppercase

    + * @stable ICU 2.6 + */ + public static final int UPPERCASE = 30; + + /** + *

    Binary property White_Space.

    + *

    Same as UCharacter.isUWhiteSpace(), different from + * UCharacter.isSpace() and UCharacter.isWhitespace().

    + * Space characters+TAB+CR+LF-ZWSP-ZWNBSP

    + * @stable ICU 2.6 + */ + public static final int WHITE_SPACE = 31; + + /** + *

    Binary property XID_Continue.

    + *

    ID_Continue modified to allow closure under normalization forms + * NFKC and NFKD.

    + * @stable ICU 2.6 + */ + public static final int XID_CONTINUE = 32; + + /** + *

    Binary property XID_Start.

    + *

    ID_Start modified to allow closure under normalization forms NFKC + * and NFKD.

    + * @stable ICU 2.6 + */ + public static final int XID_START = 33; + + /** + *

    Binary property Case_Sensitive.

    + *

    Either the source of a case + * mapping or _in_ the target of a case mapping. Not the same as + * the general category Cased_Letter.

    + * @stable ICU 2.6 + */ + public static final int CASE_SENSITIVE = 34; + + /** + * Binary property STerm (new in Unicode 4.0.1). + * Sentence Terminal. Used in UAX #29: Text Boundaries + * (http://www.unicode.org/reports/tr29/) + * @stable ICU 3.0 + */ + public static final int S_TERM = 35; + + /** + * Binary property Variation_Selector (new in Unicode 4.0.1). + * Indicates all those characters that qualify as Variation Selectors. + * For details on the behavior of these characters, + * see StandardizedVariants.html and 15.6 Variation Selectors. + * @stable ICU 3.0 + */ + public static final int VARIATION_SELECTOR = 36; + + /** + * Binary property NFD_Inert. + * ICU-specific property for characters that are inert under NFD, + * i.e., they do not interact with adjacent characters. + * Used for example in normalizing transforms in incremental mode + * to find the boundary of safely normalizable text despite possible + * text additions. + * + * There is one such property per normalization form. + * These properties are computed as follows - an inert character is: + * a) unassigned, or ALL of the following: + * b) of combining class 0. + * c) not decomposed by this normalization form. + * AND if NFC or NFKC, + * d) can never compose with a previous character. + * e) can never compose with a following character. + * f) can never change if another character is added. + * Example: a-breve might satisfy all but f, but if you + * add an ogonek it changes to a-ogonek + breve + * + * See also com.ibm.text.UCD.NFSkippable in the ICU4J repository, + * and icu/source/common/unormimp.h . + * @stable ICU 3.0 + */ + public static final int NFD_INERT = 37; + + /** + * Binary property NFKD_Inert. + * ICU-specific property for characters that are inert under NFKD, + * i.e., they do not interact with adjacent characters. + * Used for example in normalizing transforms in incremental mode + * to find the boundary of safely normalizable text despite possible + * text additions. + * @see #NFD_INERT + * @stable ICU 3.0 + */ + public static final int NFKD_INERT = 38; + + /** + * Binary property NFC_Inert. + * ICU-specific property for characters that are inert under NFC, + * i.e., they do not interact with adjacent characters. + * Used for example in normalizing transforms in incremental mode + * to find the boundary of safely normalizable text despite possible + * text additions. + * @see #NFD_INERT + * @stable ICU 3.0 + */ + public static final int NFC_INERT = 39; + + /** + * Binary property NFKC_Inert. + * ICU-specific property for characters that are inert under NFKC, + * i.e., they do not interact with adjacent characters. + * Used for example in normalizing transforms in incremental mode + * to find the boundary of safely normalizable text despite possible + * text additions. + * @see #NFD_INERT + * @stable ICU 3.0 + */ + public static final int NFKC_INERT = 40; + + /** + * Binary Property Segment_Starter. + * ICU-specific property for characters that are starters in terms of + * Unicode normalization and combining character sequences. + * They have ccc=0 and do not occur in non-initial position of the + * canonical decomposition of any character + * (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)). + * ICU uses this property for segmenting a string for generating a set of + * canonically equivalent strings, e.g. for canonical closure while + * processing collation tailoring rules. + * @stable ICU 3.0 + */ + public static final int SEGMENT_STARTER = 41; + + /** + * Binary property Pattern_Syntax (new in Unicode 4.1). + * See UAX #31 Identifier and Pattern Syntax + * (http://www.unicode.org/reports/tr31/) + * @stable ICU 3.4 + */ + public static final int PATTERN_SYNTAX = 42; + + /** + * Binary property Pattern_White_Space (new in Unicode 4.1). + * See UAX #31 Identifier and Pattern Syntax + * (http://www.unicode.org/reports/tr31/) + * @stable ICU 3.4 + */ + public static final int PATTERN_WHITE_SPACE = 43; + + /** + * Binary property alnum (a C/POSIX character class). + * Implemented according to the UTS #18 Annex C Standard Recommendation. + * See the UCharacter class documentation. + * @stable ICU 3.4 + */ + public static final int POSIX_ALNUM = 44; + + /** + * Binary property blank (a C/POSIX character class). + * Implemented according to the UTS #18 Annex C Standard Recommendation. + * See the UCharacter class documentation. + * @stable ICU 3.4 + */ + public static final int POSIX_BLANK = 45; + + /** + * Binary property graph (a C/POSIX character class). + * Implemented according to the UTS #18 Annex C Standard Recommendation. + * See the UCharacter class documentation. + * @stable ICU 3.4 + */ + public static final int POSIX_GRAPH = 46; + + /** + * Binary property print (a C/POSIX character class). + * Implemented according to the UTS #18 Annex C Standard Recommendation. + * See the UCharacter class documentation. + * @stable ICU 3.4 + */ + public static final int POSIX_PRINT = 47; + + /** + * Binary property xdigit (a C/POSIX character class). + * Implemented according to the UTS #18 Annex C Standard Recommendation. + * See the UCharacter class documentation. + * @stable ICU 3.4 + */ + public static final int POSIX_XDIGIT = 48; + + /** + * Binary property Cased. + * For Lowercase, Uppercase and Titlecase characters. + * @stable ICU 4.4 + */ + public static final int CASED=49; + /** + * Binary property Case_Ignorable. + * Used in context-sensitive case mappings. + * @stable ICU 4.4 + */ + public static final int CASE_IGNORABLE=50; + /** + * Binary property Changes_When_Lowercased. + * @stable ICU 4.4 + */ + public static final int CHANGES_WHEN_LOWERCASED=51; + /** + * Binary property Changes_When_Uppercased. + * @stable ICU 4.4 + */ + public static final int CHANGES_WHEN_UPPERCASED=52; + /** + * Binary property Changes_When_Titlecased. + * @stable ICU 4.4 + */ + public static final int CHANGES_WHEN_TITLECASED=53; + /** + * Binary property Changes_When_Casefolded. + * @stable ICU 4.4 + */ + public static final int CHANGES_WHEN_CASEFOLDED=54; + /** + * Binary property Changes_When_Casemapped. + * @stable ICU 4.4 + */ + public static final int CHANGES_WHEN_CASEMAPPED=55; + /** + * Binary property Changes_When_NFKC_Casefolded. + * @stable ICU 4.4 + */ + public static final int CHANGES_WHEN_NFKC_CASEFOLDED=56; + + /** + * One more than the last constant for binary Unicode properties. + * @stable ICU 2.6 + */ + public static final int BINARY_LIMIT = 57; + + /** + * Enumerated property Bidi_Class. + * Same as UCharacter.getDirection(int), returns UCharacterDirection values. + * @stable ICU 2.4 + */ + public static final int BIDI_CLASS = 0x1000; + + /** + * First constant for enumerated/integer Unicode properties. + * @stable ICU 2.4 + */ + public static final int INT_START = BIDI_CLASS; + + /** + * Enumerated property Block. + * Same as UCharacter.UnicodeBlock.of(int), returns UCharacter.UnicodeBlock + * values. + * @stable ICU 2.4 + */ + public static final int BLOCK = 0x1001; + + /** + * Enumerated property Canonical_Combining_Class. + * Same as UCharacter.getCombiningClass(int), returns 8-bit numeric values. + * @stable ICU 2.4 + */ + public static final int CANONICAL_COMBINING_CLASS = 0x1002; + + /** + * Enumerated property Decomposition_Type. + * Returns UCharacter.DecompositionType values. + * @stable ICU 2.4 + */ + public static final int DECOMPOSITION_TYPE = 0x1003; + + /** + * Enumerated property East_Asian_Width. + * See http://www.unicode.org/reports/tr11/ + * Returns UCharacter.EastAsianWidth values. + * @stable ICU 2.4 + */ + public static final int EAST_ASIAN_WIDTH = 0x1004; + + /** + * Enumerated property General_Category. + * Same as UCharacter.getType(int), returns UCharacterCategory values. + * @stable ICU 2.4 + */ + public static final int GENERAL_CATEGORY = 0x1005; + + /** + * Enumerated property Joining_Group. + * Returns UCharacter.JoiningGroup values. + * @stable ICU 2.4 + */ + public static final int JOINING_GROUP = 0x1006; + + /** + * Enumerated property Joining_Type. + * Returns UCharacter.JoiningType values. + * @stable ICU 2.4 + */ + public static final int JOINING_TYPE = 0x1007; + + /** + * Enumerated property Line_Break. + * Returns UCharacter.LineBreak values. + * @stable ICU 2.4 + */ + public static final int LINE_BREAK = 0x1008; + + /** + * Enumerated property Numeric_Type. + * Returns UCharacter.NumericType values. + * @stable ICU 2.4 + */ + public static final int NUMERIC_TYPE = 0x1009; + + /** + * Enumerated property Script. + * Same as UScript.getScript(int), returns UScript values. + * @stable ICU 2.4 + */ + public static final int SCRIPT = 0x100A; + + /** + * Enumerated property Hangul_Syllable_Type, new in Unicode 4. + * Returns HangulSyllableType values. + * @stable ICU 2.6 + */ + public static final int HANGUL_SYLLABLE_TYPE = 0x100B; + + /** + * Enumerated property NFD_Quick_Check. + * Returns numeric values compatible with Normalizer.QuickCheckResult. + * @stable ICU 3.0 + */ + public static final int NFD_QUICK_CHECK = 0x100C; + + /** + * Enumerated property NFKD_Quick_Check. + * Returns numeric values compatible with Normalizer.QuickCheckResult. + * @stable ICU 3.0 + */ + public static final int NFKD_QUICK_CHECK = 0x100D; + + /** + * Enumerated property NFC_Quick_Check. + * Returns numeric values compatible with Normalizer.QuickCheckResult. + * @stable ICU 3.0 + */ + public static final int NFC_QUICK_CHECK = 0x100E; + + /** + * Enumerated property NFKC_Quick_Check. + * Returns numeric values compatible with Normalizer.QuickCheckResult. + * @stable ICU 3.0 + */ + public static final int NFKC_QUICK_CHECK = 0x100F; + + /** + * Enumerated property Lead_Canonical_Combining_Class. + * ICU-specific property for the ccc of the first code point + * of the decomposition, or lccc(c)=ccc(NFD(c)[0]). + * Useful for checking for canonically ordered text; + * see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD . + * Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS. + * @stable ICU 3.0 + */ + public static final int LEAD_CANONICAL_COMBINING_CLASS = 0x1010; + + /** + * Enumerated property Trail_Canonical_Combining_Class. + * ICU-specific property for the ccc of the last code point + * of the decomposition, or lccc(c)=ccc(NFD(c)[last]). + * Useful for checking for canonically ordered text; + * see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD . + * Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS. + * @stable ICU 3.0 + */ + public static final int TRAIL_CANONICAL_COMBINING_CLASS = 0x1011; + + /** + * Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). + * Used in UAX #29: Text Boundaries + * (http://www.unicode.org/reports/tr29/) + * Returns UGraphemeClusterBreak values. + * @stable ICU 3.4 + */ + public static final int GRAPHEME_CLUSTER_BREAK = 0x1012; + + /** + * Enumerated property Sentence_Break (new in Unicode 4.1). + * Used in UAX #29: Text Boundaries + * (http://www.unicode.org/reports/tr29/) + * Returns USentenceBreak values. + * @stable ICU 3.4 + */ + public static final int SENTENCE_BREAK = 0x1013; + + /** + * Enumerated property Word_Break (new in Unicode 4.1). + * Used in UAX #29: Text Boundaries + * (http://www.unicode.org/reports/tr29/) + * Returns UWordBreakValues values. + * @stable ICU 3.4 + */ + public static final int WORD_BREAK = 0x1014; + + /** + * One more than the last constant for enumerated/integer Unicode + * properties. + * @stable ICU 2.4 + */ + public static final int INT_LIMIT = 0x1015; + + /** + * Bitmask property General_Category_Mask. + * This is the General_Category property returned as a bit mask. + * When used in UCharacter.getIntPropertyValue(c), + * returns bit masks for UCharacterCategory values where exactly one bit is set. + * When used with UCharacter.getPropertyValueName() and UCharacter.getPropertyValueEnum(), + * a multi-bit mask is used for sets of categories like "Letters". + * @stable ICU 2.4 + */ + public static final int GENERAL_CATEGORY_MASK = 0x2000; + + /** + * First constant for bit-mask Unicode properties. + * @stable ICU 2.4 + */ + public static final int MASK_START = GENERAL_CATEGORY_MASK; + + /** + * One more than the last constant for bit-mask Unicode properties. + * @stable ICU 2.4 + */ + public static final int MASK_LIMIT = 0x2001; + + /** + * Double property Numeric_Value. + * Corresponds to UCharacter.getUnicodeNumericValue(int). + * @stable ICU 2.4 + */ + public static final int NUMERIC_VALUE = 0x3000; + + /** + * First constant for double Unicode properties. + * @stable ICU 2.4 + */ + public static final int DOUBLE_START = NUMERIC_VALUE; + + /** + * One more than the last constant for double Unicode properties. + * @stable ICU 2.4 + */ + public static final int DOUBLE_LIMIT = 0x3001; + + /** + * String property Age. + * Corresponds to UCharacter.getAge(int). + * @stable ICU 2.4 + */ + public static final int AGE = 0x4000; + + /** + * First constant for string Unicode properties. + * @stable ICU 2.4 + */ + public static final int STRING_START = AGE; + + /** + * String property Bidi_Mirroring_Glyph. + * Corresponds to UCharacter.getMirror(int). + * @stable ICU 2.4 + */ + public static final int BIDI_MIRRORING_GLYPH = 0x4001; + + /** + * String property Case_Folding. + * Corresponds to UCharacter.foldCase(String, boolean). + * @stable ICU 2.4 + */ + public static final int CASE_FOLDING = 0x4002; + + /** + * String property ISO_Comment. + * Corresponds to UCharacter.getISOComment(int). + * @stable ICU 2.4 + */ + public static final int ISO_COMMENT = 0x4003; + + /** + * String property Lowercase_Mapping. + * Corresponds to UCharacter.toLowerCase(String). + * @stable ICU 2.4 + */ + public static final int LOWERCASE_MAPPING = 0x4004; + + /** + * String property Name. + * Corresponds to UCharacter.getName(int). + * @stable ICU 2.4 + */ + public static final int NAME = 0x4005; + + /** + * String property Simple_Case_Folding. + * Corresponds to UCharacter.foldCase(int, boolean). + * @stable ICU 2.4 + */ + public static final int SIMPLE_CASE_FOLDING = 0x4006; + + /** + * String property Simple_Lowercase_Mapping. + * Corresponds to UCharacter.toLowerCase(int). + * @stable ICU 2.4 + */ + public static final int SIMPLE_LOWERCASE_MAPPING = 0x4007; + + /** + * String property Simple_Titlecase_Mapping. + * Corresponds to UCharacter.toTitleCase(int). + * @stable ICU 2.4 + */ + public static final int SIMPLE_TITLECASE_MAPPING = 0x4008; + + /** + * String property Simple_Uppercase_Mapping. + * Corresponds to UCharacter.toUpperCase(int). + * @stable ICU 2.4 + */ + public static final int SIMPLE_UPPERCASE_MAPPING = 0x4009; + + /** + * String property Titlecase_Mapping. + * Corresponds to UCharacter.toTitleCase(String). + * @stable ICU 2.4 + */ + public static final int TITLECASE_MAPPING = 0x400A; + + /** + * String property Unicode_1_Name. + * Corresponds to UCharacter.getName1_0(int). + * @stable ICU 2.4 + */ + public static final int UNICODE_1_NAME = 0x400B; + + /** + * String property Uppercase_Mapping. + * Corresponds to UCharacter.toUpperCase(String). + * @stable ICU 2.4 + */ + public static final int UPPERCASE_MAPPING = 0x400C; + + /** + * One more than the last constant for string Unicode properties. + * @stable ICU 2.4 + */ + public static final int STRING_LIMIT = 0x400D; + + /** + * Selector constants for UCharacter.getPropertyName() and + * UCharacter.getPropertyValueName(). These selectors are used to + * choose which name is returned for a given property or value. + * All properties and values have a long name. Most have a short + * name, but some do not. Unicode allows for additional names, + * beyond the long and short name, which would be indicated by + * LONG + i, where i=1, 2,... + * + * @see UCharacter#getPropertyName + * @see UCharacter#getPropertyValueName + * @stable ICU 2.4 + */ + public interface NameChoice { + /** + * Selector for the abbreviated name of a property or value. + * Most properties and values have a short name; those that do + * not return null. + * @stable ICU 2.4 + */ + static final int SHORT = 0; + + /** + * Selector for the long name of a property or value. All + * properties and values have a long name. + * @stable ICU 2.4 + */ + static final int LONG = 1; + + /** + * The number of predefined property name choices. Individual + * properties or values may have more than COUNT aliases. + * @stable ICU 2.4 + */ + static final int COUNT = 2; + } +} diff --git a/main/classes/core/src/com/ibm/icu/lang/UScript.java b/main/classes/core/src/com/ibm/icu/lang/UScript.java new file mode 100644 index 00000000000..6f74c2a226d --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UScript.java @@ -0,0 +1,910 @@ +/** +******************************************************************************* +* Copyright (C) 2001-2010 International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ + +package com.ibm.icu.lang; + +import java.util.Locale; +import java.util.MissingResourceException; + +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.UCharacterProperty; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +/** + * A class to reflect UTR #24: Script Names + * (based on ISO 15924:2000, "Code for the representation of names of + * scripts"). UTR #24 describes the basis for a new Unicode data file, + * Scripts.txt. + * @stable ICU 2.4 + */ +public final class UScript { + /** + * Invalid code + * @stable ICU 2.4 + */ + public static final int INVALID_CODE = -1; + /** + * Common + * @stable ICU 2.4 + */ + public static final int COMMON = 0; /* Zyyy */ + /** + * Inherited + * @stable ICU 2.4 + */ + public static final int INHERITED = 1; /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ + /** + * Arabic + * @stable ICU 2.4 + */ + public static final int ARABIC = 2; /* Arab */ + /** + * Armenian + * @stable ICU 2.4 + */ + public static final int ARMENIAN = 3; /* Armn */ + /** + * Bengali + * @stable ICU 2.4 + */ + public static final int BENGALI = 4; /* Beng */ + /** + * Bopomofo + * @stable ICU 2.4 + */ + public static final int BOPOMOFO = 5; /* Bopo */ + /** + * Cherokee + * @stable ICU 2.4 + */ + public static final int CHEROKEE = 6; /* Cher */ + /** + * Coptic + * @stable ICU 2.4 + */ + public static final int COPTIC = 7; /* Qaac */ + /** + * Cyrillic + * @stable ICU 2.4 + */ + public static final int CYRILLIC = 8; /* Cyrl (Cyrs) */ + /** + * Deseret + * @stable ICU 2.4 + */ + public static final int DESERET = 9; /* Dsrt */ + /** + * Devanagari + * @stable ICU 2.4 + */ + public static final int DEVANAGARI = 10; /* Deva */ + /** + * Ethiopic + * @stable ICU 2.4 + */ + public static final int ETHIOPIC = 11; /* Ethi */ + /** + * Georgian + * @stable ICU 2.4 + */ + public static final int GEORGIAN = 12; /* Geor (Geon; Geoa) */ + /** + * Gothic + * @stable ICU 2.4 + */ + public static final int GOTHIC = 13; /* Goth */ + /** + * Greek + * @stable ICU 2.4 + */ + public static final int GREEK = 14; /* Grek */ + /** + * Gujarati + * @stable ICU 2.4 + */ + public static final int GUJARATI = 15; /* Gujr */ + /** + * Gurmukhi + * @stable ICU 2.4 + */ + public static final int GURMUKHI = 16; /* Guru */ + /** + * Han + * @stable ICU 2.4 + */ + public static final int HAN = 17; /* Hani */ + /** + * Hangul + * @stable ICU 2.4 + */ + public static final int HANGUL = 18; /* Hang */ + /** + * Hebrew + * @stable ICU 2.4 + */ + public static final int HEBREW = 19; /* Hebr */ + /** + * Hiragana + * @stable ICU 2.4 + */ + public static final int HIRAGANA = 20; /* Hira */ + /** + * Kannada + * @stable ICU 2.4 + */ + public static final int KANNADA = 21; /* Knda */ + /** + * Katakana + * @stable ICU 2.4 + */ + public static final int KATAKANA = 22; /* Kana */ + /** + * Khmer + * @stable ICU 2.4 + */ + public static final int KHMER = 23; /* Khmr */ + /** + * Lao + * @stable ICU 2.4 + */ + public static final int LAO = 24; /* Laoo */ + /** + * Latin + * @stable ICU 2.4 + */ + public static final int LATIN = 25; /* Latn (Latf; Latg) */ + /** + * Malayalam + * @stable ICU 2.4 + */ + public static final int MALAYALAM = 26; /* Mlym */ + /** + * Mangolian + * @stable ICU 2.4 + */ + public static final int MONGOLIAN = 27; /* Mong */ + /** + * Myammar + * @stable ICU 2.4 + */ + public static final int MYANMAR = 28; /* Mymr */ + /** + * Ogham + * @stable ICU 2.4 + */ + public static final int OGHAM = 29; /* Ogam */ + /** + * Old Itallic + * @stable ICU 2.4 + */ + public static final int OLD_ITALIC = 30; /* Ital */ + /** + * Oriya + * @stable ICU 2.4 + */ + public static final int ORIYA = 31; /* Orya */ + /** + * Runic + * @stable ICU 2.4 + */ + public static final int RUNIC = 32; /* Runr */ + /** + * Sinhala + * @stable ICU 2.4 + */ + public static final int SINHALA = 33; /* Sinh */ + /** + * Syriac + * @stable ICU 2.4 + */ + public static final int SYRIAC = 34; /* Syrc (Syrj; Syrn; Syre) */ + /** + * Tamil + * @stable ICU 2.4 + */ + public static final int TAMIL = 35; /* Taml */ + /** + * Telugu + * @stable ICU 2.4 + */ + public static final int TELUGU = 36; /* Telu */ + /** + * Thana + * @stable ICU 2.4 + */ + public static final int THAANA = 37; /* Thaa */ + /** + * Thai + * @stable ICU 2.4 + */ + public static final int THAI = 38; /* Thai */ + /** + * Tibetan + * @stable ICU 2.4 + */ + public static final int TIBETAN = 39; /* Tibt */ + /** + * Unified Canadian Aboriginal Symbols + * @stable ICU 2.6 + */ + public static final int CANADIAN_ABORIGINAL = 40; /* Cans */ + /** + * Unified Canadian Aboriginal Symbols (alias) + * @stable ICU 2.4 + */ + public static final int UCAS = CANADIAN_ABORIGINAL; /* Cans */ + /** + * Yi syllables + * @stable ICU 2.4 + */ + public static final int YI = 41; /* Yiii */ + /** + * Tagalog + * @stable ICU 2.4 + */ + public static final int TAGALOG = 42; /* Tglg */ + /** + * Hanunooo + * @stable ICU 2.4 + */ + public static final int HANUNOO = 43; /* Hano */ + /** + * Buhid + * @stable ICU 2.4 + */ + public static final int BUHID = 44; /* Buhd */ + /** + * Tagbanwa + * @stable ICU 2.4 + */ + public static final int TAGBANWA = 45; /* Tagb */ + /** + * Braille + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int BRAILLE = 46; /* Brai */ + /** + * Cypriot + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int CYPRIOT = 47; /* Cprt */ + /** + * Limbu + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int LIMBU = 48; /* Limb */ + /** + * Linear B + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int LINEAR_B = 49; /* Linb */ + /** + * Osmanya + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int OSMANYA = 50; /* Osma */ + /** + * Shavian + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int SHAVIAN = 51; /* Shaw */ + /** + * Tai Le + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int TAI_LE = 52; /* Tale */ + /** + * Ugaritic + * Script in Unicode 4 + * @stable ICU 2.6 + * + */ + public static final int UGARITIC = 53; /* Ugar */ + /** + * Script in Unicode 4.0.1 + * @stable ICU 3.0 + */ + public static final int KATAKANA_OR_HIRAGANA = 54; /*Hrkt */ + + /** + * Script in Unicode 4.1 + * @stable ICU 3.4 + */ + public static final int BUGINESE = 55; /* Bugi */ + /** + * Script in Unicode 4.1 + * @stable ICU 3.4 + */ + public static final int GLAGOLITIC = 56; /* Glag */ + /** + * Script in Unicode 4.1 + * @stable ICU 3.4 + */ + public static final int KHAROSHTHI = 57; /* Khar */ + /** + * Script in Unicode 4.1 + * @stable ICU 3.4 + */ + public static final int SYLOTI_NAGRI = 58; /* Sylo */ + /** + * Script in Unicode 4.1 + * @stable ICU 3.4 + */ + public static final int NEW_TAI_LUE = 59; /* Talu */ + /** + * Script in Unicode 4.1 + * @stable ICU 3.4 + */ + public static final int TIFINAGH = 60; /* Tfng */ + /** + * Script in Unicode 4.1 + * @stable ICU 3.4 + */ + public static final int OLD_PERSIAN = 61; /* Xpeo */ + + + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int BALINESE = 62; /* Bali */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int BATAK = 63; /* Batk */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int BLISSYMBOLS = 64; /* Blis */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int BRAHMI = 65; /* Brah */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int CHAM = 66; /* Cham */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int CIRTH = 67; /* Cirt */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int OLD_CHURCH_SLAVONIC_CYRILLIC = 68; /* Cyrs */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int DEMOTIC_EGYPTIAN = 69; /* Egyd */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int HIERATIC_EGYPTIAN = 70; /* Egyh */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int EGYPTIAN_HIEROGLYPHS = 71; /* Egyp */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int KHUTSURI = 72; /* Geok */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int SIMPLIFIED_HAN = 73; /* Hans */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int TRADITIONAL_HAN = 74; /* Hant */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int PAHAWH_HMONG = 75; /* Hmng */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int OLD_HUNGARIAN = 76; /* Hung */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int HARAPPAN_INDUS = 77; /* Inds */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int JAVANESE = 78; /* Java */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int KAYAH_LI = 79; /* Kali */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int LATIN_FRAKTUR = 80; /* Latf */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int LATIN_GAELIC = 81; /* Latg */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int LEPCHA = 82; /* Lepc */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int LINEAR_A = 83; /* Lina */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int MANDAEAN = 84; /* Mand */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int MAYAN_HIEROGLYPHS = 85; /* Maya */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int MEROITIC = 86; /* Mero */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int NKO = 87; /* Nkoo */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int ORKHON = 88; /* Orkh */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int OLD_PERMIC = 89; /* Perm */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int PHAGS_PA = 90; /* Phag */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int PHOENICIAN = 91; /* Phnx */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int PHONETIC_POLLARD = 92; /* Plrd */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int RONGORONGO = 93; /* Roro */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int SARATI = 94; /* Sara */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int ESTRANGELO_SYRIAC = 95; /* Syre */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int WESTERN_SYRIAC = 96; /* Syrj */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int EASTERN_SYRIAC = 97; /* Syrn */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int TENGWAR = 98; /* Teng */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int VAI = 99; /* Vaii */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int VISIBLE_SPEECH = 100;/* Visp */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int CUNEIFORM = 101;/* Xsux */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int UNWRITTEN_LANGUAGES = 102;/* Zxxx */ + /** + * ISO 15924 script code + * @stable ICU 3.6 + */ + public static final int UNKNOWN = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ + + /* Private use codes from Qaaa - Qabx are not supported*/ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int CARIAN = 104;/* Cari */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int JAPANESE = 105;/* Jpan */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int LANNA = 106;/* Lana */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int LYCIAN = 107;/* Lyci */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int LYDIAN = 108;/* Lydi */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int OL_CHIKI = 109;/* Olck */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int REJANG = 110;/* Rjng */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int SAURASHTRA = 111;/* Saur */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int SIGN_WRITING = 112;/* Sgnw */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int SUNDANESE = 113;/* Sund */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int MOON = 114;/* Moon */ + /** + * ISO 15924 script code + * @stable ICU 3.8 + */ + public static final int MEITEI_MAYEK = 115;/* Mtei */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int IMPERIAL_ARAMAIC = 116;/* Armi */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int AVESTAN = 117;/* Avst */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int CHAKMA = 118;/* Cakm */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int KOREAN = 119;/* Kore */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int KAITHI = 120;/* Kthi */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int MANICHAEAN = 121;/* Mani */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int INSCRIPTIONAL_PAHLAVI = 122;/* Phli */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int PSALTER_PAHLAVI = 123;/* Phlp */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int BOOK_PAHLAVI = 124;/* Phlv */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int INSCRIPTIONAL_PARTHIAN = 125;/* Prti */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int SAMARITAN = 126;/* Samr */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int TAI_VIET = 127;/* Tavt */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int MATHEMATICAL_NOTATION = 128;/* Zmth */ + + /** + * ISO 15924 script code + * @stable ICU 4.0 + */ + public static final int SYMBOLS = 129;/* Zsym */ + + /** + * ISO 15924 script code + * @stable ICU 4.4 + */ + public static final int BAMUM = 130;/* Bamu */ + /** + * ISO 15924 script code + * @stable ICU 4.4 + */ + public static final int LISU = 131;/* Lisu */ + /** + * ISO 15924 script code + * @stable ICU 4.4 + */ + public static final int NAKHI_GEBA = 132;/* Nkgb */ + /** + * ISO 15924 script code + * @stable ICU 4.4 + */ + public static final int OLD_SOUTH_ARABIAN = 133;/* Sarb */ + + /** + * Limit + * @stable ICU 2.4 + */ + public static final int CODE_LIMIT = 134; + + private static final String kLocaleScript = "LocaleScript"; + + //private static final String INVALID_NAME = "Invalid"; + /** + * Helper function to find the code from locale. + * @param locale The locale. + */ + private static int[] findCodeFromLocale(ULocale locale) { + ICUResourceBundle rb; + + try { + rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, locale); + } catch (MissingResourceException e) { + /* This part seems to never be called since "UResourceBundle.getBundleInstance" + * corrects this by setting to ICUResourceBundle.FROM_DEFAULT + * when such an invalid locale is passed. + */ + ///CLOVER:OFF + return null; + ///CLOVER:ON + } + + rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, locale); + + // if rb is not a strict fallback of the requested locale, return null + //if(!LocaleUtility.isFallbackOf(rb.getULocale().toString(), locale.toString())){ + // return null; + //} + //non existent locale check + if(rb.getLoadingStatus()==ICUResourceBundle.FROM_DEFAULT && ! locale.equals(ULocale.getDefault())){ + return null; + } + UResourceBundle sub = rb.get(kLocaleScript); + + int[] result = new int[sub.getSize()]; + int w = 0; + for (int i = 0; i < result.length; ++i) { + int code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, + sub.getString(i)); + result[w++] = code; + + } + + if (w < result.length) { + throw new IllegalStateException("bad locale data, listed " + + result.length + " scripts but found only " + w); + } + + return result; + } + + /** + * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. + * Returns MALAYAM given "Malayam" OR "Mlym". + * Returns LATIN given "en" OR "en_US" + * @param locale Locale + * @return The script codes array. null if the the code cannot be found. + * @stable ICU 2.4 + */ + public static final int[] getCode(Locale locale){ + return findCodeFromLocale(ULocale.forLocale(locale)); + } + /** + * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. + * Returns MALAYAM given "Malayam" OR "Mlym". + * Returns LATIN given "en" OR "en_US" + * @param locale ULocale + * @return The script codes array. null if the the code cannot be found. + * @stable ICU 3.0 + */ + public static final int[] getCode(ULocale locale){ + return findCodeFromLocale(locale); + } + /** + * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. + * Returns MALAYAM given "Malayam" OR "Mlym". + * Returns LATIN given "en" OR "en_US" + * + *

    Note: To search by short or long script alias only, use + * UCharacater.getPropertyValueEnum(UProperty.SCRIPT, alias) + * instead. This does a fast lookup with no access of the locale + * data. + * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale + * @return The script codes array. null if the the code cannot be found. + * @stable ICU 2.4 + */ + public static final int[] getCode(String nameOrAbbrOrLocale){ + try { + return new int[] { + UCharacter.getPropertyValueEnum(UProperty.SCRIPT, + nameOrAbbrOrLocale) + }; + } catch (IllegalArgumentException e) { + return findCodeFromLocale(new ULocale(nameOrAbbrOrLocale)); + } + } + + /** + * Gets a script codes associated with the given ISO 15924 abbreviation or name. + * Returns MALAYAM given "Malayam" OR "Mlym". + * + * @param nameOrAbbr name of the script or ISO 15924 code + * @return The script code value or INVALID_CODE if the code cannot be found. + * @internal + * @deprecated This API is ICU internal only. + */ + public static final int getCodeFromName(String nameOrAbbr) { + try { + return UCharacter.getPropertyValueEnum(UProperty.SCRIPT, + nameOrAbbr); + } catch (IllegalArgumentException e) { + return INVALID_CODE; + } + } + + /** + * Gets the script code associated with the given codepoint. + * Returns UScript.MALAYAM given 0x0D02 + * @param codepoint UChar32 codepoint + * @return The script code + * @stable ICU 2.4 + */ + public static final int getScript(int codepoint){ + if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) { + return (UCharacterProperty.INSTANCE.getAdditional(codepoint,0) & UCharacter.SCRIPT_MASK_); + }else{ + throw new IllegalArgumentException(Integer.toString(codepoint)); + } + } + + /** + * Gets a script name associated with the given script code. + * Returns "Malayam" given MALAYAM + * @param scriptCode int script code + * @return script name as a string in full as given in TR#24 + * @stable ICU 2.4 + */ + public static final String getName(int scriptCode){ + return UCharacter.getPropertyValueName(UProperty.SCRIPT, + scriptCode, + UProperty.NameChoice.LONG); + } + + /** + * Gets a script name associated with the given script code. + * Returns "Mlym" given MALAYAM + * @param scriptCode int script code + * @return script abbreviated name as a string as given in TR#24 + * @stable ICU 2.4 + */ + public static final String getShortName(int scriptCode){ + return UCharacter.getPropertyValueName(UProperty.SCRIPT, + scriptCode, + UProperty.NameChoice.SHORT); + } + ///CLOVER:OFF + /** + * Private Constructor. Never default construct + */ + private UScript(){} + ///CLOVER:ON +} diff --git a/main/classes/core/src/com/ibm/icu/lang/UScriptRun.java b/main/classes/core/src/com/ibm/icu/lang/UScriptRun.java new file mode 100644 index 00000000000..46f15a123a4 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/UScriptRun.java @@ -0,0 +1,628 @@ +/* + ******************************************************************************* + * + * Copyright (C) 1999-2009, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + */ + +package com.ibm.icu.lang; + +import com.ibm.icu.text.UTF16; + +/** + * UScriptRun is used to find runs of characters in + * the same script, as defined in the UScript class. + * It implements a simple iterator over an array of characters. + * The iterator will assign COMMON and INHERITED + * characters to the same script as the preceeding characters. If the + * COMMON and INHERITED characters are first, they will be assigned to + * the same script as the following characters. + * + * The iterator will try to match paired punctuation. If it sees an + * opening punctuation character, it will remember the script that + * was assigned to that character, and assign the same script to the + * matching closing punctuation. + * + * No attempt is made to combine related scripts into a single run. In + * particular, Hiragana, Katakana, and Han characters will appear in separate + * runs. + + * Here is an example of how to iterate over script runs: + *

    + * void printScriptRuns(char[] text)
    + * {
    + *     UScriptRun scriptRun = new UScriptRun(text);
    + *
    + *     while (scriptRun.next()) {
    + *         int start  = scriptRun.getScriptStart();
    + *         int limit  = scriptRun.getScriptLimit();
    + *         int script = scriptRun.getScriptCode();
    + *
    + *         System.out.println("Script \"" + UScript.getName(script) + "\" from " +
    + *                            start + " to " + limit + ".");
    + *     }
    + *  }
    + * 
    + * + * @internal + * @deprecated This API is ICU internal only. + */ +public final class UScriptRun +{ + /** + * Construct an empty UScriptRun object. The next() + * method will return false the first time it is called. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public UScriptRun() + { + char[] nullChars = null; + + reset(nullChars, 0, 0); + } + + /** + * Construct a UScriptRun object which iterates over the + * characters in the given string. + * + * @param text the string of characters over which to iterate. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public UScriptRun(String text) + { + reset (text); + } + + /** + * Construct a UScriptRun object which iterates over a subrange + * of the characetrs in the given string. + * + * @param text the string of characters over which to iterate. + * @param start the index of the first character over which to iterate + * @param count the number of characters over which to iterate + * + * @internal + * @deprecated This API is ICU internal only. + */ + public UScriptRun(String text, int start, int count) + { + reset(text, start, count); + } + + /** + * Construct a UScriptRun object which iterates over the given + * characetrs. + * + * @param chars the array of characters over which to iterate. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public UScriptRun(char[] chars) + { + reset(chars); + } + + /** + * Construct a UScriptRun object which iterates over a subrange + * of the given characetrs. + * + * @param chars the array of characters over which to iterate. + * @param start the index of the first character over which to iterate + * @param count the number of characters over which to iterate + * + * @internal + * @deprecated This API is ICU internal only. + */ + public UScriptRun(char[] chars, int start, int count) + { + reset(chars, start, count); + } + + + /** + * Reset the iterator to the start of the text. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final void reset() + { + // empty any old parenStack contents. + // NOTE: this is not the most efficient way + // to do this, but it's the easiest to write... + while (stackIsNotEmpty()) { + pop(); + } + + scriptStart = textStart; + scriptLimit = textStart; + scriptCode = UScript.INVALID_CODE; + parenSP = -1; + pushCount = 0; + fixupCount = 0; + + textIndex = textStart; + } + + /** + * Reset the iterator to iterate over the given range of the text. Throws + * IllegalArgumentException if the range is outside of the bounds of the + * character array. + * + * @param start the index of the new first character over which to iterate + * @param count the new number of characters over which to iterate. + * @exception IllegalArgumentException If invalid arguments are passed. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final void reset(int start, int count) + throws IllegalArgumentException + { + int len = 0; + + if (text != null) { + len = text.length; + } + + if (start < 0 || count < 0 || start > len - count) { + throw new IllegalArgumentException(); + } + + textStart = start; + textLimit = start + count; + + reset(); + } + + /** + * Reset the iterator to iterate over count characters + * in chars starting at start. This allows + * clients to reuse an iterator. + * + * @param chars the new array of characters over which to iterate. + * @param start the index of the first character over which to iterate. + * @param count the number of characters over which to iterate. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final void reset(char[] chars, int start, int count) + { + if (chars == null) { + chars = emptyCharArray; + } + + text = chars; + + reset(start, count); + } + + /** + * Reset the iterator to iterate over the characters + * in chars. This allows clients to reuse an iterator. + * + * @param chars the new array of characters over which to iterate. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final void reset(char[] chars) + { + int length = 0; + + if (chars != null) { + length = chars.length; + } + + reset(chars, 0, length); + } + + /** + * Reset the iterator to iterate over count characters + * in text starting at start. This allows + * clients to reuse an iterator. + * + * @param str the new string of characters over which to iterate. + * @param start the index of the first character over which to iterate. + * @param count the nuber of characters over which to iterate. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final void reset(String str, int start, int count) + { + char[] chars = null; + + if (str != null) { + chars = str.toCharArray(); + } + + reset(chars, start, count); + } + + /** + * Reset the iterator to iterate over the characters + * in text. This allows clients to reuse an iterator. + * + * @param str the new string of characters over which to iterate. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final void reset(String str) + { + int length = 0; + + if (str != null) { + length = str.length(); + } + + reset(str, 0, length); + } + + + + /** + * Get the starting index of the current script run. + * + * @return the index of the first character in the current script run. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final int getScriptStart() + { + return scriptStart; + } + + /** + * Get the index of the first character after the current script run. + * + * @return the index of the first character after the current script run. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final int getScriptLimit() + { + return scriptLimit; + } + + /** + * Get the script code for the script of the current script run. + * + * @return the script code for the script of the current script run. + * @see com.ibm.icu.lang.UScript + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final int getScriptCode() + { + return scriptCode; + } + + /** + * Find the next script run. Returns false if there + * isn't another run, returns true if there is. + * + * @return false if there isn't another run, true if there is. + * + * @internal + * @deprecated This API is ICU internal only. + */ + public final boolean next() + { + // if we've fallen off the end of the text, we're done + if (scriptLimit >= textLimit) { + return false; + } + + scriptCode = UScript.COMMON; + scriptStart = scriptLimit; + + syncFixup(); + + while (textIndex < textLimit) { + int ch = UTF16.charAt(text, textStart, textLimit, textIndex - textStart); + int codePointCount = UTF16.getCharCount(ch); + int sc = UScript.getScript(ch); + int pairIndex = getPairIndex(ch); + + textIndex += codePointCount; + + // Paired character handling: + // + // if it's an open character, push it onto the stack. + // if it's a close character, find the matching open on the + // stack, and use that script code. Any non-matching open + // characters above it on the stack will be poped. + if (pairIndex >= 0) { + if ((pairIndex & 1) == 0) { + push(pairIndex, scriptCode); + } else { + int pi = pairIndex & ~1; + + while (stackIsNotEmpty() && top().pairIndex != pi) { + pop(); + } + + if (stackIsNotEmpty()) { + sc = top().scriptCode; + } + } + } + + if (sameScript(scriptCode, sc)) { + if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) { + scriptCode = sc; + + fixup(scriptCode); + } + + // if this character is a close paired character, + // pop the matching open character from the stack + if (pairIndex >= 0 && (pairIndex & 1) != 0) { + pop(); + } + } else { + // We've just seen the first character of + // the next run. Back over it so we'll see + // it again the next time. + textIndex -= codePointCount; + break; + } + } + + scriptLimit = textIndex; + return true; + } + + /** + * Compare two script codes to see if they are in the same script. If one script is + * a strong script, and the other is INHERITED or COMMON, it will compare equal. + * + * @param scriptOne one of the script codes. + * @param scriptTwo the other script code. + * @return true if the two scripts are the same. + * @see com.ibm.icu.lang.UScript + */ + private static boolean sameScript(int scriptOne, int scriptTwo) + { + return scriptOne <= UScript.INHERITED || scriptTwo <= UScript.INHERITED || scriptOne == scriptTwo; + } + + /* + * An internal class which holds entries on the paren stack. + */ + private static final class ParenStackEntry + { + int pairIndex; + int scriptCode; + + public ParenStackEntry(int thePairIndex, int theScriptCode) + { + pairIndex = thePairIndex; + scriptCode = theScriptCode; + } + } + + private static final int mod(int sp) + { + return sp % PAREN_STACK_DEPTH; + } + + private static final int inc(int sp, int count) + { + return mod(sp + count); + } + + private static final int inc(int sp) + { + return inc(sp, 1); + } + + private static final int dec(int sp, int count) + { + return mod(sp + PAREN_STACK_DEPTH - count); + } + + private static final int dec(int sp) + { + return dec(sp, 1); + } + + private static final int limitInc(int count) + { + if (count < PAREN_STACK_DEPTH) { + count += 1; + } + + return count; + } + + private final boolean stackIsEmpty() + { + return pushCount <= 0; + } + + private final boolean stackIsNotEmpty() + { + return ! stackIsEmpty(); + } + + private final void push(int pairIndex, int scrptCode) + { + pushCount = limitInc(pushCount); + fixupCount = limitInc(fixupCount); + + parenSP = inc(parenSP); + parenStack[parenSP] = new ParenStackEntry(pairIndex, scrptCode); + } + + private final void pop() + { + + if (stackIsEmpty()) { + return; + } + + parenStack[parenSP] = null; + + if (fixupCount > 0) { + fixupCount -= 1; + } + + pushCount -= 1; + parenSP = dec(parenSP); + + // If the stack is now empty, reset the stack + // pointers to their initial values. + if (stackIsEmpty()) { + parenSP = -1; + } + } + + private final ParenStackEntry top() + { + return parenStack[parenSP]; + } + + private final void syncFixup() + { + fixupCount = 0; + } + + private final void fixup(int scrptCode) + { + int fixupSP = dec(parenSP, fixupCount); + + while (fixupCount-- > 0) { + fixupSP = inc(fixupSP); + parenStack[fixupSP].scriptCode = scrptCode; + } + } + + private char[] emptyCharArray = {}; + + private char[] text; + + private int textIndex; + private int textStart; + private int textLimit; + + private int scriptStart; + private int scriptLimit; + private int scriptCode; + + private static int PAREN_STACK_DEPTH = 32; + private static ParenStackEntry parenStack[] = new ParenStackEntry[PAREN_STACK_DEPTH]; + private int parenSP = -1; + private int pushCount = 0; + private int fixupCount = 0; + + /** + * Find the highest bit that's set in a word. Uses a binary search through + * the bits. + * + * @param n the word in which to find the highest bit that's set. + * @return the bit number (counting from the low order bit) of the highest bit. + */ + private static final byte highBit(int n) + { + if (n <= 0) { + return -32; + } + + byte bit = 0; + + if (n >= 1 << 16) { + n >>= 16; + bit += 16; + } + + if (n >= 1 << 8) { + n >>= 8; + bit += 8; + } + + if (n >= 1 << 4) { + n >>= 4; + bit += 4; + } + + if (n >= 1 << 2) { + n >>= 2; + bit += 2; + } + + if (n >= 1 << 1) { + n >>= 1; + bit += 1; + } + + return bit; + } + + /** + * Search the pairedChars array for the given character. + * + * @param ch the character for which to search. + * @return the index of the character in the table, or -1 if it's not there. + */ + private static int getPairIndex(int ch) + { + int probe = pairedCharPower; + int index = 0; + + if (ch >= pairedChars[pairedCharExtra]) { + index = pairedCharExtra; + } + + while (probe > (1 << 0)) { + probe >>= 1; + + if (ch >= pairedChars[index + probe]) { + index += probe; + } + } + + if (pairedChars[index] != ch) { + index = -1; + } + + return index; + } + + private static int pairedChars[] = { + 0x0028, 0x0029, // ascii paired punctuation + 0x003c, 0x003e, + 0x005b, 0x005d, + 0x007b, 0x007d, + 0x00ab, 0x00bb, // guillemets + 0x2018, 0x2019, // general punctuation + 0x201c, 0x201d, + 0x2039, 0x203a, + 0x3008, 0x3009, // chinese paired punctuation + 0x300a, 0x300b, + 0x300c, 0x300d, + 0x300e, 0x300f, + 0x3010, 0x3011, + 0x3014, 0x3015, + 0x3016, 0x3017, + 0x3018, 0x3019, + 0x301a, 0x301b + }; + + private static int pairedCharPower = 1 << highBit(pairedChars.length); + private static int pairedCharExtra = pairedChars.length - pairedCharPower; +} + diff --git a/main/classes/core/src/com/ibm/icu/lang/package.html b/main/classes/core/src/com/ibm/icu/lang/package.html new file mode 100644 index 00000000000..913282d7242 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/lang/package.html @@ -0,0 +1,16 @@ + + + + +C:ICU4J .lang Package Overview + + + + +

    Enhanced character property and surrogate support.

    + +UCharacter supports all characters and properties defined in the latest version of Unicode, including properties of surrogate characters. It provides new API for querying surrogate characters (represented as int) and also supports the java.lang.Character API. UScript and UScriptRun provide information about scripts, which is not available through the Java APIs.

    + + diff --git a/main/classes/core/src/com/ibm/icu/math/BigDecimal.java b/main/classes/core/src/com/ibm/icu/math/BigDecimal.java new file mode 100644 index 00000000000..a108b560820 --- /dev/null +++ b/main/classes/core/src/com/ibm/icu/math/BigDecimal.java @@ -0,0 +1,3882 @@ +/* Generated from 'BigDecimal.nrx' 8 Sep 2000 11:10:50 [v2.00] */ +/* Options: Binary Comments Crossref Format Java Logo Strictargs Strictcase Trace2 Verbose3 */ +package com.ibm.icu.math; + +import java.math.BigInteger; + +import com.ibm.icu.lang.UCharacter; + +/* ------------------------------------------------------------------ */ +/* BigDecimal -- Decimal arithmetic for Java */ +/* ------------------------------------------------------------------ */ +/* Copyright IBM Corporation, 1996-2010. All Rights Reserved. */ +/* */ +/* The BigDecimal class provides immutable arbitrary-precision */ +/* floating point (including integer) decimal numbers. */ +/* */ +/* As the numbers are decimal, there is an exact correspondence */ +/* between an instance of a BigDecimal object and its String */ +/* representation; the BigDecimal class provides direct conversions */ +/* to and from String and character array objects, and well as */ +/* conversions to and from the Java primitive types (which may not */ +/* be exact). */ +/* ------------------------------------------------------------------ */ +/* Notes: */ +/* */ +/* 1. A BigDecimal object is never changed in value once constructed; */ +/* this avoids the need for locking. Note in particular that the */ +/* mantissa array may be shared between many BigDecimal objects, */ +/* so that once exposed it must not be altered. */ +/* */ +/* 2. This class looks at MathContext class fields directly (for */ +/* performance). It must not and does not change them. */ +/* */ +/* 3. Exponent checking is delayed until finish(), as we know */ +/* intermediate calculations cannot cause 31-bit overflow. */ +/* [This assertion depends on MAX_DIGITS in MathContext.] */ +/* */ +/* 4. Comments for the public API now follow the javadoc conventions. */ +/* The NetRexx -comments option is used to pass these comments */ +/* through to the generated Java code (with -format, if desired). */ +/* */ +/* 5. System.arraycopy is faster than explicit loop as follows */ +/* Mean length 4: equal */ +/* Mean length 8: x2 */ +/* Mean length 16: x3 */ +/* Mean length 24: x4 */ +/* From prior experience, we expect mean length a little below 8, */ +/* but arraycopy is still the one to use, in general, until later */ +/* measurements suggest otherwise. */ +/* */ +/* 6. 'DMSRCN' referred to below is the original (1981) IBM S/370 */ +/* assembler code implementation of the algorithms below; it is */ +/* now called IXXRCN and is available with the OS/390 and VM/ESA */ +/* operating systems. */ +/* ------------------------------------------------------------------ */ +/* Change History: */ +/* 1997.09.02 Initial version (derived from netrexx.lang classes) */ +/* 1997.09.12 Add lostDigits checking */ +/* 1997.10.06 Change mantissa to a byte array */ +/* 1997.11.22 Rework power [did not prepare arguments, etc.] */ +/* 1997.12.13 multiply did not prepare arguments */ +/* 1997.12.14 add did not prepare and align arguments correctly */ +/* 1998.05.02 0.07 packaging changes suggested by Sun and Oracle */ +/* 1998.05.21 adjust remainder operator finalization */ +/* 1998.06.04 rework to pass MathContext to finish() and round() */ +/* 1998.06.06 change format to use round(); support rounding modes */ +/* 1998.06.25 rename to BigDecimal and begin merge */ +/* zero can now have trailing zeros (i.e., exp\=0) */ +/* 1998.06.28 new methods: movePointXxxx, scale, toBigInteger */ +/* unscaledValue, valueof */ +/* 1998.07.01 improve byteaddsub to allow array reuse, etc. */ +/* 1998.07.01 make null testing explicit to avoid JIT bug [Win32] */ +/* 1998.07.07 scaled division [divide(BigDecimal, int, int)] */ +/* 1998.07.08 setScale, faster equals */ +/* 1998.07.11 allow 1E6 (no sign) ; new double/float conversion */ +/* 1998.10.12 change package to com.ibm.icu.math */ +/* 1998.12.14 power operator no longer rounds RHS [to match ANSI] */ +/* add toBigDecimal() and BigDecimal(java.math.BigDecimal) */ +/* 1998.12.29 improve byteaddsub by using table lookup */ +/* 1999.02.04 lostdigits=0 behaviour rounds instead of digits+1 guard */ +/* 1999.02.05 cleaner code for BigDecimal(char[]) */ +/* 1999.02.06 add javadoc comments */ +/* 1999.02.11 format() changed from 7 to 2 method form */ +/* 1999.03.05 null pointer checking is no longer explicit */ +/* 1999.03.05 simplify; changes from discussion with J. Bloch: */ +/* null no longer permitted for MathContext; drop boolean, */ +/* byte, char, float, short constructor, deprecate double */ +/* constructor, no blanks in string constructor, add */ +/* offset and length version of char[] constructor; */ +/* add valueOf(double); drop booleanValue, charValue; */ +/* add ...Exact versions of remaining convertors */ +/* 1999.03.13 add toBigIntegerExact */ +/* 1999.03.13 1.00 release to IBM Centre for Java Technology */ +/* 1999.05.27 1.01 correct 0-0.2 bug under scaled arithmetic */ +/* 1999.06.29 1.02 constructors should not allow exponent > 9 digits */ +/* 1999.07.03 1.03 lost digits should not be checked if digits=0 */ +/* 1999.07.06 lost digits Exception message changed */ +/* 1999.07.10 1.04 more work on 0-0.2 (scaled arithmetic) */ +/* 1999.07.17 improve messages from pow method */ +/* 1999.08.08 performance tweaks */ +/* 1999.08.15 fastpath in multiply */ +/* 1999.11.05 1.05 fix problem in intValueExact [e.g., 5555555555] */ +/* 1999.12.22 1.06 remove multiply fastpath, and improve performance */ +/* 2000.01.01 copyright update [Y2K has arrived] */ +/* 2000.06.18 1.08 no longer deprecate BigDecimal(double) */ +/* ------------------------------------------------------------------ */ + +/** + * The BigDecimal class implements immutable arbitrary-precision decimal numbers. The methods of the + * BigDecimal class provide operations for fixed and floating point arithmetic, comparison, format + * conversions, and hashing. + *

    + * As the numbers are decimal, there is an exact correspondence between an instance of a BigDecimal object + * and its String representation; the BigDecimal class provides direct conversions to and from + * String and character array (char[]) objects, as well as conversions to and from the Java + * primitive types (which may not be exact) and BigInteger. + *

    + * In the descriptions of constructors and methods in this documentation, the value of a BigDecimal number + * object is shown as the result of invoking the toString() method on the object. The internal + * representation of a decimal number is neither defined nor exposed, and is not permitted to affect the result of any + * operation. + *

    + * The floating point arithmetic provided by this class is defined by the ANSI X3.274-1996 standard, and is also + * documented at http://www2.hursley.ibm.com/decimal
    + * [This URL will change.] + * + *

    Operator methods

    + *

    + * Operations on BigDecimal numbers are controlled by a {@link MathContext} object, which provides the + * context (precision and other information) for the operation. Methods that can take a MathContext + * parameter implement the standard arithmetic operators for BigDecimal objects and are known as + * operator methods. The default settings provided by the constant {@link MathContext#DEFAULT} (digits=9, + * form=SCIENTIFIC, lostDigits=false, roundingMode=ROUND_HALF_UP) perform general-purpose floating point + * arithmetic to nine digits of precision. The MathContext parameter must not be null. + *

    + * Each operator method also has a version provided which does not take a MathContext parameter. For this + * version of each method, the context settings used are digits=0, + * form=PLAIN, lostDigits=false, roundingMode=ROUND_HALF_UP; these settings perform fixed point arithmetic with + * unlimited precision, as defined for the original BigDecimal class in Java 1.1 and Java 1.2. + *

    + * For monadic operators, only the optional MathContext parameter is present; the operation acts upon the + * current object. + *

    + * For dyadic operators, a BigDecimal parameter is always present; it must not be null. The + * operation acts with the current object being the left-hand operand and the BigDecimal parameter being + * the right-hand operand. + *

    + * For example, adding two BigDecimal objects referred to by the names award and + * extra could be written as any of: + *

    + * + * award.add(extra) + *
    award.add(extra, MathContext.DEFAULT) + *
    award.add(extra, acontext) + *
    + *

    + * (where acontext is a MathContext object), which would return a BigDecimal + * object whose value is the result of adding award and extra under the appropriate context + * settings. + *

    + * When a BigDecimal operator method is used, a set of rules define what the result will be (and, by + * implication, how the result would be represented as a character string). These rules are defined in the BigDecimal + * arithmetic documentation (see the URL above), but in summary: + *