From c1425af28f279f326fd4775f4ea7f41c14b8da0e Mon Sep 17 00:00:00 2001 From: Craig Cornelius Date: Wed, 21 Sep 2016 19:55:37 +0000 Subject: [PATCH] ICU-12748 Add @Overrides and fix whitespace, thanks to Eclipse X-SVN-Rev: 39313 --- .../src/com/ibm/icu/charset/Charset88591.java | 19 +- .../src/com/ibm/icu/charset/CharsetASCII.java | 30 +- .../src/com/ibm/icu/charset/CharsetBOCU1.java | 165 +-- .../src/com/ibm/icu/charset/CharsetCESU8.java | 7 +- .../com/ibm/icu/charset/CharsetCallback.java | 132 +- .../ibm/icu/charset/CharsetCompoundText.java | 167 +-- .../ibm/icu/charset/CharsetDecoderICU.java | 164 +-- .../ibm/icu/charset/CharsetEncoderICU.java | 55 +- .../src/com/ibm/icu/charset/CharsetHZ.java | 15 +- .../src/com/ibm/icu/charset/CharsetICU.java | 69 +- .../com/ibm/icu/charset/CharsetISO2022.java | 572 ++++---- .../src/com/ibm/icu/charset/CharsetLMBCS.java | 219 +-- .../ibm/icu/charset/CharsetProviderICU.java | 52 +- .../src/com/ibm/icu/charset/CharsetSCSU.java | 215 +-- .../src/com/ibm/icu/charset/CharsetUTF16.java | 31 +- .../src/com/ibm/icu/charset/CharsetUTF32.java | 13 +- .../src/com/ibm/icu/charset/CharsetUTF7.java | 139 +- .../src/com/ibm/icu/charset/CharsetUTF8.java | 18 +- .../charset/UConverterAliasDataReader.java | 5 +- .../ibm/icu/charset/UConverterDataReader.java | 13 +- .../ibm/icu/impl/coll/CollationBuilder.java | 5 +- .../icu/impl/coll/CollationDataReader.java | 2 +- .../ibm/icu/impl/coll/CollationWeights.java | 4 +- .../src/com/ibm/icu/text/AlphabeticIndex.java | 72 +- .../com/ibm/icu/text/CollatorServiceShim.java | 11 + .../icu/util/GlobalizationPreferences.java | 135 +- .../com/ibm/icu/impl/CalendarAstronomer.java | 404 +++--- .../core/src/com/ibm/icu/impl/CharTrie.java | 36 +- .../icu/impl/CharacterIteratorWrapper.java | 39 +- .../src/com/ibm/icu/impl/ClassLoaderUtil.java | 13 +- .../com/ibm/icu/impl/DateNumberFormat.java | 18 +- .../core/src/com/ibm/icu/impl/ICUBinary.java | 20 +- .../core/src/com/ibm/icu/impl/ICUConfig.java | 1 + .../core/src/com/ibm/icu/impl/ICUData.java | 3 + .../com/ibm/icu/impl/ICULocaleService.java | 50 +- .../src/com/ibm/icu/impl/ICUNotifier.java | 7 +- .../core/src/com/ibm/icu/impl/ICURWLock.java | 11 +- .../core/src/com/ibm/icu/impl/ICUService.java | 13 +- .../icu/impl/IllegalIcuArgumentException.java | 9 +- .../core/src/com/ibm/icu/impl/IntTrie.java | 34 +- .../com/ibm/icu/impl/IterableComparator.java | 1 + .../ibm/icu/impl/LocaleDisplayNamesImpl.java | 3 + .../src/com/ibm/icu/impl/LocaleIDParser.java | 19 +- .../src/com/ibm/icu/impl/Norm2AllModes.java | 1 + .../src/com/ibm/icu/impl/Normalizer2Impl.java | 9 +- .../src/com/ibm/icu/impl/OlsonTimeZone.java | 79 +- .../icu/impl/PVecToTrieCompactHandler.java | 6 +- .../src/com/ibm/icu/impl/PropsVectors.java | 67 +- .../core/src/com/ibm/icu/impl/Relation.java | 17 +- .../impl/ReplaceableUCharacterIterator.java | 81 +- .../ibm/icu/impl/ResourceBundleWrapper.java | 29 +- .../core/src/com/ibm/icu/impl/Row.java | 8 + .../ibm/icu/impl/RuleCharacterIterator.java | 9 +- .../src/com/ibm/icu/impl/SimpleCache.java | 3 + .../ibm/icu/impl/StringPrepDataReader.java | 7 +- .../src/com/ibm/icu/impl/StringRange.java | 21 +- .../com/ibm/icu/impl/TZDBTimeZoneNames.java | 9 +- .../src/com/ibm/icu/impl/TextTrieMap.java | 18 +- .../src/com/ibm/icu/impl/TimeZoneAdapter.java | 19 +- .../ibm/icu/impl/TimeZoneGenericNames.java | 30 +- .../core/src/com/ibm/icu/impl/Trie.java | 83 +- .../core/src/com/ibm/icu/impl/Trie2.java | 282 ++-- .../src/com/ibm/icu/impl/TrieIterator.java | 142 +- .../core/src/com/ibm/icu/impl/UBiDiProps.java | 5 +- .../com/ibm/icu/impl/UCharArrayIterator.java | 16 +- .../icu/impl/UCharacterIteratorWrapper.java | 22 +- .../ibm/icu/impl/UCharacterNameReader.java | 47 +- .../com/ibm/icu/impl/UCharacterProperty.java | 35 +- .../com/ibm/icu/impl/UPropertyAliases.java | 3 +- .../core/src/com/ibm/icu/impl/URLHandler.java | 64 +- .../src/com/ibm/icu/impl/UnicodeRegex.java | 23 +- .../com/ibm/icu/impl/data/HolidayBundle.java | 10 +- .../ibm/icu/impl/data/HolidayBundle_da.java | 1 + .../icu/impl/data/HolidayBundle_da_DK.java | 36 +- .../ibm/icu/impl/data/HolidayBundle_de.java | 1 + .../icu/impl/data/HolidayBundle_de_AT.java | 1 + .../icu/impl/data/HolidayBundle_de_DE.java | 1 + .../ibm/icu/impl/data/HolidayBundle_el.java | 1 + .../icu/impl/data/HolidayBundle_el_GR.java | 1 + .../ibm/icu/impl/data/HolidayBundle_en.java | 1 + .../icu/impl/data/HolidayBundle_en_CA.java | 1 + .../icu/impl/data/HolidayBundle_en_GB.java | 1 + .../icu/impl/data/HolidayBundle_en_US.java | 1 + .../ibm/icu/impl/data/HolidayBundle_es.java | 1 + .../icu/impl/data/HolidayBundle_es_MX.java | 1 + .../ibm/icu/impl/data/HolidayBundle_fr.java | 1 + .../icu/impl/data/HolidayBundle_fr_CA.java | 1 + .../icu/impl/data/HolidayBundle_fr_FR.java | 1 + .../ibm/icu/impl/data/HolidayBundle_it.java | 1 + .../icu/impl/data/HolidayBundle_it_IT.java | 1 + .../ibm/icu/impl/data/HolidayBundle_iw.java | 1 + .../icu/impl/data/HolidayBundle_iw_IL.java | 1 + .../icu/impl/data/HolidayBundle_ja_JP.java | 1 + .../com/ibm/icu/impl/data/ResourceReader.java | 11 +- .../impl/duration/BasicDurationFormatter.java | 27 +- .../BasicDurationFormatterFactory.java | 13 +- .../duration/BasicPeriodBuilderFactory.java | 59 +- .../impl/duration/BasicPeriodFormatter.java | 18 +- .../duration/BasicPeriodFormatterFactory.java | 11 +- .../duration/BasicPeriodFormatterService.java | 8 +- .../src/com/ibm/icu/impl/duration/Period.java | 28 +- .../com/ibm/icu/impl/duration/TimeUnit.java | 23 +- ...sourceBasedPeriodFormatterDataService.java | 2 + .../impl/duration/impl/XMLRecordReader.java | 11 + .../impl/duration/impl/XMLRecordWriter.java | 11 + .../com/ibm/icu/impl/locale/AsciiUtil.java | 2 + .../com/ibm/icu/impl/locale/BaseLocale.java | 8 + .../com/ibm/icu/impl/locale/Extension.java | 1 + .../impl/locale/InternalLocaleBuilder.java | 4 + .../com/ibm/icu/impl/locale/KeyTypeData.java | 11 +- .../com/ibm/icu/impl/locale/LanguageTag.java | 25 +- .../ibm/icu/impl/locale/LocaleExtensions.java | 3 + .../ibm/icu/lang/UCharacterNameIterator.java | 5 +- .../core/src/com/ibm/icu/math/BigDecimal.java | 262 ++-- .../src/com/ibm/icu/math/MathContext.java | 103 +- .../src/com/ibm/icu/text/ArabicShaping.java | 403 +++--- .../core/src/com/ibm/icu/text/BidiRun.java | 1 + .../src/com/ibm/icu/text/BreakIterator.java | 35 +- .../com/ibm/icu/text/BurmeseBreakEngine.java | 24 +- .../ibm/icu/text/BytesDictionaryMatcher.java | 8 +- .../ibm/icu/text/CharsDictionaryMatcher.java | 6 +- .../src/com/ibm/icu/text/CharsetMatch.java | 37 +- .../com/ibm/icu/text/CharsetRecog_2022.java | 64 +- .../com/ibm/icu/text/CharsetRecog_UTF8.java | 20 +- .../ibm/icu/text/CharsetRecog_Unicode.java | 58 +- .../com/ibm/icu/text/CharsetRecog_mbcs.java | 314 +++-- .../com/ibm/icu/text/CharsetRecog_sbcs.java | 1183 +++++++++-------- .../com/ibm/icu/text/ChineseDateFormat.java | 24 +- .../icu/text/ChineseDateFormatSymbols.java | 5 +- .../src/com/ibm/icu/text/CjkBreakEngine.java | 33 +- .../com/ibm/icu/text/DateFormatSymbols.java | 5 +- .../ibm/icu/text/DictionaryBreakEngine.java | 36 +- .../src/com/ibm/icu/text/DurationFormat.java | 12 +- .../com/ibm/icu/text/LocaleDisplayNames.java | 3 +- .../src/com/ibm/icu/text/MessageFormat.java | 25 +- .../src/com/ibm/icu/text/MessagePattern.java | 7 +- .../core/src/com/ibm/icu/text/Normalizer.java | 333 ++--- .../src/com/ibm/icu/text/PluralFormat.java | 13 +- .../src/com/ibm/icu/text/PluralRanges.java | 19 +- .../icu/text/StringPrepParseException.java | 65 +- .../src/com/ibm/icu/text/StringTransform.java | 1 + .../src/com/ibm/icu/text/TimeZoneFormat.java | 117 +- .../com/ibm/icu/text/UCharacterIterator.java | 423 +++--- .../src/com/ibm/icu/text/UnicodeFilter.java | 1 + .../core/src/com/ibm/icu/text/UnicodeSet.java | 99 +- .../com/ibm/icu/util/AnnualTimeZoneRule.java | 35 +- .../core/src/com/ibm/icu/util/BytesTrie.java | 4 + .../core/src/com/ibm/icu/util/CharsTrie.java | 4 + .../core/src/com/ibm/icu/util/Currency.java | 106 +- .../src/com/ibm/icu/util/DateTimeRule.java | 65 +- .../src/com/ibm/icu/util/EasterHoliday.java | 10 +- .../core/src/com/ibm/icu/util/Holiday.java | 6 +- .../com/ibm/icu/util/InitialTimeZoneRule.java | 24 +- .../core/src/com/ibm/icu/util/LocaleData.java | 35 +- .../core/src/com/ibm/icu/util/Measure.java | 9 +- .../core/src/com/ibm/icu/util/Output.java | 3 +- .../core/src/com/ibm/icu/util/OutputInt.java | 1 + .../src/com/ibm/icu/util/SimpleTimeZone.java | 5 + .../ibm/icu/util/TimeArrayTimeZoneRule.java | 19 +- .../core/src/com/ibm/icu/util/TimeZone.java | 88 +- .../src/com/ibm/icu/util/TimeZoneRule.java | 61 +- .../com/ibm/icu/util/TimeZoneTransition.java | 21 +- .../src/com/ibm/icu/util/UResourceBundle.java | 39 +- .../com/ibm/icu/impl/ICUCurrencyMetaInfo.java | 13 +- .../javaspi/util/CurrencyNameProviderICU.java | 2 +- .../javaspi/util/LocaleNameProviderICU.java | 2 +- .../ibm/icu/impl/jdkadapter/CollatorICU.java | 11 + .../com/ibm/icu/text/AnyTransliterator.java | 7 +- .../com/ibm/icu/text/BreakTransliterator.java | 13 + .../ibm/icu/text/CaseFoldTransliterator.java | 11 +- .../ibm/icu/text/CompoundTransliterator.java | 6 +- .../ibm/icu/text/EscapeTransliterator.java | 17 +- .../com/ibm/icu/text/FunctionReplacer.java | 3 + .../icu/text/NameUnicodeTransliterator.java | 10 +- .../icu/text/NormalizationTransliterator.java | 12 +- .../com/ibm/icu/text/NullTransliterator.java | 1 + .../ibm/icu/text/RemoveTransliterator.java | 2 + .../ibm/icu/text/RuleBasedTransliterator.java | 6 +- .../src/com/ibm/icu/text/StringMatcher.java | 9 +- .../src/com/ibm/icu/text/StringReplacer.java | 5 +- .../com/ibm/icu/text/TransliterationRule.java | 3 +- .../src/com/ibm/icu/text/Transliterator.java | 110 +- .../ibm/icu/text/TransliteratorParser.java | 30 +- .../ibm/icu/text/TransliteratorRegistry.java | 6 +- .../ibm/icu/text/UnescapeTransliterator.java | 14 +- .../icu/text/UnicodeNameTransliterator.java | 8 +- 186 files changed, 4949 insertions(+), 4118 deletions(-) diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java index 1893f31ea3c..1ea9cfc9f21 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java @@ -27,6 +27,7 @@ class Charset88591 extends CharsetASCII { super(cs); } + @Override protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target, byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) { @@ -40,6 +41,7 @@ class Charset88591 extends CharsetASCII { return null; } + @Override protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) { byte ch; /* @@ -54,7 +56,7 @@ class Charset88591 extends CharsetASCII { return CoderResult.OVERFLOW; } } - + return CoderResult.UNDERFLOW; } } @@ -64,6 +66,7 @@ class Charset88591 extends CharsetASCII { super(cs); } + @Override protected final CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target, char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit, boolean flush) { @@ -74,7 +77,7 @@ class Charset88591 extends CharsetASCII { * char in the source is within the correct range */ for (i = oldSource; i < limit; i++) { - ch = (int) sourceArray[i]; + ch = sourceArray[i]; if ((ch & 0xff00) == 0) { targetArray[i + offset] = (byte) ch; } else { @@ -95,6 +98,7 @@ class Charset88591 extends CharsetASCII { return null; } + @Override protected final CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) { int ch; @@ -102,9 +106,9 @@ class Charset88591 extends CharsetASCII { * perform 88591 conversion from the source buffer to the target buffer, making sure * each char in the source is within the correct range */ - + while (source.hasRemaining()) { - ch = (int) source.get(); + ch = source.get(); if ((ch & 0xff00) == 0) { if (target.hasRemaining()) { target.put((byte) ch); @@ -119,20 +123,23 @@ class Charset88591 extends CharsetASCII { return encodeMalformedOrUnmappable(source, ch, flush); } } - + return CoderResult.UNDERFLOW; } } + @Override public CharsetDecoder newDecoder() { return new CharsetDecoder88591(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoder88591(this); } - + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ setFillIn.add(0,0xff); } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java index deb34a2e4af..5ea22793666 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java @@ -36,6 +36,7 @@ class CharsetASCII extends CharsetICU { super(cs); } + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { if (!source.hasRemaining()) { @@ -62,7 +63,7 @@ class CharsetASCII extends CharsetICU { int sourceOffset = source.arrayOffset(); int sourceIndex = oldSource + sourceOffset; int sourceLength = source.limit() - oldSource; - + char[] targetArray = target.array(); int targetOffset = target.arrayOffset(); int targetIndex = oldTarget + targetOffset; @@ -140,7 +141,7 @@ class CharsetASCII extends CharsetICU { */ while (source.hasRemaining()) { ch = source.get() & 0xff; - + if ((ch & 0x80) == 0) { if (target.hasRemaining()) { target.put((char)ch); @@ -155,7 +156,7 @@ class CharsetASCII extends CharsetICU { return decodeMalformedOrUnmappable(ch); } } - + return CoderResult.UNDERFLOW; } @@ -179,11 +180,13 @@ class CharsetASCII extends CharsetICU { private final static int NEED_TO_WRITE_BOM = 1; + @Override protected void implReset() { super.implReset(); fromUnicodeStatus = NEED_TO_WRITE_BOM; } + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { if (!source.hasRemaining()) { @@ -246,9 +249,9 @@ class CharsetASCII extends CharsetICU { } } else { /* unoptimized loop */ - + cr = encodeLoopCoreUnoptimized(source, target, flush); - + if (cr == CoderResult.OVERFLOW) { source.position(source.position() - 1); /* rewind by 1 */ } @@ -274,7 +277,7 @@ class CharsetASCII extends CharsetICU { * perform ascii conversion from the source array to the target array, making sure each * char in the source is within the correct range */ - for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++) + for (i = oldSource; i < limit && (((ch = sourceArray[i]) & 0xff80) == 0); i++) targetArray[i + offset] = (byte) ch; /* @@ -292,14 +295,14 @@ class CharsetASCII extends CharsetICU { protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) { int ch; - + /* * perform ascii conversion from the source buffer to the target buffer, making sure * each char in the source is within the correct range */ while (source.hasRemaining()) { - ch = (int) source.get(); - + ch = source.get(); + if ((ch & 0xff80) == 0) { if (target.hasRemaining()) { target.put((byte) ch); @@ -314,7 +317,7 @@ class CharsetASCII extends CharsetICU { return encodeMalformedOrUnmappable(source, ch, flush); } } - + return CoderResult.UNDERFLOW; } @@ -331,7 +334,7 @@ class CharsetASCII extends CharsetICU { private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) { /* * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null, - * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable. + * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable. */ CoderResult cr = handleSurrogates(source, lead); if (cr != null) { @@ -344,14 +347,17 @@ class CharsetASCII extends CharsetICU { } + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderASCII(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderASCII(this); } - + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ setFillIn.add(0,0x7f); } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java index f68a11c5c8f..d4111dfaad2 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java @@ -23,12 +23,12 @@ import com.ibm.icu.text.UnicodeSet; * @author krajwade * */ -class CharsetBOCU1 extends CharsetICU { +class CharsetBOCU1 extends CharsetICU { /* BOCU constants and macros */ - + /* initial value for "prev": middle of the ASCII range */ private static final byte BOCU1_ASCII_PREV = 0x40; - + /* bounding byte values for differences */ private static final int BOCU1_MIN = 0x21; private static final int BOCU1_MIDDLE = 0x90; @@ -45,7 +45,7 @@ class CharsetBOCU1 extends CharsetICU { /* number of trail bytes */ private static final int BOCU1_TRAIL_COUNT =((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT); - + /* * number of positive and negative single-byte codes * (counting 0==BOCU1_MIDDLE among the positive ones) @@ -84,8 +84,8 @@ class CharsetBOCU1 extends CharsetICU { /* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ /* private static int BOCU1_LENGTH_FROM_LEAD(int lead) { - return ((BOCU1_START_NEG_2<=(lead) && (lead)>24 : 4); } - + /* * Byte value map for control codes, * from external byte values 0x00..0x20 @@ -123,7 +123,7 @@ class CharsetBOCU1 extends CharsetICU { * from trail byte values 0..19 (0..0x13) as used in the difference calculation * to external byte values 0x00..0x20. */ - private static final int[] + private static final int[] bocu1TrailToByte = { /* 0 1 2 3 4 5 6 7 */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, @@ -134,8 +134,8 @@ class CharsetBOCU1 extends CharsetICU { /* 10 11 12 13 */ 0x1c, 0x1d, 0x1e, 0x1f }; - - + + /* * 12 commonly used C0 control codes (and space) are only used to encode * themselves directly, @@ -166,8 +166,8 @@ class CharsetBOCU1 extends CharsetICU { */ private static int BOCU1_TRAIL_TO_BYTE(int trail) { return ((trail)>=BOCU1_TRAIL_CONTROLS_COUNT ? (trail)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[trail]); - } - + } + /* BOCU-1 implementation functions ------------------------------------------ */ private static int BOCU1_SIMPLE_PREV(int c){ return (((c)&~0x7f)+BOCU1_ASCII_PREV); @@ -201,7 +201,7 @@ class CharsetBOCU1 extends CharsetICU { private static int BOCU1_PREV(int c) { return ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c)); } - + protected byte[] fromUSubstitution = new byte[]{(byte)0x1A}; /* Faster versions of packDiff() for single-byte-encoded diff values. */ @@ -219,35 +219,35 @@ class CharsetBOCU1 extends CharsetICU { /** Is a diff value encodable in two bytes? */ private static boolean DIFF_IS_DOUBLE(int diff){ return (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2); - } - + } + public CharsetBOCU1(String icuCanonicalName, String javaCanonicalName, String[] aliases){ super(icuCanonicalName, javaCanonicalName, aliases); - maxBytesPerChar = 4; + maxBytesPerChar = 4; minBytesPerChar = 1; maxCharsPerByte = 1; } - + class CharsetEncoderBOCU extends CharsetEncoderICU { public CharsetEncoderBOCU(CharsetICU cs) { super(cs,fromUSubstitution); } - + int sourceIndex, nextSourceIndex; int prev, c , diff; boolean checkNegative; boolean LoopAfterTrail; int targetCapacity; - CoderResult cr; - + CoderResult cr; + /* label values for supporting behavior similar to goto in C */ private static final int fastSingle=0; private static final int getTrail=1; private static final int regularLoop=2; - + private boolean LabelLoop; //used to break the while loop private int labelType = fastSingle; //labeType is set to fastSingle to start the code from fastSingle: - + /** * Integer division and modulo with negative numerators * yields negative modulo results and quotients that are one more than @@ -263,15 +263,15 @@ class CharsetBOCU1 extends CharsetICU { */ private int NEGDIVMOD(int n, int d, int m) { diff = n; - (m)=(diff)%(d); - (diff)/=(d); - if((m)<0) { + (m)=(diff)%(d); + (diff)/=(d); + if((m)<0) { --(diff); (m)+=(d); } return m; } - + /** * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes * and return a packed integer with them. @@ -385,32 +385,33 @@ class CharsetBOCU1 extends CharsetICU { } return result; } - + + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){ cr = CoderResult.UNDERFLOW; - + LabelLoop = true; //used to break the while loop checkNegative = false; // its value is set to true to get out of while loop when c = -c LoopAfterTrail = false; // its value is set to true to ignore code before getTrail: - + /*set up the local pointers*/ targetCapacity = target.limit() - target.position(); c = fromUChar32; prev = fromUnicodeStatus; - + if(prev==0){ prev = BOCU1_ASCII_PREV; } - + /*sourceIndex ==-1 if the current characte began in the previous buffer*/ sourceIndex = c == 0 ? 0: -1; nextSourceIndex = 0; - + /*conversion loop*/ if(c!=0 && targetCapacity>0){ labelType = getTrail; } - + while(LabelLoop){ switch(labelType){ case fastSingle: @@ -424,12 +425,12 @@ class CharsetBOCU1 extends CharsetICU { break; } } - + return cr; } - - private int fastSingle(CharBuffer source, ByteBuffer target, IntBuffer offsets){ -//fastSingle: + + private int fastSingle(CharBuffer source, ByteBuffer target, IntBuffer offsets){ +//fastSingle: /*fast loop for single-byte differences*/ /*use only one loop counter variable , targetCapacity, not also source*/ diff = source.limit() - source.position(); @@ -464,7 +465,7 @@ class CharsetBOCU1 extends CharsetICU { } return regularLoop; } - + private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){ if(source.hasRemaining()){ /*test the following code unit*/ @@ -493,11 +494,11 @@ class CharsetBOCU1 extends CharsetICU { /*regular loop for all classes*/ while(LoopAfterTrail || source.hasRemaining()){ if(LoopAfterTrail || targetCapacity>0){ - + if(!LoopAfterTrail){ c = source.get(); ++nextSourceIndex; - + if(c<=0x20){ /* * ISO C0 control & space: @@ -512,11 +513,11 @@ class CharsetBOCU1 extends CharsetICU { offsets.put(sourceIndex++); } --targetCapacity; - + sourceIndex=nextSourceIndex; continue; } - + if(UTF16.isLeadSurrogate((char)c)){ getTrail(source, target, offsets); if(checkNegative){ @@ -524,11 +525,11 @@ class CharsetBOCU1 extends CharsetICU { } } } - + if(LoopAfterTrail){ - LoopAfterTrail = false; + LoopAfterTrail = false; } - + /* * all other Unicode code points c==U+0021..U+10ffff * are encoded with the difference c-prev @@ -576,7 +577,7 @@ class CharsetBOCU1 extends CharsetICU { int length; /*will be 2..4*/ diff = packDiff(diff); length = BOCU1_LENGTH_FROM_PACKED(diff); - + /*write the output character bytes from diff and length*/ /*from the first if in the loop we know that targetCapacity>0*/ if(length<=targetCapacity){ @@ -631,7 +632,7 @@ class CharsetBOCU1 extends CharsetICU { break; } errorBufferLength = length; - + /* now output what fits into the regular target */ diff>>=8*length; /* length was reduced by targetCapacity */ switch(targetCapacity) { @@ -667,7 +668,7 @@ class CharsetBOCU1 extends CharsetICU { cr = CoderResult.OVERFLOW; break; } - + } /*set the converter state back into UConverter*/ fromUChar32 = c<0 ? -c :0; @@ -676,26 +677,26 @@ class CharsetBOCU1 extends CharsetICU { labelType = fastSingle; return labelType; } - + } - + static class CharsetDecoderBOCU extends CharsetDecoderICU{ public CharsetDecoderBOCU(CharsetICU cs) { super(cs); } - + int byteIndex; int sourceIndex, nextSourceIndex; int prev, c , diff, count; byte[] bytes; CoderResult cr; - + /* label values for supporting behavior similar to goto in C */ private static final int fastSingle=0; private static final int getTrail=1; private static final int regularLoop=2; private static final int endLoop=3; - + private boolean LabelLoop;//used to break the while loop private boolean afterTrail; // its value is set to true to ignore code after getTrail: private int labelType; @@ -711,8 +712,8 @@ class CharsetBOCU1 extends CharsetICU { /* BOCU-1-from-Unicode conversion functions --------------------------------- */ - - + + /** * Function for BOCU-1 decoder; handles multi-byte lead bytes. * @@ -758,7 +759,7 @@ class CharsetBOCU1 extends CharsetICU { /* return the state for decoding the trail byte(s) */ return (diffValue<<2)|countValue; } - + /** * Function for BOCU-1 decoder; handles multi-byte trail bytes. * @@ -788,37 +789,38 @@ class CharsetBOCU1 extends CharsetICU { return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); } } - + + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){ cr = CoderResult.UNDERFLOW; - - LabelLoop = true; - afterTrail = false; + + LabelLoop = true; + afterTrail = false; labelType = fastSingle; // labelType is set to fastSingle so t - + /*get the converter state*/ prev = toUnicodeStatus; - + if(prev==0){ prev = BOCU1_ASCII_PREV; } diff = mode; count = diff&3; diff>>=2; - + byteIndex = toULength; bytes = toUBytesArray; - + /* sourceIndex=-1 if the current character began in the previous buffer */ sourceIndex=byteIndex==0 ? 0 : -1; nextSourceIndex=0; - + /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ if(count>0 && byteIndex>0 && target.position() Callback API for CharsetICU API - * - * CharsetCallback class defines some error behaviour functions called + * + * CharsetCallback class defines some error behaviour functions called * by CharsetDecoderICU and CharsetEncoderICU. The class also provides * the facility by which clients can write their own callbacks. * * These functions, although public, should NEVER be called directly. - * They should be used as parameters to the onUmappableCharacter() and + * They should be used as parameters to the onUmappableCharacter() and * onMalformedInput() methods, to set the behaviour of a converter * when it encounters UNMAPPED/INVALID sequences. * Currently the only way to set callbacks is by using CodingErrorAction. @@ -44,7 +44,7 @@ public class CharsetCallback { // private static final String SKIP_STOP_ON_ILLEGAL = "i"; // /* -// * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) +// * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) // */ // private static final String ESCAPE_ICU = null; @@ -92,36 +92,36 @@ public class CharsetCallback { * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g= * * This list should be sync with the one in ucnv_err.c - * + * */ private static boolean IS_DEFAULT_IGNORABLE_CODE_POINT(int c) { - return ((c == 0x00AD) || - (c == 0x034F) || - (c == 0x061C) || - (c == 0x115F) || - (c == 0x1160) || - (0x17B4 <= c && c <= 0x17B5) || - (0x180B <= c && c <= 0x180E) || - (0x200B <= c && c <= 0x200F) || - (0x202A <= c && c <= 0x202E) || - (c == 0x2060) || - (0x2066 <= c && c <= 0x2069) || - (0x2061 <= c && c <= 0x2064) || - (0x206A <= c && c <= 0x206F) || - (c == 0x3164) || - (0x0FE00 <= c && c <= 0x0FE0F) || - (c == 0x0FEFF) || - (c == 0x0FFA0) || - (0x01BCA0 <= c && c <= 0x01BCA3) || - (0x01D173 <= c && c <= 0x01D17A) || - (c == 0x0E0001) || - (0x0E0020 <= c && c <= 0x0E007F) || - (0x0E0100 <= c && c <= 0x0E01EF) || - (c == 0x2065) || - (0x0FFF0 <= c && c <= 0x0FFF8) || - (c == 0x0E0000) || - (0x0E0002 <= c && c <= 0x0E001F) || - (0x0E0080 <= c && c <= 0x0E00FF) || + return ((c == 0x00AD) || + (c == 0x034F) || + (c == 0x061C) || + (c == 0x115F) || + (c == 0x1160) || + (0x17B4 <= c && c <= 0x17B5) || + (0x180B <= c && c <= 0x180E) || + (0x200B <= c && c <= 0x200F) || + (0x202A <= c && c <= 0x202E) || + (c == 0x2060) || + (0x2066 <= c && c <= 0x2069) || + (0x2061 <= c && c <= 0x2064) || + (0x206A <= c && c <= 0x206F) || + (c == 0x3164) || + (0x0FE00 <= c && c <= 0x0FE0F) || + (c == 0x0FEFF) || + (c == 0x0FFA0) || + (0x01BCA0 <= c && c <= 0x01BCA3) || + (0x01D173 <= c && c <= 0x01D17A) || + (c == 0x0E0001) || + (0x0E0020 <= c && c <= 0x0E007F) || + (0x0E0100 <= c && c <= 0x0E01EF) || + (c == 0x2065) || + (0x0FFF0 <= c && c <= 0x0FFF8) || + (c == 0x0E0000) || + (0x0E0002 <= c && c <= 0x0E001F) || + (0x0E0080 <= c && c <= 0x0E00FF) || (0x0E01F0 <= c && c <= 0x0E0FFF) ); } @@ -133,12 +133,12 @@ public class CharsetCallback { /** * This function is called when the bytes in the source cannot be handled, * and this function is meant to handle or fix the error if possible. - * + * * @return Result of decoding action. This returned object is set to an error * if this function could not handle the conversion. * @stable ICU 3.6 */ - public CoderResult call(CharsetDecoderICU decoder, Object context, + public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr); } @@ -154,17 +154,18 @@ public class CharsetCallback { * if this function could not handle the conversion. * @stable ICU 3.6 */ - public CoderResult call(CharsetEncoderICU encoder, Object context, - CharBuffer source, ByteBuffer target, IntBuffer offsets, + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr); - } + } /** * Skip callback * @stable ICU 3.6 */ public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() { - public CoderResult call(CharsetEncoderICU encoder, Object context, - CharBuffer source, ByteBuffer target, IntBuffer offsets, + @Override + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr){ if(context==null){ return CoderResult.UNDERFLOW; @@ -183,7 +184,8 @@ public class CharsetCallback { * @stable ICU 3.6 */ public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() { - public CoderResult call(CharsetDecoderICU decoder, Object context, + @Override + public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr){ if(context==null){ @@ -202,9 +204,10 @@ public class CharsetCallback { * Write substitute callback * @stable ICU 3.6 */ - public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){ - public CoderResult call(CharsetEncoderICU encoder, Object context, - CharBuffer source, ByteBuffer target, IntBuffer offsets, + public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){ + @Override + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr){ if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) { return CoderResult.UNDERFLOW; @@ -227,7 +230,8 @@ public class CharsetCallback { * @stable ICU 3.6 */ public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() { - public CoderResult call(CharsetDecoderICU decoder, Object context, + @Override + public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr){ @@ -238,7 +242,7 @@ public class CharsetCallback { if (replacementChar.length == 1 && (replacementChar[0] == kSubstituteChar1[0] || replacementChar[0] == kSubstituteChar[0])) { useReplacement = false; } - + /* could optimize this case, just one uchar */ if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) { return CharsetDecoderICU.toUWriteUChars(decoder, useReplacement ? replacementChar : kSubstituteChar1, 0, useReplacement ? replacementChar.length : 1, target, offsets, source.position()); @@ -252,8 +256,9 @@ public class CharsetCallback { * @stable ICU 3.6 */ public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() { - public CoderResult call(CharsetEncoderICU encoder, Object context, - CharBuffer source, ByteBuffer target, IntBuffer offsets, + @Override + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr){ if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) { return CoderResult.UNDERFLOW; @@ -266,12 +271,13 @@ public class CharsetCallback { * @stable ICU 3.6 */ public static final Decoder TO_U_CALLBACK_STOP = new Decoder() { - public CoderResult call(CharsetDecoderICU decoder, Object context, + @Override + public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr){ return cr; } - }; + }; private static final int VALUE_STRING_LENGTH = 32; private static final char UNICODE_PERCENT_SIGN_CODEPOINT = 0x0025; private static final char UNICODE_U_CODEPOINT = 0x0055; @@ -291,17 +297,18 @@ public class CharsetCallback { * @stable ICU 4.0 */ public static final Encoder FROM_U_CALLBACK_ESCAPE = new Encoder() { - public CoderResult call(CharsetEncoderICU encoder, Object context, - CharBuffer source, ByteBuffer target, IntBuffer offsets, + @Override + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr){ char[] valueString = new char[VALUE_STRING_LENGTH]; int valueStringLength = 0; int i = 0; - + if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) { return CoderResult.UNDERFLOW; } - + if (context == null || !(context instanceof String)) { while (i < length) { valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ @@ -317,7 +324,7 @@ public class CharsetCallback { } } else if (((String)context).equals(ESCAPE_C)) { valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */ - + if (length == 2) { valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */ valueStringLength = itou(valueString, valueStringLength, cp, 16, 8); @@ -376,13 +383,14 @@ public class CharsetCallback { * @stable ICU 4.0 */ public static final Decoder TO_U_CALLBACK_ESCAPE = new Decoder() { - public CoderResult call(CharsetDecoderICU decoder, Object context, + @Override + public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr){ char[] uniValueString = new char[VALUE_STRING_LENGTH]; int valueStringLength = 0; int i = 0; - + if (context == null || !(context instanceof String)) { while (i < length) { uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ @@ -420,12 +428,12 @@ public class CharsetCallback { } } } - + cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0); - + return cr; } - }; + }; /*** * Java port of uprv_itou() in ICU4C used by TO_U_CALLBACK_ESCAPE and FROM_U_CALLBACK_ESCAPE. * Fills in a char string with the radix-based representation of a number padded with zeroes @@ -436,13 +444,13 @@ public class CharsetCallback { int digit; int j; char temp; - + do { digit = i % radix; buffer[sourceIndex + length++] = (char)(digit <= 9 ? (0x0030+digit) : (0x0030+digit+7)); i = i/radix; } while (i != 0 && (sourceIndex + length) < buffer.length); - + while (length < minwidth) { buffer[sourceIndex + length++] = (char)0x0030; /* zero padding */ } @@ -452,7 +460,7 @@ public class CharsetCallback { buffer[(sourceIndex + length-1) -j] = buffer[sourceIndex + j]; buffer[sourceIndex + j] = temp; } - + return length; } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCompoundText.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCompoundText.java index 7acebfe8e1a..2e2b3146850 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCompoundText.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCompoundText.java @@ -25,14 +25,14 @@ class CharsetCompoundText extends CharsetICU { private static final byte[] fromUSubstitution = new byte[] { (byte) 0x3F }; private CharsetMBCS myConverterArray[]; private byte state; - + private final static byte INVALID = -2; private final static byte DO_SEARCH = -1; private final static byte COMPOUND_TEXT_SINGLE_0 = 0; private final static byte COMPOUND_TEXT_SINGLE_1 = 1; private final static byte COMPOUND_TEXT_SINGLE_2 = 2; private final static byte COMPOUND_TEXT_SINGLE_3 = 3; - + /*private final static byte COMPOUND_TEXT_DOUBLE_1 = 4; private final static byte COMPOUND_TEXT_DOUBLE_2 = 5; private final static byte COMPOUND_TEXT_DOUBLE_3 = 6; @@ -40,9 +40,9 @@ class CharsetCompoundText extends CharsetICU { private final static byte COMPOUND_TEXT_DOUBLE_5 = 8; private final static byte COMPOUND_TEXT_DOUBLE_6 = 9; private final static byte COMPOUND_TEXT_DOUBLE_7 = 10; - + private final static byte COMPOUND_TEXT_TRIPLE_DOUBLE = 11;*/ - + private final static byte IBM_915 = 12; private final static byte IBM_916 = 13; private final static byte IBM_914 = 14; @@ -51,18 +51,18 @@ class CharsetCompoundText extends CharsetICU { private final static byte IBM_913 = 17; private final static byte ISO_8859_14 = 18; private final static byte IBM_923 = 19; - + private final static byte NUM_OF_CONVERTERS = 20; - + private final static byte SEARCH_LENGTH = 12; - + private final static byte[][] escSeqCompoundText = { /* Single */ { 0x1B, 0x2D, 0x41 }, { 0x1B, 0x2D, 0x4D }, { 0x1B, 0x2D, 0x46 }, { 0x1B, 0x2D, 0x47 }, - + /* Double */ { 0x1B, 0x24, 0x29, 0x41 }, { 0x1B, 0x24, 0x29, 0x42 }, @@ -71,10 +71,10 @@ class CharsetCompoundText extends CharsetICU { { 0x1B, 0x24, 0x29, 0x47 }, { 0x1B, 0x24, 0x29, 0x48 }, { 0x1B, 0x24, 0x29, 0x49 }, - + /* Triple/Double */ { 0x1B, 0x25, 0x47 }, - + /*IBM-915*/ { 0x1B, 0x2D, 0x4C }, /*IBM-916*/ @@ -92,9 +92,9 @@ class CharsetCompoundText extends CharsetICU { /* IBM-923 */ { 0x1B, 0x2D, 0x62 }, }; - + private final static byte ESC_START = 0x1B; - + private static boolean isASCIIRange(int codepoint) { if ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) { @@ -102,21 +102,21 @@ class CharsetCompoundText extends CharsetICU { } return false; } - + private static boolean isIBM915(int codepoint) { if ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) { return true; } return false; } - + private static boolean isIBM916(int codepoint) { if ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) { return true; } return false; } - + private static boolean isCompoundS3(int codepoint) { if ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || @@ -125,14 +125,14 @@ class CharsetCompoundText extends CharsetICU { } return false; } - + private static boolean isCompoundS2(int codepoint) { if ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) { return true; } return false; } - + private static boolean isIBM914(int codepoint) { if ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || @@ -143,14 +143,14 @@ class CharsetCompoundText extends CharsetICU { } return false; } - + private static boolean isIBM874(int codepoint) { if ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) { return true; } return false; } - + private static boolean isIBM912(int codepoint) { return ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || @@ -159,7 +159,7 @@ class CharsetCompoundText extends CharsetICU { (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)); } - + private static boolean isIBM913(int codepoint) { if ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || @@ -169,7 +169,7 @@ class CharsetCompoundText extends CharsetICU { } return false; } - + private static boolean isCompoundS1(int codepoint) { if ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) { @@ -177,7 +177,7 @@ class CharsetCompoundText extends CharsetICU { } return false; } - + private static boolean isISO8859_14(int codepoint) { if ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || @@ -189,14 +189,14 @@ class CharsetCompoundText extends CharsetICU { } return false; } - + private static boolean isIBM923(int codepoint) { if ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) { return true; } return false; } - + private static int findNextEsc(ByteBuffer source) { int sourceLimit = source.limit(); for (int i = (source.position() + 1); i < sourceLimit; i++) { @@ -206,10 +206,10 @@ class CharsetCompoundText extends CharsetICU { } return sourceLimit; } - + private static byte getState(int codepoint) { byte state = -1; - + if (isASCIIRange(codepoint)) { state = COMPOUND_TEXT_SINGLE_0; } else if (isIBM912(codepoint)) { @@ -235,10 +235,10 @@ class CharsetCompoundText extends CharsetICU { } else if (isCompoundS1(codepoint)) { state = COMPOUND_TEXT_SINGLE_1; } - + return state; } - + private static byte findStateFromEscSeq(ByteBuffer source, byte[] toUBytes, int toUBytesLength) { byte state = INVALID; int sourceIndex = source.position(); @@ -246,7 +246,7 @@ class CharsetCompoundText extends CharsetICU { byte i, n; int offset = toUBytesLength; int sourceLimit = source.limit(); - + for (i = 0; i < escSeqCompoundText.length; i++) { matchFound = true; for (n = 0; n < escSeqCompoundText[i].length; n++) { @@ -266,30 +266,30 @@ class CharsetCompoundText extends CharsetICU { break; } } - + if (matchFound) { state = i; source.position(sourceIndex + (escSeqCompoundText[i].length - offset)); } - + return state; } - + public CharsetCompoundText(String icuCanonicalName, String javaCanonicalName, String[] aliases) { super(icuCanonicalName, javaCanonicalName, aliases); - + LoadConverters(); - + maxBytesPerChar = 6; minBytesPerChar = 1; maxCharsPerByte = 1; } - + private void LoadConverters() { myConverterArray = new CharsetMBCS[NUM_OF_CONVERTERS]; - + myConverterArray[COMPOUND_TEXT_SINGLE_0] = null; - + for (int i = 1; i < SEARCH_LENGTH; i++) { String name = "icu-internal-compound-"; if (i <= 3) { @@ -299,10 +299,10 @@ class CharsetCompoundText extends CharsetICU { } else { name = name + "t"; } - + myConverterArray[i] = (CharsetMBCS)CharsetICU.forNameICU(name); } - + myConverterArray[IBM_915] = (CharsetMBCS)CharsetICU.forNameICU("ibm-915_P100-1995"); myConverterArray[IBM_916] = (CharsetMBCS)CharsetICU.forNameICU("ibm-916_P100-1995"); myConverterArray[IBM_914] = (CharsetMBCS)CharsetICU.forNameICU("ibm-914_P100-1995"); @@ -312,15 +312,15 @@ class CharsetCompoundText extends CharsetICU { myConverterArray[ISO_8859_14] = (CharsetMBCS)CharsetICU.forNameICU("iso-8859_14-1998"); myConverterArray[IBM_923] = (CharsetMBCS)CharsetICU.forNameICU("ibm-923_P100-1998"); } - + class CharsetEncoderCompoundText extends CharsetEncoderICU { CharsetEncoderMBCS gbEncoder[]; - + public CharsetEncoderCompoundText(CharsetICU cs) { super(cs, fromUSubstitution); - + gbEncoder = new CharsetEncoderMBCS[NUM_OF_CONVERTERS]; - + for (int i = 0; i < NUM_OF_CONVERTERS; i++) { if (i == 0) { gbEncoder[i] = null; @@ -329,7 +329,8 @@ class CharsetCompoundText extends CharsetICU { } } } - + + @Override protected void implReset() { super.implReset(); for (int i = 0; i < NUM_OF_CONVERTERS; i++) { @@ -338,7 +339,8 @@ class CharsetCompoundText extends CharsetICU { } } } - + + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; int sourceChar; @@ -350,35 +352,35 @@ class CharsetCompoundText extends CharsetICU { byte tmpState = 0; int i = 0; boolean gotoGetTrail = false; - + if (!source.hasRemaining()) return CoderResult.UNDERFLOW; else if (!target.hasRemaining()) return CoderResult.OVERFLOW; - + /* check if the last codepoint of previous buffer was a lead surrogate */ if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) { // goto getTrail label - gotoGetTrail = true; + gotoGetTrail = true; } - + while (source.hasRemaining()) { if (target.hasRemaining()) { if (!gotoGetTrail) { sourceChar = source.get(); } - + targetLength = 0; tmpTargetBuffer.position(0); tmpTargetBuffer.limit(3); - + /* check if the char is a First surrogate */ if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) { if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) { // getTrail label /* reset gotoGetTrail flag*/ gotoGetTrail = false; - + /* look ahead to find the trail surrogate */ if (source.hasRemaining()) { /* test the following code unit */ @@ -410,11 +412,11 @@ class CharsetCompoundText extends CharsetICU { break; } } - + tmpState = getState(sourceChar); - + sourceCharArray[0] = (char)sourceChar; - + if (tmpState < 0) { /* Test all available converters */ for (i = 1; i < SEARCH_LENGTH; i++) { @@ -438,22 +440,22 @@ class CharsetCompoundText extends CharsetICU { if (err.isError()) { break; } - + if (currentState != tmpState) { currentState = tmpState; - + /* Write escape sequence if necessary */ for (i = 0; i < escSeqCompoundText[currentState].length; i++) { targetBytes[i] = escSeqCompoundText[currentState][i]; } targetLength = i; } - + for (i = 0; i < tmpTargetBuffer.limit(); i++) { targetBytes[i+targetLength] = tmpTargetBuffer.get(i); } targetLength += i; - + for (i = 0; i < targetLength; i++) { if (target.hasRemaining()) { target.put(targetBytes[i]); @@ -467,7 +469,7 @@ class CharsetCompoundText extends CharsetICU { break; } } - + if (err.isOverflow()) { int m = 0; for (int n = i; n < targetLength; n++) { @@ -476,18 +478,18 @@ class CharsetCompoundText extends CharsetICU { this.errorBufferLength = m; } state = currentState; - + return err; } } - + class CharsetDecoderCompoundText extends CharsetDecoderICU { CharsetDecoderMBCS gbDecoder[]; - + public CharsetDecoderCompoundText(CharsetICU cs) { super(cs); gbDecoder = new CharsetDecoderMBCS[NUM_OF_CONVERTERS]; - + for (int i = 0; i < NUM_OF_CONVERTERS; i++) { if (i == 0) { gbDecoder[i] = null; @@ -496,7 +498,8 @@ class CharsetCompoundText extends CharsetICU { } } } - + + @Override protected void implReset() { super.implReset(); for (int i = 0; i < NUM_OF_CONVERTERS; i++) { @@ -505,7 +508,8 @@ class CharsetCompoundText extends CharsetICU { } } } - + + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; byte[] sourceChar = { 0x00 }; @@ -513,12 +517,12 @@ class CharsetCompoundText extends CharsetICU { byte tmpState = currentState; CharsetDecoderMBCS decoder; int sourceLimit = source.limit();; - + if (!source.hasRemaining()) return CoderResult.UNDERFLOW; else if (!target.hasRemaining()) return CoderResult.OVERFLOW; - + while (source.hasRemaining()) { if (target.hasRemaining()) { if (this.toULength > 0) { @@ -526,7 +530,7 @@ class CharsetCompoundText extends CharsetICU { } else { sourceChar[0] = source.get(source.position()); } - + if (sourceChar[0] == ESC_START) { tmpState = findStateFromEscSeq(source, this.toUBytesArray, this.toULength); if (tmpState == DO_SEARCH) { @@ -542,14 +546,14 @@ class CharsetCompoundText extends CharsetICU { } break; } - + this.toULength = 0; } - + if (tmpState != currentState) { currentState = tmpState; } - + if (currentState == COMPOUND_TEXT_SINGLE_0) { while (source.hasRemaining()) { if (!target.hasRemaining()) { @@ -567,29 +571,29 @@ class CharsetCompoundText extends CharsetICU { source.limit(findNextEsc(source)); decoder = gbDecoder[currentState]; - + decoder.toUBytesArray = this.toUBytesArray; decoder.toULength = this.toULength; err = decoder.decodeLoop(source, target, offsets, true); - + this.toULength = decoder.toULength; decoder.toULength = 0; - + if (err.isError()) { if (err.isOverflow()) { this.charErrorBufferArray = decoder.charErrorBufferArray; this.charErrorBufferBegin = decoder.charErrorBufferBegin; this.charErrorBufferLength = decoder.charErrorBufferLength; - + decoder.charErrorBufferBegin = 0; decoder.charErrorBufferLength = 0; } } - + source.limit(sourceLimit); } - + if (err.isError()) { break; } @@ -602,15 +606,18 @@ class CharsetCompoundText extends CharsetICU { return err; } } - + + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderCompoundText(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderCompoundText(this); } - + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ for (int i = 1; i < NUM_OF_CONVERTERS; i++) { myConverterArray[i].MBCSGetFilteredUnicodeSetForUnicode(myConverterArray[i].sharedData, setFillIn, which, CharsetMBCS.UCNV_SET_FILTER_NONE); diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java index 797875714c7..267fa086a81 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java @@ -7,7 +7,7 @@ ******************************************************************************* * ******************************************************************************* -*/ +*/ package com.ibm.icu.charset; @@ -22,11 +22,11 @@ import com.ibm.icu.impl.Assert; /** * An abstract class that provides framework methods of decoding operations for concrete - * subclasses. + * subclasses. * In the future this class will contain API that will implement converter sematics of ICU4C. * @stable ICU 3.6 */ -public abstract class CharsetDecoderICU extends CharsetDecoder{ +public abstract class CharsetDecoderICU extends CharsetDecoder{ int toUnicodeStatus; byte[] toUBytesArray = new byte[128]; @@ -37,7 +37,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ int charErrorBufferBegin; char[] invalidCharBuffer = new char[128]; int invalidCharLength; - + /** * Maximum number of indexed bytes * @internal @@ -52,11 +52,12 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ int preToULength; /* negative: replay */ int preToUFirstLength; /* length of first character */ int mode; - + Object toUContext = null; private CharsetCallback.Decoder onUnmappableCharacter = CharsetCallback.TO_U_CALLBACK_STOP; private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP; CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() { + @Override public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) { if (cr.isUnmappable()) { @@ -69,14 +70,14 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ // return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target, offsets, buffer, length, cr); } }; - + // exist to keep implOnMalformedInput and implOnUnmappableInput from being too recursive private boolean malformedInputCalled = false; private boolean unmappableCharacterCalled = false; - + /* * Construct a CharsetDecorderICU based on the information provided from a CharsetICU object. - * + * * @param cs The CharsetICU object containing information about how to charset to decode. */ CharsetDecoderICU(CharsetICU cs) { @@ -93,65 +94,67 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ final boolean isFallbackUsed() { return true; } - + /** * Fallback is currently always used by icu4j decoders. */ static final boolean isToUUseFallback() { return isToUUseFallback(true); } - + /** * Fallback is currently always used by icu4j decoders. */ static final boolean isToUUseFallback(boolean iUseFallback) { return true; } - + /** * Sets the action to be taken if an illegal sequence is encountered - * + * * @param newAction action to be taken * @exception IllegalArgumentException * @stable ICU 3.6 */ + @Override protected final void implOnMalformedInput(CodingErrorAction newAction) { // don't run infinitely if (malformedInputCalled) return; - + // if we get a replace, do not let the nio replace if (newAction == CodingErrorAction.REPLACE) { malformedInputCalled = true; super.onMalformedInput(CodingErrorAction.IGNORE); malformedInputCalled = false; } - + onMalformedInput = getCallback(newAction); } - + /** * Sets the action to be taken if an illegal sequence is encountered - * + * * @param newAction action to be taken * @exception IllegalArgumentException * @stable ICU 3.6 */ + @Override protected final void implOnUnmappableCharacter(CodingErrorAction newAction) { // dont run infinitely if (unmappableCharacterCalled) return; - + // if we get a replace, do not let the nio replace if (newAction == CodingErrorAction.REPLACE) { unmappableCharacterCalled = true; super.onUnmappableCharacter(CodingErrorAction.IGNORE); unmappableCharacterCalled = false; } - + onUnmappableCharacter = getCallback(newAction); } - + /** * Sets the callback encoder method and context to be used if an illegal sequence is encounterd. * You would normally call this twice to set both the malform and unmappable error. In this case, @@ -170,12 +173,12 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ } else { /* Error: Only malformed and unmappable are handled. */ } - + if (toUContext == null || !toUContext.equals(newContext)) { toUContext = newContext; } } - + private static CharsetCallback.Decoder getCallback(CodingErrorAction action){ if(action==CodingErrorAction.REPLACE){ return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE; @@ -190,42 +193,44 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * Flushes any characters saved in the converter's internal buffer and * resets the converter. * @param out action to be taken - * @return result of flushing action and completes the decoding all input. + * @return result of flushing action and completes the decoding all input. * Returns CoderResult.UNDERFLOW if the action succeeds. * @stable ICU 3.6 */ + @Override protected final CoderResult implFlush(CharBuffer out) { return decode(EMPTY, out, null, true); } - + /** * Resets the to Unicode mode of converter * @stable ICU 3.6 */ + @Override protected void implReset() { toUnicodeStatus = 0 ; toULength = 0; charErrorBufferLength = 0; charErrorBufferBegin = 0; - + /* store previous UChars/chars to continue partial matches */ preToUBegin = 0; preToULength = 0; /* negative: replay */ - preToUFirstLength = 0; + preToUFirstLength = 0; mode = 0; } - + /** * Decodes one or more bytes. The default behaviour of the converter - * is stop and report if an error in input stream is encountered. + * is stop and report if an error in input stream is encountered. * To set different behaviour use @see CharsetDecoder.onMalformedInput() - * This method allows a buffer by buffer conversion of a data stream. - * The state of the conversion is saved between calls to convert. - * Among other things, this means multibyte input sequences can be - * split between calls. If a call to convert results in an Error, the - * conversion may be continued by calling convert again with suitably - * modified parameters.All conversions should be finished with a call to + * This method allows a buffer by buffer conversion of a data stream. + * The state of the conversion is saved between calls to convert. + * Among other things, this means multibyte input sequences can be + * split between calls. If a call to convert results in an Error, the + * conversion may be continued by calling convert again with suitably + * modified parameters.All conversions should be finished with a call to * the flush method. * @param in buffer to decode * @param out buffer to populate with decoded result @@ -233,6 +238,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * action succeeds or more input is needed for completing the decoding action. * @stable ICU 3.6 */ + @Override protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){ if(in.remaining() < toUCountPending()){ return CoderResult.UNDERFLOW; @@ -241,16 +247,16 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ // toULength = 0; // return CoderResult.UNDERFLOW; // } - + in.position(in.position() + toUCountPending()); - + /* do the conversion */ CoderResult ret = decode(in, out, null, false); - // ok was there input held in the previous invocation of decodeLoop + // ok was there input held in the previous invocation of decodeLoop // that resulted in output in this invocation? in.position(in.position() - toUCountPending()); - + return ret; } @@ -262,7 +268,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * action succeeds or more input is needed for completing the decoding action. */ abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets, boolean flush); - + /* * Implements the ICU semantic for decode operation * @param source The input byte buffer @@ -274,12 +280,12 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * action succeeds or more input is needed for completing the decoding action. */ final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { - + /* check parameters */ if (target == null || source == null) { throw new IllegalArgumentException(); } - + /* * Make sure that the buffer sizes do not exceed the number range for * int32_t because some functions use the size (in units or bytes) @@ -301,7 +307,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ return; } */ - + /* flush the target overflow buffer */ if (charErrorBufferLength > 0) { int i = 0; @@ -328,12 +334,12 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ /* the overflow buffer is completely copied to the target */ charErrorBufferLength = 0; } - + if (!flush && !source.hasRemaining() && toULength == 0 && preToULength >= 0) { /* the overflow buffer is emptied and there is no new input: we are done */ return CoderResult.UNDERFLOW; } - + /* * Do not simply return with a buffer overflow error if * !flush && t==targetLimit @@ -341,7 +347,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * For example, the skip callback may be called; * it does not output anything. */ - + return toUnicodeWithCallback(source, target, offsets, flush); } @@ -388,7 +394,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ } } */ final CoderResult toUnicodeWithCallback(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){ - + int sourceIndex; int errorInputLength; boolean converterSawEndOfInput, calledCallback; @@ -397,14 +403,14 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ /* variables for m:n conversion */ ByteBuffer replayArray = ByteBuffer.allocate(EXT_MAX_BYTES); int replayArrayIndex = 0; - + ByteBuffer realSource=null; boolean realFlush=false; int realSourceIndex=0; - + CoderResult cr = CoderResult.UNDERFLOW; - + /* get the converter implementation function */ sourceIndex=0; @@ -428,7 +434,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ sourceIndex=-1; preToULength=0; } - + /* * loop for conversion and error handling * @@ -454,13 +460,13 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * s0) { updateOffsets(offsets, length, sourceIndex, errorInputLength); - - + + /* * if a converter handles offsets and updates the offsets * pointer at the end, then pArgs->offset should not change @@ -488,13 +494,13 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ */ //TODO: pArgs->offsets=offsets+=length; /* } - + if(sourceIndex>=0) { sourceIndex+=(source.position()-s); } - + } */ - + if(preToULength<0) { /* * switch the source to new replay units (cannot occur while replaying) @@ -505,7 +511,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ realSource=source; realFlush=flush; realSourceIndex=sourceIndex; - + //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength); replayArray.put(preToUArray,0, -preToULength); // reset position @@ -517,7 +523,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ if((sourceIndex+=preToULength)<0) { sourceIndex=-1; } - + preToULength=0; } else { /* see implementation note before _fromUnicodeWithCallback() */ @@ -525,11 +531,11 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ Assert.assrt(realSource==null); } } - + /* update pointers */ s=source.position(); //t=target.position(); - + if(cr.isUnderflow()) { if(s0) { copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength); } - + /* set the converter state to deal with the next character */ toULength=0; - + /* call the callback function */ cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr); /* @@ -634,7 +640,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ /* * Returns the number of chars held in the converter's internal state - * because more input is needed for completing the conversion. This function is + * because more input is needed for completing the conversion. This function is * useful for mapping semantics of ICU's converter interface to those of iconv, * and this information is not needed for normal conversion. * @return The number of chars in the state. -1 if an error is encountered. @@ -650,7 +656,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ return 0; } } - + private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) { for(int i=srcOffset; i0 && target.hasRemaining()) { @@ -685,7 +691,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ } } /* write overflow */ - if(length>0) { + if(length>0) { cnv.charErrorBufferLength= 0; cr = CoderResult.OVERFLOW; do { @@ -707,8 +713,8 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ /* Note: Currently, this method is not being used because the callback method calls toUWriteUChars with * the substitution characters. Will leave in here for the time being. To be removed later. (4.0) */ - /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder, - ByteBuffer source, CharBuffer target, + /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder, + ByteBuffer source, CharBuffer target, IntBuffer offsets){ String sub = decoder.replacement(); CharsetICU cs = (CharsetICU) decoder.charset(); @@ -719,10 +725,10 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ } else { return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(), 0, sub.length(), target, offsets, source.position()); - + } }*/ - + /** * Returns the maxBytesPerChar value for the Charset that created this decoder. * @return maxBytesPerChar diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java index 4f680094b5a..8c960ecd293 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java @@ -25,7 +25,7 @@ import com.ibm.icu.text.UTF16; /** * An abstract class that provides framework methods of decoding operations for concrete - * subclasses. + * subclasses. * In the future this class will contain API that will implement converter semantics of ICU4C. * @stable ICU 3.6 */ @@ -70,6 +70,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP; CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() { + @Override public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr) { @@ -87,7 +88,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { /* * Construcs a new encoder for the given charset - * + * * @param cs * for which the decoder is created * @param replacement @@ -144,28 +145,30 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { /** * Sets the action to be taken if an illegal sequence is encountered - * + * * @param newAction * action to be taken * @exception IllegalArgumentException * @stable ICU 3.6 */ + @Override protected void implOnMalformedInput(CodingErrorAction newAction) { onMalformedInput = getCallback(newAction); } - + /** * Sets the action to be taken if an illegal sequence is encountered - * + * * @param newAction * action to be taken * @exception IllegalArgumentException * @stable ICU 3.6 */ + @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) { onUnmappableInput = getCallback(newAction); } - + /** * Sets the callback encoder method and context to be used if an illegal sequence is encountered. * You would normally call this twice to set both the malform and unmappable error. In this case, @@ -184,7 +187,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { } else { /* Error: Only malformed and unmappable are handled. */ } - + if (fromUContext == null || !fromUContext.equals(newContext)) { setFromUContext(newContext); } @@ -192,7 +195,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { /** * Sets fromUContext used in callbacks. - * + * * @param newContext Object * @exception IllegalArgumentException The object is an illegal argument for UContext. * @stable ICU 4.0 @@ -200,7 +203,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { public final void setFromUContext(Object newContext) { fromUContext = newContext; } - + private static CharsetCallback.Encoder getCallback(CodingErrorAction action) { if (action == CodingErrorAction.REPLACE) { return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE; @@ -217,10 +220,11 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * Flushes any characters saved in the converter's internal buffer and * resets the converter. * @param out action to be taken - * @return result of flushing action and completes the decoding all input. + * @return result of flushing action and completes the decoding all input. * Returns CoderResult.UNDERFLOW if the action succeeds. * @stable ICU 3.6 */ + @Override protected CoderResult implFlush(ByteBuffer out) { return encode(EMPTY, out, null, true); } @@ -229,6 +233,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * Resets the from Unicode mode of converter * @stable ICU 3.6 */ + @Override protected void implReset() { errorBufferLength = 0; fromUnicodeStatus = 0; @@ -252,6 +257,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * action succeeds or more input is needed for completing the decoding action. * @stable ICU 3.6 */ + @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty // The Java framework should have already substituted what was left. @@ -596,7 +602,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { /* callback handling */ { int codePoint; - + /* get and write the code point */ codePoint = fromUChar32; errorInputLength = UTF16.append(invalidUCharBuffer, 0, @@ -671,8 +677,9 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { }*/ /** * Overrides super class method - * @stable ICU 3.6 + * @stable ICU 3.6 */ + @Override public boolean isLegalReplacement(byte[] repl) { return true; } @@ -701,7 +708,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { out.put(bytesArray[bytesBegin]); bytesBegin++; } - // success + // success bytesLength = 0; } catch (BufferOverflowException ex) { cr = CoderResult.OVERFLOW; @@ -713,7 +720,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { --obl; } } - //write overflow + //write overflow cnv.errorBufferLength = bytesLimit - bytesBegin; if (cnv.errorBufferLength > 0) { int index = 0; @@ -727,7 +734,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { /* * Returns the number of chars held in the converter's internal state - * because more input is needed for completing the conversion. This function is + * because more input is needed for completing the conversion. This function is * useful for mapping semantics of ICU's converter interface to those of iconv, * and this information is not needed for normal conversion. * @return The number of chars in the state. -1 if an error is encountered. @@ -746,12 +753,12 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { } /** - * + * * @param source */ private final void setSourcePosition(CharBuffer source) { - // ok was there input held in the previous invocation of encodeLoop + // ok was there input held in the previous invocation of encodeLoop // that resulted in output in this invocation? source.position(source.position() - fromUCountPending()); } @@ -816,7 +823,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { */ if (cr.isOverflow()) { /* Overflowed target. Now, we'll write into the charErrorBuffer. - * It's a fixed size. If we overflow it...Hm + * It's a fixed size. If we overflow it...Hm */ /* start the new target at the first free slot in the error buffer */ @@ -840,13 +847,13 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * a lead surrogate followed by a trail surrogate. This method can change * the source position and will modify fromUChar32. *

- * + * *

* If null is returned, then there was success in reading a * surrogate pair, the codepoint is stored in fromUChar32 and * fromUChar32 should be reset (to 0) after being read. *

- * + * * @param source * The encoding source. * @param lead @@ -886,8 +893,8 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * requirement, the calling method must also increment the index if this method returns * null. *

- * - * + * + * * @param source * The encoding source. * @param lead @@ -920,7 +927,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { fromUChar32 = UCharacter.getCodePoint(lead, trail); return null; } - + /** * Returns the maxCharsPerByte value for the Charset that created this encoder. * @return maxCharsPerByte @@ -929,7 +936,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { public final float maxCharsPerByte() { return ((CharsetICU)(this.charset())).maxCharsPerByte; } - + /** * Calculates the size of a buffer for conversion from Unicode to a charset. * The calculated size is guaranteed to be sufficient for this conversion. diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java index 5aa6eb37f9b..e2294c03053 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java @@ -42,7 +42,7 @@ class CharsetHZ extends CharsetICU { maxBytesPerChar = 4; minBytesPerChar = 1; maxCharsPerByte = 1; - + isEmptySegment = false; } @@ -55,6 +55,7 @@ class CharsetHZ extends CharsetICU { gbDecoder = (CharsetMBCS.CharsetDecoderMBCS) gbCharset.newDecoder(); } + @Override protected void implReset() { super.implReset(); gbDecoder.implReset(); @@ -63,6 +64,7 @@ class CharsetHZ extends CharsetICU { isEmptySegment = false; } + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; byte[] tempBuf = new byte[2]; @@ -141,7 +143,7 @@ class CharsetHZ extends CharsetICU { * add another bit to distinguish a 0 byte from not having seen a lead byte */ toUnicodeStatus = mySourceChar | 0x100; - isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */ + isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */ } continue; } else { @@ -154,7 +156,7 @@ class CharsetHZ extends CharsetICU { * - We include at least the first byte in the illegal sequence. * - If any of the non-initial bytes could be the start of a character, * we stop the illegal sequence before the first one of those - * + * * In HZ DBCS, if the second byte is in the 21..7e range, * we report ony the first byte as the illegal sequence. * Otherwise we convert of report the pair of bytes. @@ -230,6 +232,7 @@ class CharsetHZ extends CharsetICU { gbEncoder = (CharsetMBCS.CharsetEncoderMBCS) gbCharset.newEncoder(); } + @Override protected void implReset() { super.implReset(); gbEncoder.implReset(); @@ -238,6 +241,7 @@ class CharsetHZ extends CharsetICU { isTargetUCharDBCS = false; } + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { int length = 0; int[] targetUniChar = new int[] { 0 }; @@ -375,14 +379,17 @@ class CharsetHZ extends CharsetICU { } } + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderHZ(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderHZ(this); } - + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ setFillIn.add(0,0x7f); // CharsetMBCS mbcshz = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546"); diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java index eff49cca893..8fd7d876da3 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java @@ -5,7 +5,7 @@ * Copyright (C) 2006-2016, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* -*/ +*/ package com.ibm.icu.charset; @@ -69,7 +69,7 @@ public abstract class CharsetICU extends Charset{ int options; float maxCharsPerByte; - + String name; /* +4: 60 internal name of the converter- invariant chars */ int codepage; /* +64: 4 codepage # (now IBM-$codepage) */ @@ -82,20 +82,20 @@ public abstract class CharsetICU extends Charset{ byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */ byte subCharLen; /* +76: 1 */ - + byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ byte hasFromUnicodeFallback; /* +78: 1 */ short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ //byte reserved[/*19*/]; /* +81: 19 to round out the structure */ - - + + // typedef enum UConverterUnicodeSet { - /** - * Parameter that select the set of roundtrippable Unicode code points. + /** + * Parameter that select the set of roundtrippable Unicode code points. * @stable ICU 4.0 */ - public static final int ROUNDTRIP_SET=0; + public static final int ROUNDTRIP_SET=0; /** * Select the set of Unicode code points with roundtrip or fallback mappings. * Not supported at this point. @@ -106,9 +106,9 @@ public abstract class CharsetICU extends Charset{ public static final int ROUNDTRIP_AND_FALLBACK_SET =1; //} UConverterUnicodeSet; - + /** - * + * * @param icuCanonicalName * @param canonicalName * @param aliases @@ -121,7 +121,7 @@ public abstract class CharsetICU extends Charset{ } this.icuCanonicalName = icuCanonicalName; } - + /** * Ascertains if a charset is a sub set of this charset * Implements the abstract method of super class. @@ -129,6 +129,7 @@ public abstract class CharsetICU extends Charset{ * @return true if the given charset is a subset of this charset * @stable ICU 3.6 */ + @Override public boolean contains(Charset cs){ if (null == cs) { return false; @@ -152,7 +153,7 @@ public abstract class CharsetICU extends Charset{ algorithmicCharsets.put("LMBCS-18", "com.ibm.icu.charset.CharsetLMBCS"); algorithmicCharsets.put("LMBCS-19", "com.ibm.icu.charset.CharsetLMBCS"); algorithmicCharsets.put("BOCU-1", "com.ibm.icu.charset.CharsetBOCU1" ); - algorithmicCharsets.put("SCSU", "com.ibm.icu.charset.CharsetSCSU" ); + algorithmicCharsets.put("SCSU", "com.ibm.icu.charset.CharsetSCSU" ); algorithmicCharsets.put("US-ASCII", "com.ibm.icu.charset.CharsetASCII" ); algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" ); algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" ); @@ -206,7 +207,7 @@ public abstract class CharsetICU extends Charset{ Class[] paramTypes = new Class[]{ String.class, String.class, String[].class}; final Constructor c = cs.getConstructor(paramTypes); Object[] params = new Object[]{ icuCanonicalName, javaCanonicalName, aliases}; - + // Run constructor try { conv = c.newInstance(params); @@ -222,18 +223,18 @@ public abstract class CharsetICU extends Charset{ } }catch(ClassNotFoundException ex){ }catch(NoSuchMethodException ex){ - }catch (IllegalAccessException ex){ - }catch (InstantiationException ex){ + }catch (IllegalAccessException ex){ + }catch (InstantiationException ex){ } - throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className); + throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className); } - + static final boolean isSurrogate(int c){ return (((c)&0xfffff800)==0xd800); } - + /* - * Returns the default charset name + * Returns the default charset name */ // static final String getDefaultCharsetName(){ // String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding(); @@ -246,7 +247,7 @@ public abstract class CharsetICU extends Charset{ * available. If the ICU charset provider does not support * the specified charset, then try other charset providers * including the standard Java charset provider. - * + * * @param charsetName The name of the requested charset, * may be either a canonical name or an alias * @return A charset object for the named charset @@ -281,11 +282,11 @@ public abstract class CharsetICU extends Charset{ * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the * start of the stream for example U+FEFF (the Unicode BOM/signature * character) that can be ignored. - * + * * Detects Unicode signature byte sequences at the start of the byte stream * and returns number of bytes of the BOM of the indicated Unicode charset. * 0 is returned when no Unicode signature is recognized. - * + * */ // TODO This should be proposed as CharsetDecoderICU API. // static String detectUnicodeSignature(ByteBuffer source) { @@ -373,17 +374,17 @@ public abstract class CharsetICU extends Charset{ // /* no known Unicode signature byte sequence recognized */ // return null; // } - - + + abstract void getUnicodeSetImpl(UnicodeSet setFillIn, int which); - + /** * Returns the set of Unicode code points that can be converted by an ICU Converter. * *

The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): The set of all Unicode code points that can be - * roundtrip-converted (converted without any data loss) with the converter This set will not include code points that have fallback + * roundtrip-converted (converted without any data loss) with the converter This set will not include code points that have fallback * mappings or are only the result of reverse fallback mappings. See UTR #22 "Character Mapping Markup Language" at http://www.unicode.org/reports/tr22/ - * + * *

In the future, there may be more UConverterUnicodeSet choices to select sets with different properties. * *

This is useful for example for @@ -393,10 +394,10 @@ public abstract class CharsetICU extends Charset{ * by comparing its roundtrip set with the set of ExemplarCharacters from * ICU's locale data or other sources * - * @param setFillIn A valid UnicodeSet. It will be cleared by this function before + * @param setFillIn A valid UnicodeSet. It will be cleared by this function before * the converter's specific set is filled in. * @param which A selector; currently ROUNDTRIP_SET is the only supported value. - * @throws IllegalArgumentException if the parameters does not match. + * @throws IllegalArgumentException if the parameters does not match. * @stable ICU 4.0 */ public void getUnicodeSet(UnicodeSet setFillIn, int which){ @@ -406,7 +407,7 @@ public abstract class CharsetICU extends Charset{ setFillIn.clear(); getUnicodeSetImpl(setFillIn, which); } - + /** * Returns whether or not the charset of the converter has a fixed number of bytes * per charset character. @@ -422,21 +423,21 @@ public abstract class CharsetICU extends Charset{ if (this instanceof CharsetASCII || this instanceof CharsetUTF32) { return true; } - + if (this instanceof CharsetMBCS) { if (((CharsetMBCS)this).sharedData.staticData.maxBytesPerChar == ((CharsetMBCS)this).sharedData.staticData.minBytesPerChar) { return true; } } - + return false; } - + static void getNonSurrogateUnicodeSet(UnicodeSet setFillIn){ setFillIn.add(0, 0xd7ff); setFillIn.add(0xe000, 0x10ffff); } - + static void getCompleteUnicodeSet(UnicodeSet setFillIn){ setFillIn.add(0, 0x10ffff); } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java index fcd6e12f064..fdaa94d38aa 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java @@ -25,7 +25,7 @@ import com.ibm.icu.text.UnicodeSet; class CharsetISO2022 extends CharsetICU { private UConverterDataISO2022 myConverterData; private int variant; // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN} - + private static final byte[] SHIFT_IN_STR = { 0x0f }; // private static final byte[] SHIFT_OUT_STR = { 0x0e }; @@ -37,7 +37,7 @@ class CharsetISO2022 extends CharsetICU { */ private static final char HWKANA_START = 0xff61; private static final char HWKANA_END = 0xff9f; - + /* * 94-character sets with native byte values A1..FE are encoded in ISO 2022 * as bytes 21..7E. (Subtract 0x80.) @@ -52,16 +52,16 @@ class CharsetISO2022 extends CharsetICU { */ private static final char GR96_START = 0xa0; private static final char GR96_END = 0xff; - + /* for ISO-2022-JP and -CN implementations */ // typedef enum { /* shared values */ private static final byte INVALID_STATE = -1; private static final byte ASCII = 0; - + private static final byte SS2_STATE = 0x10; private static final byte SS3_STATE = 0x11; - + /* JP */ private static final byte ISO8859_1 = 1; private static final byte ISO8859_7 = 2; @@ -71,13 +71,13 @@ class CharsetISO2022 extends CharsetICU { private static final byte GB2312 = 6; private static final byte KSC5601 = 7; private static final byte HWKANA_7BIT = 8; /* Halfwidth Katakana 7 bit */ - + /* CN */ /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */ private static final byte GB2312_1 = 1; private static final byte ISO_IR_165= 2; private static final byte CNS_11643 = 3; - + /* * these are used in StateEnum and ISO2022State variables, * but CNS_11643 must be used to index into myConverterArray[] @@ -91,18 +91,18 @@ class CharsetISO2022 extends CharsetICU { private static final byte CNS_11643_6 = 0x26; private static final byte CNS_11643_7 = 0x27; // } StateEnum; - + public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) { super(icuCanonicalName, javaCanonicalName, aliases); - + myConverterData = new UConverterDataISO2022(); - + int versionIndex = icuCanonicalName.indexOf("version="); int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue(); - + myConverterData.version = version; - + if (icuCanonicalName.indexOf("locale=ja") > 0) { ISO2022InitJP(version); } else if (icuCanonicalName.indexOf("locale=zh") > 0) { @@ -110,18 +110,18 @@ class CharsetISO2022 extends CharsetICU { } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ { ISO2022InitKR(version); } - + myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder(); myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder(); } - + private void ISO2022InitJP(int version) { variant = ISO_2022_JP; - + maxBytesPerChar = 6; minBytesPerChar = 1; maxCharsPerByte = 1; - // open the required converters and cache them + // open the required converters and cache them if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) { myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData; } @@ -136,14 +136,14 @@ class CharsetISO2022 extends CharsetICU { if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) { myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData; } - + // create a generic CharsetMBCS object myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546"); } - + private void ISO2022InitCN(int version) { variant = ISO_2022_CN; - + maxBytesPerChar = 8; minBytesPerChar = 1; maxCharsPerByte = 1; @@ -151,41 +151,41 @@ class CharsetISO2022 extends CharsetICU { myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData; if (version == 1) { myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData; - } + } myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData; - + // create a generic CharsetMBCS object myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546"); } - + private void ISO2022InitKR(int version) { variant = ISO_2022_KR; - + maxBytesPerChar = 8; minBytesPerChar = 1; maxCharsPerByte = 1; - + if (version == 1) { myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546"); myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0]; } else { myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949"); } - + myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder(); myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder(); } - + /* * ISO 2022 control codes must not be converted from Unicode * because they would mess up the byte stream. * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b * corresponding to SO, SI, and ESC. */ - private static boolean IS_2022_CONTROL(int c) { + private static boolean IS_2022_CONTROL(int c) { return (c<0x20) && (((1<= 0xa1a1) && + if ((value <= 0xfefe && value >= 0xa1a1) && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) { return (value - 0x8080); /* shift down to 21..7e byte range */ } else { return 0; /* not valid for ISO 2022 */ } } - + /* - * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that. - * + * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that. + * * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point - * unchanged. - * + * unchanged. + * private static int _2022ToGR94DBCS(int value) { int returnValue = value + 0x8080; - - if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) && + + if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) { return returnValue; } else { return value; } }*/ - + /* is the StateEnum charset value for a DBCS charset? */ private static boolean IS_JP_DBCS(byte cs) { return ((JISX208 <= cs) && (cs <= KSC5601)); } - + private static short CSM(short cs) { return (short)(1<> 1; /* Finds median */ - + if (mid == oldmid) { break; } - + if (escSeqStateTable_Key_2022[mid] > togo) { hi = mid; } else if (escSeqStateTable_Key_2022[mid] < togo) { @@ -755,7 +755,7 @@ class CharsetISO2022 extends CharsetICU { } return INVALID_2022; } - + /* * To Unicode Callback helper function */ @@ -769,28 +769,29 @@ class CharsetISO2022 extends CharsetICU { cnv.toUBytesArray[0] = (byte)sourceChar; cnv.toULength = 1; } - + if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) { err = CoderResult.unmappableForLength(1); } else { err = CoderResult.malformedForLength(1); } - + return err; } - + /****************************ISO-2022-JP************************************/ private class CharsetDecoderISO2022JP extends CharsetDecoderICU { public CharsetDecoderISO2022JP(CharsetICU cs) { super(cs); } - + + @Override protected void implReset() { super.implReset(); myConverterData.reset(); } - /* - * Map 00..7F to Unicode according to JIS X 0201. + /* + * Map 00..7F to Unicode according to JIS X 0201. * */ private int jisx201ToU(int value) { if (value < 0x5c) { @@ -827,7 +828,7 @@ class CharsetISO2022 extends CharsetICU { c2 = 0; /* invalid */ } } - + c1 >>=1; if (c1 <= 0x2f) { c1 += 0x70; @@ -840,6 +841,7 @@ class CharsetISO2022 extends CharsetICU { bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2); } + @Override @SuppressWarnings("fallthrough") protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { boolean gotoGetTrail = false; @@ -851,7 +853,7 @@ class CharsetISO2022 extends CharsetICU { int mySourceCharTemp = 0x0000; // use for getTrail label call. byte cs; /* StateEnum */ byte csTemp= 0; // use for getTrail label call. - + if (myConverterData.key != 0) { /* continue with a partial escape sequence */ // goto escape; @@ -865,21 +867,21 @@ class CharsetISO2022 extends CharsetICU { mySourceCharTemp = 0x99; gotoGetTrail = true; } - + while (source.hasRemaining() || gotoEscape || gotoGetTrail) { // This code is here for the goto escape label call above. if (gotoEscape) { mySourceCharTemp = ESC_2022; } - + targetUniChar = UConverterConstants.missingCharMarker; - + if (gotoEscape || gotoGetTrail || target.hasRemaining()) { if (!gotoEscape && !gotoGetTrail) { mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK; mySourceCharTemp = mySourceChar; } - + switch (mySourceCharTemp) { case UConverterConstants.SI: if (myConverterData.version == 3) { @@ -890,19 +892,19 @@ class CharsetISO2022 extends CharsetICU { myConverterData.isEmptySegment = false; break; } - + case UConverterConstants.SO: if (myConverterData.version == 3) { /* JIS7: switch to G1 half-width Katakana */ myConverterData.toU2022State.cs[1] = HWKANA_7BIT; myConverterData.toU2022State.g = 1; - continue; + continue; } else { /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ myConverterData.isEmptySegment = false; /* reset this, we have a different error */ break; } - + case ESC_2022: if (!gotoEscape) { source.position(source.position() - 1); @@ -913,7 +915,7 @@ class CharsetISO2022 extends CharsetICU { { int mySourceBefore = source.position(); int toULengthBefore = this.toULength; - + err = changeState_2022(this, source, variant); /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */ @@ -956,7 +958,7 @@ class CharsetISO2022 extends CharsetICU { if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) { /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */ targetUniChar = mySourceChar + (HWKANA_START - 0xa1); - + /* return from a single-shift state to the previous one */ if (myConverterData.toU2022State.g >= 2) { myConverterData.toU2022State.g = myConverterData.toU2022State.prevG; @@ -1003,14 +1005,14 @@ class CharsetISO2022 extends CharsetICU { gotoGetTrail = false; short trailByte; boolean leadIsOk, trailIsOk; - + trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK); /* * Ticket 5691: consistent illegal sequences: * - We include at least the first byte in the illegal sequence. * - If any of the non-initial bytes could be the start of a character, * we stop the illegal sequence before the first one of those. - * + * * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is * an ESC/SO/SI, we report only the first byte as the illegal sequence. * Otherwise we convert or report the pair of bytes. @@ -1049,7 +1051,7 @@ class CharsetISO2022 extends CharsetICU { } break; } /* end of outer switch */ - + if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) { if (offsets != null) { offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2)); @@ -1072,7 +1074,7 @@ class CharsetISO2022 extends CharsetICU { } target.get(); } else { - charErrorBufferArray[charErrorBufferLength++] = + charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff)); } } else { @@ -1089,18 +1091,20 @@ class CharsetISO2022 extends CharsetICU { return err; } } // end of class CharsetDecoderISO2022JP - + /****************************ISO-2022-CN************************************/ private class CharsetDecoderISO2022CN extends CharsetDecoderICU { public CharsetDecoderISO2022CN(CharsetICU cs) { super(cs); } - + + @Override protected void implReset() { super.implReset(); myConverterData.reset(); } + @Override @SuppressWarnings("fallthrough") protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; @@ -1110,7 +1114,7 @@ class CharsetISO2022 extends CharsetICU { int mySourceCharTemp = 0x0000; boolean gotoEscape = false; boolean gotoGetTrailByte = false; - + if (myConverterData.key != 0) { /* continue with a partial escape sequence */ // goto escape; @@ -1123,10 +1127,10 @@ class CharsetISO2022 extends CharsetICU { // goto getTrailByte gotoGetTrailByte = true; } - + while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) { targetUniChar = UConverterConstants.missingCharMarker; - + if (target.hasRemaining() || gotoEscape) { if (gotoEscape) { mySourceChar = ESC_2022; // goto escape label @@ -1137,7 +1141,7 @@ class CharsetISO2022 extends CharsetICU { mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get(); mySourceCharTemp = mySourceChar; } - + switch (mySourceCharTemp) { case UConverterConstants.SI: myConverterData.toU2022State.g = 0; @@ -1149,7 +1153,7 @@ class CharsetISO2022 extends CharsetICU { return err; } continue; - + case UConverterConstants.SO: if (myConverterData.toU2022State.cs[1] != 0) { myConverterData.toU2022State.g = 1; @@ -1160,7 +1164,7 @@ class CharsetISO2022 extends CharsetICU { myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */ break; } - + case ESC_2022: if (!gotoEscape) { source.position(source.position()-1); @@ -1186,7 +1190,7 @@ class CharsetISO2022 extends CharsetICU { return err; } continue; - + /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ case CR: /* falls through */ @@ -1205,14 +1209,14 @@ class CharsetISO2022 extends CharsetICU { short trailByte; // getTrailByte: label gotoGetTrailByte = false; // reset gotoGetTrailByte - + trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK); /* * Ticket 5691: consistent illegal sequences: * - We include at least the first byte in the illegal sequence. * - If any of the non-initial bytes could be the start of a character, * we stop the illegal sequence before the first one of those. - * + * * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is * an ESC/SO/SI, we report only the first byte as the illegal sequence. * Otherwise we convert or report the pair of bytes. @@ -1238,7 +1242,7 @@ class CharsetISO2022 extends CharsetICU { tempBuffer.limit(tempBufLen); targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false); mySourceChar = (mySourceChar << 8) | trailByte; - + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { /* report a pair of illegal bytes if the second byte is not a DBCS starter */ source.get(); @@ -1283,33 +1287,35 @@ class CharsetISO2022 extends CharsetICU { charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff)); } } else { - /* Call the callback function */ + /* Call the callback function */ err = toUnicodeCallback(this, mySourceChar, targetUniChar); break; } - + } else { err = CoderResult.OVERFLOW; break; } } - + return err; } - + } /************************ ISO-2022-KR ********************/ private class CharsetDecoderISO2022KR extends CharsetDecoderICU { public CharsetDecoderISO2022KR(CharsetICU cs) { super(cs); } - + + @Override protected void implReset() { super.implReset(); setInitialStateToUnicodeKR(); myConverterData.reset(); } - + + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; int mySourceChar = 0x0000; @@ -1318,14 +1324,14 @@ class CharsetISO2022 extends CharsetICU { boolean usingFallback; boolean gotoGetTrailByte = false; boolean gotoEscape = false; - + if (myConverterData.version == 1) { return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush); } - + /* initialize state */ usingFallback = isFallbackUsed(); - + if (myConverterData.key != 0) { /* continue with a partial escape sequence */ gotoEscape = true; @@ -1335,13 +1341,13 @@ class CharsetISO2022 extends CharsetICU { toULength = 0; gotoGetTrailByte = true; } - + while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) { if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) { if (!gotoGetTrailByte && !gotoEscape) { mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); } - + if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) { myConverterData.toU2022State.g = 0; if (myConverterData.isEmptySegment) { @@ -1364,7 +1370,7 @@ class CharsetISO2022 extends CharsetICU { } // escape label gotoEscape = false; // reset gotoEscape flag - myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */ + myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */ err = changeState_2022(this, source, ISO_2022_KR); if (err.isError()) { return err; @@ -1378,7 +1384,7 @@ class CharsetISO2022 extends CharsetICU { short trailByte; // getTrailByte label gotoGetTrailByte = false; // reset gotoGetTrailByte flag - + trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK); targetUniChar = UConverterConstants.missingCharMarker; /* @@ -1386,7 +1392,7 @@ class CharsetISO2022 extends CharsetICU { * - We include at least the first byte in the illegal sequence. * - If any of the non-initial bytes could be the start of a character, * we stop the illegal sequence before the first one of those. - * + * * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is * an ESC/SO/SI, we report only the first byte as the illegal sequence. * Otherwise we convert or report the pair of bytes. @@ -1414,7 +1420,7 @@ class CharsetISO2022 extends CharsetICU { int savedSourceLimit = source.limit(); int savedSourcePosition = source.position(); source.limit(source.position()); - source.position(source.position()-1); + source.position(source.position()-1); targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback); source.limit(savedSourceLimit); source.position(savedSourcePosition); @@ -1436,10 +1442,10 @@ class CharsetISO2022 extends CharsetICU { break; } } - + return err; } - + protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; int sourceStart; @@ -1448,15 +1454,15 @@ class CharsetISO2022 extends CharsetICU { int argTarget; boolean gotoEscape = false; int oldSourceLimit; - + /* remember the original start of the input for offsets */ sourceStart = argSource = source.position(); - + if (myConverterData.key != 0) { /* continue with a partial escape sequence */ gotoEscape = true; } - + while (gotoEscape || (!err.isError() && source.hasRemaining())) { if (!gotoEscape) { /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */ @@ -1466,7 +1472,7 @@ class CharsetISO2022 extends CharsetICU { if (source.position() != sourceLimit) { /* * get the current partial byte sequence - * + * * it needs to be moved between the public and the subconverter * so that the conversion frameword, which only sees the public * converter, can handle truncated and illegal input etc. @@ -1475,7 +1481,7 @@ class CharsetISO2022 extends CharsetICU { cnv.toUBytesArray = toUBytesArray.clone(); } cnv.toULength = toULength; - + /* * Convert up to the end of the input, or to before the next escape character. * Does not handle conversion extensions because the preToU[] state etc. @@ -1501,13 +1507,13 @@ class CharsetISO2022 extends CharsetICU { } } argSource = source.position(); - + /* copy input/error/overflow buffers */ if (cnv.toULength > 0) { toUBytesArray = cnv.toUBytesArray.clone(); } toULength = cnv.toULength; - + if (err.isOverflow()) { if (cnv.charErrorBufferLength > 0) { charErrorBufferArray = cnv.charErrorBufferArray.clone(); @@ -1516,7 +1522,7 @@ class CharsetISO2022 extends CharsetICU { cnv.charErrorBufferLength = 0; } } - + if (err.isError() || err.isOverflow() || (source.position() == source.limit())) { return err; } @@ -1528,7 +1534,7 @@ class CharsetISO2022 extends CharsetICU { return err; } } - + /******************** from unicode **********************/ /* preference order of JP charsets */ private final static byte []jpCharsetPref = { @@ -1629,14 +1635,15 @@ class CharsetISO2022 extends CharsetICU { 0x212B, 0x212C /* U+FF9F */ }; - + protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} }; /****************************ISO-2022-JP************************************/ private class CharsetEncoderISO2022JP extends CharsetEncoderICU { public CharsetEncoderISO2022JP(CharsetICU cs) { super(cs, fromUSubstitutionChar[0]); } - + + @Override protected void implReset() { super.implReset(); myConverterData.reset(); @@ -1654,7 +1661,7 @@ class CharsetISO2022 extends CharsetICU { } return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe); } - + /* * Take a valid Shift-JIS byte pair, check that it is in the range corresponding * to JIS X 0208, and convert it to a pair of 21..7E bytes. @@ -1662,22 +1669,22 @@ class CharsetISO2022 extends CharsetICU { */ private int _2022FromSJIS(int value) { short trail; - + if (value > 0xEFFC) { return 0; /* beyond JIS X 0208 */ } - + trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK); - + value &= 0xff00; /* lead byte */ if (value <= 0x9f00) { value -= 0x7000; } else { /* 0xe000 <= value <= 0xef00 */ value -= 0xb000; } - + value <<= 1; - + if (trail <= 0x9e) { value -= 0x100; if (trail <= 0x7e) { @@ -1688,18 +1695,19 @@ class CharsetISO2022 extends CharsetICU { } else { /* trail <= 0xfc */ value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK); } - + return value; } /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */ - CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, + @Override + CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target, IntBuffer offsets){ CoderResult err = CoderResult.UNDERFLOW; byte[] buffer = new byte[8]; int i = 0; byte[] subchar; subchar = encoder.replacement(); - + byte cs; if (myConverterData.fromU2022State.g == 1) { /* JIS7: switch from G1 to G0 */ @@ -1707,7 +1715,7 @@ class CharsetISO2022 extends CharsetICU { buffer[i++] = UConverterConstants.SI; } cs = myConverterData.fromU2022State.cs[0]; - + if (cs != ASCII && cs != JISX201) { /* not in ASCII or JIS X 0201: switch to ASCII */ myConverterData.fromU2022State.cs[0] = ASCII; @@ -1715,14 +1723,15 @@ class CharsetISO2022 extends CharsetICU { buffer[i++] = 0x28; buffer[i++] = 0x42; } - + buffer[i++] = subchar[0]; - + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1); return err; } - + + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; int sourceChar; @@ -1735,14 +1744,14 @@ class CharsetISO2022 extends CharsetICU { byte[] buffer = new byte[8]; boolean getTrail = false; // use for getTrail label int oldSourcePos; // for proper error handling - + choiceCount = 0; - + /* check if the last codepoint of previous buffer was a lead surrogate */ if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) { getTrail = true; } - + while (getTrail || source.hasRemaining()) { if (getTrail || target.hasRemaining()) { oldSourcePos = source.position(); @@ -1752,7 +1761,7 @@ class CharsetISO2022 extends CharsetICU { /* check if the char is a First surrogate */ if (getTrail || UTF16.isSurrogate((char)sourceChar)) { if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) { -// getTrail: +// getTrail: if (getTrail) { getTrail = false; } @@ -1788,7 +1797,7 @@ class CharsetISO2022 extends CharsetICU { break; } } - + /* do not convert SO/SI/ESC */ if (IS_2022_CONTROL(sourceChar)) { /* callback(illegal) */ @@ -1796,9 +1805,9 @@ class CharsetISO2022 extends CharsetICU { fromUChar32 = sourceChar; break; } - + /* do the conversion */ - + if (choiceCount == 0) { char csm; /* @@ -1807,18 +1816,18 @@ class CharsetISO2022 extends CharsetICU { */ csm = (char)jpCharsetMasks[myConverterData.version]; choiceCount = 0; - + /* JIS7/8: try single-byte half-width Katakana before JISX208 */ if (myConverterData.version == 3 || myConverterData.version == 4) { choices[choiceCount++] = HWKANA_7BIT; } /* Do not try single-bit half-width Katakana for other versions. */ csm &= ~CSM(HWKANA_7BIT); - + /* try the current G0 charset */ choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0]; csm &= ~CSM(cs); - + /* try the current G2 charset */ if ((cs = myConverterData.fromU2022State.cs[2]) != 0) { choices[choiceCount++] = cs; @@ -1834,9 +1843,9 @@ class CharsetISO2022 extends CharsetICU { } } } - + cs = g = 0; - /* + /* * len==0: no mapping found yet * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks * len>0: found a roundtrip result, done @@ -1849,7 +1858,7 @@ class CharsetISO2022 extends CharsetICU { * an early fallback with a later one. */ usingFallback = useFallback; - + for (int i = 0; i < choiceCount && len <= 0; i++) { int[] value = new int[1]; int len2; @@ -1885,7 +1894,7 @@ class CharsetISO2022 extends CharsetICU { /* Shift U+FF61..U+FF9F to bytes A1..DF. */ targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1))); len = 1; - + cs = myConverterData.fromU2022State.cs[0]; if (IS_JP_DBCS(cs)) { /* switch from a DBCS charset to JISX201 */ @@ -1969,30 +1978,30 @@ class CharsetISO2022 extends CharsetICU { break; } } - + if (len != 0) { if (len < 0) { len = -len; /* fallback */ } outLen = 0; - + /* write SI if necessary (only for JIS7 */ if (myConverterData.fromU2022State.g == 1 && g == 0) { buffer[outLen++] = UConverterConstants.SI; myConverterData.fromU2022State.g = 0; } - + /* write the designation sequence if necessary */ if (cs != myConverterData.fromU2022State.cs[g]) { for (int i = 0; i < escSeqChars[cs].length; i++) { buffer[outLen++] = escSeqChars[cs][i]; } myConverterData.fromU2022State.cs[g] = cs; - + /* invalidate the choices[] */ choiceCount = 0; } - + /* write the shift sequence if necessary */ if (g != myConverterData.fromU2022State.g) { switch (g) { @@ -2008,7 +2017,7 @@ class CharsetISO2022 extends CharsetICU { /* case 3: no SS3 in ISO-2022-JP-x */ } } - + /* write the output bytes */ if (len == 1) { buffer[outLen++] = (byte)targetValue; @@ -2025,13 +2034,13 @@ class CharsetISO2022 extends CharsetICU { fromUChar32 = sourceChar; break; } - + if (sourceChar == CR || sourceChar == LF) { /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */ myConverterData.fromU2022State.cs[2] = 0; choiceCount = 0; } - + /* output outLen>0 bytes in buffer[] */ if (outLen == 1) { target.put(buffer[0]); @@ -2054,12 +2063,12 @@ class CharsetISO2022 extends CharsetICU { break; } } - + /* * the end of the input stream and detection of truncated input * are handled by the framework, but for ISO-2022-JP conversion * we need to be in ASCII mode at the very end - * + * * conditions: * successful * in SO mode or not in ASCII mode @@ -2069,21 +2078,21 @@ class CharsetISO2022 extends CharsetICU { (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) && flush && !source.hasRemaining() && fromUChar32 == 0) { int sourceIndex; - + outLen = 0; - + if (myConverterData.fromU2022State.g != 0) { buffer[outLen++] = UConverterConstants.SI; myConverterData.fromU2022State.g = 0; } - + if (myConverterData.fromU2022State.cs[0] != ASCII) { for (int i = 0; i < escSeqChars[ASCII].length; i++) { buffer[outLen++] = escSeqChars[ASCII][i]; } myConverterData.fromU2022State.cs[0] = ASCII; } - + /* get the source index of the last input character */ sourceIndex = source.position(); if (sourceIndex > 0) { @@ -2095,7 +2104,7 @@ class CharsetISO2022 extends CharsetICU { } else { sourceIndex = -1; } - + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex); } return err; @@ -2118,60 +2127,60 @@ class CharsetISO2022 extends CharsetICU { * SO CNS-11643-1992 Plane 1, GB2312, ISO-IR-165 * SS2 N CNS-11643-1992 Plane 2 * SS3 O CNS-11643-1992 Planes 3-7 - * vi) + * vi) * SOdesignator : ESC "$" ")" finalchar_for_SO * SS2designator : ESC "$" "*" finalchar_for_SS2 * SS3designator : ESC "$" "+" finalchar_for_SS3 - * + * * ESC $ ) A Indicates the bytes following SO are Chinese * characters as defined in GB 2312-80, until * another SOdesignation appears - * + * * ESC $ ) E Indicates the bytes following SO are as defined * in ISO-IR-165 (for details, see section 2.1), * until another SOdesignation appears - * + * * ESC $ ) G Indicates the bytes following SO are as defined * in CNS 11643-plane-1, until another SOdesignation appears - * + * * ESC $ * H Indicates teh two bytes immediately following * SS2 is a Chinese character as defined in CNS * 11643-plane-2, until another SS2designation * appears * (Meaning N must preceed ever 2 byte sequence.) - * + * * ESC $ + I Indicates the immediate two bytes following SS3 * is a Chinese character as defined in CNS * 11643-plane-3, until another SS3designation * appears * (Meaning O must preceed every 2 byte sequence.) - * + * * ESC $ + J Indicates the immediate two bytes following SS3 * is a Chinese character as defined in CNS * 11643-plane-4, until another SS3designation * appears * (In English: O must preceed every 2 byte sequence.) - * + * * ESC $ + K Indicates the immediate two bytes following SS3 * is a Chinese character as defined in CNS * 11643-plane-5, until another SS3designation * appears - * + * * ESC $ + L Indicates the immediate two bytes following SS3 * is a Chinese character as defined in CNS * 11643-plane-6, until another SS3designation * appears - * + * * ESC $ + M Indicates the immediate two bytes following SS3 * is a Chinese character as defined in CNS * 11643-plane-7, until another SS3designation * appears - * + * * As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and * has its own designation information before any Chinese chracters * appears */ - + /* The following are defined this way to make strings truely readonly */ private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 }; private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 }; @@ -2182,7 +2191,7 @@ class CharsetISO2022 extends CharsetICU { private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B }; private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C }; private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D }; - + /************************ ISO2022-CN Data *****************************/ private final static byte[][] escSeqCharsCN = { SHIFT_IN_STR, @@ -2196,38 +2205,41 @@ class CharsetISO2022 extends CharsetICU { CNS_11643_1992_Plane_6_STR, CNS_11643_1992_Plane_7_STR, }; - + private class CharsetEncoderISO2022CN extends CharsetEncoderICU { public CharsetEncoderISO2022CN(CharsetICU cs) { super(cs, fromUSubstitutionChar[0]); } - + + @Override protected void implReset() { super.implReset(); myConverterData.reset(); } - + /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */ - CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, + @Override + CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target, IntBuffer offsets){ CoderResult err = CoderResult.UNDERFLOW; byte[] buffer = new byte[8]; int i = 0; byte[] subchar; subchar = encoder.replacement(); - + if (myConverterData.fromU2022State.g != 0) { /* not in ASCII mode: switch to ASCII */ myConverterData.fromU2022State.g = 0; buffer[i++] = UConverterConstants.SI; } buffer[i++] = subchar[0]; - + err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1); return err; } - + + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; int sourceChar; @@ -2239,15 +2251,15 @@ class CharsetISO2022 extends CharsetICU { boolean usingFallback; boolean gotoGetTrail = false; int oldSourcePos; // For proper error handling - + choiceCount = 0; - + /* check if the last codepoint of previous buffer was a lead surrogate */ if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) { // goto getTrail label - gotoGetTrail = true; + gotoGetTrail = true; } - + while (source.hasRemaining() || gotoGetTrail) { if (target.hasRemaining() || gotoGetTrail) { oldSourcePos = source.position(); @@ -2260,7 +2272,7 @@ class CharsetISO2022 extends CharsetICU { // getTrail label /* reset gotoGetTrail flag*/ gotoGetTrail = false; - + /* look ahead to find the trail surrogate */ if (source.hasRemaining()) { /* test the following code unit */ @@ -2292,7 +2304,7 @@ class CharsetISO2022 extends CharsetICU { break; } } - + /* do the conversion */ if (sourceChar <= 0x007f) { /* do not converter SO/SI/ESC */ @@ -2302,7 +2314,7 @@ class CharsetISO2022 extends CharsetICU { fromUChar32 = sourceChar; break; } - + /* US-ASCII */ if (myConverterData.fromU2022State.g == 0) { buffer[0] = (byte)sourceChar; @@ -2314,7 +2326,7 @@ class CharsetISO2022 extends CharsetICU { myConverterData.fromU2022State.g = 0; choiceCount = 0; } - + if (sourceChar == CR || sourceChar == LF) { /* reset the state at the end of a line */ myConverterData.fromU2022State.reset(); @@ -2324,11 +2336,11 @@ class CharsetISO2022 extends CharsetICU { /* convert U+0080..U+10ffff */ int i; byte cs, g; - + if (choiceCount == 0) { /* try the current SO/G1 converter first */ choices[0] = myConverterData.fromU2022State.cs[1]; - + /* default to GB2312_1 if none is designated yet */ if (choices[0] == 0) { choices[0] = GB2312_1; @@ -2341,11 +2353,11 @@ class CharsetISO2022 extends CharsetICU { } else { choices[1] = GB2312_1; } - + choiceCount = 2; } else if (myConverterData.version == 1) { /* ISO-2022-CN-EXT */ - + /* try one of the other converters */ switch (choices[0]) { case GB2312_1: @@ -2361,17 +2373,17 @@ class CharsetISO2022 extends CharsetICU { choices[2] = ISO_IR_165; break; } - + choiceCount = 3; } else { /* ISO-2022-CN-CNS */ choices[0] = CNS_11643_1; choices[1] = GB2312_1; - + choiceCount = 2; } } - + cs = g = 0; /* * len==0: no mapping found yet @@ -2386,7 +2398,7 @@ class CharsetISO2022 extends CharsetICU { * an early fallback with a later one. */ usingFallback = useFallback; - + for (i = 0; i < choiceCount && len <= 0; ++i) { byte cs0 = choices[i]; if (cs0 > 0) { @@ -2435,10 +2447,10 @@ class CharsetISO2022 extends CharsetICU { } } } - + if (len != 0) { len = 0; /* count output bytes; it must have ben abs(len) == 2 */ - + /* write the designation sequence if necessary */ if (cs != myConverterData.fromU2022State.cs[g]) { if (cs < CNS_11643) { @@ -2457,13 +2469,13 @@ class CharsetISO2022 extends CharsetICU { choiceCount = 0; } } - + /* write the shift sequence if necessary */ if (g != myConverterData.fromU2022State.g) { switch (g) { case 1: buffer[len++] = UConverterConstants.SO; - + /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */ myConverterData.fromU2022State.g = 1; break; @@ -2477,7 +2489,7 @@ class CharsetISO2022 extends CharsetICU { break; } } - + /* write the two output bytes */ buffer[len++] = (byte)(targetValue >> 8); buffer[len++] = (byte)targetValue; @@ -2515,12 +2527,12 @@ class CharsetISO2022 extends CharsetICU { break; } } /* end while (source.hasRemaining() */ - + /* * the end of the input stream and detection of truncated input * are handled by the framework, but for ISO-2022-CN conversion * we need to be in ASCII mode at the very end - * + * * condtions: * succesful * not in ASCII mode @@ -2528,25 +2540,25 @@ class CharsetISO2022 extends CharsetICU { */ if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) { int sourceIndex; - + /* we are switching to ASCII */ myConverterData.fromU2022State.g = 0; - + /* get the source index of the last input character */ sourceIndex = source.position(); if (sourceIndex > 0) { --sourceIndex; - if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && + if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) { --sourceIndex; } } else { sourceIndex = -1; } - + err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex); } - + return err; } } @@ -2563,24 +2575,26 @@ class CharsetISO2022 extends CharsetICU { public CharsetEncoderISO2022KR(CharsetICU cs) { super(cs, fromUSubstitutionChar[myConverterData.version]); } - + + @Override protected void implReset() { super.implReset(); myConverterData.reset(); setInitialStateFromUnicodeKR(this); } - + /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */ - CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, + @Override + CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target, IntBuffer offsets){ CoderResult err = CoderResult.UNDERFLOW; byte[] buffer = new byte[8]; int length, i = 0; byte[] subchar; - + subchar = encoder.replacement(); length = subchar.length; - + if (myConverterData.version == 0) { if (length == 1) { if (encoder.fromUnicodeStatus != 0) { @@ -2599,10 +2613,10 @@ class CharsetISO2022 extends CharsetICU { buffer[i++] = subchar[1]; } err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1); - } else { + } else { /* save the subvonverter's substitution string */ byte[] currentSubChars = myConverterData.currentEncoder.replacement(); - + /* set our substitution string into the subconverter */ myConverterData.currentEncoder.replaceWith(subchar); myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0]; @@ -2610,10 +2624,10 @@ class CharsetISO2022 extends CharsetICU { myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32; err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets); encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32; - + /* restore the subconverter's substitution string */ myConverterData.currentEncoder.replaceWith(currentSubChars); - + if (err.isOverflow()) { if (myConverterData.currentEncoder.errorBufferLength > 0) { encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone(); @@ -2622,17 +2636,17 @@ class CharsetISO2022 extends CharsetICU { myConverterData.currentEncoder.errorBufferLength = 0; } } - + return err; } - + private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; myConverterData.currentEncoder.fromUChar32 = fromUChar32; err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush); fromUChar32 = myConverterData.currentEncoder.fromUChar32; - + if (err.isOverflow()) { if (myConverterData.currentEncoder.errorBufferLength > 0) { errorBuffer = myConverterData.currentEncoder.errorBuffer.clone(); @@ -2640,10 +2654,11 @@ class CharsetISO2022 extends CharsetICU { errorBufferLength = myConverterData.currentEncoder.errorBufferLength; myConverterData.currentEncoder.errorBufferLength = 0; } - + return err; } - + + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; int[] targetByteUnit = { 0x0000 }; @@ -2653,7 +2668,7 @@ class CharsetISO2022 extends CharsetICU { boolean usingFallback; int length = 0; boolean gotoGetTrail = false; // for goto getTrail label call - + /* * if the version is 1 then the user is requesting * conversion with ibm-25546 pass the argument to @@ -2662,20 +2677,20 @@ class CharsetISO2022 extends CharsetICU { if (myConverterData.version == 1) { return encodeLoopIBM(source, target, offsets, flush); } - + usingFallback = useFallback; isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true; if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) { gotoGetTrail = true; } - + while (source.hasRemaining() || gotoGetTrail) { targetByteUnit[0] = UConverterConstants.missingCharMarker; - + if (target.hasRemaining() || gotoGetTrail) { if (!gotoGetTrail) { sourceChar = source.get(); - + /* do not convert SO/SI/ESC */ if (IS_2022_CONTROL(sourceChar)) { /* callback(illegal) */ @@ -2685,7 +2700,7 @@ class CharsetISO2022 extends CharsetICU { } myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2; length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback); - //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); + //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2); if (length < 0) { length = -length; /* fallback */ } @@ -2739,7 +2754,7 @@ class CharsetISO2022 extends CharsetICU { errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80)); err = CoderResult.OVERFLOW; } - + } else { errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)); errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)); @@ -2750,14 +2765,14 @@ class CharsetISO2022 extends CharsetICU { /* oops.. the code point is unassigned * set the error and reason */ - + /* check if the char is a First surrogate */ if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) { if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) { // getTrail label // reset gotoGetTrail flag gotoGetTrail = false; - + /* look ahead to find the trail surrogate */ if (source.hasRemaining()) { /* test the following code unit */ @@ -2787,7 +2802,7 @@ class CharsetISO2022 extends CharsetICU { /* callback(unassigned) for a BMP code point */ err = CoderResult.unmappableForLength(1); } - + fromUChar32 = sourceChar; break; } @@ -2800,7 +2815,7 @@ class CharsetISO2022 extends CharsetICU { * the end of the input stream and detection of truncated input * are handled by the framework, but for ISO-2022-KR conversion * we need to be inASCII mode at the very end - * + * * conditions: * successful * not in ASCII mode @@ -2808,10 +2823,10 @@ class CharsetISO2022 extends CharsetICU { */ if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) { int sourceIndex; - + /* we are switching to ASCII */ isTargetByteDBCS = false; - + /* get the source index of the last input character */ sourceIndex = source.position(); if (sourceIndex > 0) { @@ -2822,53 +2837,55 @@ class CharsetISO2022 extends CharsetICU { } else { sourceIndex = -1; } - + CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex); } /*save the state and return */ fromUnicodeStatus = isTargetByteDBCS ? 1 : 0; - + return err; } } - + + @Override public CharsetDecoder newDecoder() { switch (variant) { case ISO_2022_JP: return new CharsetDecoderISO2022JP(this); - + case ISO_2022_CN: return new CharsetDecoderISO2022CN(this); - + case ISO_2022_KR: setInitialStateToUnicodeKR(); return new CharsetDecoderISO2022KR(this); - + default: /* should not happen */ return null; } } - + + @Override public CharsetEncoder newEncoder() { CharsetEncoderICU cnv; - + switch (variant) { case ISO_2022_JP: return new CharsetEncoderISO2022JP(this); - + case ISO_2022_CN: return new CharsetEncoderISO2022CN(this); - + case ISO_2022_KR: cnv = new CharsetEncoderISO2022KR(this); setInitialStateFromUnicodeKR(cnv); return cnv; - + default: /* should not happen */ return null; } } - + private void setInitialStateToUnicodeKR() { if (myConverterData.version == 1) { myConverterData.currentDecoder.toUnicodeStatus = 0; /* offset */ @@ -2893,11 +2910,12 @@ class CharsetISO2022 extends CharsetICU { myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */ } } - + + @Override void getUnicodeSetImpl(UnicodeSet setFillIn, int which) { int i; /*open a set and initialize it with code points that are algorithmically round-tripped */ - + switch(variant){ case ISO_2022_JP: /*include JIS X 0201 which is hardcoded */ @@ -2906,7 +2924,7 @@ class CharsetISO2022 extends CharsetICU { if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){ /*include Latin-1 some variants of JP */ setFillIn.add(0, 0xff); - + } else { /* include ASCII for JP */ @@ -2940,14 +2958,14 @@ class CharsetISO2022 extends CharsetICU { default: break; } - + //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until for(i=0; i 0x80 in an otherwise double-byte * character set. So, for example, the LMBCS sequence x10 x10 xAE is the * same as '0xAE' in the Japanese code page 943. - * + * * Next, you will notice that the list of group bytes has some gaps. * These are used in various ways. - * + * * We reserve a few special single byte values for common control * characters. These are in the same place as their ANSI equivalents for speed. */ @@ -163,7 +163,7 @@ class CharsetLMBCS extends CharsetICU { private static final short ULMBCS_GRP_UNICODE = 0x14; /* * The two bytes appearing after a 0x14 are interpreted as UTF-16 BE - * (Big Endian) characters. The exception comes when UTF16 + * (Big Endian) characters. The exception comes when UTF16 * representation would have a zero as the second byte. In that case, * 'F6' is used in its place, and the bytes are swapped. (This prevents * LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK: @@ -180,7 +180,7 @@ class CharsetLMBCS extends CharsetICU { * translations even faster, sometimes the optimization group byte can be dropped * from a LMBCS character. This is decided on a process-by-process basis. The * group byte that is dropped is called the 'optimization group.' - * + * * For Notes, the optimization group is always 0x1. */ //private static final short ULMBCS_DEFAULTOPTGROUP = 0x01; @@ -191,19 +191,19 @@ class CharsetLMBCS extends CharsetICU { * etc.). Using plain 'LMBCS' as the name of the converter will give you * LMBCS-1. */ - + /* Implementation strategy */ - /* + /* * Because of the extensive use of other character sets, the LMBCS converter * keeps a mapping between optimization groups and IBM character sets, so that * ICU converters can be created and used as needed. - * + * * As you can see, even though any byte below 0x20 could be an optimization * byte, only those at 0x13 or below can map to an actual converter. To limit * some loops and searches, we define a value for that last group converter: */ private static final short ULMBCS_GRP_LAST = 0x13; /* last LMBCS group that has a converter */ - + private static final String[] OptGroupByteToCPName = { /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ /* 0x0001 */ "ibm-850", @@ -227,14 +227,14 @@ class CharsetLMBCS extends CharsetICU { /* 0x0013 */ "windows-936", /* The rest are null, including the 0x0014 Unicode compatibility region * and 0x0019, the 1-2-3 system range control char */ - /* 0x0014 */ null + /* 0x0014 */ null }; - + /* That's approximately all the data that's needed for translating * LMBCS to Unicode. - * + * * However, to translate Unicode to LMBCS, we need some more support. - * + * * That's because there are often more than one possible mappings from a Unicode * code point back into LMBCS. The first thing we do is look up into a table * to figure out if there are more than one possible mapplings. This table, @@ -252,7 +252,7 @@ class CharsetLMBCS extends CharsetICU { LMBCS mbcs native encoding (example: Unihan) */ private static final short ULMBCS_AMBIGUOUS_ALL = 0x82; - + /* And here's a simple way to see if a group falls in an appropriate range */ private boolean ULMBCS_AMBIGUOUS_MATCH(short agroup, short xgroup) { return (((agroup == ULMBCS_AMBIGUOUS_SBCS) && @@ -261,7 +261,7 @@ class CharsetLMBCS extends CharsetICU { (xgroup >= ULMBCS_DOUBLEOPTGROUP_START)) || ((agroup) == ULMBCS_AMBIGUOUS_ALL)); } - + /* The table & some code to use it: */ private static class _UniLMBCSGrpMap { int uniStartRange; @@ -273,7 +273,7 @@ class CharsetLMBCS extends CharsetICU { this.GrpType = GrpType; } } - + private static final _UniLMBCSGrpMap[] UniLMBCSGrpMap = { new _UniLMBCSGrpMap(0x0001, 0x001F, ULMBCS_GRP_CTRL), new _UniLMBCSGrpMap(0x0080, 0x009F, ULMBCS_GRP_CTRL), @@ -413,27 +413,27 @@ class CharsetLMBCS extends CharsetICU { new _UniLMBCSGrpMap(0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS), new _UniLMBCSGrpMap(0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE) }; - + static short FindLMBCSUniRange(char uniChar) { int index = 0; - + while (uniChar > UniLMBCSGrpMap[index].uniEndRange) { index++; } - + if (uniChar >= UniLMBCSGrpMap[index].uniStartRange) { return UniLMBCSGrpMap[index].GrpType; } return ULMBCS_GRP_UNICODE; } - + /* * We also ask the creator of a converter to send in a preferred locale * that we can use in resolving ambiguous mappings. They send the locale * in as a string, and we map it, if possible, to one of the * LMBCS groups. We use this table, and the associated code, to * do the lookup: - * + * * This table maps locale ID's to LMBCS opt groups. * The default return is group 0x01. Note that for * performance reasons, the table is sorted in @@ -500,11 +500,11 @@ class CharsetLMBCS extends CharsetICU { }; static short FindLMBCSLocale(String LocaleID) { int index = 0; - + if (LocaleID == null) { return 0; } - + while (LocaleLMBCSGrpMap[index].LocaleID != null) { if (LocaleLMBCSGrpMap[index].LocaleID == LocaleID) { return LocaleLMBCSGrpMap[index].OptGroup; @@ -515,7 +515,7 @@ class CharsetLMBCS extends CharsetICU { } return ULMBCS_GRP_L1; } - + /* * Before we get to the main body of code, here's how we hook up the rest * of ICU. ICU converters are required to define a structure that includes @@ -540,67 +540,68 @@ class CharsetLMBCS extends CharsetICU { decoder = (CharsetDecoderMBCS)charset.newDecoder(); } } - + private UConverterDataLMBCS extraInfo; /* extraInfo in ICU4C implementation */ - + public CharsetLMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) { super(icuCanonicalName, javaCanonicalName, aliases); - maxBytesPerChar = ULMBCS_CHARSIZE_MAX; + maxBytesPerChar = ULMBCS_CHARSIZE_MAX; minBytesPerChar = 1; maxCharsPerByte = 1; - + extraInfo = new UConverterDataLMBCS(); - + for (int i = 0; i <= ULMBCS_GRP_LAST; i++) { if (OptGroupByteToCPName[i] != null) { extraInfo.OptGrpConverter[i] = ((CharsetMBCS)CharsetICU.forNameICU(OptGroupByteToCPName[i])).sharedData; } } - + //get the Opt Group number for the LMBCS converter int option = Integer.parseInt(icuCanonicalName.substring(6)); extraInfo.OptGroup = (short)option; extraInfo.localeConverterIndex = FindLMBCSLocale(ULocale.getDefault().getBaseName()); } - + class CharsetDecoderLMBCS extends CharsetDecoderICU { public CharsetDecoderLMBCS(CharsetICU cs) { super(cs); implReset(); } - + + @Override protected void implReset() { super.implReset(); } - + /* A function to call when we are looking at the Unicode group byte in LMBCS */ private char GetUniFromLMBCSUni(ByteBuffer ppLMBCSin) { short HighCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK); short LowCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK); - + if (HighCh == ULMBCS_UNICOMPATZERO) { HighCh = LowCh; LowCh = 0; /* zero-byte in LSB special character */ } - + return (char)((HighCh << 8) | LowCh); } - + private int LMBCS_SimpleGetNextUChar(UConverterSharedData cnv, ByteBuffer source, int positionOffset, int length) { int uniChar; int oldSourceLimit; int oldSourcePos; - + extraInfo.charset.sharedData = cnv; - + oldSourceLimit = source.limit(); oldSourcePos = source.position(); - + source.position(oldSourcePos + positionOffset); source.limit(source.position() + length); - + uniChar = extraInfo.decoder.simpleGetNextUChar(source, false); - + source.limit(oldSourceLimit); source.position(oldSourcePos); @@ -615,7 +616,7 @@ class CharsetLMBCS extends CharsetICU { private int LMBCSGetNextUCharWorker(ByteBuffer source, CoderResult[] err) { int uniChar = 0; /* an output Unicode char */ short CurByte; /* A byte from the input stream */ - + /* error check */ if (!source.hasRemaining()) { err[0] = CoderResult.malformedForLength(0); @@ -623,12 +624,12 @@ class CharsetLMBCS extends CharsetICU { } /* Grab first byte & save address for error recovery */ CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); - + /* * at entry of each if clause: * 1. 'CurByte' points at the first byte of a LMBCS character * 2. 'source' points to the next byte of the source stream after 'CurByte' - * + * * the job of each if clause is: * 1. set 'source' to the point at the beginning of the next char (not if LMBCS char is only 1 byte) * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately @@ -637,12 +638,12 @@ class CharsetLMBCS extends CharsetICU { if ((CurByte > ULMBCS_C0END && CurByte < ULMBCS_C1START) /* ascii range */ || CurByte == 0 || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE) { - + uniChar = CurByte; } else { short group; UConverterSharedData cnv; - + if (CurByte == ULMBCS_GRP_CTRL) { /* Control character group - no opt group update */ short C0C1byte; /* CHECK_SOURCE_LIMIT(1) */ @@ -660,7 +661,7 @@ class CharsetLMBCS extends CharsetICU { source.position(source.limit()); return 0xFFFF; } - + /* don't check for error indicators fffe/ffff below */ return GetUniFromLMBCSUni(source); } else if (CurByte <= ULMBCS_CTRLOFFSET) { @@ -675,7 +676,7 @@ class CharsetLMBCS extends CharsetICU { source.position(source.limit()); return 0xFFFF; } - + /* check for LMBCS doubled-group-byte case */ if (source.get(source.position()) == group) { /* single byte */ @@ -696,7 +697,7 @@ class CharsetLMBCS extends CharsetICU { return 0xFFFF; } CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); - + if (CurByte >= ULMBCS_C1START) { uniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv.mbcs, CurByte); } else { @@ -705,16 +706,16 @@ class CharsetLMBCS extends CharsetICU { * AND the second byte is not in the upper ascii range */ byte[] bytes = new byte[2]; - + cnv = extraInfo.OptGrpConverter[ULMBCS_GRP_EXCEPT]; - + /* Lookup value must include opt group */ bytes[0] = (byte)group; bytes[1] = (byte)CurByte; uniChar = LMBCS_SimpleGetNextUChar(cnv, ByteBuffer.wrap(bytes), 0, 2); } } - + } else if (CurByte >= ULMBCS_C1START) { /* group byte is implicit */ group = extraInfo.OptGroup; cnv = extraInfo.OptGrpConverter[group]; @@ -726,7 +727,7 @@ class CharsetLMBCS extends CharsetICU { source.position(source.limit()); return 0xFFFF; } - + /* let the MBCS conversion consume CurByte again */ uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 1); } else { @@ -736,7 +737,7 @@ class CharsetLMBCS extends CharsetICU { source.position(source.limit()); return 0xFFFF; } - + /* let the MBCS conversion consume CurByte again */ uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 2); source.get(); @@ -746,11 +747,12 @@ class CharsetLMBCS extends CharsetICU { } } } - + return uniChar; } - - protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + + @Override + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { CoderResult[] err = new CoderResult[1]; err[0] = CoderResult.UNDERFLOW; byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX * 2]; /* Increase the size for proper handling in subsequent calls to MBCS functions */ @@ -758,14 +760,14 @@ class CharsetLMBCS extends CharsetICU { int saveSource; /* beginning of current code point */ int errSource = 0; /* index to actual input in case an error occurs */ byte savebytes = 0; - + /* Process from source to limit, or until error */ while (err[0].isUnderflow() && source.hasRemaining() && target.hasRemaining()) { saveSource = source.position(); /* beginning of current code point */ if (toULength > 0) { /* reassemble char from previous call */ int size_old = toULength; ByteBuffer tmpSourceBuffer; - + /* limit from source is either remainder of temp buffer, or user limit on source */ int size_new_maybe_1 = ULMBCS_CHARSIZE_MAX - size_old; int size_new_maybe_2 = source.remaining(); @@ -783,8 +785,8 @@ class CharsetLMBCS extends CharsetICU { uniChar = (char)LMBCSGetNextUCharWorker(tmpSourceBuffer, err); source.position(saveSource + tmpSourceBuffer.position() - size_old); errSource = saveSource - size_old; - - if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */ + + if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */ /* evil special case: source buffers so small a char spans more than 2 buffers */ toULength = savebytes; for (int i = 0; i < savebytes; i++) { @@ -802,7 +804,7 @@ class CharsetLMBCS extends CharsetICU { uniChar = (char)LMBCSGetNextUCharWorker(source, err); savebytes = (byte)(source.position() - saveSource); } - + if (err[0].isUnderflow()) { if (uniChar < 0x0fffe) { target.put(uniChar); @@ -834,13 +836,14 @@ class CharsetLMBCS extends CharsetICU { return err[0]; } } - + class CharsetEncoderLMBCS extends CharsetEncoderICU { public CharsetEncoderLMBCS(CharsetICU cs) { super(cs, fromUSubstitution); implReset(); } - + + @Override protected void implReset() { super.implReset(); } @@ -854,14 +857,14 @@ class CharsetLMBCS extends CharsetICU { private int LMBCSConversionWorker(short group, byte[] LMBCS, char pUniChar, short[] lastConverterIndex, boolean[] groups_tried) { byte pLMBCS = 0; UConverterSharedData xcnv = extraInfo.OptGrpConverter[group]; - + int bytesConverted; int[] value = new int[1]; short firstByte; - + extraInfo.charset.sharedData = xcnv; bytesConverted = extraInfo.encoder.fromUChar32(pUniChar, value, false); - + /* get the first result byte */ if (bytesConverted > 0) { firstByte = (short)((value[0] >> ((bytesConverted - 1) * 8)) & UConverterConstants.UNSIGNED_BYTE_MASK); @@ -870,14 +873,14 @@ class CharsetLMBCS extends CharsetICU { groups_tried[group] = true; return 0; } - + lastConverterIndex[0] = group; - - /* + + /* * All initial byte values in lower ascii range should have been caught by now, * except with the exception group. */ - + /* use converted data: first write 0, 1 or two group bytes */ if (group != ULMBCS_GRP_EXCEPT && extraInfo.OptGroup != group) { LMBCS[pLMBCS++] = (byte)group; @@ -885,12 +888,12 @@ class CharsetLMBCS extends CharsetICU { LMBCS[pLMBCS++] = (byte)group; } } - + /* don't emit control chars */ if (bytesConverted == 1 && firstByte < 0x20) { return 0; } - + /* then move over the converted data */ switch (bytesConverted) { case 4: @@ -905,7 +908,7 @@ class CharsetLMBCS extends CharsetICU { /* will never occur */ break; } - + return pLMBCS; } /* @@ -916,9 +919,9 @@ class CharsetLMBCS extends CharsetICU { int index = 0; short LowCh = (short)(uniChar & UConverterConstants.UNSIGNED_BYTE_MASK); short HighCh = (short)((uniChar >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK); - + LMBCS[index++] = (byte)ULMBCS_GRP_UNICODE; - + if (LowCh == 0) { LMBCS[index++] = (byte)ULMBCS_UNICOMPATZERO; LMBCS[index++] = (byte)HighCh; @@ -929,6 +932,7 @@ class CharsetLMBCS extends CharsetICU { return ULMBCS_UNICODE_SIZE; } /* The main Unicode to LMBCS conversion function */ + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult err = CoderResult.UNDERFLOW; short[] lastConverterIndex = new short[1]; @@ -938,12 +942,12 @@ class CharsetLMBCS extends CharsetICU { int bytes_written; boolean[] groups_tried = new boolean[ULMBCS_GRP_LAST+1]; int sourceIndex = 0; - + /* * Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS) * If that succeeds, see if it will all fit into the target & copy it over * if it does. - * + * * We try conversions in the following order: * 1. Single-byte ascii & special fixed control chars (&null) * 2. Look up group in table & try that (could b @@ -959,23 +963,23 @@ class CharsetLMBCS extends CharsetICU { * E) If its single-byte ambiguous, try the exceptions group * 4. And as a grand fallback: Unicode */ - + short OldConverterIndex = 0; - + while (source.hasRemaining() && err.isUnderflow()) { OldConverterIndex = extraInfo.localeConverterIndex; - + if (!target.hasRemaining()) { err = CoderResult.OVERFLOW; break; } - + uniChar = source.get(source.position()); bytes_written = 0; pLMBCS = 0; - + /* check cases in rough order of how common they are, for speed */ - + /* single-byte matches: strategy 1 */ if((uniChar>=0x80) && (uniChar<=0xff) && (uniChar!=0xB1) && (uniChar!=0xD7) && (uniChar!=0xF7) && (uniChar!=0xB0) && (uniChar!=0xB4) && (uniChar!=0xB6) && (uniChar!=0xA7) && (uniChar!=0xA8)) { @@ -987,7 +991,7 @@ class CharsetLMBCS extends CharsetICU { LMBCS[pLMBCS++] = (byte)uniChar; bytes_written = 1; } - + if (bytes_written == 0) { /* Check by Unicode rage (Strategy 2) */ short group = FindLMBCSUniRange(uniChar); @@ -1009,12 +1013,12 @@ class CharsetLMBCS extends CharsetICU { } if (bytes_written == 0) { /* the ambiguous group cases (Strategy 3) */ groups_tried = new boolean[ULMBCS_GRP_LAST+1]; - + /* check for non-default optimization group (Strategy 3A) */ if (extraInfo.OptGroup != 1 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.OptGroup)) { if(extraInfo.localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) { bytes_written = LMBCSConversionWorker (ULMBCS_GRP_L1, LMBCS, uniChar, lastConverterIndex, groups_tried); - + if(bytes_written == 0) { bytes_written = LMBCSConversionWorker (ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried); } @@ -1027,7 +1031,7 @@ class CharsetLMBCS extends CharsetICU { } /* check for locale optimization group (Strategy 3B) */ if (bytes_written == 0 && extraInfo.localeConverterIndex > 0 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.localeConverterIndex)) { - + bytes_written = LMBCSConversionWorker(extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried); } /* check for last optimization group used for this string (Strategy 3C) */ @@ -1039,23 +1043,23 @@ class CharsetLMBCS extends CharsetICU { short grp_start; short grp_end; short grp_ix; - + grp_start = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_DOUBLEOPTGROUP_START : ULMBCS_GRP_L1; grp_end = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_GRP_LAST : ULMBCS_GRP_TH; - + if(group == ULMBCS_AMBIGUOUS_ALL) { grp_start = ULMBCS_GRP_L1; grp_end = ULMBCS_GRP_LAST; } - + for (grp_ix = grp_start; grp_ix <= grp_end && bytes_written == 0; grp_ix++) { if (extraInfo.OptGrpConverter[grp_ix] != null && !groups_tried[grp_ix]) { bytes_written = LMBCSConversionWorker(grp_ix, LMBCS, uniChar, lastConverterIndex, groups_tried); } } - /* + /* * a final conversion fallback to the exceptions group if its likely - * to be single byte (Strategy 3E) + * to be single byte (Strategy 3E) */ if (bytes_written == 0 && grp_start == ULMBCS_GRP_L1) { bytes_written = LMBCSConversionWorker(ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried); @@ -1092,18 +1096,21 @@ class CharsetLMBCS extends CharsetICU { } extraInfo.localeConverterIndex = OldConverterIndex; } - + return err; } } + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderLMBCS(this); } - + + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderLMBCS(this); } - + + @Override void getUnicodeSetImpl(UnicodeSet setFillIn, int which){ getCompleteUnicodeSet(setFillIn); } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java index 92bb3ff85ca..8e11a1eb036 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java @@ -35,19 +35,20 @@ public final class CharsetProviderICU extends CharsetProvider{ private static List icuCharsets = Collections.emptyList(); /** - * Default constructor + * Default constructor * @stable ICU 3.6 */ public CharsetProviderICU() { } /** - * Constructs a Charset for the given charset name. + * Constructs a Charset for the given charset name. * Implements the abstract method of super class. * @param charsetName charset name * @return Charset object for the given charset name, null if unsupported * @stable ICU 3.6 */ + @Override public final Charset charsetForName(String charsetName){ try{ // extract the options from the charset name @@ -58,11 +59,11 @@ public final class CharsetProviderICU extends CharsetProvider{ charsetName = charsetName.substring(0, charsetName.length() - optionsString.length()); } // get the canonical name - String icuCanonicalName = getICUCanonicalName(charsetName); + String icuCanonicalName = getICUCanonicalName(charsetName); // create the converter object and return it if(icuCanonicalName==null || icuCanonicalName.length()==0){ - // Try the original name, may be something added and not in the alias table. + // Try the original name, may be something added and not in the alias table. // Will get an unsupported encoding exception if it doesn't work. icuCanonicalName = charsetName; } @@ -72,7 +73,7 @@ public final class CharsetProviderICU extends CharsetProvider{ } return null; } - + /** * Constructs a charset for the given ICU conversion table from the specified class path. * Example use: cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");. @@ -88,7 +89,7 @@ public final class CharsetProviderICU extends CharsetProvider{ public final Charset charsetForName(String charsetName, String classPath) { return charsetForName(charsetName, classPath, null); } - + /** * Constructs a charset for the given ICU conversion table from the specified class path. * This function is similar to {@link #charsetForName(String, String)}. @@ -107,7 +108,7 @@ public final class CharsetProviderICU extends CharsetProvider{ } return cs; } - + /** * Gets the canonical name of the converter as defined by Java * @param enc converter name @@ -143,7 +144,7 @@ public final class CharsetProviderICU extends CharsetProvider{ } else { ret = ""; } - + }else{ /* unsupported encoding */ ret = ""; @@ -152,7 +153,7 @@ public final class CharsetProviderICU extends CharsetProvider{ return ret; }catch(IOException ex){ throw new UnsupportedCharsetException(enc); - } + } } private static final Charset getCharset(String icuCanonicalName, String optionsString) throws IOException { @@ -174,17 +175,17 @@ public final class CharsetProviderICU extends CharsetProvider{ @Deprecated public static String getJavaCanonicalName(String charsetName){ /* - If a charset listed in the IANA Charset Registry is supported by an implementation - of the Java platform then its canonical name must be the name listed in the registry. - Many charsets are given more than one name in the registry, in which case the registry - identifies one of the names as MIME-preferred. If a charset has more than one registry - name then its canonical name must be the MIME-preferred name and the other names in - the registry must be valid aliases. If a supported charset is not listed in the IANA + If a charset listed in the IANA Charset Registry is supported by an implementation + of the Java platform then its canonical name must be the name listed in the registry. + Many charsets are given more than one name in the registry, in which case the registry + identifies one of the names as MIME-preferred. If a charset has more than one registry + name then its canonical name must be the MIME-preferred name and the other names in + the registry must be valid aliases. If a supported charset is not listed in the IANA registry then its canonical name must begin with one of the strings "X-" or "x-". */ if(charsetName==null ){ return null; - } + } try{ String cName = null; /* find out the alias with MIME tag */ @@ -192,8 +193,8 @@ public final class CharsetProviderICU extends CharsetProvider{ /* find out the alias with IANA tag */ }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){ }else { - /* - check to see if an alias already exists with x- prefix, if yes then + /* + check to see if an alias already exists with x- prefix, if yes then make that the canonical name */ int aliasNum = UConverterAlias.countAliases(charsetName); @@ -205,7 +206,7 @@ public final class CharsetProviderICU extends CharsetProvider{ break; } } - /* last resort just append x- to any of the alias and + /* last resort just append x- to any of the alias and make it the canonical name */ if((cName==null || cName.length()==0)){ name = UConverterAlias.getStandardName(charsetName, "UTR22"); @@ -221,12 +222,12 @@ public final class CharsetProviderICU extends CharsetProvider{ } return cName; }catch (IOException ex){ - + } return null; } - /** + /** * Gets the aliases associated with the converter name * @param encName converter name * @return converter names as elements in an object array @@ -240,7 +241,7 @@ public final class CharsetProviderICU extends CharsetProvider{ int i=0; int j=0; String aliasArray[/*50*/] = new String[50]; - + if(encName != null){ aliasNum = UConverterAlias.countAliases(encName); for(i=0,j=0;i=0;) { ret[j] = aliasArray[j]; } - + } return (ret); - + } /** @@ -290,13 +291,14 @@ public final class CharsetProviderICU extends CharsetProvider{ * @return the Charset iterator * @stable ICU 3.6 */ + @Override public final Iterator charsets() { loadAvailableICUCharsets(); return icuCharsets.iterator(); } /** - * Gets the canonical names of available ICU converters + * Gets the canonical names of available ICU converters * @return array of available converter names * @internal * @deprecated This API is ICU internal only. diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java index 8c757892005..d16215646b0 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java @@ -38,7 +38,7 @@ class CharsetSCSU extends CharsetICU{ private static final short SC7=0x17; /* Select window 7 */ private static final short SD0=0x18; /* Define and select window 0 */ //private static final short SD7=0x1F; /* Define and select window 7 */ - + private static final short UC0=0xE0; /* Select window 0 */ private static final short UC7=0xE7; /* Select window 7 */ private static final short UD0=0xE8; /* Define and select window 0 */ @@ -47,7 +47,7 @@ class CharsetSCSU extends CharsetICU{ private static final short UDX=0xF1; /* Define a Window as extended */ private static final short Urs=0xF2; /* reserved */ // }; - + // enum { /* * Unicode code points from 3400 to E000 are not adressible by @@ -61,9 +61,9 @@ class CharsetSCSU extends CharsetICU{ /* use table of predefined fixed offsets for values from fixedThreshold */ private static final int fixedThreshold=0xF9; //}; - + protected byte[] fromUSubstitution = new byte[]{(byte)0x0E,(byte)0xFF, (byte)0xFD}; - + /* constant offsets for the 8 static windows */ private static final int staticOffsets[]={ 0x0000, /* ASCII for quoted tags */ @@ -109,11 +109,11 @@ class CharsetSCSU extends CharsetICU{ private static final int definePairTwo=5; private static final int defineOne=6; // }; - + private final static class SCSUData { /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ int toUDynamicOffsets[] = new int[8] ; - int fromUDynamicOffsets[] = new int[8] ; + int fromUDynamicOffsets[] = new int[8] ; /* state machine state - toUnicode */ boolean toUIsSingleByteMode; @@ -135,11 +135,11 @@ class CharsetSCSU extends CharsetICU{ byte locale; byte nextWindowUseIndex; byte windowUse[] = new byte[8]; - + SCSUData(){ initialize(); } - + void initialize(){ for(int i=0;i<8;i++){ this.toUDynamicOffsets[i] = initialDynamicOffsets[i]; @@ -154,7 +154,7 @@ class CharsetSCSU extends CharsetICU{ for(int i=0;i<8;i++){ this.fromUDynamicOffsets[i] = initialDynamicOffsets[i]; } - this.nextWindowUseIndex = 0; + this.nextWindowUseIndex = 0; switch(this.locale){ /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ /* case l_ja: @@ -166,11 +166,11 @@ class CharsetSCSU extends CharsetICU{ for(int i=0;i<8;i++){ this.windowUse[i] = initialWindowUse[i]; } - + } } } - + static final byte initialWindowUse[]={ 7, 0, 3, 2, 4, 5, 6, 1 }; /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ // static final byte initialWindowUse_ja[]={ 3, 2, 4, 1, 0, 7, 5, 6 }; @@ -180,74 +180,76 @@ class CharsetSCSU extends CharsetICU{ /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */ // private static final int l_ja = 1; //}; - - private SCSUData extraInfo = null; - + + private SCSUData extraInfo = null; + public CharsetSCSU(String icuCanonicalName, String javaCanonicalName, String[] aliases){ super(icuCanonicalName, javaCanonicalName, aliases); - maxBytesPerChar = 3; + maxBytesPerChar = 3; minBytesPerChar = 1; maxCharsPerByte = 1; extraInfo = new SCSUData(); } - - class CharsetDecoderSCSU extends CharsetDecoderICU { + + class CharsetDecoderSCSU extends CharsetDecoderICU { /* label values for supporting behavior similar to goto in C */ private static final int FastSingle=0; private static final int SingleByteMode=1; private static final int EndLoop=2; - + /* Mode Type */ private static final int ByteMode = 0; - private static final int UnicodeMode =1; - + private static final int UnicodeMode =1; + public CharsetDecoderSCSU(CharsetICU cs) { super(cs); implReset(); } - + //private SCSUData data ; + @Override protected void implReset(){ super.implReset(); toULength = 0; extraInfo.initialize(); } - + short b; - - //Get the state machine state + + //Get the state machine state private boolean isSingleByteMode ; private short state ; private byte quoteWindow ; private byte dynamicWindow ; private short byteOne; - - + + //sourceIndex=-1 if the current character began in the previous buffer private int sourceIndex ; private int nextSourceIndex ; - + CoderResult cr; SCSUData data ; private boolean LabelLoop;// used to break the while loop - + + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){ data = extraInfo; - - //Get the state machine state + + //Get the state machine state isSingleByteMode = data.toUIsSingleByteMode; state = data.toUState; quoteWindow = data.toUQuoteWindow; dynamicWindow = data.toUDynamicWindow; byteOne = data.toUByteOne; - + LabelLoop = true; - + //sourceIndex=-1 if the current character began in the previous buffer sourceIndex = data.toUState == readCommand ? 0: -1 ; nextSourceIndex = 0; - + cr = CoderResult.UNDERFLOW; int labelType = 0; while(LabelLoop){ @@ -284,11 +286,11 @@ class CharsetSCSU extends CharsetICU{ } return cr; } - + private int fastSingle(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){ int label = 0; if(modeType==ByteMode){ - + if(state==readCommand){ while(source.hasRemaining() && target.hasRemaining() && (b=(short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK)) >= 0x20){ source.position(source.position()+1); @@ -350,7 +352,7 @@ class CharsetSCSU extends CharsetICU{ label = SingleByteMode; return label; } - + private int singleByteMode(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){ int label = SingleByteMode; if(modeType == ByteMode){ @@ -403,7 +405,7 @@ class CharsetSCSU extends CharsetICU{ label = EndLoop; return label; } - + /* Store the first byte of a multibyte sequence in toUByte[] */ toUBytesArray[0] = (byte)b; toULength = 1; @@ -504,7 +506,7 @@ class CharsetSCSU extends CharsetICU{ return label; } } - + }else if(modeType==UnicodeMode){ while(source.hasRemaining()){ if(!target.hasRemaining()){ @@ -576,7 +578,7 @@ class CharsetSCSU extends CharsetICU{ label = EndLoop; return label; } - + private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){ if(cr==CoderResult.OVERFLOW){ state = readCommand; @@ -591,78 +593,80 @@ class CharsetSCSU extends CharsetICU{ LabelLoop = false; } } - + class CharsetEncoderSCSU extends CharsetEncoderICU{ public CharsetEncoderSCSU(CharsetICU cs) { super(cs, fromUSubstitution); implReset(); } - + //private SCSUData data; + @Override protected void implReset() { super.implReset(); extraInfo.initialize(); } - + /* label values for supporting behavior similar to goto in C */ - private static final int Loop=0; + private static final int Loop=0; private static final int GetTrailUnicode=1; private static final int OutputBytes=2; private static final int EndLoop =3; - + private int delta; private int length; - + ///variables of compression heuristics private int offset; private char lead, trail; private int code; private byte window; - - //Get the state machine state + + //Get the state machine state private boolean isSingleByteMode; private byte dynamicWindow ; private int currentOffset; int c; - + SCSUData data ; - + //sourceIndex=-1 if the current character began in the previous buffer private int sourceIndex ; private int nextSourceIndex; private int targetCapacity; - + private boolean LabelLoop;//used to break the while loop private boolean AfterGetTrail;// its value is set to true in order to ignore the code before getTrailSingle: private boolean AfterGetTrailUnicode;// is value is set to true in order to ignore the code before getTrailUnicode: - + CoderResult cr; - + + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { data = extraInfo; cr = CoderResult.UNDERFLOW; - - //Get the state machine state + + //Get the state machine state isSingleByteMode = data.fromUIsSingleByteMode; dynamicWindow = data.fromUDynamicWindow; currentOffset = data.fromUDynamicOffsets[dynamicWindow]; c = fromUChar32; - + sourceIndex = c== 0 ? 0: -1 ; nextSourceIndex = 0; - - + + targetCapacity = target.limit()-target.position(); - + //sourceIndex=-1 if the current character began in the previous buffer sourceIndex = c== 0 ? 0: -1 ; nextSourceIndex = 0; - + int labelType = Loop; // set to Loop so that the code starts from loop: - LabelLoop = true; - AfterGetTrail = false; - AfterGetTrailUnicode = false; - + LabelLoop = true; + AfterGetTrail = false; + AfterGetTrailUnicode = false; + while(LabelLoop){ switch(labelType){ case Loop: @@ -681,7 +685,7 @@ class CharsetSCSU extends CharsetICU{ } return cr; } - + private byte getWindow(int[] offsets){ int i; for (i=0;i<8;i++){ @@ -691,14 +695,14 @@ class CharsetSCSU extends CharsetICU{ } return -1; } - + private boolean isInOffsetWindowOrDirect(int offsetValue, int a){ - return (a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f & - ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) || - ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20 + return (a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f & + ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) || + ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20 || ((1L<<(a & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0))); } - + private byte getNextDynamicWindow(){ byte windowValue = data.windowUse[data.nextWindowUseIndex]; if(++data.nextWindowUseIndex==8){ @@ -706,7 +710,7 @@ class CharsetSCSU extends CharsetICU{ } return windowValue; } - + private void useDynamicWindow(byte windowValue){ /*first find the index of the window*/ int i,j; @@ -716,7 +720,7 @@ class CharsetSCSU extends CharsetICU{ i=7; } }while(data.windowUse[i]!=windowValue); - + /*now copy each window[i+1] to [i]*/ j= i+1; if(j==8){ @@ -729,12 +733,12 @@ class CharsetSCSU extends CharsetICU{ j=0; } } - + /*finally, set the window into the most recently used index*/ data.windowUse[i]= windowValue; } - - + + private int getDynamicOffset(){ int i; for(i=0;i<7;++i){ @@ -746,7 +750,7 @@ class CharsetSCSU extends CharsetICU{ if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x80){ /*No dynamic window for US-ASCII*/ return -1; - }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) || + }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) || ((c-0x1d000)&UConverterConstants.UNSIGNED_INT_MASK)<=(0x1ffff-0x1d000)){ /*This character is in the code range for a "small", i.e, reasonably windowable, script*/ offset = c&0x7fffff80; @@ -759,7 +763,7 @@ class CharsetSCSU extends CharsetICU{ return -1; } } - + private int loop(CharBuffer source, ByteBuffer target, IntBuffer offsets){ int label = 0; if(isSingleByteMode){ @@ -778,7 +782,7 @@ class CharsetSCSU extends CharsetICU{ if(!AfterGetTrail){ c = source.get(); ++nextSourceIndex; - + } if(((c -0x20)&UConverterConstants.UNSIGNED_INT_MASK)<=0x5f && !AfterGetTrail){ /*pass US-ASCII graphic character through*/ @@ -824,12 +828,12 @@ class CharsetSCSU extends CharsetICU{ return label; } } - - + + if(AfterGetTrail){ AfterGetTrail = false; } - + /*Compress supplementary character U+10000...U+10ffff */ if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){ /*use the current dynamic window*/ @@ -923,9 +927,9 @@ class CharsetSCSU extends CharsetICU{ length = 3; label = OutputBytes; return label; - } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() || + } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() || ((int)((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))< (0xd800 - 0x3400))){ - + /* * this character is not compressible (a BMP ideograph of similar) * switch to Unicode mode if this is the last character in the block @@ -953,7 +957,7 @@ class CharsetSCSU extends CharsetICU{ label = GetTrailUnicode; return label; } - + /*state machine for Unicode*/ /*unicodeByteMode*/ while(AfterGetTrailUnicode || source.hasRemaining()){ @@ -967,7 +971,7 @@ class CharsetSCSU extends CharsetICU{ c = source.get(); ++nextSourceIndex; } - + if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && !AfterGetTrailUnicode){ /*not compressible, write character directly */ if(targetCapacity>=2){ @@ -986,7 +990,7 @@ class CharsetSCSU extends CharsetICU{ } else if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300*/&& !AfterGetTrailUnicode){ /*compress BMP character if the following one is not an uncompressible ideograph*/ if(!(source.hasRemaining() && (((source.get(source.position())-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400))){ - if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26 + if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26 || (((c-0x41)&UConverterConstants.UNSIGNED_INT_MASK))<26)){ /*ASCII digit or letter*/ isSingleByteMode = true; @@ -1010,14 +1014,14 @@ class CharsetSCSU extends CharsetICU{ dynamicWindow = getNextDynamicWindow(); currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset; useDynamicWindow(dynamicWindow); - c = ((UD0+dynamicWindow)<<16) | (code<<8) + c = ((UD0+dynamicWindow)<<16) | (code<<8) |(c - currentOffset) | 0x80; length = 3; label = OutputBytes; return label; } } - + /*don't know how to compress these character, just write it directly*/ length = 2; label = OutputBytes; @@ -1032,7 +1036,7 @@ class CharsetSCSU extends CharsetICU{ label = OutputBytes; return label; } - + if(AfterGetTrailUnicode){ AfterGetTrailUnicode = false; } @@ -1044,7 +1048,7 @@ class CharsetSCSU extends CharsetICU{ label = EndLoop; return label; } - + private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){ lead = (char)c; int label = Loop; @@ -1069,13 +1073,13 @@ class CharsetSCSU extends CharsetICU{ AfterGetTrail = true; return label; } - + private int getTrailUnicode(CharBuffer source, ByteBuffer target, IntBuffer offsets){ int label = EndLoop; AfterGetTrailUnicode = true; /*c is surrogate*/ if(UTF16.isLeadSurrogate((char)c)){ - // getTrailUnicode: + // getTrailUnicode: lead = (char)c; if(source.hasRemaining()){ /*test the following code unit*/ @@ -1105,10 +1109,10 @@ class CharsetSCSU extends CharsetICU{ label = EndLoop; return label; } - + /*compress supplementary character*/ - if((window=getWindow(data.fromUDynamicOffsets))>=0 && - !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) < + if((window=getWindow(data.fromUDynamicOffsets))>=0 && + !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) < (0xd800 - 0x3400))){ /* * this is the dynamic window that contains this character and the following @@ -1140,9 +1144,9 @@ class CharsetSCSU extends CharsetICU{ label = OutputBytes; return label; } - + } - + private void endLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){ /*set the converter state back to UConverter*/ data.fromUIsSingleByteMode = isSingleByteMode; @@ -1185,7 +1189,7 @@ class CharsetSCSU extends CharsetICU{ break; } targetCapacity-=length; - + /*normal end of conversion: prepare for a new character*/ c = 0; sourceIndex = nextSourceIndex; @@ -1196,7 +1200,7 @@ class CharsetSCSU extends CharsetICU{ /* * We actually do this backwards here: * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the + * first to the overflow buffer what does not fit into the * regular target */ /* we know that 0<=targetCapacity>=8*length; //length was reduced by targetCapacity switch(targetCapacity){ @@ -1240,7 +1244,7 @@ class CharsetSCSU extends CharsetICU{ default: break; } - + /*target overflow*/ targetCapacity = 0; cr = CoderResult.OVERFLOW; @@ -1249,19 +1253,22 @@ class CharsetSCSU extends CharsetICU{ return label; } } - + } - + + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderSCSU(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderSCSU(this); } - + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ CharsetICU.getCompleteUnicodeSet(setFillIn); } - + } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java index 7632821b8fc..a401441b2c5 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java @@ -38,7 +38,7 @@ class CharsetUTF16 extends CharsetICU { private int endianXOR; private byte[] bom; private byte[] fromUSubstitution; - + private int version; public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases) { @@ -51,7 +51,7 @@ class CharsetUTF16 extends CharsetICU { } else { version = 0; } - + this.isEndianSpecified = (this instanceof CharsetUTF16BE || this instanceof CharsetUTF16LE); this.isBigEndian = !(this instanceof CharsetUTF16LE); @@ -64,21 +64,21 @@ class CharsetUTF16 extends CharsetICU { this.fromUSubstitution = fromUSubstitution_LE; this.endianXOR = ENDIAN_XOR_LE; } - + /* UnicodeBig and UnicodeLittle requires maxBytesPerChar set to 4 in Java 5 or less */ if ((VersionInfo.javaVersion().getMajor() == 1 && VersionInfo.javaVersion().getMinor() <= 5) - && (isEndianSpecified && version == 1)) { - maxBytesPerChar = 4; - } else { - maxBytesPerChar = 2; - } + && (isEndianSpecified && version == 1)) { + maxBytesPerChar = 4; + } else { + maxBytesPerChar = 2; + } minBytesPerChar = 2; maxCharsPerByte = 1; } class CharsetDecoderUTF16 extends CharsetDecoderICU { - + private boolean isBOMReadYet; private int actualEndianXOR; private byte[] actualBOM; @@ -87,12 +87,14 @@ class CharsetUTF16 extends CharsetICU { super(cs); } + @Override protected void implReset() { super.implReset(); isBOMReadYet = false; actualBOM = null; } + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { /* * If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual @@ -167,7 +169,7 @@ class CharsetUTF16 extends CharsetICU { return CoderResult.UNDERFLOW; toUBytesArray[toULength++] = source.get(); } - + if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) { return CoderResult.malformedForLength(2); } else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) { @@ -246,11 +248,13 @@ class CharsetUTF16 extends CharsetICU { fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM; } + @Override protected void implReset() { super.implReset(); fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM; } + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult cr; @@ -315,15 +319,18 @@ class CharsetUTF16 extends CharsetICU { } } + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderUTF16(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderUTF16(this); } - + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ - getNonSurrogateUnicodeSet(setFillIn); + getNonSurrogateUnicodeSet(setFillIn); } } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java index ce739361307..c8e45445e57 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java @@ -69,12 +69,14 @@ class CharsetUTF32 extends CharsetICU { super(cs); } + @Override protected void implReset() { super.implReset(); isBOMReadYet = false; actualBOM = null; } + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { /* * If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual @@ -171,11 +173,13 @@ class CharsetUTF32 extends CharsetICU { fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM; } + @Override protected void implReset() { super.implReset(); fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM; } + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult cr; @@ -238,16 +242,19 @@ class CharsetUTF32 extends CharsetICU { } } + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderUTF32(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderUTF32(this); } - - + + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ - getNonSurrogateUnicodeSet(setFillIn); + getNonSurrogateUnicodeSet(setFillIn); } } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java index 5941e68054a..3f2cb4972a3 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java @@ -25,20 +25,20 @@ class CharsetUTF7 extends CharsetICU { private final static String IMAP_NAME="IMAP-mailbox-name"; private boolean useIMAP; protected byte[] fromUSubstitution=new byte[]{0x3F}; - + public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) { super(icuCanonicalName, javaCanonicalName, aliases); maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */ minBytesPerChar=1; maxCharsPerByte=1; - + useIMAP=false; - + if (icuCanonicalName.equals(IMAP_NAME)) { useIMAP=true; } } - + //private static boolean inSetD(char c) { // return ( // (char)(c - 97) < 26 || (char)(c - 65) < 26 || /* letters */ @@ -48,7 +48,7 @@ class CharsetUTF7 extends CharsetICU { // (c==58) || (c==63) /* :? */ // ); //} - + //private static boolean inSetO(char c) { // return ( // (char)(c - 33) < 6 || /* !"#$%& */ @@ -58,19 +58,19 @@ class CharsetUTF7 extends CharsetICU { // (c==58) || (c==63) /* *@[ */ // ); //} - + private static boolean isCRLFTAB(char c) { return ( (c==13) || (c==10) || (c==9) ); } - + //private static boolean isCRLFSPTAB(char c) { // return ( // (c==32) || (c==13) || (c==10) || (c==9) // ); //} - + private static final byte PLUS=43; private static final byte MINUS=45; private static final byte BACKSLASH=92; @@ -78,7 +78,7 @@ class CharsetUTF7 extends CharsetICU { private static final byte AMPERSAND=0x26; private static final byte COMMA=0x2c; private static final byte SLASH=0x2f; - + // legal byte values: all US-ASCII graphic characters 0x20..0x7e private static boolean isLegal(char c, boolean useIMAP) { if (useIMAP) { @@ -91,56 +91,56 @@ class CharsetUTF7 extends CharsetICU { ); } } - + // directly encode all of printable ASCII 0x20..0x7e except '&' 0x26 private static boolean inSetDIMAP(char c) { return ( (isLegal(c, true) && c != AMPERSAND) ); } - + private static byte TO_BASE64_IMAP(int n) { return (n < 63 ? TO_BASE_64[n] : COMMA); } - + private static byte FROM_BASE64_IMAP(char c) { return (c==COMMA ? 63 : c==SLASH ? -1 : FROM_BASE_64[c]); } - + /* encode directly sets D and O and CR LF SP TAB */ private static final byte ENCODE_DIRECTLY_MAXIMUM[] = { /*0 1 2 3 4 5 6 7 8 9 a b c d e f*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, - + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }; - + /* encode directly set D and CR LF SP TAB but not set O */ private static final byte ENCODE_DIRECTLY_RESTRICTED[] = { /*0 1 2 3 4 5 6 7 8 9 a b c d e f*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - + 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; - + private static final byte TO_BASE_64[] = { /* A-Z */ @@ -154,7 +154,7 @@ class CharsetUTF7 extends CharsetICU { /* +/ */ 43, 47 }; - + private static final byte FROM_BASE_64[] = { /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */ @@ -166,24 +166,26 @@ class CharsetUTF7 extends CharsetICU { 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, /* A-Z */ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, /* a-z*/ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3 }; - + class CharsetDecoderUTF7 extends CharsetDecoderICU { public CharsetDecoderUTF7(CharsetICU cs) { super(cs); implReset(); } - + + @Override protected void implReset() { super.implReset(); toUnicodeStatus=(toUnicodeStatus & 0xf0000000) | 0x1000000; } - - protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { + + @Override + protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { CoderResult cr=CoderResult.UNDERFLOW; byte base64Value; byte base64Counter; @@ -191,14 +193,14 @@ class CharsetUTF7 extends CharsetICU { char bits; int byteIndex; int sourceIndex, nextSourceIndex; - + int length; - + char b; char c; - + int sourceArrayIndex=source.position(); - + //get the state of the machine state { int status=toUnicodeStatus; @@ -209,23 +211,23 @@ class CharsetUTF7 extends CharsetICU { byteIndex=toULength; /* sourceIndex=-1 if the current character began in the previous buffer */ sourceIndex=byteIndex==0 ? 0 : -1; - nextSourceIndex=0; - + nextSourceIndex=0; + directMode: while (true) { if (inDirectMode==1) { - /* + /* * In Direct Mode, most US-ASCII characters are encoded directly, i.e., * with their US-ASCII byte values. * Backslash and Tilde and most control characters are not alled in UTF-7. * A plus sign starts Unicode (or "escape") Mode. * An ampersand starts Unicode Mode for IMAP. - * + * * In Direct Mode, only the sourceIndex is used. */ byteIndex=0; length=source.remaining(); //targetCapacity=target.remaining(); - //Commented out because length of source may be larger than target when it comes to bytes + //Commented out because length of source may be larger than target when it comes to bytes /*if (useIMAP && length > targetCapacity) { length=targetCapacity; }*/ @@ -266,11 +268,11 @@ class CharsetUTF7 extends CharsetICU { } break directMode; } else { /* Unicode Mode*/ - /* + /* * In Unicode Mode, UTF-16BE is base64-encoded. * The base64 sequence ends with any character that is not in the base64 alphabet. * A terminating minus sign is consumed. - * + * * In Unicode Mode, the sourceIndex has the index to the start of the current * base64 bytes, while nextSourceIndex is precisely parallel to source, * keeping the index to the following byte. @@ -296,7 +298,7 @@ class CharsetUTF7 extends CharsetICU { * 2.2.2. Else if the current char is illegal, we might as well deal with it here. */ inDirectMode=1; - + if(base64Counter==-1) { /* illegal: + immediately followed by something other than base64 or minus sign */ /* include the plus sign in the reported sequence, but not the subsequent char */ @@ -396,9 +398,9 @@ class CharsetUTF7 extends CharsetICU { bits=0; base64Counter=0; break; - //default: + //default: /* will never occur */ - //break; + //break; }//end of switch } else if (!useIMAP || (useIMAP && base64Value==-2)) { /* minus sign terminates the base64 sequence */ @@ -419,7 +421,7 @@ class CharsetUTF7 extends CharsetICU { } sourceIndex=nextSourceIndex; continue directMode; - } else if (useIMAP) { + } else if (useIMAP) { if (base64Counter==-1) { // illegal: & immediately followed by something other than base64 or minus sign // include the ampersand in the reported sequence @@ -455,13 +457,13 @@ class CharsetUTF7 extends CharsetICU { inDirectMode=1; cr=CoderResult.malformedForLength(sourceIndex); } - + } else { if (!cr.isError() && flush && !source.hasRemaining() && bits ==0) { /* * if we are in Unicode Mode, then the byteIndex might not be 0, * but that is ok if bits -- 0 - * -> we set byteIndex=0 at the end of the stream to avoid a truncated error + * -> we set byteIndex=0 at the end of the stream to avoid a truncated error * (not true for IMAP-mailbox-name where we must end in direct mode) */ if (!cr.isOverflow()) { @@ -470,32 +472,34 @@ class CharsetUTF7 extends CharsetICU { } } /* set the converter state */ - toUnicodeStatus=(inDirectMode<<24 | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (int)bits); + toUnicodeStatus=(inDirectMode<<24 | ((base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | bits); toULength=byteIndex; - + return cr; } } - + class CharsetEncoderUTF7 extends CharsetEncoderICU { public CharsetEncoderUTF7(CharsetICU cs) { super(cs, fromUSubstitution); implReset(); } - + + @Override protected void implReset() { super.implReset(); fromUnicodeStatus=(fromUnicodeStatus & 0xf0000000) | 0x1000000; } - + + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { CoderResult cr=CoderResult.UNDERFLOW; byte inDirectMode; byte encodeDirectly[]; int status; - + int length, targetCapacity, sourceIndex; - + byte base64Counter; char bits; char c; @@ -510,7 +514,7 @@ class CharsetUTF7 extends CharsetICU { } /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ sourceIndex=0; - + directMode: while(true) { if(inDirectMode==1) { length=source.remaining(); @@ -567,7 +571,7 @@ class CharsetUTF7 extends CharsetICU { cr=CoderResult.OVERFLOW; } break directMode; - } else { + } else { /* Unicode Mode */ while (source.hasRemaining()) { if (target.hasRemaining()) { @@ -575,10 +579,10 @@ class CharsetUTF7 extends CharsetICU { if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && isLegal(c, useIMAP))) { /* encode directly */ inDirectMode=1; - + /* trick: back out this character to make this easier */ source.position(source.position() - 1); - + /* terminate the base64 sequence */ if (base64Counter!=0) { /* write remaining bits for the previous character */ @@ -607,7 +611,7 @@ class CharsetUTF7 extends CharsetICU { * base64 this character: * Output 2 or 3 base64 bytres for the remaining bits of the previous character * and the bits of this character, each implicitly in UTF-16BE. - * + * * Here, bits is an 8-bit variable because only 6 bits need to be kept from one * character to the next. The actual 2 or 4 bits are shifted to the left edge * of the 6-bits filed 5..0 to make the termination of the base64 sequence easier. @@ -714,8 +718,8 @@ class CharsetUTF7 extends CharsetICU { //default: /* will never occur */ //break; - } //end of switch - } + } //end of switch + } } else { /* target is full */ cr=CoderResult.OVERFLOW; @@ -725,7 +729,7 @@ class CharsetUTF7 extends CharsetICU { break directMode; } } //end of directMode label - + if (flush && !source.hasRemaining()) { /* flush remaining bits to the target */ if (inDirectMode==0) { @@ -740,7 +744,7 @@ class CharsetUTF7 extends CharsetICU { cr=CoderResult.OVERFLOW; } } - + /* need to terminate with a minus */ if (target.hasRemaining()) { target.put(MINUS); @@ -756,21 +760,24 @@ class CharsetUTF7 extends CharsetICU { fromUnicodeStatus=((status&0xf0000000) | 0x1000000); /* keep version, inDirectMode=TRUE */ } else { /* set the converter state back */ - fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | ((int)bits)); + fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | ((base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (bits)); } - + return cr; } } - + + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderUTF7(this); } - + + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderUTF7(this); } - + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ getCompleteUnicodeSet(setFillIn); } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java index 0796a8b6865..b1d0472536a 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java @@ -65,6 +65,7 @@ class CharsetUTF8 extends CharsetICU { super(cs); } + @Override protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { if (!source.hasRemaining()) { @@ -346,10 +347,12 @@ class CharsetUTF8 extends CharsetICU { implReset(); } + @Override protected void implReset() { super.implReset(); } + @Override protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { if (!source.hasRemaining()) { @@ -548,7 +551,7 @@ class CharsetUTF8 extends CharsetICU { CoderResult cr = handleSurrogates(sourceArray, sourceIndex, sourceLimit, (char)char32); if (cr != null) return cr; - + sourceIndex++; char32 = fromUChar32; fromUChar32 = 0; @@ -588,10 +591,10 @@ class CharsetUTF8 extends CharsetICU { CoderResult cr = handleSurrogates(source, (char)char32); if (cr != null) return cr; - + char32 = fromUChar32; fromUChar32 = 0; - + /* the rest is routine -- encode four bytes, stopping on overflow */ target.put(encodeHeadOf4(char32)); @@ -672,7 +675,7 @@ class CharsetUTF8 extends CharsetICU { // UConverterConstants.UNSIGNED_BYTE_MASK)<0x3e);} /* * Is this code unit (byte) a UTF-8 trail byte? - * + * * @param c * 8-bit code unit (byte) * @return TRUE or FALSE @@ -681,15 +684,18 @@ class CharsetUTF8 extends CharsetICU { return (((c) & 0xc0) == 0x80); }*/ + @Override public CharsetDecoder newDecoder() { return new CharsetDecoderUTF8(this); } + @Override public CharsetEncoder newEncoder() { return new CharsetEncoderUTF8(this); } - - + + + @Override void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ getNonSurrogateUnicodeSet(setFillIn); } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java index 92d6f6b302e..4393f01e83a 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java @@ -150,6 +150,7 @@ final class UConverterAliasDataReader implements ICUBinary.Authenticate { return ICUBinary.getInts(byteBuffer, n, 0); } + @Override public boolean isDataVersionAcceptable(byte version[]) { return version.length >= DATA_FORMAT_VERSION.length @@ -157,9 +158,9 @@ final class UConverterAliasDataReader implements ICUBinary.Authenticate { && version[1] == DATA_FORMAT_VERSION[1] && version[2] == DATA_FORMAT_VERSION[2]; } - + /*byte[] getUnicodeVersion(){ - return ICUBinary.getVersionByteArrayFromCompactInt(unicodeVersion); + return ICUBinary.getVersionByteArrayFromCompactInt(unicodeVersion); }*/ // private data members ------------------------------------------------- diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java index e6f696bc6f7..842cb328e77 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java @@ -81,15 +81,15 @@ import com.ibm.icu.impl.InvalidFormatException; * -- normal base table with optional extension * * int32_t stateTable[countStates][256]; - * + * * struct _MBCSToUFallback { (fallbacks are sorted by offset) * uint32_t offset; * UChar32 codePoint; * } toUFallbacks[countToUFallbacks]; - * + * * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2]; * (padded to an even number of units) - * + * * -- stage 1 tables * if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) { * -- stage 1 table for all of Unicode @@ -98,7 +98,7 @@ import com.ibm.icu.impl.InvalidFormatException; * -- BMP-only tables have a smaller stage 1 table * uint16_t fromUTable[0x40]; (32-bit-aligned) * } - * + * * -- stage 2 tables * length determined by top of stage 1 and bottom of stage 3 tables * if(outputType==MBCS_OUTPUT_1) { @@ -108,7 +108,7 @@ import com.ibm.icu.impl.InvalidFormatException; * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes * uint32_t stage 2 flags and indexes[?]; * } - * + * * -- stage 3 tables with byte results * if(outputType==MBCS_OUTPUT_1) { * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c @@ -342,7 +342,7 @@ import com.ibm.icu.impl.InvalidFormatException; * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0]) * } * } - * + * * The first pair in a section contains the number of following pairs in the * UChar position (16 bits, number=1..0xffff). * The value of the initial pair is used when the current UChar is not found @@ -408,6 +408,7 @@ final class UConverterDataReader { private static final class IsAcceptable implements ICUBinary.Authenticate { // @Override when we switch to Java 6 + @Override public boolean isDataVersionAcceptable(byte formatVersion[]) { return formatVersion[0] == 6; } diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java index 173117b283b..abc710985f7 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java @@ -30,6 +30,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { private static final boolean DEBUG = false; private static final class BundleImporter implements CollationRuleParser.Importer { BundleImporter() {} + @Override public String getRules(String localeID, String collationType) { return CollationLoader.loadRules(new ULocale(localeID), collationType); } @@ -433,7 +434,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { } /** Implements CollationRuleParser.Sink. */ - // Java 6: @Override + @Override void addRelation(int strength, CharSequence prefix, CharSequence str, CharSequence extension) { String nfdPrefix; if(prefix.length() == 0) { @@ -1322,6 +1323,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { CEFinalizer(long[] ces) { finalCEs = ces; } + @Override public long modifyCE32(int ce32) { assert(!Collation.isSpecialCE32(ce32)); if(CollationBuilder.isTempCE32(ce32)) { @@ -1331,6 +1333,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { return Collation.NO_CE; } } + @Override public long modifyCE(long ce) { if(CollationBuilder.isTempCE(ce)) { // retain case bits diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java index 24ae66f1ca0..d4c41d761cf 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java @@ -491,7 +491,7 @@ final class CollationDataReader /* all static */ { } private static final class IsAcceptable implements ICUBinary.Authenticate { - // @Override when we switch to Java 6 + @Override public boolean isDataVersionAcceptable(byte version[]) { return version[0] == 5; } diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java index d0ce4670816..80a1d238210 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationWeights.java @@ -1,6 +1,6 @@ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License -/* +/* ******************************************************************************* * * Copyright (C) 1999-2015, International Business Machines @@ -160,7 +160,7 @@ public final class CollationWeights { long start, end; int length, count; - // Java 6: @Override + @Override public int compareTo(WeightRange other) { long l=start; long r=other.start; diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java index 0503b8e0ae5..6bbf99916de 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java @@ -57,18 +57,18 @@ import com.ibm.icu.util.ULocale; *

Direct Use

*

The following shows an example of building an index directly. * The "show..." methods below are just to illustrate usage. - * + * *

  * // Create a simple index where the values for the strings are Integers, and add the strings
- * 
+ *
  * AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addLabels(additionalLocale);
  * int counter = 0;
  * for (String item : test) {
- *     index.addRecord(item, counter++); 
+ *     index.addRecord(item, counter++);
  * }
  * ...
  * // Show index at top. We could skip or gray out empty buckets
- * 
+ *
  * for (AlphabeticIndex.Bucket<Integer> bucket : index) {
  *     if (showAll || bucket.size() != 0) {
  *         showLabelAtTop(UI, bucket.getLabel());
@@ -76,7 +76,7 @@ import com.ibm.icu.util.ULocale;
  * }
  *  ...
  * // Show the buckets with their contents, skipping empty buckets
- * 
+ *
  * for (AlphabeticIndex.Bucket<Integer> bucket : index) {
  *     if (bucket.size() != 0) {
  *         showLabelInList(UI, bucket.getLabel());
@@ -140,6 +140,7 @@ public final class AlphabeticIndex implements Iterable> {
 
     // Comparator for records, so that the Record class can be static.
     private final Comparator> recordComparator = new Comparator>() {
+        @Override
         public int compare(Record o1, Record o2) {
             return collatorOriginal.compare(o1.name, o2.name);
         }
@@ -218,6 +219,7 @@ public final class AlphabeticIndex implements Iterable> {
          * {@inheritDoc}
          * @stable ICU 51
          */
+        @Override
         public Iterator> iterator() {
             return buckets.iterator();
         }
@@ -225,7 +227,7 @@ public final class AlphabeticIndex implements Iterable> {
 
     /**
      * Create the index object.
-     * 
+     *
      * @param locale
      *            The locale for the index.
      * @stable ICU 4.8
@@ -236,7 +238,7 @@ public final class AlphabeticIndex implements Iterable> {
 
     /**
      * Create the index object.
-     * 
+     *
      * @param locale
      *            The locale for the index.
      * @stable ICU 4.8
@@ -245,16 +247,16 @@ public final class AlphabeticIndex implements Iterable> {
         this(ULocale.forLocale(locale), null);
     }
 
-    /** 
+    /**
      * Create an AlphabeticIndex that uses a specific collator.
-     * 
+     *
      * 

The index will be created with no labels; the addLabels() function must be called * after creation to add the desired labels to the index. - * - *

The index will work directly with the supplied collator. If the caller will need to + * + *

The index will work directly with the supplied collator. If the caller will need to * continue working with the collator it should be cloned first, so that the * collator provided to the AlphabeticIndex remains unchanged after creation of the index. - * + * * @param collator The collator to use to order the contents of this index. * @stable ICU 51 */ @@ -353,7 +355,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the default label used in the IndexCharacters' locale for underflow, eg the last item in: X Y Z ... - * + * * @return underflow label * @stable ICU 4.8 */ @@ -376,7 +378,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the default label used in the IndexCharacters' locale for overflow, eg the first item in: ... A B C - * + * * @return overflow label * @stable ICU 4.8 */ @@ -400,7 +402,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the default label used for abbreviated buckets between other labels. For example, consider the labels * for Latin and Greek are used: X Y Z ... Α Β Γ. - * + * * @return inflow label * @stable ICU 4.8 */ @@ -411,7 +413,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the limit on the number of labels in the index. The number of buckets can be slightly larger: see getBucketCount(). - * + * * @return maxLabelCount maximum number of labels. * @stable ICU 4.8 */ @@ -632,7 +634,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the labels. - * + * * @return The list of bucket labels, after processing. * @stable ICU 4.8 */ @@ -651,7 +653,7 @@ public final class AlphabeticIndex implements Iterable> { *

* Don't use this method across threads if you are changing the settings on the collator, at least not without * synchronizing. - * + * * @return a clone of the collator used internally * @stable ICU 4.8 */ @@ -671,7 +673,7 @@ public final class AlphabeticIndex implements Iterable> { * Add a record (name and data) to the index. The name will be used to sort the items into buckets, and to sort * within the bucket. Two records may have the same name. When they do, the sort order is according to the order added: * the first added comes first. - * + * * @param name * Name, such as a name * @param data @@ -698,7 +700,7 @@ public final class AlphabeticIndex implements Iterable> { *

* Note that the bucket number (and sort key) are only valid for the settings of the current AlphabeticIndex; if * those are changed, then the bucket number and sort key must be regenerated. - * + * * @param name * Name, such as a name * @return the bucket index for the name @@ -711,7 +713,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Clear the index. - * + * * @return this, for chaining * @stable ICU 4.8 */ @@ -725,7 +727,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Return the number of buckets in the index. This will be the same as the number of labels, plus buckets for the underflow, overflow, and inflow(s). - * + * * @return number of buckets * @stable ICU 4.8 */ @@ -736,7 +738,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Return the number of records in the index: that is, the total number of distinct <name,data> pairs added with addRecord(...), over all the buckets. - * + * * @return total number of records in buckets * @stable ICU 4.8 */ @@ -746,10 +748,11 @@ public final class AlphabeticIndex implements Iterable> { /** * Return an iterator over the buckets. - * + * * @return iterator over buckets. * @stable ICU 4.8 */ + @Override public Iterator> iterator() { initBuckets(); return buckets.iterator(); @@ -839,7 +842,7 @@ public final class AlphabeticIndex implements Iterable> { /** * A (name, data) pair, to be sorted by name into one of the index buckets. * The user data is not used by the index implementation. - * + * * @stable ICU 4.8 */ public static class Record { @@ -853,7 +856,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the name - * + * * @return the name * @stable ICU 4.8 */ @@ -863,7 +866,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the data - * + * * @return the data * @stable ICU 4.8 */ @@ -875,6 +878,7 @@ public final class AlphabeticIndex implements Iterable> { * Standard toString() * @stable ICU 4.8 */ + @Override public String toString() { return name + "=" + data; } @@ -887,7 +891,7 @@ public final class AlphabeticIndex implements Iterable> { * returned by {@link AlphabeticIndex.ImmutableIndex#getBucket(int)}, * and {@link AlphabeticIndex#addRecord(CharSequence, Object)} adds a record * into a bucket according to the record's name. - * + * * @param * Data type * @stable ICU 4.8 @@ -902,7 +906,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Type of the label - * + * * @stable ICU 4.8 */ public enum LabelType { @@ -930,7 +934,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Set up the bucket. - * + * * @param label * label for the bucket * @param labelType @@ -945,7 +949,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the label - * + * * @return label for the bucket * @stable ICU 4.8 */ @@ -955,7 +959,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Is a normal, underflow, overflow, or inflow bucket - * + * * @return is an underflow, overflow, or inflow bucket * @stable ICU 4.8 */ @@ -965,7 +969,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Get the number of records in the bucket. - * + * * @return number of records in bucket * @stable ICU 4.8 */ @@ -977,6 +981,7 @@ public final class AlphabeticIndex implements Iterable> { * Iterator over the records in the bucket * @stable ICU 4.8 */ + @Override public Iterator> iterator() { if (records == null) { return Collections.>emptyList().iterator(); @@ -1188,6 +1193,7 @@ public final class AlphabeticIndex implements Iterable> { /** * Iterator over just the visible buckets. */ + @Override public Iterator> iterator() { return immutableVisibleList.iterator(); // use immutable list to prevent remove(). } diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java index c006bbfd72a..53403a34f57 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java @@ -28,6 +28,7 @@ import com.ibm.icu.util.ULocale; final class CollatorServiceShim extends Collator.ServiceShim { + @Override Collator getInstance(ULocale locale) { // use service cache, it's faster than instantiation // if (service.isDefault()) { @@ -51,6 +52,7 @@ final class CollatorServiceShim extends Collator.ServiceShim { } } + @Override Object registerInstance(Collator collator, ULocale locale) { // Set the collator locales while registering so that getInstance() // need not guess whether the collator's locales are already set properly @@ -59,6 +61,7 @@ final class CollatorServiceShim extends Collator.ServiceShim { return service.registerObject(collator, locale); } + @Override Object registerFactory(CollatorFactory f) { class CFactory extends LocaleKeyFactory { CollatorFactory delegate; @@ -68,16 +71,19 @@ final class CollatorServiceShim extends Collator.ServiceShim { this.delegate = fctry; } + @Override public Object handleCreate(ULocale loc, int kind, ICUService srvc) { Object coll = delegate.createCollator(loc); return coll; } + @Override public String getDisplayName(String id, ULocale displayLocale) { ULocale objectLocale = new ULocale(id); return delegate.getDisplayName(objectLocale, displayLocale); } + @Override public Set getSupportedIDs() { return delegate.getSupportedLocaleIDs(); } @@ -86,10 +92,12 @@ final class CollatorServiceShim extends Collator.ServiceShim { return service.registerFactory(new CFactory(f)); } + @Override boolean unregister(Object registryKey) { return service.unregisterFactory((Factory)registryKey); } + @Override Locale[] getAvailableLocales() { // TODO rewrite this to just wrap getAvailableULocales later Locale[] result; @@ -102,6 +110,7 @@ final class CollatorServiceShim extends Collator.ServiceShim { return result; } + @Override ULocale[] getAvailableULocales() { ULocale[] result; if (service.isDefault()) { @@ -113,6 +122,7 @@ final class CollatorServiceShim extends Collator.ServiceShim { return result; } + @Override String getDisplayName(ULocale objectLocale, ULocale displayLocale) { String id = objectLocale.getName(); return service.getDisplayName(id, displayLocale); @@ -152,6 +162,7 @@ final class CollatorServiceShim extends Collator.ServiceShim { ///CLOVER:OFF // The following method can not be reached by testing + @Override protected Object handleDefault(Key key, String[] actualIDReturn) { if (actualIDReturn != null) { actualIDReturn[0] = "root"; diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java b/icu4j/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java index f351154d2d2..cab3bdfba89 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java @@ -86,7 +86,7 @@ import com.ibm.icu.text.SimpleDateFormat; // - Other utilities? public class GlobalizationPreferences implements Freezable { - + /** * Default constructor * @draft ICU 3.6 @@ -98,7 +98,7 @@ public class GlobalizationPreferences implements Freezable(); + result = new ArrayList(); result.addAll(locales); } return result; @@ -214,7 +214,7 @@ public class GlobalizationPreferences implements Freezable * After: en_US, en_GB, en, fr_FR, fr - *
+ *
*
* The final locale list is used to produce a default value for the appropriate territory, * currency, timezone, etc. The list also represents the lookup order used in * getResourceBundle for this object. A subclass may override this method * to customize the algorithm used for populating the locale list. - * + * * @param inputLocales The list of input locales * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. @@ -853,7 +853,7 @@ public class GlobalizationPreferences implements Freezable 0 && c.length() == 0) { // We want to see zh_HK before zh_Hant result.add(j, uloc); @@ -907,7 +907,7 @@ public class GlobalizationPreferences implements FreezableIt MUST return a 'safe' value, * one whose modification will not affect this object. - * + * * @param dateStyle * @param timeStyle * @draft ICU 3.6 @@ -964,7 +964,7 @@ public class GlobalizationPreferences implements FreezableIt MUST return a 'safe' value, * one whose modification will not affect this object. - * + * * @param style * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. @@ -1000,7 +1000,7 @@ public class GlobalizationPreferences implements FreezableIt MUST return a 'safe' value, * one whose modification will not affect this object. - * + * * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ @@ -1064,7 +1064,7 @@ public class GlobalizationPreferences implements FreezableIt MUST return a 'safe' value, * one whose modification will not affect this object. - * + * * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ @@ -1080,7 +1080,7 @@ public class GlobalizationPreferences implements FreezableIt MUST return a 'safe' value, * one whose modification will not affect this object. - * + * * @param type * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. @@ -1117,7 +1117,7 @@ public class GlobalizationPreferences implements FreezableIt MUST return a 'safe' value, * one whose modification will not affect this object. - * + * * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ @@ -1126,7 +1126,7 @@ public class GlobalizationPreferences implements FreezableIt MUST return a 'safe' value, * one whose modification will not affect this object. - * + * * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ @@ -1162,9 +1162,9 @@ public class GlobalizationPreferences implements Freezable locales; private String territory; private Currency currency; @@ -1175,7 +1175,7 @@ public class GlobalizationPreferences implements Freezable implicitLocales; - + { reset(); } @@ -1199,9 +1199,9 @@ public class GlobalizationPreferences implements Freezable language_territory_hack_map = new HashMap(); private static final String[][] language_territory_hack = { @@ -1467,7 +1467,7 @@ public class GlobalizationPreferences implements Freezablenoon. To get the jdn for * the corresponding midnight, subtract 0.5. @@ -299,7 +299,7 @@ public class CalendarAstronomer { clearCache(); julianDay = jdn; } - + /** * Get the current time of this CalendarAstronomer object, * represented as the number of milliseconds since @@ -312,7 +312,7 @@ public class CalendarAstronomer { public long getTime() { return time; } - + /** * Get the current time of this CalendarAstronomer object, * represented as a Date object. @@ -324,7 +324,7 @@ public class CalendarAstronomer { public Date getDate() { return new Date(time); } - + /** * Get the current time of this CalendarAstronomer object, * expressed as a "julian day number", which is the number of elapsed @@ -340,7 +340,7 @@ public class CalendarAstronomer { } return julianDay; } - + /** * Return this object's time expressed in julian centuries: * the number of centuries after 1/1/1900 AD, 12:00 GMT @@ -363,14 +363,14 @@ public class CalendarAstronomer { if (siderealTime == INVALID) { // See page 86 of "Practial Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. - + double UT = normalize((double)time/HOUR_MS, 24); - + siderealTime = normalize(getSiderealOffset() + UT*1.002737909, 24); } return siderealTime; } - + private double getSiderealOffset() { if (siderealT0 == INVALID) { double JD = Math.floor(getJulianDay() - 0.5) + 0.5; @@ -380,7 +380,7 @@ public class CalendarAstronomer { } return siderealT0; } - + /** * Returns the current local sidereal time, measured in hours * @internal @@ -388,7 +388,7 @@ public class CalendarAstronomer { public double getLocalSidereal() { return normalize(getGreenwichSidereal() + (double)fGmtOffset/HOUR_MS, 24); } - + /** * Converts local sidereal time to Universal Time. * @@ -396,22 +396,22 @@ public class CalendarAstronomer { * on this object's current date. * * @return The corresponding Universal Time, in milliseconds since - * 1 Jan 1970, GMT. + * 1 Jan 1970, GMT. */ private long lstToUT(double lst) { // Convert to local mean time double lt = normalize((lst - getSiderealOffset()) * 0.9972695663, 24); - + // Then find local midnight on this day long base = DAY_MS * ((time + fGmtOffset)/DAY_MS) - fGmtOffset; - + //out(" lt =" + lt + " hours"); //out(" base=" + new Date(base)); - + return base + (long)(lt * HOUR_MS); } - - + + //------------------------------------------------------------------------- // Coordinate transformations, all based on the current time of this object //------------------------------------------------------------------------- @@ -445,14 +445,14 @@ public class CalendarAstronomer { double obliq = eclipticObliquity(); double sinE = Math.sin(obliq); double cosE = Math.cos(obliq); - + double sinL = Math.sin(eclipLong); double cosL = Math.cos(eclipLong); - + double sinB = Math.sin(eclipLat); double cosB = Math.cos(eclipLat); double tanB = Math.tan(eclipLat); - + return new Equatorial(Math.atan2(sinL*cosE - tanB*sinE, cosL), Math.asin(sinB*cosE + cosB*sinE*sinL) ); } @@ -476,23 +476,23 @@ public class CalendarAstronomer { public Horizon eclipticToHorizon(double eclipLong) { Equatorial equatorial = eclipticToEquatorial(eclipLong); - + double H = getLocalSidereal()*PI/12 - equatorial.ascension; // Hour-angle - + double sinH = Math.sin(H); double cosH = Math.cos(H); double sinD = Math.sin(equatorial.declination); double cosD = Math.cos(equatorial.declination); double sinL = Math.sin(fLatitude); double cosL = Math.cos(fLatitude); - + double altitude = Math.asin(sinD*sinL + cosD*cosL*cosH); double azimuth = Math.atan2(-cosD*cosL*sinH, sinD - sinL * Math.sin(altitude)); return new Horizon(azimuth, altitude); } - + //------------------------------------------------------------------------- // The Sun //------------------------------------------------------------------------- @@ -569,7 +569,7 @@ public class CalendarAstronomer { { // See page 86 of "Practial Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. - + if (sunLongitude == INVALID) { double[] result = getSunLongitude(getJulianDay()); sunLongitude = result[0]; @@ -577,7 +577,7 @@ public class CalendarAstronomer { } return sunLongitude; } - + /** * TODO Make this public when the entire class is package-private. */ @@ -585,17 +585,17 @@ public class CalendarAstronomer { { // See page 86 of "Practial Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. - + double day = julian - JD_EPOCH; // Days since epoch - + // Find the angular distance the sun in a fictitious // circular orbit has travelled since the epoch. double epochAngle = norm2PI(PI2/TROPICAL_YEAR*day); - + // The epoch wasn't at the sun's perigee; find the angular distance // since perigee, which is called the "mean anomaly" double meanAnomaly = norm2PI(epochAngle + SUN_ETA_G - SUN_OMEGA_G); - + // Now find the "true anomaly", e.g. the real solar longitude // by solving Kepler's equation for an elliptical orbit // NOTE: The 3rd ed. of the book lists omega_g and eta_g in different @@ -614,20 +614,20 @@ public class CalendarAstronomer { public Equatorial getSunPosition() { return eclipticToEquatorial(getSunLongitude(), 0); } - + private static class SolarLongitude { double value; SolarLongitude(double val) { value = val; } } - + /** * Constant representing the vernal equinox. - * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. + * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. * Note: In this case, "vernal" refers to the northern hemisphere's seasons. * @internal */ public static final SolarLongitude VERNAL_EQUINOX = new SolarLongitude(0); - + /** * Constant representing the summer solstice. * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. @@ -635,7 +635,7 @@ public class CalendarAstronomer { * @internal */ public static final SolarLongitude SUMMER_SOLSTICE = new SolarLongitude(PI/2); - + /** * Constant representing the autumnal equinox. * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. @@ -643,7 +643,7 @@ public class CalendarAstronomer { * @internal */ public static final SolarLongitude AUTUMN_EQUINOX = new SolarLongitude(PI); - + /** * Constant representing the winter solstice. * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}. @@ -651,30 +651,31 @@ public class CalendarAstronomer { * @internal */ public static final SolarLongitude WINTER_SOLSTICE = new SolarLongitude((PI*3)/2); - + /** * Find the next time at which the sun's ecliptic longitude will have - * the desired value. + * the desired value. * @internal */ public long getSunTime(double desired, boolean next) { - return timeOfAngle( new AngleFunc() { public double eval() { return getSunLongitude(); } }, + return timeOfAngle( new AngleFunc() { @Override + public double eval() { return getSunLongitude(); } }, desired, TROPICAL_YEAR, MINUTE_MS, next); } - + /** * Find the next time at which the sun's ecliptic longitude will have - * the desired value. + * the desired value. * @internal */ public long getSunTime(SolarLongitude desired, boolean next) { return getSunTime(desired.value, next); } - + /** * Returns the time (GMT) of sunrise or sunset on the local date to which * this calendar is currently set. @@ -684,29 +685,29 @@ public class CalendarAstronomer { * official time zone and the geographic longitude, the * computation can flop over into an adjacent day if this object * is set to a time near local midnight. - * + * * @internal */ - public long getSunRiseSet(boolean rise) - { + public long getSunRiseSet(boolean rise) { long t0 = time; // Make a rough guess: 6am or 6pm local time on the current day long noon = ((time + fGmtOffset)/DAY_MS)*DAY_MS - fGmtOffset + 12*HOUR_MS; - - setTime(noon + (rise ? -6L : 6L) * HOUR_MS); - - long t = riseOrSet(new CoordFunc() { - public Equatorial eval() { return getSunPosition(); } - }, - rise, - .533 * DEG_RAD, // Angular Diameter - 34 /60.0 * DEG_RAD, // Refraction correction - MINUTE_MS / 12); // Desired accuracy - setTime(t0); - return t; - } + setTime(noon + (rise ? -6L : 6L) * HOUR_MS); + + long t = riseOrSet(new CoordFunc() { + @Override + public Equatorial eval() { return getSunPosition(); } + }, + rise, + .533 * DEG_RAD, // Angular Diameter + 34 /60.0 * DEG_RAD, // Refraction correction + MINUTE_MS / 12); // Desired accuracy + + setTime(t0); + return t; + } // Commented out - currently unused. ICU 2.6, Alan // //------------------------------------------------------------------------- @@ -741,7 +742,7 @@ public class CalendarAstronomer { // if (lst1s > 24) lst1s -= 24; // if (lst2r > 24) lst2r -= 24; // if (lst2s > 24) lst2s -= 24; -// +// // // 4. Convert LSTs to GSTs. If GST1 > GST2, add 24 to GST2. // double gst1r = lstToGst(lst1r); // double gst1s = lstToGst(lst1s); @@ -752,12 +753,12 @@ public class CalendarAstronomer { // // // 5. Calculate GST at 0h UT of this date // double t00 = utToGst(0); -// +// // // 6. Calculate GST at 0h on the observer's longitude // double offset = Math.round(fLongitude*12/PI); // p.95 step 6; he _rounds_ to nearest 15 deg. // double t00p = t00 - offset*1.002737909; // if (t00p < 0) t00p += 24; // do NOT normalize -// +// // // 7. Adjust // if (gst1r < t00p) { // gst1r += 24; @@ -801,7 +802,7 @@ public class CalendarAstronomer { // double delta = fLongitude * 24 / PI2; // return normalize(lst - delta, 24); // } - + // Commented out - currently unused. ICU 2.6, Alan // /** // * Convert UT to GST on this date. @@ -845,7 +846,7 @@ public class CalendarAstronomer { // //------------------------------------------------------------------------- // // // Note: This method appears to produce inferior accuracy as -// // compared to getSunRiseSet(). +// // compared to getSunRiseSet(). // // /** // * TODO Make this public when the entire class is package-private. @@ -856,62 +857,62 @@ public class CalendarAstronomer { // double d = (double)(time - EPOCH_2000_MS) / DAY_MS; // // // Now compute the Local Sidereal Time, LST: -// // +// // // double LST = 98.9818 + 0.985647352 * d + /*UT*15 + long*/ // fLongitude*RAD_DEG; -// // +// // // // (east long. positive). Note that LST is here expressed in degrees, // // where 15 degrees corresponds to one hour. Since LST really is an angle, // // it's convenient to use one unit---degrees---throughout. // // // COMPUTING THE SUN'S POSITION // // ---------------------------- -// // +// // // // To be able to compute the Sun's rise/set times, you need to be able to // // compute the Sun's position at any time. First compute the "day // // number" d as outlined above, for the desired moment. Next compute: -// // +// // // double oblecl = 23.4393 - 3.563E-7 * d; -// // +// // // double w = 282.9404 + 4.70935E-5 * d; // double M = 356.0470 + 0.9856002585 * d; // double e = 0.016709 - 1.151E-9 * d; -// // +// // // // This is the obliquity of the ecliptic, plus some of the elements of // // the Sun's apparent orbit (i.e., really the Earth's orbit): w = // // argument of perihelion, M = mean anomaly, e = eccentricity. // // Semi-major axis is here assumed to be exactly 1.0 (while not strictly // // true, this is still an accurate approximation). Next compute E, the // // eccentric anomaly: -// // +// // // double E = M + e*(180/PI) * Math.sin(M*DEG_RAD) * ( 1.0 + e*Math.cos(M*DEG_RAD) ); -// // +// // // // where E and M are in degrees. This is it---no further iterations are // // needed because we know e has a sufficiently small value. Next compute // // the true anomaly, v, and the distance, r: -// // +// // // /* r * cos(v) = */ double A = Math.cos(E*DEG_RAD) - e; // /* r * sin(v) = */ double B = Math.sqrt(1 - e*e) * Math.sin(E*DEG_RAD); -// // +// // // // and -// // +// // // // r = sqrt( A*A + B*B ) // double v = Math.atan2( B, A )*RAD_DEG; -// // +// // // // The Sun's true longitude, slon, can now be computed: -// // +// // // double slon = v + w; -// // +// // // // Since the Sun is always at the ecliptic (or at least very very close to // // it), we can use simplified formulae to convert slon (the Sun's ecliptic // // longitude) to sRA and sDec (the Sun's RA and Dec): -// // +// // // // sin(slon) * cos(oblecl) // // tan(sRA) = ------------------------- // // cos(slon) -// // +// // // // sin(sDec) = sin(oblecl) * sin(slon) -// // +// // // // As was the case when computing az, the Azimuth, if possible use an // // atan2() function to compute sRA. // @@ -922,38 +923,38 @@ public class CalendarAstronomer { // // // COMPUTING RISE AND SET TIMES // // ---------------------------- -// // +// // // // To compute when an object rises or sets, you must compute when it // // passes the meridian and the HA of rise/set. Then the rise time is // // the meridian time minus HA for rise/set, and the set time is the // // meridian time plus the HA for rise/set. -// // +// // // // To find the meridian time, compute the Local Sidereal Time at 0h local // // time (or 0h UT if you prefer to work in UT) as outlined above---name // // that quantity LST0. The Meridian Time, MT, will now be: -// // +// // // // MT = RA - LST0 // double MT = normalize(sRA - LST, 360); -// // +// // // // where "RA" is the object's Right Ascension (in degrees!). If negative, // // add 360 deg to MT. If the object is the Sun, leave the time as it is, // // but if it's stellar, multiply MT by 365.2422/366.2422, to convert from // // sidereal to solar time. Now, compute HA for rise/set, name that // // quantity HA0: -// // +// // // // sin(h0) - sin(lat) * sin(Dec) // // cos(HA0) = --------------------------------- // // cos(lat) * cos(Dec) -// // +// // // // where h0 is the altitude selected to represent rise/set. For a purely // // mathematical horizon, set h0 = 0 and simplify to: -// // +// // // // cos(HA0) = - tan(lat) * tan(Dec) -// // +// // // // If you want to account for refraction on the atmosphere, set h0 = -35/60 // // degrees (-35 arc minutes), and if you want to compute the rise/set times // // for the Sun's upper limb, set h0 = -50/60 (-50 arc minutes). -// // +// // // double h0 = -50/60 * DEG_RAD; // // double HA0 = Math.acos( @@ -963,12 +964,12 @@ public class CalendarAstronomer { // // When HA0 has been computed, leave it as it is for the Sun but multiply // // by 365.2422/366.2422 for stellar objects, to convert from sidereal to // // solar time. Finally compute: -// // +// // // // Rise time = MT - HA0 // // Set time = MT + HA0 -// // +// // // // convert the times from degrees to hours by dividing by 15. -// // +// // // // If you'd like to check that your calculations are accurate or just // // need a quick result, check the USNO's Sun or Moon Rise/Set Table, // // . @@ -984,18 +985,18 @@ public class CalendarAstronomer { //------------------------------------------------------------------------- // The Moon //------------------------------------------------------------------------- - + static final double moonL0 = 318.351648 * PI/180; // Mean long. at epoch static final double moonP0 = 36.340410 * PI/180; // Mean long. of perigee static final double moonN0 = 318.510107 * PI/180; // Mean long. of node static final double moonI = 5.145366 * PI/180; // Inclination of orbit static final double moonE = 0.054900; // Eccentricity of orbit - + // These aren't used right now static final double moonA = 3.84401e5; // semi-major axis (km) static final double moonT0 = 0.5181 * PI/180; // Angular size at distance A static final double moonPi = 0.9507 * PI/180; // Parallax at distance A - + /** * The position of the moon at the time set on this * object, in equatorial coordinates. @@ -1011,18 +1012,18 @@ public class CalendarAstronomer { // Calculate the solar longitude. Has the side effect of // filling in "meanAnomalySun" as well. double sunLong = getSunLongitude(); - + // // Find the # of days since the epoch of our orbital parameters. // TODO: Convert the time of day portion into ephemeris time // double day = getJulianDay() - JD_EPOCH; // Days since epoch - + // Calculate the mean longitude and anomaly of the moon, based on // a circular orbit. Similar to the corresponding solar calculation. double meanLongitude = norm2PI(13.1763966*PI/180*day + moonL0); double meanAnomalyMoon = norm2PI(meanLongitude - 0.1114041*PI/180 * day - moonP0); - + // // Calculate the following corrections: // Evection: the sun's gravity affects the moon's eccentricity @@ -1035,7 +1036,7 @@ public class CalendarAstronomer { double a3 = 0.3700*PI/180 * Math.sin(meanAnomalySun); meanAnomalyMoon += evection - annual - a3; - + // // More correction factors: // center equation of the center correction @@ -1045,7 +1046,7 @@ public class CalendarAstronomer { // double center = 6.2886*PI/180 * Math.sin(meanAnomalyMoon); double a4 = 0.2140*PI/180 * Math.sin(2 * meanAnomalyMoon); - + // Now find the moon's corrected longitude moonLongitude = meanLongitude + evection + center - annual + a4; @@ -1055,9 +1056,9 @@ public class CalendarAstronomer { // the earth the moon is on // double variation = 0.6583*PI/180 * Math.sin(2*(moonLongitude - sunLong)); - + moonLongitude += variation; - + // // What we've calculated so far is the moon's longitude in the plane // of its own orbit. Now map to the ecliptic to get the latitude @@ -1071,7 +1072,7 @@ public class CalendarAstronomer { double y = Math.sin(moonLongitude - nodeLongitude); double x = Math.cos(moonLongitude - nodeLongitude); - + moonEclipLong = Math.atan2(y*Math.cos(moonI), x) + nodeLongitude; double moonEclipLat = Math.asin(y * Math.sin(moonI)); @@ -1079,7 +1080,7 @@ public class CalendarAstronomer { } return moonPosition; } - + /** * The "age" of the moon at the time specified in this object. * This is really the angle between the @@ -1097,10 +1098,10 @@ public class CalendarAstronomer { // some the intermediate results cached during that calculation. // getMoonPosition(); - + return norm2PI(moonEclipLong - sunLongitude); } - + /** * Calculate the phase of the moon at the time set in this object. * The returned phase is a double in the range @@ -1120,12 +1121,12 @@ public class CalendarAstronomer { // by Peter Duffet-Smith, for details on the algorithm. return 0.5 * (1 - Math.cos(getMoonAge())); } - + private static class MoonAge { double value; MoonAge(double val) { value = val; } } - + /** * Constant representing a new moon. * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime} @@ -1139,53 +1140,54 @@ public class CalendarAstronomer { * @internal */ public static final MoonAge FIRST_QUARTER = new MoonAge(PI/2); - + /** * Constant representing a full moon. * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime} * @internal */ public static final MoonAge FULL_MOON = new MoonAge(PI); - + /** * Constant representing the moon's last quarter. * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime} * @internal */ public static final MoonAge LAST_QUARTER = new MoonAge((PI*3)/2); - + /** * Find the next or previous time at which the Moon's ecliptic - * longitude will have the desired value. + * longitude will have the desired value. *

* @param desired The desired longitude. * @param next true if the next occurrance of the phase - * is desired, false for the previous occurrance. + * is desired, false for the previous occurrance. * @internal */ public long getMoonTime(double desired, boolean next) { return timeOfAngle( new AngleFunc() { + @Override public double eval() { return getMoonAge(); } }, desired, SYNODIC_MONTH, MINUTE_MS, next); } - + /** * Find the next or previous time at which the moon will be in the * desired phase. *

* @param desired The desired phase of the moon. * @param next true if the next occurrance of the phase - * is desired, false for the previous occurrance. + * is desired, false for the previous occurrance. * @internal */ public long getMoonTime(MoonAge desired, boolean next) { return getMoonTime(desired.value, next); } - + /** * Returns the time (GMT) of sunrise or sunset on the local date to which * this calendar is currently set. @@ -1194,6 +1196,7 @@ public class CalendarAstronomer { public long getMoonRiseSet(boolean rise) { return riseOrSet(new CoordFunc() { + @Override public Equatorial eval() { return getMoonPosition(); } }, rise, @@ -1205,27 +1208,27 @@ public class CalendarAstronomer { //------------------------------------------------------------------------- // Interpolation methods for finding the time at which a given event occurs //------------------------------------------------------------------------- - + private interface AngleFunc { public double eval(); } - + private long timeOfAngle(AngleFunc func, double desired, double periodDays, long epsilon, boolean next) { // Find the value of the function at the current time double lastAngle = func.eval(); - + // Find out how far we are from the desired angle double deltaAngle = norm2PI(desired - lastAngle) ; - + // Using the average period, estimate the next (or previous) time at // which the desired angle occurs. double deltaT = (deltaAngle + (next ? 0 : -PI2)) * (periodDays*DAY_MS) / PI2; - + double lastDeltaT = deltaT; // Liu long startTime = time; // Liu - + setTime(time + (long)deltaT); // Now iterate until we get the error below epsilon. Throughout @@ -1240,17 +1243,17 @@ public class CalendarAstronomer { // Correct the time estimate based on how far off the angle is deltaT = normPI(desired - angle) * factor; - + // HACK: - // + // // If abs(deltaT) begins to diverge we need to quit this loop. // This only appears to happen when attempting to locate, for // example, a new moon on the day of the new moon. E.g.: - // + // // This result is correct: // newMoon(7508(Mon Jul 23 00:00:00 CST 1990,false))= // Sun Jul 22 10:57:41 CST 1990 - // + // // But attempting to make the same call a day earlier causes deltaT // to diverge: // CalendarAstronomer.timeOfAngle() diverging: 1.348508727575625E9 -> @@ -1274,23 +1277,23 @@ public class CalendarAstronomer { setTime(time + (long)deltaT); } while (Math.abs(deltaT) > epsilon); - + return time; } - + private interface CoordFunc { public Equatorial eval(); } - + private long riseOrSet(CoordFunc func, boolean rise, - double diameter, double refraction, + double diameter, double refraction, long epsilon) - { + { Equatorial pos = null; double tanL = Math.tan(fLatitude); long deltaT = Long.MAX_VALUE; int count = 0; - + // // Calculate the object's position at the current time, then use that // position to calculate the time of rising or setting. The position @@ -1301,10 +1304,10 @@ public class CalendarAstronomer { pos = func.eval(); double angle = Math.acos(-tanL * Math.tan(pos.declination)); double lst = ((rise ? PI2-angle : angle) + pos.ascension ) * 24 / PI2; - + // Convert from LST to Universal Time. long newTime = lstToUT( lst ); - + deltaT = newTime - time; setTime(newTime); } @@ -1316,10 +1319,10 @@ public class CalendarAstronomer { double x = diameter / 2 + refraction; double y = Math.asin(Math.sin(x) / Math.sin(psi)); long delta = (long)((240 * y * RAD_DEG / cosD)*SECOND_MS); - + return time + (rise ? -delta : delta); } - + //------------------------------------------------------------------------- // Other utility methods //------------------------------------------------------------------------- @@ -1331,7 +1334,7 @@ public class CalendarAstronomer { private static final double normalize(double value, double range) { return value - range * Math.floor(value / range); } - + /** * Normalize an angle so that it's in the range 0 - 2pi. * For positive angles this is just (angle % 2pi), but the Java @@ -1340,14 +1343,14 @@ public class CalendarAstronomer { private static final double norm2PI(double angle) { return normalize(angle, PI2); } - + /** * Normalize an angle into the range -PI - PI */ private static final double normPI(double angle) { return normalize(angle + PI, PI2) - PI; } - + /** * Find the "true anomaly" (longitude) of an object from * its mean anomaly and the eccentricity of its orbit. This uses @@ -1355,7 +1358,7 @@ public class CalendarAstronomer { * * @param meanAnomaly The object's longitude calculated as if it were in * a regular, circular orbit, measured in radians - * from the point of perigee. + * from the point of perigee. * * @param eccentricity The eccentricity of the orbit * @@ -1370,13 +1373,13 @@ public class CalendarAstronomer { do { delta = E - eccentricity * Math.sin(E) - meanAnomaly; E = E - delta / (1 - eccentricity * Math.cos(E)); - } + } while (Math.abs(delta) > 1e-5); // epsilon = 1e-5 rad return 2.0 * Math.atan( Math.tan(E/2) * Math.sqrt( (1+eccentricity) /(1-eccentricity) ) ); } - + /** * Return the obliquity of the ecliptic (the angle between the ecliptic * and the earth's equator) at the current time. This varies due to @@ -1390,42 +1393,42 @@ public class CalendarAstronomer { final double epoch = 2451545.0; // 2000 AD, January 1.5 double T = (getJulianDay() - epoch) / 36525; - + eclipObliquity = 23.439292 - 46.815/3600 * T - 0.0006/3600 * T*T + 0.00181/3600 * T*T*T; - + eclipObliquity *= DEG_RAD; } return eclipObliquity; } - - + + //------------------------------------------------------------------------- // Private data //------------------------------------------------------------------------- - + /** * Current time in milliseconds since 1/1/1970 AD * @see java.util.Date#getTime */ private long time; - + /* These aren't used yet, but they'll be needed for sunset calculations * and equatorial to horizon coordinate conversions */ private double fLongitude = 0.0; private double fLatitude = 0.0; private long fGmtOffset = 0; - + // // The following fields are used to cache calculated results for improved // performance. These values all depend on the current time setting // of this object, so the clearCache method is provided. // static final private double INVALID = Double.MIN_VALUE; - + private transient double julianDay = INVALID; private transient double julianCentury = INVALID; private transient double sunLongitude = INVALID; @@ -1436,7 +1439,7 @@ public class CalendarAstronomer { private transient double eclipObliquity = INVALID; private transient double siderealT0 = INVALID; private transient double siderealTime = INVALID; - + private transient Equatorial moonPosition = null; private void clearCache() { @@ -1452,15 +1455,15 @@ public class CalendarAstronomer { siderealT0 = INVALID; moonPosition = null; } - + //private static void out(String s) { // System.out.println(s); //} - + //private static String deg(double rad) { // return Double.toString(rad * RAD_DEG); //} - + //private static String hours(long ms) { // return Double.toString((double)ms / HOUR_MS) + " hours"; //} @@ -1471,11 +1474,11 @@ public class CalendarAstronomer { public String local(long localMillis) { return new Date(localMillis - TimeZone.getDefault().getRawOffset()).toString(); } - - + + /** * Represents the position of an object in the sky relative to the ecliptic, - * the plane of the earth's orbit around the Sun. + * the plane of the earth's orbit around the Sun. * This is a spherical coordinate system in which the latitude * specifies the position north or south of the plane of the ecliptic. * The longitude specifies the position along the ecliptic plane @@ -1507,10 +1510,11 @@ public class CalendarAstronomer { * Return a string representation of this object * @internal */ + @Override public String toString() { return Double.toString(longitude*RAD_DEG) + "," + (latitude*RAD_DEG); } - + /** * The ecliptic latitude, in radians. This specifies an object's * position north or south of the plane of the ecliptic, @@ -1518,7 +1522,7 @@ public class CalendarAstronomer { * @internal */ public final double latitude; - + /** * The ecliptic longitude, in radians. * This specifies an object's position along the ecliptic plane @@ -1534,8 +1538,8 @@ public class CalendarAstronomer { } /** - * Represents the position of an - * object in the sky relative to the plane of the earth's equator. + * Represents the position of an + * object in the sky relative to the plane of the earth's equator. * The Right Ascension specifies the position east or west * along the equator, relative to the sun's position at the vernal * equinox. The Declination is the position north or south @@ -1567,10 +1571,11 @@ public class CalendarAstronomer { * angles measured in degrees. * @internal */ + @Override public String toString() { return Double.toString(ascension*RAD_DEG) + "," + (declination*RAD_DEG); } - + /** * Return a string representation of this object with the right ascension * measured in hours, minutes, and seconds. @@ -1579,16 +1584,16 @@ public class CalendarAstronomer { public String toHmsString() { return radToHms(ascension) + "," + radToDms(declination); } - + /** - * The right ascension, in radians. + * The right ascension, in radians. * This is the position east or west along the equator * relative to the sun's position at the vernal equinox, * with positive angles representing East. * @internal */ public final double ascension; - + /** * The declination, in radians. * This is the position north or south of the equatorial plane, @@ -1599,7 +1604,7 @@ public class CalendarAstronomer { } /** - * Represents the position of an object in the sky relative to + * Represents the position of an object in the sky relative to * the local horizon. * The Altitude represents the object's elevation above the horizon, * with objects below the horizon having a negative altitude. @@ -1633,18 +1638,19 @@ public class CalendarAstronomer { * angles measured in degrees. * @internal */ + @Override public String toString() { return Double.toString(altitude*RAD_DEG) + "," + (azimuth*RAD_DEG); } - - /** - * The object's altitude above the horizon, in radians. + + /** + * The object's altitude above the horizon, in radians. * @internal */ public final double altitude; - - /** - * The object's direction, in radians clockwise from north. + + /** + * The object's direction, in radians clockwise from north. * @internal */ public final double azimuth; @@ -1654,15 +1660,15 @@ public class CalendarAstronomer { int hrs = (int) (angle*RAD_HOUR); int min = (int)((angle*RAD_HOUR - hrs) * 60); int sec = (int)((angle*RAD_HOUR - hrs - min/60.0) * 3600); - + return Integer.toString(hrs) + "h" + min + "m" + sec + "s"; } - + static private String radToDms(double angle) { int deg = (int) (angle*RAD_DEG); int min = (int)((angle*RAD_DEG - deg) * 60); int sec = (int)((angle*RAD_DEG - deg - min/60.0) * 3600); - + return Integer.toString(deg) + "\u00b0" + min + "'" + sec + "\""; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java index 293ffe9f0c2..688b6207dc5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java @@ -103,7 +103,7 @@ public class CharTrie extends Trie } // public methods -------------------------------------------------- - + /** * Gets the value associated with the codepoint. * If no value is associated with the codepoint, a default value will be @@ -118,14 +118,14 @@ public class CharTrie extends Trie // fastpath for U+0000..U+D7FF if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // copy of getRawOffset() - offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } // handle U+D800..U+10FFFF offset = getCodePointOffset(ch); - + // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return (offset >= 0) ? m_data_[offset] : m_initialValue_; @@ -174,7 +174,7 @@ public class CharTrie extends Trie /** *

Get a value from a folding offset (from the value of a lead surrogate) * and a trail surrogate.

- *

If the + *

If the * @param leadvalue value associated with the lead surrogate which contains * the folding offset * @param trail surrogate @@ -188,24 +188,24 @@ public class CharTrie extends Trie } int offset = m_dataManipulate_.getFoldingOffset(leadvalue); if (offset > 0) { - return m_data_[getRawOffset(offset, + return m_data_[getRawOffset(offset, (char)(trail & SURROGATE_MASK_))]; } return m_initialValue_; } - + /** *

Gets the latin 1 fast path value.

- *

Note this only works if latin 1 characters have their own linear + *

Note this only works if latin 1 characters have their own linear * array.

* @param ch latin 1 characters * @return value associated with latin character */ - public final char getLatin1LinearValue(char ch) + public final char getLatin1LinearValue(char ch) { return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch]; } - + /** * Checks if the argument Trie has the same data as this Trie * @param other Trie to check @@ -213,7 +213,8 @@ public class CharTrie extends Trie * otherwise */ ///CLOVER:OFF - public boolean equals(Object other) + @Override + public boolean equals(Object other) { boolean result = super.equals(other); if (result && other instanceof CharTrie) { @@ -222,7 +223,8 @@ public class CharTrie extends Trie } return false; } - + + @Override public int hashCode() { assert false : "hashCode not designed"; return 42; @@ -236,6 +238,7 @@ public class CharTrie extends Trie * data array

* @param bytes buffer containing trie data */ + @Override protected final void unserialize(ByteBuffer bytes) { int indexDataLength = m_dataOffset_ + m_dataLength_; @@ -250,13 +253,14 @@ public class CharTrie extends Trie * @param trail trailing surrogate * @return offset to data */ + @Override protected final int getSurrogateOffset(char lead, char trail) { if (m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } - + // get fold position for the next trail surrogate int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); @@ -269,7 +273,7 @@ public class CharTrie extends Trie // value: m_initialValue_ return -1; } - + /** * Gets the value at the argument index. * For use internally in TrieIterator. @@ -277,6 +281,7 @@ public class CharTrie extends Trie * @return 32 bit value * @see com.ibm.icu.impl.TrieIterator */ + @Override protected final int getValue(int index) { return m_data_[index]; @@ -284,13 +289,14 @@ public class CharTrie extends Trie /** * Gets the default initial value - * @return 32 bit value + * @return 32 bit value */ + @Override protected final int getInitialValue() { return m_initialValue_; } - + // private data members -------------------------------------------- /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java index d271c435ab2..d657f771c22 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java @@ -13,26 +13,27 @@ import java.text.CharacterIterator; import com.ibm.icu.text.UCharacterIterator; /** - * This class is a wrapper around CharacterIterator and implements the + * This class is a wrapper around CharacterIterator and implements the * UCharacterIterator protocol * @author ram */ public class CharacterIteratorWrapper extends UCharacterIterator { - + private CharacterIterator iterator; - - + + public CharacterIteratorWrapper(CharacterIterator iter){ if(iter==null){ throw new IllegalArgumentException(); } - iterator = iter; + iterator = iter; } /** * @see UCharacterIterator#current() */ + @Override public int current() { int c = iterator.current(); if(c==CharacterIterator.DONE){ @@ -44,6 +45,7 @@ public class CharacterIteratorWrapper extends UCharacterIterator { /** * @see UCharacterIterator#getLength() */ + @Override public int getLength() { return (iterator.getEndIndex() - iterator.getBeginIndex()); } @@ -51,6 +53,7 @@ public class CharacterIteratorWrapper extends UCharacterIterator { /** * @see UCharacterIterator#getIndex() */ + @Override public int getIndex() { return iterator.getIndex(); } @@ -58,10 +61,11 @@ public class CharacterIteratorWrapper extends UCharacterIterator { /** * @see UCharacterIterator#next() */ + @Override public int next() { int i = iterator.current(); iterator.next(); - if(i==CharacterIterator.DONE){ + if(i==CharacterIterator.DONE){ return DONE; } return i; @@ -70,6 +74,7 @@ public class CharacterIteratorWrapper extends UCharacterIterator { /** * @see UCharacterIterator#previous() */ + @Override public int previous() { int i = iterator.previous(); if(i==CharacterIterator.DONE){ @@ -81,6 +86,7 @@ public class CharacterIteratorWrapper extends UCharacterIterator { /** * @see UCharacterIterator#setIndex(int) */ + @Override public void setIndex(int index) { try{ iterator.setIndex(index); @@ -92,6 +98,7 @@ public class CharacterIteratorWrapper extends UCharacterIterator { /** * @see UCharacterIterator#setToLimit() */ + @Override public void setToLimit() { iterator.setIndex(iterator.getEndIndex()); } @@ -99,13 +106,14 @@ public class CharacterIteratorWrapper extends UCharacterIterator { /** * @see UCharacterIterator#getText(char[]) */ + @Override public int getText(char[] fillIn, int offset){ - int length =iterator.getEndIndex() - iterator.getBeginIndex(); + int length =iterator.getEndIndex() - iterator.getBeginIndex(); int currentIndex = iterator.getIndex(); if(offset < 0 || offset + length > fillIn.length){ throw new IndexOutOfBoundsException(Integer.toString(length)); } - + for (char ch = iterator.first(); ch != CharacterIterator.DONE; ch = iterator.next()) { fillIn[offset++] = ch; } @@ -118,21 +126,23 @@ public class CharacterIteratorWrapper extends UCharacterIterator { * Creates a clone of this iterator. Clones the underlying character iterator. * @see UCharacterIterator#clone() */ + @Override public Object clone(){ try { CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone(); result.iterator = (CharacterIterator)this.iterator.clone(); return result; - } catch (CloneNotSupportedException e) { + } catch (CloneNotSupportedException e) { return null; // only invoked if bad underlying character iterator } } - + + @Override public int moveIndex(int delta){ - int length = iterator.getEndIndex() - iterator.getBeginIndex(); + int length = iterator.getEndIndex() - iterator.getBeginIndex(); int idx = iterator.getIndex()+delta; - + if(idx < 0) { idx = 0; } else if(idx > length) { @@ -140,11 +150,12 @@ public class CharacterIteratorWrapper extends UCharacterIterator { } return iterator.setIndex(idx); } - + /** * @see UCharacterIterator#getCharacterIterator() */ + @Override public CharacterIterator getCharacterIterator(){ return (CharacterIterator)iterator.clone(); - } + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ClassLoaderUtil.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ClassLoaderUtil.java index 7d6c4438e0e..e7389a1f884 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ClassLoaderUtil.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ClassLoaderUtil.java @@ -29,7 +29,7 @@ public class ClassLoaderUtil { // this constructor on Android, because ClassLoaderUtil.getClassLoader() // should get non-null ClassLoader before calling // ClassLoaderUtil.getBootstrapClassLoader(). - // + // // On other common JREs (such as Oracle, OpenJDK), // Object.class.getClassLoader() returns null, but // super(null) is commonly used for accessing the bootstrap @@ -44,7 +44,7 @@ public class ClassLoaderUtil { * Lazily create a singleton BootstrapClassLoader. * This class loader might be necessary when ICU4J classes are * initialized by bootstrap class loader. - * + * * @return The BootStrapClassLoader singleton instance */ private static ClassLoader getBootstrapClassLoader() { @@ -54,10 +54,11 @@ public class ClassLoaderUtil { ClassLoader cl = null; if (System.getSecurityManager() != null) { cl = AccessController.doPrivileged(new PrivilegedAction() { - public BootstrapClassLoader run() { - return new BootstrapClassLoader(); - } - }); + @Override + public BootstrapClassLoader run() { + return new BootstrapClassLoader(); + } + }); } else { cl = new BootstrapClassLoader(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java index 6c4d3a9c332..af077d5e899 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java @@ -67,7 +67,7 @@ public final class DateNumberFormat extends NumberFormat { } catch (MissingResourceException ex) { if ( !nsName.equals("latn") ) { try { - minusString = rb.getStringWithFallback("NumberElements/latn/symbols/minusSign"); + minusString = rb.getStringWithFallback("NumberElements/latn/symbols/minusSign"); } catch (MissingResourceException ex1) { minusString = "-"; } @@ -90,18 +90,22 @@ public final class DateNumberFormat extends NumberFormat { minusSign = elems[10]; } + @Override public void setMaximumIntegerDigits(int newValue) { maxIntDigits = newValue; } + @Override public int getMaximumIntegerDigits() { return maxIntDigits; } + @Override public void setMinimumIntegerDigits(int newValue) { minIntDigits = newValue; } + @Override public int getMinimumIntegerDigits() { return minIntDigits; } @@ -130,11 +134,13 @@ public final class DateNumberFormat extends NumberFormat { return digits.clone(); } + @Override public StringBuffer format(double number, StringBuffer toAppendTo, FieldPosition pos) { throw new UnsupportedOperationException("StringBuffer format(double, StringBuffer, FieldPostion) is not implemented"); } + @Override public StringBuffer format(long numberL, StringBuffer toAppendTo, FieldPosition pos) { @@ -173,17 +179,20 @@ public final class DateNumberFormat extends NumberFormat { } return toAppendTo; } - + + @Override public StringBuffer format(BigInteger number, StringBuffer toAppendTo, FieldPosition pos) { throw new UnsupportedOperationException("StringBuffer format(BigInteger, StringBuffer, FieldPostion) is not implemented"); } + @Override public StringBuffer format(java.math.BigDecimal number, StringBuffer toAppendTo, FieldPosition pos) { throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented"); } + @Override public StringBuffer format(BigDecimal number, StringBuffer toAppendTo, FieldPosition pos) { throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented"); @@ -194,6 +203,7 @@ public final class DateNumberFormat extends NumberFormat { */ private static final long PARSE_THRESHOLD = 922337203685477579L; // (Long.MAX_VALUE / 10) - 1 + @Override public Number parse(String text, ParsePosition parsePosition) { long num = 0; boolean sawNumber = false; @@ -236,6 +246,7 @@ public final class DateNumberFormat extends NumberFormat { return result; } + @Override public boolean equals(Object obj) { if (obj == null || !super.equals(obj) || !(obj instanceof DateNumberFormat)) { return false; @@ -247,7 +258,8 @@ public final class DateNumberFormat extends NumberFormat { && this.positiveOnly == other.positiveOnly && Arrays.equals(this.digits, other.digits)); } - + + @Override public int hashCode() { return super.hashCode(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java index 0b4009062bd..98a1d0bb4e6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -39,7 +39,7 @@ public final class ICUBinary { private static final int DATA_FORMAT = 0x436d6e44; private static final class IsAcceptable implements Authenticate { - // @Override when we switch to Java 6 + @Override public boolean isDataVersionAcceptable(byte version[]) { return version[0] == 1; } @@ -367,7 +367,7 @@ public final class ICUBinary { } else if (i == key.length()) { return -1; // key < table key because key is shorter. } - int diff = (int)key.charAt(i) - c2; + int diff = key.charAt(i) - c2; if (diff != 0) { return diff; } @@ -386,7 +386,7 @@ public final class ICUBinary { } else if (i == key.length()) { return -1; // key < table key because key is shorter. } - int diff = (int)key.charAt(i) - c2; + int diff = key.charAt(i) - c2; if (diff != 0) { return diff; } @@ -402,13 +402,13 @@ public final class ICUBinary { { /** * Method used in ICUBinary.readHeader() to provide data format - * authentication. + * authentication. * @param version version of the current data * @return true if dataformat is an acceptable version, false otherwise */ public boolean isDataVersionAcceptable(byte version[]); } - + // public methods -------------------------------------------------------- /** @@ -610,7 +610,7 @@ public final class ICUBinary { bytes.position(headerSize); return // dataVersion - ((int)bytes.get(20) << 24) | + (bytes.get(20) << 24) | ((bytes.get(21) & 0xff) << 16) | ((bytes.get(22) & 0xff) << 8) | (bytes.get(23) & 0xff); @@ -767,23 +767,23 @@ public final class ICUBinary { } // private variables ------------------------------------------------- - + /** * Magic numbers to authenticate the data file */ private static final byte MAGIC1 = (byte)0xda; private static final byte MAGIC2 = (byte)0x27; - + /** * File format authentication values */ private static final byte CHAR_SET_ = 0; private static final byte CHAR_SIZE_ = 2; - + /** * Error messages */ - private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ = + private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ = "ICU data file error: Not an ICU data file"; private static final String HEADER_AUTHENTICATION_FAILED_ = "ICU data file error: Header authentication failed, please check if you have a valid ICU data file"; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java index c4fe481e94b..2989e8501c7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java @@ -63,6 +63,7 @@ public class ICUConfig { if (System.getSecurityManager() != null) { try { val = AccessController.doPrivileged(new PrivilegedAction() { + @Override public String run() { return System.getProperty(fname); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java index fdc7df45de6..cf20dbf497f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java @@ -93,6 +93,7 @@ public final class ICUData { URL i = null; if (System.getSecurityManager() != null) { i = AccessController.doPrivileged(new PrivilegedAction() { + @Override public URL run() { return ICUData.class.getResource(resourceName); } @@ -107,6 +108,7 @@ public final class ICUData { InputStream i = null; if (System.getSecurityManager() != null) { i = AccessController.doPrivileged(new PrivilegedAction() { + @Override public InputStream run() { return root.getResourceAsStream(resourceName); } @@ -129,6 +131,7 @@ public final class ICUData { InputStream i = null; if (System.getSecurityManager() != null) { i = AccessController.doPrivileged(new PrivilegedAction() { + @Override public InputStream run() { return loader.getResourceAsStream(resourceName); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java index 9158dda0aa8..9f9ba536e9a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java @@ -61,7 +61,7 @@ public class ICULocaleService extends ICUService { * Convenience override for callers using locales. This uses * createKey(ULocale.toString(), kind) to create a key, calls getKey, and then * if actualReturn is not null, returns the actualResult from - * getKey (stripping any prefix) into a ULocale. + * getKey (stripping any prefix) into a ULocale. */ public Object get(ULocale locale, int kind, ULocale[] actualReturn) { Key key = createKey(locale, kind); @@ -146,7 +146,7 @@ public class ICULocaleService extends ICUService { } return locales; } - + /** * A subclass of Key that implements a locale fallback mechanism. * The first locale to search for is the locale provided by the @@ -157,7 +157,7 @@ public class ICULocaleService extends ICUService { * *

Canonicalization adjusts the locale string so that the * section before the first understore is in lower case, and the rest - * is in upper case, with no trailing underscores.

+ * is in upper case, with no trailing underscores.

*/ public static class LocaleKey extends ICUService.Key { private int kind; @@ -174,7 +174,7 @@ public class ICULocaleService extends ICUService { public static LocaleKey createWithCanonicalFallback(String primaryID, String canonicalFallbackID) { return createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY); } - + /** * Create a LocaleKey with canonical primary and fallback IDs. */ @@ -185,7 +185,7 @@ public class ICULocaleService extends ICUService { String canonicalPrimaryID = ULocale.getName(primaryID); return new LocaleKey(primaryID, canonicalPrimaryID, canonicalFallbackID, kind); } - + /** * Create a LocaleKey with canonical primary and fallback IDs. */ @@ -196,7 +196,7 @@ public class ICULocaleService extends ICUService { String canonicalPrimaryID = locale.getName(); return new LocaleKey(canonicalPrimaryID, canonicalPrimaryID, canonicalFallbackID, kind); } - + /** * PrimaryID is the user's requested locale string, * canonicalPrimaryID is this string in canonical form, @@ -248,6 +248,7 @@ public class ICULocaleService extends ICUService { /** * Return the (canonical) original ID. */ + @Override public String canonicalID() { return primaryID; } @@ -255,6 +256,7 @@ public class ICULocaleService extends ICUService { /** * Return the (canonical) current ID, or null if no current id. */ + @Override public String currentID() { return currentID; } @@ -263,6 +265,7 @@ public class ICULocaleService extends ICUService { * Return the (canonical) current descriptor, or null if no current id. * Includes the keywords, whereas the ID does not include keywords. */ + @Override public String currentDescriptor() { String result = currentID(); if (result != null) { @@ -305,8 +308,9 @@ public class ICULocaleService extends ICUService { *

First falls back through the primary ID, then through * the fallbackID. The final fallback is "" (root) * unless the primary id was "" (root), in which case - * there is no fallback. + * there is no fallback. */ + @Override public boolean fallback() { int x = currentID.lastIndexOf('_'); if (x != -1) { @@ -329,9 +333,10 @@ public class ICULocaleService extends ICUService { } /** - * If a key created from id would eventually fallback to match the + * If a key created from id would eventually fallback to match the * canonical ID of this key, return true. */ + @Override public boolean isFallbackOf(String id) { return LocaleUtility.isFallbackOf(canonicalID(), id); } @@ -369,11 +374,12 @@ public class ICULocaleService extends ICUService { * the key against the supported IDs, and passes the canonicalLocale and * kind off to handleCreate (which subclasses must implement). */ + @Override public Object create(Key key, ICUService service) { if (handlesKey(key)) { LocaleKey lkey = (LocaleKey)key; int kind = lkey.kind(); - + ULocale uloc = lkey.currentLocale(); return handleCreate(uloc, kind, service); } else { @@ -395,6 +401,7 @@ public class ICULocaleService extends ICUService { /** * Override of superclass method. */ + @Override public void updateVisibleIDs(Map result) { Set cache = getSupportedIDs(); for (String id : cache) { @@ -409,6 +416,7 @@ public class ICULocaleService extends ICUService { /** * Return a localized name for the locale represented by id. */ + @Override public String getDisplayName(String id, ULocale locale) { // assume if the user called this on us, we must have handled some fallback of this id // if (isSupportedID(id)) { @@ -432,15 +440,15 @@ public class ICULocaleService extends ICUService { ///CLOVER:ON /** - * Return true if this id is one the factory supports (visible or + * Return true if this id is one the factory supports (visible or * otherwise). */ protected boolean isSupportedID(String id) { return getSupportedIDs().contains(id); } - + /** - * Return the set of ids that this factory supports (visible or + * Return the set of ids that this factory supports (visible or * otherwise). This can be called often and might need to be * cached if it is expensive to create. */ @@ -451,6 +459,7 @@ public class ICULocaleService extends ICUService { /** * For debugging. */ + @Override public String toString() { StringBuilder buf = new StringBuilder(super.toString()); if (name != null) { @@ -478,7 +487,7 @@ public class ICULocaleService extends ICUService { public SimpleLocaleKeyFactory(Object obj, ULocale locale, int kind, boolean visible, String name) { super(visible, name); - + this.obj = obj; this.id = locale.getBaseName(); this.kind = kind; @@ -487,11 +496,12 @@ public class ICULocaleService extends ICUService { /** * Returns the service object if kind/locale match. Service is not used. */ + @Override public Object create(Key key, ICUService service) { if (!(key instanceof LocaleKey)) { return null; } - + LocaleKey lkey = (LocaleKey)key; if (kind != LocaleKey.KIND_ANY && kind != lkey.kind()) { return null; @@ -499,14 +509,16 @@ public class ICULocaleService extends ICUService { if (!id.equals(lkey.currentID())) { return null; } - + return obj; } + @Override protected boolean isSupportedID(String idToCheck) { return this.id.equals(idToCheck); } + @Override public void updateVisibleIDs(Map result) { if (visible) { result.put(id, this); @@ -515,6 +527,7 @@ public class ICULocaleService extends ICUService { } } + @Override public String toString() { StringBuilder buf = new StringBuilder(super.toString()); buf.append(", id: "); @@ -555,13 +568,15 @@ public class ICULocaleService extends ICUService { /** * Return the supported IDs. This is the set of all locale names for the bundleName. */ + @Override protected Set getSupportedIDs() { - return ICUResourceBundle.getFullLocaleNameSet(bundleName, loader()); + return ICUResourceBundle.getFullLocaleNameSet(bundleName, loader()); } /** * Override of superclass method. */ + @Override public void updateVisibleIDs(Map result) { Set visibleIDs = ICUResourceBundle.getAvailableLocaleNameSet(bundleName, loader()); // only visible ids for (String id : visibleIDs) { @@ -573,6 +588,7 @@ public class ICULocaleService extends ICUService { * Create the service. The default implementation returns the resource bundle * for the locale, ignoring kind, and service. */ + @Override protected Object handleCreate(ULocale loc, int kind, ICUService service) { return ICUResourceBundle.getBundleInstance(bundleName, loc, loader()); } @@ -581,6 +597,7 @@ public class ICULocaleService extends ICUService { return ClassLoaderUtil.getClassLoader(getClass()); } + @Override public String toString() { return super.toString() + ", bundle: " + bundleName; } @@ -604,6 +621,7 @@ public class ICULocaleService extends ICUService { return fallbackLocaleName; } + @Override public Key createKey(String id) { return LocaleKey.createWithCanonicalFallback(id, validateFallbackLocale()); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java index 72453218d8b..9e833723462 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java @@ -21,12 +21,12 @@ import java.util.List; * eventually dequeues the list and calls notifyListener on each * listener in the list.

* - *

Subclasses override acceptsListener and notifyListener + *

Subclasses override acceptsListener and notifyListener * to add type-safe notification. AcceptsListener should return * true if the listener is of the appropriate type; ICUNotifier * itself will ensure the listener is non-null and that the * identical listener is not already registered with the Notifier. - * NotifyListener should cast the listener to the appropriate + * NotifyListener should cast the listener to the appropriate * type and call the appropriate method on the listener. */ public abstract class ICUNotifier { @@ -39,7 +39,7 @@ public abstract class ICUNotifier { * The listener must not be null. AcceptsListener must return * true for the listener. Attempts to concurrently * register the identical listener more than once will be - * silently ignored. + * silently ignored. */ public void addListener(EventListener l) { if (l == null) { @@ -137,6 +137,7 @@ public abstract class ICUNotifier { * Wait for a notification to be queued, then notify all * listeners listed in the notification. */ + @Override public void run() { EventListener[] list; while (true) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java index 86e6fcae1bb..6cc93a5cfe9 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java @@ -88,9 +88,10 @@ public class ICURWLock { /** * Return a string listing all the stats. */ + @Override public String toString() { return " rc: " + _rc + - " mrc: " + _mrc + + " mrc: " + _mrc + " wrc: " + _wrc + " wc: " + _wc + " wwc: " + _wwc; @@ -114,7 +115,7 @@ public class ICURWLock { stats = null; return result; } - + /** * Return a snapshot of the current stats. This does not reset the stats. */ @@ -130,7 +131,7 @@ public class ICURWLock { *

If there's a writer, or a waiting writer, increment the * waiting reader count and block on this. Otherwise * increment the active reader count and return. Caller must call - * releaseRead when done (for example, in a finally block).

+ * releaseRead when done (for example, in a finally block).

*/ public void acquireRead() { if (stats != null) { // stats is null by default @@ -168,7 +169,7 @@ public class ICURWLock { * having an active writer and return. Otherwise, add a lock to the * end of the waiting writer list, and block on it. Caller * must call releaseWrite when done (for example, in a finally - * block).

+ * block).

*/ public void acquireWrite() { if (stats != null) { // stats is null by default @@ -189,7 +190,7 @@ public class ICURWLock { *

If there are waiting readers, make them all active and * notify all of them. Otherwise, notify the oldest waiting * writer, if any. Call when finished with work controlled by - * acquireWrite.

+ * acquireWrite.

*/ public void releaseWrite() { rwl.writeLock().unlock(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUService.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUService.java index ae5e96dd949..4db16b382b8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUService.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUService.java @@ -297,6 +297,7 @@ public class ICUService extends ICUNotifier { * Return the service instance if the factory's id is equal to * the key's currentID. Service is ignored. */ + @Override public Object create(Key key, ICUService service) { if (id.equals(key.currentID())) { return instance; @@ -308,6 +309,7 @@ public class ICUService extends ICUNotifier { * If visible, adds a mapping from id -> this to the result, * otherwise removes id from result. */ + @Override public void updateVisibleIDs(Map result) { if (visible) { result.put(id, this); @@ -321,6 +323,7 @@ public class ICUService extends ICUNotifier { * otherwise returns null. (This default implementation has * no localized id information.) */ + @Override public String getDisplayName(String identifier, ULocale locale) { return (visible && id.equals(identifier)) ? identifier : null; } @@ -328,6 +331,7 @@ public class ICUService extends ICUNotifier { /** * For debugging. */ + @Override public String toString() { StringBuilder buf = new StringBuilder(super.toString()); buf.append(", id: "); @@ -625,12 +629,12 @@ public class ICUService extends ICUNotifier { return f.getDisplayName(id, locale); } } - + return null; } /** - * Convenience override of getDisplayNames(ULocale, Comparator, String) that + * Convenience override of getDisplayNames(ULocale, Comparator, String) that * uses the current default Locale as the locale, null as * the comparator, and null for the matchID. */ @@ -685,7 +689,7 @@ public class ICUService extends ICUNotifier { synchronized (this) { if (ref == dnref || dnref == null) { dncache = new TreeMap(com); // sorted - + Map m = getVisibleIDMap(); Iterator> ei = m.entrySet().iterator(); while (ei.hasNext()) { @@ -923,6 +927,7 @@ public class ICUService extends ICUNotifier { * requires a ServiceListener. Subclasses can override to accept * different listeners. */ + @Override protected boolean acceptsListener(EventListener l) { return l instanceof ServiceListener; } @@ -931,6 +936,7 @@ public class ICUService extends ICUNotifier { * Notify the listener, which by default is a ServiceListener. * Subclasses can override to use a different listener. */ + @Override protected void notifyListener(EventListener l) { ((ServiceListener)l).serviceChanged(this); } @@ -959,6 +965,7 @@ public class ICUService extends ICUNotifier { /** * Returns the result of super.toString, appending the name in curly braces. */ + @Override public String toString() { return super.toString() + "{" + name + "}"; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java index 257306f0c61..b9902a6b00f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java @@ -18,17 +18,18 @@ public class IllegalIcuArgumentException extends IllegalArgumentException { public IllegalIcuArgumentException(String errorMessage) { super(errorMessage); } - + public IllegalIcuArgumentException(Throwable cause) { super(cause); } - + public IllegalIcuArgumentException(String errorMessage, Throwable cause) { super(errorMessage, cause); } - + + @Override public synchronized IllegalIcuArgumentException initCause(Throwable cause) { return (IllegalIcuArgumentException) super.initCause(cause); } - + } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java index a08fe684815..ca45dedf7be 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java @@ -30,10 +30,10 @@ public class IntTrie extends Trie /** *

Creates a new Trie with the settings for the trie data.

- *

Unserialize the 32-bit-aligned input stream and use the data for the + *

Unserialize the 32-bit-aligned input stream and use the data for the * trie.

* @param bytes file buffer to a ICU data file, containing the trie - * @param dataManipulate object which provides methods to parse the char + * @param dataManipulate object which provides methods to parse the char * data * @throws IOException thrown when data reading fails */ @@ -122,7 +122,7 @@ public class IntTrie extends Trie // fastpath for U+0000..U+D7FF if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // copy of getRawOffset() - offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } @@ -202,15 +202,15 @@ public class IntTrie extends Trie } return m_initialValue_; } - + /** *

Gets the latin 1 fast path value.

- *

Note this only works if latin 1 characters have their own linear + *

Note this only works if latin 1 characters have their own linear * array.

* @param ch latin 1 characters * @return value associated with latin character */ - public final int getLatin1LinearValue(char ch) + public final int getLatin1LinearValue(char ch) { return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch]; } @@ -222,7 +222,8 @@ public class IntTrie extends Trie * otherwise */ ///CLOVER:OFF - public boolean equals(Object other) + @Override + public boolean equals(Object other) { boolean result = super.equals(other); if (result && other instanceof IntTrie) { @@ -235,13 +236,14 @@ public class IntTrie extends Trie } return false; } - + + @Override public int hashCode() { assert false : "hashCode not designed"; return 42; } ///CLOVER:ON - + // protected methods ----------------------------------------------- /** @@ -249,6 +251,7 @@ public class IntTrie extends Trie * data array

* @param bytes data buffer containing trie data */ + @Override protected final void unserialize(ByteBuffer bytes) { super.unserialize(bytes); @@ -263,6 +266,7 @@ public class IntTrie extends Trie * @param trail trailing surrogate * @return offset to data */ + @Override protected final int getSurrogateOffset(char lead, char trail) { if (m_dataManipulate_ == null) { @@ -281,7 +285,7 @@ public class IntTrie extends Trie // value: m_initialValue_ return -1; } - + /** * Gets the value at the argument index. * For use internally in TrieIterator @@ -289,22 +293,24 @@ public class IntTrie extends Trie * @return 32 bit value * @see com.ibm.icu.impl.TrieIterator */ + @Override protected final int getValue(int index) { return m_data_[index]; } - + /** * Gets the default initial value - * @return 32 bit value + * @return 32 bit value */ + @Override protected final int getInitialValue() { return m_initialValue_; } // package private methods ----------------------------------------- - + /** * Internal constructor for builder use * @param index the index array to be slotted into this trie @@ -321,7 +327,7 @@ public class IntTrie extends Trie m_dataLength_ = m_data_.length; m_initialValue_ = initialvalue; } - + // private data members -------------------------------------------- /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java index 3e71d938c07..38c1983f20b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java @@ -32,6 +32,7 @@ public class IterableComparator implements Comparator> { this.shorterFirst = shorterFirst ? 1 : -1; } + @Override public int compare(Iterable a, Iterable b) { if (a == null) { return b == null ? 0 : -shorterFirst; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java index 7d05ffe30e2..03ff19efddd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java @@ -637,10 +637,12 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { path, locale.getBaseName()); } + @Override public ULocale getLocale() { return bundle.getULocale(); } + @Override public String get(String tableName, String subTableName, String code) { return ICUResourceTableAccess.getTableString(bundle, tableName, subTableName, code, nullIfNotFound ? null : code); @@ -654,6 +656,7 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { return (DataTables) Class.forName(className).newInstance(); } catch (Throwable t) { return new DataTables() { + @Override public DataTable get(ULocale locale, boolean nullIfNotFound) { return new DataTable(nullIfNotFound); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java index 1685656bce7..3d1144fce43 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java @@ -21,22 +21,22 @@ import com.ibm.icu.impl.locale.AsciiUtil; * Utility class to parse and normalize locale ids (including POSIX style) */ public final class LocaleIDParser { - + /** * Char array representing the locale ID. */ private char[] id; - + /** * Current position in {@link #id} (while parsing). */ private int index; - + /** * Temporary buffer for parsed sections of data. */ private StringBuilder buffer; - + // um, don't handle POSIX ids unless we request it. why not? well... because. private boolean canonicalize; private boolean hadCountry; @@ -73,14 +73,14 @@ public final class LocaleIDParser { } // utilities for working on text in the buffer - + /** * Append c to the buffer. */ private void append(char c) { buffer.append(c); } - + private void addSeparator() { append(UNDERSCORE); } @@ -194,7 +194,7 @@ public final class LocaleIDParser { */ private int parseLanguage() { int startLength = buffer.length(); - + if (haveExperimentalLanguagePrefix()) { append(AsciiUtil.toLower(id[0])); append(HYPHEN); @@ -280,7 +280,7 @@ public final class LocaleIDParser { if (!atTerminator()) { int oldIndex = index; ++index; - + char c; while (!isTerminatorOrIDSeparator(c = next()) && AsciiUtil.isAlpha(c)); --index; @@ -397,7 +397,7 @@ public final class LocaleIDParser { boolean skipping = false; char c; boolean firstPass = true; - + while ((c = next()) != DONE) { if (c == DOT) { start = false; @@ -591,6 +591,7 @@ public final class LocaleIDParser { private Comparator getKeyComparator() { final Comparator comp = new Comparator() { + @Override public int compare(String lhs, String rhs) { return lhs.compareTo(rhs); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java index 80114791bda..a3ac7a3d513 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java @@ -342,6 +342,7 @@ public final class Norm2AllModes { } private static CacheBase cache = new SoftCache() { + @Override protected Norm2AllModes createInstance(String key, ByteBuffer bytes) { Normalizer2Impl impl; if(bytes==null) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java index 67e06f7ff38..86a02479737 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java @@ -206,6 +206,7 @@ public final class Normalizer2Impl { // They assume that the cc or trailCC of their input is 0. // Most of them implement Appendable interface methods. // @Override when we switch to Java 6 + @Override public ReorderingBuffer append(char c) { str.append(c); lastCC=0; @@ -218,6 +219,7 @@ public final class Normalizer2Impl { reorderStart=str.length(); } // @Override when we switch to Java 6 + @Override public ReorderingBuffer append(CharSequence s) { if(s.length()!=0) { str.append(s); @@ -227,6 +229,7 @@ public final class Normalizer2Impl { return this; } // @Override when we switch to Java 6 + @Override public ReorderingBuffer append(CharSequence s, int start, int limit) { if(start!=limit) { str.append(s, start, limit); @@ -413,6 +416,7 @@ public final class Normalizer2Impl { private static final class IsAcceptable implements ICUBinary.Authenticate { // @Override when we switch to Java 6 + @Override public boolean isDataVersionAcceptable(byte version[]) { return version[0]==2; } @@ -560,6 +564,7 @@ public final class Normalizer2Impl { } } private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() { + @Override public int map(int in) { return in&CANON_NOT_SEGMENT_STARTER; } @@ -1829,7 +1834,7 @@ public final class Normalizer2Impl { } if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { if((firstUnit&COMP_1_TRIPLE)!=0) { - return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2); + return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2); } else { return compositions.charAt(list+1); } @@ -1874,7 +1879,7 @@ public final class Normalizer2Impl { compositeAndFwd=maybeYesCompositions.charAt(list+1); list+=2; } else { - compositeAndFwd=(((int)maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)| + compositeAndFwd=((maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)| maybeYesCompositions.charAt(list+2); list+=3; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java index 550681c2f0c..44e97872fe3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/OlsonTimeZone.java @@ -45,52 +45,52 @@ import com.ibm.icu.util.UResourceBundle; * * a. Zone (table). A zone is a table resource contains several * type of resources below: - * + * * - typeOffsets:intvector (Required) - * + * * Sets of UTC raw/dst offset pairs in seconds. Entries at * 2n represents raw offset and 2n+1 represents dst offset * paired with the raw offset at 2n. The very first pair represents * the initial zone offset (before the first transition) always. * - * - trans:intvector (Optional) - * + * - trans:intvector (Optional) + * * List of transition times represented by 32bit seconds from the * epoch (1970-01-01T00:00Z) in ascending order. - * + * * - transPre32/transPost32:intvector (Optional) - * + * * List of transition times before/after 32bit minimum seconds. * Each time is represented by a pair of 32bit integer. - * + * * - typeMap:bin (Optional) - * + * * Array of bytes representing the mapping between each transition * time (transPre32/trans/transPost32) and its corresponding offset * data (typeOffsets). - * + * * - finalRule:string (Optional) - * + * * If a recurrent transition rule is applicable to a zone forever * after the final transition time, finalRule represents the rule * in Rules data. - * + * * - finalRaw:int (Optional) - * + * * When finalRule is available, finalRaw is required and specifies * the raw (base) offset of the rule. - * + * * - finalYear:int (Optional) - * + * * When finalRule is available, finalYear is required and specifies * the start year of the rule. - * + * * - links:intvector (Optional) - * + * * When this zone data is shared with other zones, links specifies * all zones including the zone itself. Each zone is referenced by * integer index. - * + * * b. Link (int, length 1). A link zone is an int resource. The * integer is the zone number of the target zone. The key of this * resource is an alternate name for the target zone. This data @@ -317,8 +317,8 @@ public class OlsonTimeZone extends BasicTimeZone { int[] fields = Grego.timeToFields(current, null); // Find start of this year, and start of next year - long start = Grego.fieldsToDay(fields[0], 0, 1) * SECONDS_PER_DAY; - long limit = Grego.fieldsToDay(fields[0] + 1, 0, 1) * SECONDS_PER_DAY; + long start = Grego.fieldsToDay(fields[0], 0, 1) * SECONDS_PER_DAY; + long limit = Grego.fieldsToDay(fields[0] + 1, 0, 1) * SECONDS_PER_DAY; // Return TRUE if DST is observed at any time during the current // year. @@ -485,7 +485,7 @@ public class OlsonTimeZone extends BasicTimeZone { } private void construct(UResourceBundle top, UResourceBundle res){ - + if ((top == null || res == null)) { throw new IllegalArgumentException(); } @@ -537,21 +537,21 @@ public class OlsonTimeZone extends BasicTimeZone { int idx = 0; if (transPre32 != null) { for (int i = 0; i < transPre32.length / 2; i++, idx++) { - transitionTimes64[idx] = - (((long)transPre32[i * 2]) & 0x00000000FFFFFFFFL) << 32 - | (((long)transPre32[i * 2 + 1]) & 0x00000000FFFFFFFFL); + transitionTimes64[idx] = + ((transPre32[i * 2]) & 0x00000000FFFFFFFFL) << 32 + | ((transPre32[i * 2 + 1]) & 0x00000000FFFFFFFFL); } } if (trans32 != null) { for (int i = 0; i < trans32.length; i++, idx++) { - transitionTimes64[idx] = (long)trans32[i]; + transitionTimes64[idx] = trans32[i]; } } if (transPost32 != null) { for (int i = 0; i < transPost32.length / 2; i++, idx++) { - transitionTimes64[idx] = - (((long)transPost32[i * 2]) & 0x00000000FFFFFFFFL) << 32 - | (((long)transPost32[i * 2 + 1]) & 0x00000000FFFFFFFFL); + transitionTimes64[idx] = + ((transPost32[i * 2]) & 0x00000000FFFFFFFFL) << 32 + | ((transPost32[i * 2 + 1]) & 0x00000000FFFFFFFFL); } } } else { @@ -607,7 +607,7 @@ public class OlsonTimeZone extends BasicTimeZone { finalStartYear = r.getInt(); // Note: Setting finalStartYear to the finalZone is problematic. When a date is around - // year boundary, SimpleTimeZone may return false result when DST is observed at the + // year boundary, SimpleTimeZone may return false result when DST is observed at the // beginning of year. We could apply safe margin (day or two), but when one of recurrent // rules falls around year boundary, it could return false result. Without setting the // start year, finalZone works fine around the year boundary of the start year. @@ -747,7 +747,7 @@ public class OlsonTimeZone extends BasicTimeZone { } private int getInt(byte val){ - return val & 0xFF; + return val & 0xFF; } /* @@ -827,7 +827,7 @@ public class OlsonTimeZone extends BasicTimeZone { buf.append(",finalStartMillis=" + finalStartMillis); buf.append(",finalZone=" + finalZone); buf.append(']'); - + return buf.toString(); } @@ -874,7 +874,7 @@ public class OlsonTimeZone extends BasicTimeZone { * If and only if finalYear == INT32_MAX then finalZone == 0. */ private SimpleTimeZone finalZone = null; // owned, may be NULL - + /** * The canonical ID of this zone. Initialized when {@link #getCanonicalID()} * is invoked first time, or {@link #setID(String)} is called. @@ -885,7 +885,7 @@ public class OlsonTimeZone extends BasicTimeZone { private static final boolean DEBUG = ICUDebug.enabled("olson"); private static final int SECONDS_PER_DAY = 24*60*60; - + private static UResourceBundle loadRule(UResourceBundle top, String ruleid) { UResourceBundle r = top.get("Rules"); r = r.get(ruleid); @@ -919,9 +919,9 @@ public class OlsonTimeZone extends BasicTimeZone { public int hashCode(){ int ret = (int) (finalStartYear ^ (finalStartYear>>>4) + transitionCount ^ (transitionCount>>>6) + - typeCount ^ (typeCount>>>8) + + typeCount ^ (typeCount>>>8) + Double.doubleToLongBits(finalStartMillis)+ - (finalZone == null ? 0 : finalZone.hashCode()) + + (finalZone == null ? 0 : finalZone.hashCode()) + super.hashCode()); if (transitionTimes64 != null) { for(int i=0; i 0xffff) { // too many rows for a 16-bit trie @@ -37,6 +41,6 @@ public class PVecToTrieCompactHandler implements CompactHandler { } else { builder = new IntTrieBuilder(null, 100000, initialValue, initialValue, false); - } + } } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/PropsVectors.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/PropsVectors.java index 0db096ef6af..0df6063ddea 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/PropsVectors.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/PropsVectors.java @@ -9,9 +9,9 @@ /** * Store bits (Unicode character properties) in bit set vectors. - * + * * This is a port of the C++ class UPropsVectors from ICU4C - * + * * @author Shaopeng Jia * @internal */ @@ -23,15 +23,15 @@ import java.util.Comparator; /** * Unicode Properties Vectors associated with code point ranges. - * + * * Rows of primitive integers in a contiguous array store the range limits and * the properties vectors. - * + * * In each row, row[0] contains the start code point and row[1] contains the * limit code point, which is the start of the next range. - * + * * Initially, there is only one range [0..0x110000] with values 0. - * + * * It would be possible to store only one range boundary per row, but * self-contained rows allow to later sort them by contents. */ @@ -45,10 +45,10 @@ public class PropsVectors { private boolean isCompacted; // internal function to compare elements in v and target. Return true iff - // elements in v starting from index1 to index1 + length - 1 + // elements in v starting from index1 to index1 + length - 1 // are exactly the same as elements in target // starting from index2 to index2 + length - 1 - private boolean areElementsSame(int index1, int[] target, int index2, + private boolean areElementsSame(int index1, int[] target, int index2, int length) { for (int i = 0; i < length; ++i) { if (v[index1 + i] != target[index2 + i]) { @@ -57,7 +57,7 @@ public class PropsVectors { } return true; } - + // internal function which given rangeStart, returns // index where v[index]<=rangeStart() { + @Override public int compare(Integer o1, Integer o2) { int indexOfRow1 = o1.intValue(); int indexOfRow2 = o2.intValue(); @@ -436,10 +437,10 @@ public class PropsVectors { compactor.startRealValues(count); /* - * Move vector contents up to a contiguous array with only unique + * Move vector contents up to a contiguous array with only unique * vector values, and call the handler function for each vector. - * - * This destroys the Properties Vector structure and replaces it + * + * This destroys the Properties Vector structure and replaces it * with an array of just vector values. */ int[] temp = new int[count]; @@ -450,7 +451,7 @@ public class PropsVectors { // count a new values vector if it is different // from the current one - if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, + if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2, temp, count, valueColumns)) { count += valueColumns; System.arraycopy(v, indexArray[i].intValue() + 2, temp, count, @@ -462,7 +463,7 @@ public class PropsVectors { } } v = temp; - + // count is at the beginning of the last vector, // add one to include that last vector rows = count / valueColumns + 1; @@ -470,7 +471,7 @@ public class PropsVectors { /* * Get the vectors array after calling compact(). - * + * * @throws IllegalStateException */ public int[] getCompactedArray() { @@ -483,7 +484,7 @@ public class PropsVectors { /* * Get the number of rows for the compacted array. - * + * * @throws IllegalStateException */ public int getCompactedRows() { @@ -496,7 +497,7 @@ public class PropsVectors { /* * Get the number of columns for the compacted array. - * + * * @throws IllegalStateException */ public int getCompactedColumns() { @@ -520,6 +521,7 @@ public class PropsVectors { // inner class implementation of Trie.DataManipulate private static class DefaultGetFoldingOffset implements Trie.DataManipulate { + @Override public int getFoldingOffset(int value) { return value; } @@ -534,8 +536,9 @@ public class PropsVectors { builder = inBuilder; } + @Override public int getFoldedValue(int start, int offset) { - int initialValue = builder.m_initialValue_; + int initialValue = builder.m_initialValue_; int limit = start + 0x400; while (start < limit) { boolean[] inBlockZero = new boolean[1]; @@ -551,7 +554,7 @@ public class PropsVectors { return 0; } } - + public static interface CompactHandler { public void setRowIndexForRange(int start, int end, int rowIndex); public void setRowIndexForInitialValue(int rowIndex); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Relation.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Relation.java index 01531d3e7a1..2f2f184eb3d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Relation.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Relation.java @@ -57,7 +57,7 @@ public class Relation implements Freezable> { // TODO: add , this.setCreator.newInstance(setComparatorParam); // check to make sure compiles } else { this.setCreator = ((Class>)setCreator).getConstructor(Comparator.class); - this.setCreator.newInstance(setComparatorParam); // check to make sure compiles + this.setCreator.newInstance(setComparatorParam); // check to make sure compiles } data = map == null ? new HashMap>() : map; } catch (Exception e) { @@ -85,11 +85,11 @@ public class Relation implements Freezable> { // TODO: add , public final Set> entrySet() { return keyValueSet(); } - + public Set>> keyValuesSet() { return data.entrySet(); } - + public Set> keyValueSet() { Set> result = new LinkedHashSet>(); for (K key : data.keySet()) { @@ -100,6 +100,7 @@ public class Relation implements Freezable> { // TODO: add , return result; } + @Override public boolean equals(Object o) { if (o == null) return false; @@ -123,6 +124,7 @@ public class Relation implements Freezable> { // TODO: add , return data.get(key); } + @Override public int hashCode() { return data.hashCode(); } @@ -163,7 +165,7 @@ public class Relation implements Freezable> { // TODO: add , private Set newSet() { try { - return (Set) setCreator.newInstance(setComparatorParam); + return setCreator.newInstance(setComparatorParam); } catch (Exception e) { throw (RuntimeException) new IllegalArgumentException("Can't create new set").initCause(e); } @@ -222,6 +224,7 @@ public class Relation implements Freezable> { // TODO: add , return result; } + @Override public String toString() { return data.toString(); } @@ -241,14 +244,17 @@ public class Relation implements Freezable> { // TODO: add , this.value = e.getValue(); } + @Override public K getKey() { return key; } + @Override public V getValue() { return value; } + @Override public V setValue(V value) { V oldValue = this.value; this.value = value; @@ -274,10 +280,12 @@ public class Relation implements Freezable> { // TODO: add , volatile boolean frozen = false; + @Override public boolean isFrozen() { return frozen; } + @Override public Relation freeze() { if (!frozen) { // does not handle one level down, so we do that on a case-by-case basis @@ -291,6 +299,7 @@ public class Relation implements Freezable> { // TODO: add , return this; } + @Override public Relation cloneAsThawed() { // TODO do later throw new UnsupportedOperationException(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java index 695eb8a587b..aa914e0db7f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java @@ -26,7 +26,7 @@ import com.ibm.icu.text.UTF16; public class ReplaceableUCharacterIterator extends UCharacterIterator { // public constructor ------------------------------------------------------ - + /** * Public constructor * @param replaceable text which the iterator will be based on @@ -38,7 +38,7 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator { this.replaceable = replaceable; this.currentIndex = 0; } - + /** * Public constructor * @param str text which the iterator will be based on @@ -50,7 +50,7 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator { this.replaceable = new ReplaceableString(str); this.currentIndex = 0; } - + /** * Public constructor * @param buf buffer of text on which the iterator will be based @@ -62,14 +62,15 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator { this.replaceable = new ReplaceableString(buf); this.currentIndex = 0; } - + // public methods ---------------------------------------------------------- - + /** - * Creates a copy of this iterator, does not clone the underlying + * Creates a copy of this iterator, does not clone the underlying * Replaceableobject * @return copy of this iterator */ + @Override public Object clone(){ try { return super.clone(); @@ -77,37 +78,39 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator { return null; // never invoked } } - + /** * Returns the current UTF16 character. * @return current UTF16 character */ + @Override public int current(){ if (currentIndex < replaceable.length()) { return replaceable.charAt(currentIndex); } return DONE; } - + /** * Returns the current codepoint * @return current codepoint */ + @Override public int currentCodePoint(){ - // cannot use charAt due to it different + // cannot use charAt due to it different // behaviour when index is pointing at a // trail surrogate, check for surrogates - + int ch = current(); if(UTF16.isLeadSurrogate((char)ch)){ // advance the index to get the next code point next(); - // due to post increment semantics current() after next() + // due to post increment semantics current() after next() // actually returns the next char which is what we want int ch2 = current(); // current should never change the current index so back off previous(); - + if(UTF16.isTrailSurrogate((char)ch2)){ // we found a surrogate pair return Character.toCodePoint((char)ch, (char)ch2); @@ -115,47 +118,51 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator { } return ch; } - + /** * Returns the length of the text * @return length of the text */ + @Override public int getLength(){ return replaceable.length(); } - + /** * Gets the current currentIndex in text. * @return current currentIndex in text. */ + @Override public int getIndex(){ return currentIndex; } - + /** - * Returns next UTF16 character and increments the iterator's currentIndex by 1. - * If the resulting currentIndex is greater or equal to the text length, the - * currentIndex is reset to the text length and a value of DONECODEPOINT is - * returned. - * @return next UTF16 character in text or DONE if the new currentIndex is off the + * Returns next UTF16 character and increments the iterator's currentIndex by 1. + * If the resulting currentIndex is greater or equal to the text length, the + * currentIndex is reset to the text length and a value of DONECODEPOINT is + * returned. + * @return next UTF16 character in text or DONE if the new currentIndex is off the * end of the text range. */ + @Override public int next(){ if (currentIndex < replaceable.length()) { return replaceable.charAt(currentIndex++); } return DONE; } - - + + /** - * Returns previous UTF16 character and decrements the iterator's currentIndex by - * 1. - * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a - * value of DONECODEPOINT is returned. - * @return next UTF16 character in text or DONE if the new currentIndex is off the + * Returns previous UTF16 character and decrements the iterator's currentIndex by + * 1. + * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a + * value of DONECODEPOINT is returned. + * @return next UTF16 character in text or DONE if the new currentIndex is off the * start of the text range. */ + @Override public int previous(){ if (currentIndex > 0) { return replaceable.charAt(--currentIndex); @@ -164,22 +171,24 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator { } /** - *

Sets the currentIndex to the specified currentIndex in the text and returns that - * single UTF16 character at currentIndex. + *

Sets the currentIndex to the specified currentIndex in the text and returns that + * single UTF16 character at currentIndex. * This assumes the text is stored as 16-bit code units.

- * @param currentIndex the currentIndex within the text. - * @exception IllegalArgumentException is thrown if an invalid currentIndex is + * @param currentIndex the currentIndex within the text. + * @exception IllegalArgumentException is thrown if an invalid currentIndex is * supplied. i.e. currentIndex is out of bounds. - * @returns the character at the specified currentIndex or DONE if the specified + * @returns the character at the specified currentIndex or DONE if the specified * currentIndex is equal to the end of the text. */ + @Override public void setIndex(int currentIndex) throws IndexOutOfBoundsException{ if (currentIndex < 0 || currentIndex > replaceable.length()) { throw new IndexOutOfBoundsException(); } this.currentIndex = currentIndex; } - + + @Override public int getText(char[] fillIn, int offset){ int length = replaceable.length(); if(offset < 0 || offset + length > fillIn.length){ @@ -187,10 +196,10 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator { } replaceable.getChars(0,length,fillIn,offset); return length; - } - + } + // private data members ---------------------------------------------------- - + /** * Replacable object */ diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java index 8b290f9a084..50869e78033 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ResourceBundleWrapper.java @@ -22,7 +22,7 @@ import com.ibm.icu.util.ULocale; import com.ibm.icu.util.UResourceBundle; /** - * just a wrapper for Java ListResourceBundles and + * just a wrapper for Java ListResourceBundles and * @author ram * */ @@ -49,6 +49,7 @@ public final class ResourceBundleWrapper extends UResourceBundle { this.bundle=bundle; } + @Override protected Object handleGetObject(String aKey){ ResourceBundleWrapper current = this; Object obj = null; @@ -69,11 +70,12 @@ public final class ResourceBundleWrapper extends UResourceBundle { } return obj; } - + + @Override public Enumeration getKeys(){ return Collections.enumeration(keys); } - + private void initKeysVector(){ ResourceBundleWrapper current = this; keys = new ArrayList(); @@ -88,25 +90,29 @@ public final class ResourceBundleWrapper extends UResourceBundle { current = (ResourceBundleWrapper)current.getParent(); } } + @Override protected String getLocaleID(){ - return localeID; + return localeID; } - + + @Override protected String getBaseName(){ - return bundle.getClass().getName().replace('.','/'); + return bundle.getClass().getName().replace('.','/'); } - + + @Override public ULocale getULocale(){ - return new ULocale(localeID); + return new ULocale(localeID); } - + + @Override public UResourceBundle getParent(){ - return (UResourceBundle)parent; + return (UResourceBundle)parent; } // Flag for enabling/disabling debugging code private static final boolean DEBUG = ICUDebug.enabled("resourceBundleWrapper"); - + // This method is for super class's instantiateBundle method public static ResourceBundleWrapper getBundleInstance(String baseName, String localeID, ClassLoader root, boolean disableFallback) { @@ -182,6 +188,7 @@ public final class ResourceBundleWrapper extends UResourceBundle { final String resName = name.replace('.', '/') + ".properties"; InputStream stream = java.security.AccessController.doPrivileged( new java.security.PrivilegedAction() { + @Override public InputStream run() { return root.getResourceAsStream(resName); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Row.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Row.java index fcec0f56037..55667a8bfbe 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Row.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Row.java @@ -95,6 +95,7 @@ public class Row implements java.lang.Comparable, Cloneable, return this; } + @Override public int hashCode() { int sum = items.length; for (Object item : items) { @@ -103,6 +104,7 @@ public class Row implements java.lang.Comparable, Cloneable, return sum; } + @Override public boolean equals(Object other) { if (other == null) { return false; @@ -127,6 +129,7 @@ public class Row implements java.lang.Comparable, Cloneable, } } + @Override public int compareTo(Object other) { int result; Row that = (Row)other; @@ -144,6 +147,7 @@ public class Row implements java.lang.Comparable, Cloneable, return 0; } + @Override public String toString() { StringBuilder result = new StringBuilder("["); boolean first = true; @@ -158,15 +162,18 @@ public class Row implements java.lang.Comparable, Cloneable, return result.append("]").toString(); } + @Override public boolean isFrozen() { return frozen; } + @Override public Row freeze() { frozen = true; return this; } + @Override public Object clone() { if (frozen) return this; try { @@ -178,6 +185,7 @@ public class Row implements java.lang.Comparable, Cloneable, } } + @Override public Row cloneAsThawed() { try { Row result = (Row) super.clone(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java index 8e673e44146..a72261c158c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/RuleCharacterIterator.java @@ -36,7 +36,7 @@ public class RuleCharacterIterator { /** * Text being iterated. - */ + */ private String text; /** @@ -81,7 +81,7 @@ public class RuleCharacterIterator { * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded * to its value. Escapes are parsed using Utility.unescapeAt(). */ - public static final int PARSE_ESCAPES = 2; + public static final int PARSE_ESCAPES = 2; /** * Bitmask option to enable skipping of whitespace. If (options & @@ -111,7 +111,7 @@ public class RuleCharacterIterator { this.pos = pos; buf = null; } - + /** * Returns true if this iterator has no more characters to return. */ @@ -308,6 +308,7 @@ public class RuleCharacterIterator { * Position within an expanded variable is not indicated. * @return a string representation of this object */ + @Override public String toString() { int b = pos.getIndex(); return text.substring(0, b) + '|' + text.substring(b); @@ -326,7 +327,7 @@ public class RuleCharacterIterator { return (i < text.length()) ? UTF16.charAt(text, i) : DONE; } } - + /** * Advances the position by the given amount. * @param count the number of 16-bit code units to advance past diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/SimpleCache.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/SimpleCache.java index 36d0cdce49a..d0a1264757e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/SimpleCache.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/SimpleCache.java @@ -39,6 +39,7 @@ public class SimpleCache implements ICUCache { } } + @Override public V get(Object key) { Reference> ref = cacheRef; if (ref != null) { @@ -50,6 +51,7 @@ public class SimpleCache implements ICUCache { return null; } + @Override public void put(K key, V value) { Reference> ref = cacheRef; Map map = null; @@ -68,6 +70,7 @@ public class SimpleCache implements ICUCache { map.put(key, value); } + @Override public void clear() { cacheRef = null; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java index c36cea669c2..667522f3353 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringPrepDataReader.java @@ -47,9 +47,10 @@ public final class StringPrepDataReader implements ICUBinary.Authenticate { return ICUBinary.getChars(byteBuffer, length, 0); } + @Override public boolean isDataVersionAcceptable(byte version[]){ - return version[0] == DATA_FORMAT_VERSION[0] - && version[2] == DATA_FORMAT_VERSION[2] + return version[0] == DATA_FORMAT_VERSION[0] + && version[2] == DATA_FORMAT_VERSION[2] && version[3] == DATA_FORMAT_VERSION[3]; } public int[] readIndexes(int length)throws IOException{ @@ -59,7 +60,7 @@ public final class StringPrepDataReader implements ICUBinary.Authenticate { indexes[i] = byteBuffer.getInt(); } return indexes; - } + } public byte[] getUnicodeVersion(){ return ICUBinary.getVersionByteArrayFromCompactInt(unicodeVersion); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java index 1e7c53c6a2d..fae69eb2514 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringRange.java @@ -33,6 +33,7 @@ public class StringRange { } public static final Comparator COMPARE_INT_ARRAYS = new Comparator() { + @Override public int compare(int[] o1, int[] o2) { int minIndex = Math.min(o1.length, o2.length); for (int i = 0; i < minIndex; ++i) { @@ -69,8 +70,8 @@ public class StringRange { } } // We failed to find continuation. Add what we have and restart - adder.add(start, end == null ? null - : !shorterPairs ? end + adder.add(start, end == null ? null + : !shorterPairs ? end : end.substring(prefixLen, end.length())); } // new possible range @@ -79,8 +80,8 @@ public class StringRange { lastCp = s.codePointBefore(s.length()); prefixLen = s.length() - Character.charCount(lastCp); } - adder.add(start, end == null ? null - : !shorterPairs ? end + adder.add(start, end == null ? null + : !shorterPairs ? end : end.substring(prefixLen, end.length())); } else { // not a fast algorithm, but ok for now @@ -88,19 +89,19 @@ public class StringRange { // first sort by lengths Relation lengthToArrays = Relation.of(new TreeMap>(), TreeSet.class); for (String s : source) { - Ranges item = new Ranges(s); + Ranges item = new Ranges(s); lengthToArrays.put(item.size(), item); } // then compact items of each length and emit compacted sets for (Entry> entry : lengthToArrays.keyValuesSet()) { LinkedList compacted = compact(entry.getKey(), entry.getValue()); - for (Ranges ranges : compacted) { + for (Ranges ranges : compacted) { adder.add(ranges.start(), ranges.end(shorterPairs)); } } } } - + /** * Faster but not as good compaction. Only looks at final codepoint. * @param source set of strings @@ -140,6 +141,7 @@ public class StringRange { public boolean equals(Object obj) { return this == obj || (obj != null && obj instanceof Range && compareTo((Range)obj) == 0); } + @Override public int compareTo(Range that) { int diff = min - that.min; if (diff != 0) { @@ -185,7 +187,7 @@ public class StringRange { if (DEBUG) System.out.println(" => " + this); return true; } - + public String start() { StringBuilder result = new StringBuilder(); for (int i = 0; i < ranges.length; ++i) { @@ -215,6 +217,7 @@ public class StringRange { public Integer size() { return ranges.length; } + @Override public int compareTo(Ranges other) { int diff = ranges.length - other.ranges.length; if (diff != 0) { @@ -259,7 +262,7 @@ public class StringRange { add(0, startOffset, startCps, endCps, builder, output); return output; } - + private static void add(int endIndex, int startOffset, int[] starts, int[] ends, StringBuilder builder, Collection output) { int start = starts[endIndex+startOffset]; int end = ends[endIndex]; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/TZDBTimeZoneNames.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/TZDBTimeZoneNames.java index 2792ddcb299..9676af357d9 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/TZDBTimeZoneNames.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/TZDBTimeZoneNames.java @@ -26,10 +26,10 @@ import com.ibm.icu.util.UResourceBundle; * Yet another TimeZoneNames implementation based on the tz database. * This implementation contains only tz abbreviations (short standard * and daylight names) for each metazone. - * + * * The data file $ICU4C_ROOT/source/data/zone/tzdbNames.txt contains * the metazone - abbreviations mapping data (manually edited). - * + * * Note: The abbreviations in the tz database are not necessarily * unique. For example, parsing abbreviation "IST" is ambiguous * (can be parsed as India Standard Time or Israel Standard Time). @@ -40,7 +40,7 @@ import com.ibm.icu.util.UResourceBundle; public class TZDBTimeZoneNames extends TimeZoneNames { private static final long serialVersionUID = 1L; - private static final ConcurrentHashMap TZDB_NAMES_MAP = + private static final ConcurrentHashMap TZDB_NAMES_MAP = new ConcurrentHashMap(); private static volatile TextTrieMap TZDB_NAMES_TRIE = null; @@ -97,7 +97,7 @@ public class TZDBTimeZoneNames extends TimeZoneNames { */ @Override public String getMetaZoneDisplayName(String mzID, NameType type) { - if (mzID == null || mzID.length() == 0 || + if (mzID == null || mzID.length() == 0 || (type != NameType.SHORT_STANDARD && type != NameType.SHORT_DAYLIGHT)) { return null; } @@ -242,6 +242,7 @@ public class TZDBTimeZoneNames extends TimeZoneNames { * @see com.ibm.icu.impl.TextTrieMap.ResultHandler#handlePrefixMatch(int, * java.util.Iterator) */ + @Override public boolean handlePrefixMatch(int matchLength, Iterator values) { TZDBNameInfo match = null; TZDBNameInfo defaultRegionMatch = null; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java index 631ec50de87..6688d4719f1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java @@ -26,7 +26,7 @@ public class TextTrieMap { /** * Constructs a TextTrieMap object. - * + * * @param ignoreCase true to use simple case insensitive match */ public TextTrieMap(boolean ignoreCase) { @@ -35,7 +35,7 @@ public class TextTrieMap { /** * Adds the text key and its associated object in this object. - * + * * @param text The text. * @param val The value object associated with the text. */ @@ -48,7 +48,7 @@ public class TextTrieMap { /** * Gets an iterator of the objects associated with the * longest prefix matching string key. - * + * * @param text The text to be matched with prefixes. * @return An iterator of the objects associated with * the longest prefix matching matching key, or null @@ -60,13 +60,13 @@ public class TextTrieMap { /** * Gets an iterator of the objects associated with the - * longest prefix matching string key starting at the + * longest prefix matching string key starting at the * specified position. - * + * * @param text The text to be matched with prefixes. * @param start The start index of of the text * @return An iterator of the objects associated with the - * longest prefix matching matching key, or null if no + * longest prefix matching matching key, or null if no * matching entry is found. */ public Iterator get(CharSequence text, int start) { @@ -122,6 +122,7 @@ public class TextTrieMap { /* (non-Javadoc) * @see java.util.Iterator#hasNext() */ + @Override public boolean hasNext() { if (_nextIdx == _text.length() && _remainingChar == null) { return false; @@ -132,6 +133,7 @@ public class TextTrieMap { /* (non-Javadoc) * @see java.util.Iterator#next() */ + @Override public Character next() { if (_nextIdx == _text.length() && _remainingChar == null) { return null; @@ -161,6 +163,7 @@ public class TextTrieMap { /* (non-Javadoc) * @see java.util.Iterator#remove() */ + @Override public void remove() { throw new UnsupportedOperationException("remove() not supproted"); } @@ -184,7 +187,7 @@ public class TextTrieMap { public interface ResultHandler { /** * Handles a prefix key match - * + * * @param matchLength Matched key's length * @param values An iterator of the objects associated with the matched key * @return Return true to continue the search in the trie, false to quit. @@ -196,6 +199,7 @@ public class TextTrieMap { private Iterator matches = null; private int length = 0; + @Override public boolean handlePrefixMatch(int matchLength, Iterator values) { if (matchLength > length) { length = matchLength; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java index 51ee91686d5..7e0c18f3be4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneAdapter.java @@ -30,16 +30,16 @@ import com.ibm.icu.util.TimeZone; * @since ICU 2.8 */ public class TimeZoneAdapter extends java.util.TimeZone { - + // Generated by serialver from JDK 1.4.1_01 static final long serialVersionUID = -2040072218820018557L; - + /** * The contained com.ibm.icu.util.TimeZone object. Must not be null. * We delegate all methods to this object. */ private TimeZone zone; - + /** * Given a java.util.TimeZone, wrap it in the appropriate adapter * subclass of com.ibm.icu.util.TimeZone and return the adapter. @@ -66,14 +66,16 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * TimeZone API; calls through to wrapped time zone. */ + @Override public void setID(String ID) { super.setID(ID); zone.setID(ID); - } + } /** * TimeZone API; calls through to wrapped time zone. */ + @Override public boolean hasSameRules(java.util.TimeZone other) { return other instanceof TimeZoneAdapter && zone.hasSameRules(((TimeZoneAdapter)other).zone); @@ -82,6 +84,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * TimeZone API; calls through to wrapped time zone. */ + @Override public int getOffset(int era, int year, int month, int day, int dayOfWeek, int millis) { return zone.getOffset(era, year, month, day, dayOfWeek, millis); @@ -90,6 +93,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * TimeZone API; calls through to wrapped time zone. */ + @Override public int getRawOffset() { return zone.getRawOffset(); } @@ -97,6 +101,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * TimeZone API; calls through to wrapped time zone. */ + @Override public void setRawOffset(int offsetMillis) { zone.setRawOffset(offsetMillis); } @@ -104,6 +109,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * TimeZone API; calls through to wrapped time zone. */ + @Override public boolean useDaylightTime() { return zone.useDaylightTime(); } @@ -111,6 +117,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * TimeZone API; calls through to wrapped time zone. */ + @Override public boolean inDaylightTime(Date date) { return zone.inDaylightTime(date); } @@ -118,6 +125,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * Boilerplate API; calls through to wrapped object. */ + @Override public Object clone() { return new TimeZoneAdapter((TimeZone)zone.clone()); } @@ -125,6 +133,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * Boilerplate API; calls through to wrapped object. */ + @Override public synchronized int hashCode() { return zone.hashCode(); } @@ -132,6 +141,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { /** * Boilerplate API; calls through to wrapped object. */ + @Override public boolean equals(Object obj) { if (obj instanceof TimeZoneAdapter) { obj = ((TimeZoneAdapter) obj).zone; @@ -143,6 +153,7 @@ public class TimeZoneAdapter extends java.util.TimeZone { * Returns a string representation of this object. * @return a string representation of this object. */ + @Override public String toString() { return "TimeZoneAdapter: " + zone.toString(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneGenericNames.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneGenericNames.java index a00ff477b2e..689905b6d7c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneGenericNames.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/TimeZoneGenericNames.java @@ -79,7 +79,7 @@ public class TimeZoneGenericNames implements Serializable, Freezable mzIDs = _tznames.getAvailableMetaZoneIDs(tzCanonicalID); @@ -510,7 +510,7 @@ public class TimeZoneGenericNames implements Serializable, Freezable values) { while (values.hasNext()) { NameInfo info = values.next(); @@ -847,7 +848,7 @@ public class TimeZoneGenericNames implements Serializable, FreezableA trie is a kind of compressed, serializable table of values + *

A trie is a kind of compressed, serializable table of values * associated with Unicode code points (0..0x10ffff).

- *

This class defines the basic structure of a trie and provides methods + *

This class defines the basic structure of a trie and provides methods * to retrieve the offsets to the actual data.

*

Data will be the form of an array of basic types, char or int.

*

The actual data format will have to be specified by the user in the @@ -35,9 +35,9 @@ import com.ibm.icu.text.UTF16; * to the fromOffsetTrail() methods. * To handle such supplementary codepoints, some offset information are kept * in the data.

- *

Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve + *

Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve * that offset from the folded value for the lead surrogate unit.

- *

For examples of use, see com.ibm.icu.impl.CharTrie or + *

For examples of use, see com.ibm.icu.impl.CharTrie or * com.ibm.icu.impl.IntTrie.

* @author synwee * @see com.ibm.icu.impl.CharTrie @@ -47,36 +47,37 @@ import com.ibm.icu.text.UTF16; public abstract class Trie { // public class declaration ---------------------------------------- - + /** * Character data in com.ibm.impl.Trie have different user-specified format * for different purposes. * This interface specifies methods to be implemented in order for - * com.ibm.impl.Trie, to surrogate offset information encapsulated within + * com.ibm.impl.Trie, to surrogate offset information encapsulated within * the data. */ public static interface DataManipulate { /** - * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's + * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's * data * the index array offset of the indexes for that lead surrogate. * @param value data value for a surrogate from the trie, including the * folding offset * @return data offset or 0 if there is no data for the lead surrogate */ - public int getFoldingOffset(int value); + public int getFoldingOffset(int value); } // default implementation private static class DefaultGetFoldingOffset implements DataManipulate { + @Override public int getFoldingOffset(int value) { - return value; + return value; } } // public methods -------------------------------------------------- - + /** * Determines if this trie has a linear latin 1 array * @return true if this trie has a linear latin 1 array, false otherwise @@ -85,7 +86,7 @@ public abstract class Trie { return m_isLatin1Linear_; } - + /** * Checks if the argument Trie has the same data as this Trie. * Attributes are checked but not the index data. @@ -94,7 +95,8 @@ public abstract class Trie * otherwise */ ///CLOVER:OFF - public boolean equals(Object other) + @Override + public boolean equals(Object other) { if (other == this) { return true; @@ -108,16 +110,17 @@ public abstract class Trie && m_dataLength_ == othertrie.m_dataLength_ && Arrays.equals(m_index_, othertrie.m_index_); } - + + @Override public int hashCode() { assert false : "hashCode not designed"; return 42; } ///CLOVER:ON - + /** - * Gets the serialized data file size of the Trie. This is used during - * trie data reading for size checking purposes. + * Gets the serialized data file size of the Trie. This is used during + * trie data reading for size checking purposes. * @return size size of serialized trie data file in terms of the number * of bytes */ @@ -169,7 +172,7 @@ public abstract class Trie * Trie constructor * @param index array to be used for index * @param options used by the trie - * @param dataManipulate object containing the information to parse the + * @param dataManipulate object containing the information to parse the * trie data */ protected Trie(char index[], int options, DataManipulate dataManipulate) @@ -231,7 +234,7 @@ public abstract class Trie * Surrogate mask to use when shifting offset to retrieve supplementary * values */ - protected static final int SURROGATE_MASK_ = 0x3FF; + protected static final int SURROGATE_MASK_ = 0x3FF; /** * Index or UTF16 characters */ @@ -242,17 +245,17 @@ public abstract class Trie */ protected DataManipulate m_dataManipulate_; /** - * Start index of the data portion of the trie. CharTrie combines - * index and data into a char array, so this is used to indicate the + * Start index of the data portion of the trie. CharTrie combines + * index and data into a char array, so this is used to indicate the * initial offset to the data portion. * Note this index always points to the initial value. */ protected int m_dataOffset_; /** - * Length of the data array + * Length of the data array */ protected int m_dataLength_; - + // protected methods ----------------------------------------------- /** @@ -262,20 +265,20 @@ public abstract class Trie * @return offset to data */ protected abstract int getSurrogateOffset(char lead, char trail); - + /** * Gets the value at the argument index * @param index value at index will be retrieved - * @return 32 bit value + * @return 32 bit value */ protected abstract int getValue(int index); /** * Gets the default initial value - * @return 32 bit value + * @return 32 bit value */ protected abstract int getInitialValue(); - + /** * Gets the offset to the data which the index ch after variable offset * points to. @@ -292,11 +295,11 @@ public abstract class Trie */ protected final int getRawOffset(int offset, char ch) { - return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)] - << INDEX_STAGE_2_SHIFT_) + return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)] + << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); } - + /** * Gets the offset to data which the BMP character points to * Treats a lead surrogate as a normal code point. @@ -305,10 +308,10 @@ public abstract class Trie */ protected final int getBMPOffset(char ch) { - return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE - && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) + return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE + && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) ? getRawOffset(LEAD_INDEX_OFFSET_, ch) - : getRawOffset(0, ch); + : getRawOffset(0, ch); // using a getRawOffset(ch) makes no diff } @@ -343,14 +346,14 @@ public abstract class Trie return getRawOffset(0, (char)ch); } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) { // BMP codepoint - return getBMPOffset((char)ch); + return getBMPOffset((char)ch); } else if (ch <= UCharacter.MAX_VALUE) { // look at the construction of supplementary characters // trail forms the ends of it. - return getSurrogateOffset(UTF16.getLeadSurrogate(ch), + return getSurrogateOffset(UTF16.getLeadSurrogate(ch), (char)(ch & SURROGATE_MASK_)); } else { - // return -1 if there is an error, in this case we return + // return -1 if there is an error, in this case we return return -1; } } @@ -410,12 +413,12 @@ public abstract class Trie private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF; protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4; protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100; - + /** * Flag indicator for Latin quick access data block */ private boolean m_isLatin1Linear_; - + /** *

Trie options field.

*

options bit field:
@@ -425,9 +428,9 @@ public abstract class Trie * 3..0 INDEX_STAGE_2_SHIFT // 1..9
*/ private int m_options_; - + // private methods --------------------------------------------------- - + /** * Authenticates raw data header. * Checking the header information, signature and options. @@ -443,7 +446,7 @@ public abstract class Trie return false; } - if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) != + if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) != INDEX_STAGE_1_SHIFT_ || ((m_options_ >> HEADER_OPTIONS_INDEX_SHIFT_) & HEADER_OPTIONS_SHIFT_MASK_) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java index 1141c4e4965..d8924095f1e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java @@ -26,7 +26,7 @@ import java.util.NoSuchElementException; * character properties. * * This is the second common version of a Unicode trie (hence the name Trie2). - * + * */ public abstract class Trie2 implements Iterable { @@ -208,11 +208,11 @@ public abstract class Trie2 implements Iterable { byte sig[] = new byte[4]; int read = is.read(sig); is.reset(); - + if (read != sig.length) { return 0; } - + if (sig[0]=='T' && sig[1]=='r' && sig[2]=='i' && sig[3]=='e') { return 1; } @@ -238,7 +238,7 @@ public abstract class Trie2 implements Iterable { */ abstract public int get(int codePoint); - + /** * Get the trie value for a UTF-16 code unit. * @@ -246,15 +246,15 @@ public abstract class Trie2 implements Iterable { * range, one for lead surrogates, which is the value that will be * returned by this function, and a second value that is returned * by Trie2.get(). - * + * * For code units outside of the lead surrogate range, this function * returns the same result as Trie2.get(). - * + * * This function, together with the alternate value for lead surrogates, * makes possible very efficient processing of UTF-16 strings without * first converting surrogate pairs to their corresponding 32 bit code point * values. - * + * * At build-time, enumerate the contents of the Trie2 to see if there * is non-trivial (non-initialValue) data for any of the supplementary * code points associated with a lead surrogate. @@ -263,33 +263,34 @@ public abstract class Trie2 implements Iterable { * * At runtime, use Trie2.getFromU16SingleLead(). If there is non-trivial * data and the code unit is a lead surrogate, then check if a trail surrogate - * follows. If so, assemble the supplementary code point and look up its value + * follows. If so, assemble the supplementary code point and look up its value * with Trie2.get(); otherwise reset the lead * surrogate's value or do a code point lookup for it. * * If there is only trivial data for lead and trail surrogates, then processing * can often skip them. For example, in normalization or case mapping * all characters that do not have any mappings are simply copied as is. - * + * * @param c the code point or lead surrogate value. * @return the value */ abstract public int getFromU16SingleLead(char c); - + /** * Equals function. Two Tries are equal if their contents are equal. - * The type need not be the same, so a Trie2Writable will be equal to + * The type need not be the same, so a Trie2Writable will be equal to * (read-only) Trie2_16 or Trie2_32 so long as they are storing the same values. - * + * */ + @Override public final boolean equals(Object other) { if(!(other instanceof Trie2)) { return false; } Trie2 OtherTrie = (Trie2)other; Range rangeFromOther; - + Iterator otherIter = OtherTrie.iterator(); for (Trie2.Range rangeFromThis: this) { if (otherIter.hasNext() == false) { @@ -303,16 +304,17 @@ public abstract class Trie2 implements Iterable { if (otherIter.hasNext()) { return false; } - + if (errorValue != OtherTrie.errorValue || initialValue != OtherTrie.initialValue) { return false; } - + return true; } - - + + + @Override public int hashCode() { if (fHash == 0) { int hash = initHash(); @@ -326,11 +328,11 @@ public abstract class Trie2 implements Iterable { } return fHash; } - + /** * When iterating over the contents of a Trie2, Elements of this type are produced. - * The iterator will return one item for each contiguous range of codepoints having the same value. - * + * The iterator will return one item for each contiguous range of codepoints having the same value. + * * When iterating, the same Trie2EnumRange object will be reused and returned for each range. * If you need to retain complete iteration results, clone each returned Trie2EnumRange, * or save the range in some other way, before advancing to the next iteration step. @@ -340,7 +342,8 @@ public abstract class Trie2 implements Iterable { public int endCodePoint; // Inclusive. public int value; public boolean leadSurrogate; - + + @Override public boolean equals(Object other) { if (other == null || !(other.getClass().equals(getClass()))) { return false; @@ -348,11 +351,12 @@ public abstract class Trie2 implements Iterable { Range tother = (Range)other; return this.startCodePoint == tother.startCodePoint && this.endCodePoint == tother.endCodePoint && - this.value == tother.value && + this.value == tother.value && this.leadSurrogate == tother.leadSurrogate; } - - + + + @Override public int hashCode() { int h = initHash(); h = hashUChar32(h, startCodePoint); @@ -362,32 +366,34 @@ public abstract class Trie2 implements Iterable { return h; } } - - + + /** * Create an iterator over the value ranges in this Trie2. * Values from the Trie2 are not remapped or filtered, but are returned as they * are stored in the Trie2. - * + * * @return an Iterator */ + @Override public Iterator iterator() { return iterator(defaultValueMapper); } - + private static ValueMapper defaultValueMapper = new ValueMapper() { - public int map(int in) { + @Override + public int map(int in) { return in; } }; - + /** * Create an iterator over the value ranges from this Trie2. * Values from the Trie2 are passed through a caller-supplied remapping function, * and it is the remapped values that determine the ranges that * will be produced by the iterator. - * - * + * + * * @param mapper provides a function to remap values obtained from the Trie2. * @return an Iterator */ @@ -395,7 +401,7 @@ public abstract class Trie2 implements Iterable { return new Trie2Iterator(mapper); } - + /** * Create an iterator over the Trie2 values for the 1024=0x400 code points * corresponding to a given lead surrogate. @@ -435,10 +441,10 @@ public abstract class Trie2 implements Iterable { * be used to remap the values from the Trie2. The remapped values will be used * both in determining the ranges of codepoints and as the value to be returned * for each range. - * + * * Example of use, with an anonymous subclass of TrieValueMapper: - * - * + * + * * ValueMapper m = new ValueMapper() { * int map(int in) {return in & 0x1f;}; * } @@ -446,12 +452,12 @@ public abstract class Trie2 implements Iterable { * Trie2EnumRange r = i.next(); * ... // Do something with the range r. * } - * + * */ public interface ValueMapper { public int map(int originalVal); } - + /** * Serialize a trie2 Header and Index onto an OutputStream. This is @@ -459,12 +465,12 @@ public abstract class Trie2 implements Iterable { * @param dos the stream to which the serialized Trie2 data will be written. * @return the number of bytes written. */ - protected int serializeHeader(DataOutputStream dos) throws IOException { + protected int serializeHeader(DataOutputStream dos) throws IOException { // Write the header. It is already set and ready to use, having been // created when the Trie2 was unserialized or when it was frozen. int bytesWritten = 0; - - dos.writeInt(header.signature); + + dos.writeInt(header.signature); dos.writeShort(header.options); dos.writeShort(header.indexLength); dos.writeShort(header.shiftedDataLength); @@ -472,36 +478,36 @@ public abstract class Trie2 implements Iterable { dos.writeShort(header.dataNullOffset); dos.writeShort(header.shiftedHighStart); bytesWritten += 16; - + // Write the index int i; for (i=0; i< header.indexLength; i++) { dos.writeChar(index[i]); } - bytesWritten += header.indexLength; - return bytesWritten; + bytesWritten += header.indexLength; + return bytesWritten; } - - + + /** * Struct-like class for holding the results returned by a UTrie2 CharSequence iterator. * The iteration walks over a CharSequence, and for each Unicode code point therein * returns the character and its associated Trie2 value. */ - public static class CharSequenceValues { + public static class CharSequenceValues { /** string index of the current code point. */ - public int index; + public int index; /** The code point at index. */ - public int codePoint; + public int codePoint; /** The Trie2 value for the current code point */ - public int value; + public int value; } - + /** * Create an iterator that will produce the values from the Trie2 for * the sequence of code points in an input text. - * + * * @param text A text string to be iterated over. * @param index The starting iteration position within the input text. * @return the CharSequenceIterator @@ -509,17 +515,17 @@ public abstract class Trie2 implements Iterable { public CharSequenceIterator charSequenceIterator(CharSequence text, int index) { return new CharSequenceIterator(text, index); } - + // TODO: Survey usage of the equivalent of CharSequenceIterator in ICU4C // and if there is none, remove it from here. // Don't waste time testing and maintaining unused code. - + /** * An iterator that operates over an input CharSequence, and for each Unicode code point * in the input returns the associated value from the Trie2. - * + * * The iterator can move forwards or backwards, and can be reset to an arbitrary index. - * + * * Note that Trie2_16 and Trie2_32 subclass Trie2.CharSequenceIterator. This is done * only for performance reasons. It does require that any changes made here be propagated * into the corresponding code in the subclasses. @@ -528,36 +534,38 @@ public abstract class Trie2 implements Iterable { /** * Internal constructor. */ - CharSequenceIterator(CharSequence t, int index) { + CharSequenceIterator(CharSequence t, int index) { text = t; textLength = text.length(); set(index); } - + private CharSequence text; private int textLength; private int index; private Trie2.CharSequenceValues fResults = new Trie2.CharSequenceValues(); - - + + public void set(int i) { if (i < 0 || i > textLength) { throw new IndexOutOfBoundsException(); } index = i; } - - + + + @Override public final boolean hasNext() { return index0; } - + + @Override public Trie2.CharSequenceValues next() { int c = Character.codePointAt(text, index); int val = get(c); @@ -568,11 +576,11 @@ public abstract class Trie2 implements Iterable { index++; if (c >= 0x10000) { index++; - } + } return fResults; } - + public Trie2.CharSequenceValues previous() { int c = Character.codePointBefore(text, index); int val = get(c); @@ -585,49 +593,50 @@ public abstract class Trie2 implements Iterable { fResults.value = val; return fResults; } - - /** + + /** * Iterator.remove() is not supported by Trie2.CharSequenceIterator. * @throws UnsupportedOperationException Always thrown because this operation is not supported * @see java.util.Iterator#remove() */ + @Override public void remove() { - throw new UnsupportedOperationException("Trie2.CharSequenceIterator does not support remove()."); + throw new UnsupportedOperationException("Trie2.CharSequenceIterator does not support remove()."); } } - - + + //-------------------------------------------------------------------------------- // // Below this point are internal implementation items. No further public API. // //-------------------------------------------------------------------------------- - - + + /** * Selectors for the width of a UTrie2 data value. - */ + */ enum ValueWidth { BITS_16, BITS_32 } - + /** * Trie2 data structure in serialized form: * * UTrie2Header header; * uint16_t index[header.index2Length]; * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] - * + * * For Java, this is read from the stream into an instance of UTrie2Header. * (The C version just places a struct over the raw serialized data.) - * + * * @internal */ static class UTrie2Header { /** "Tri2" in big-endian US-ASCII (0x54726932) */ int signature; - + /** * options bit field (uint16_t): * 15.. 4 reserved (0) @@ -637,7 +646,7 @@ public abstract class Trie2 implements Iterable { /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH (uint16_t) */ int indexLength; - + /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT (uint16_t) */ int shiftedDataLength; @@ -650,7 +659,7 @@ public abstract class Trie2 implements Iterable { */ int shiftedHighStart; } - + // // Data members of UTrie2. // @@ -658,7 +667,7 @@ public abstract class Trie2 implements Iterable { char index[]; // Index array. Includes data for 16 bit Tries. int data16; // Offset to data portion of the index array, if 16 bit data. // zero if 32 bit data. - int data32[]; // NULL if 16b data is used via index + int data32[]; // NULL if 16b data is used via index int indexLength; int dataLength; @@ -671,25 +680,25 @@ public abstract class Trie2 implements Iterable { /* Start of the last range which ends at U+10ffff, and its value. */ int highStart; int highValueIndex; - + int dataNullOffset; - + int fHash; // Zero if not yet computed. // Shared by Trie2Writable, Trie2_16, Trie2_32. // Thread safety: if two racing threads compute // the same hash on a frozen Trie2, no damage is done. - + /** * Trie2 constants, defining shift widths, index array lengths, etc. * * These are needed for the runtime macros but users can treat these as * implementation details and skip to the actual public API further below. */ - + static final int UTRIE2_OPTIONS_VALUE_BITS_MASK=0x000f; - - + + /** Shift size for getting the index-1 table offset. */ static final int UTRIE2_SHIFT_1=6+5; @@ -710,19 +719,19 @@ public abstract class Trie2 implements Iterable { /** Number of code points per index-1 table entry. 2048=0x800 */ static final int UTRIE2_CP_PER_INDEX_1_ENTRY=1< { * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY. */ static final int UTRIE2_INDEX_SHIFT=2; - + /** The alignment size of a data block. Also the granularity for compaction. */ static final int UTRIE2_DATA_GRANULARITY=1<>UTRIE2_SHIFT_2. */ static final int UTRIE2_INDEX_2_OFFSET=0; - + /** * The part of the index-2 table for U+D800..U+DBFF stores values for * lead surrogate code _units_ not code _points_. @@ -750,17 +759,17 @@ public abstract class Trie2 implements Iterable { */ static final int UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2; static final int UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2; - + /** Count the lengths of both BMP pieces. 2080=0x820 */ static final int UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH; - + /** * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2. */ static final int UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH; static final int UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6; /* U+0800 is the first code point after 2-byte UTF-8 */ - + /** * The index-1 table, only used for supplementary code points, at offset 2112=0x840. * Variable length, for code points up to highStart, where the last single-value range starts. @@ -775,22 +784,22 @@ public abstract class Trie2 implements Iterable { */ static final int UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH; static final int UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1; - + /* * Fixed layout of the first part of the data array. ----------------------- * Starts with 4 blocks (128=0x80 entries) for ASCII. */ - + /** * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80. * Used with linear access for single bytes 0..0xbf for simple error handling. * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH. */ static final int UTRIE2_BAD_UTF8_DATA_OFFSET=0x80; - + /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */ static final int UTRIE2_DATA_START_OFFSET=0xc0; - + /* Building a Trie2 ---------------------------------------------------------- */ /* @@ -831,14 +840,14 @@ public abstract class Trie2 implements Iterable { */ static final int UNEWTRIE2_MAX_DATA_LENGTH = (0x110000+0x40+0x40+0x400); - - - /** + + + /** * Implementation class for an iterator over a Trie2. - * + * * Iteration over a Trie2 first returns all of the ranges that are indexed by code points, * then returns the special alternate values for the lead surrogates - * + * * @internal */ class Trie2Iterator implements Iterator { @@ -850,7 +859,7 @@ public abstract class Trie2 implements Iterable { limitCP = 0x110000; doLeadSurrogates = true; } - + // An alternate constructor that configures the iterator to cover only the // code points corresponding to a particular Lead Surrogate value. Trie2Iterator(char leadSurrogate, ValueMapper vm) { @@ -863,11 +872,12 @@ public abstract class Trie2 implements Iterable { doLeadSurrogates = false; // Do not iterate over lead the special lead surrogate // values after completing iteration over code points. } - + /** * The main next() function for Trie2 iterators - * + * */ + @Override public Range next() { if (!hasNext()) { throw new NoSuchElementException(); @@ -881,7 +891,7 @@ public abstract class Trie2 implements Iterable { int endOfRange = 0; int val = 0; int mappedVal = 0; - + if (doingCodePoints) { // Iteration over code point values. val = get(nextStart); @@ -901,7 +911,7 @@ public abstract class Trie2 implements Iterable { } } else { // Iteration over the alternate lead surrogate values. - val = getFromU16SingleLead((char)nextStart); + val = getFromU16SingleLead((char)nextStart); mappedVal = mapper.map(val); endOfRange = rangeEndLS((char)nextStart); // Loop once for each range in the Trie2 with the same raw (unmapped) value. @@ -921,34 +931,36 @@ public abstract class Trie2 implements Iterable { returnValue.endCodePoint = endOfRange; returnValue.value = mappedVal; returnValue.leadSurrogate = !doingCodePoints; - nextStart = endOfRange+1; + nextStart = endOfRange+1; return returnValue; } - + /** - * + * */ + @Override public boolean hasNext() { return doingCodePoints && (doLeadSurrogates || nextStart < limitCP) || nextStart < 0xdc00; } - + + @Override public void remove() { throw new UnsupportedOperationException(); } - - + + /** * Find the last lead surrogate in a contiguous range with the * same Trie2 value as the input character. - * + * * Use the alternate Lead Surrogate values from the Trie2, * not the code-point values. - * + * * Note: Trie2_16 and Trie2_32 override this implementation with optimized versions, * meaning that the implementation here is only being used with * Trie2Writable. The code here is logically correct with any type * of Trie2, however. - * + * * @param c The character to begin with. * @return The last contiguous character with the same value. */ @@ -956,7 +968,7 @@ public abstract class Trie2 implements Iterable { if (startingLS >= 0xdbff) { return 0xdbff; } - + int c; int val = getFromU16SingleLead(startingLS); for (c = startingLS+1; c <= 0x0dbff; c++) { @@ -966,7 +978,7 @@ public abstract class Trie2 implements Iterable { } return c-1; } - + // // Iteration State Variables // @@ -977,27 +989,27 @@ public abstract class Trie2 implements Iterable { // The upper limit for the last normal range to be returned. Normally 0x110000, but // may be lower when iterating over the code points for a single lead surrogate. private int limitCP; - + // True while iterating over the the Trie2 values for code points. // False while iterating over the alternate values for lead surrogates. private boolean doingCodePoints = true; - + // True if the iterator should iterate the special values for lead surrogates in // addition to the normal values for code points. private boolean doLeadSurrogates = true; } - + /** * Find the last character in a contiguous range of characters with the * same Trie2 value as the input character. - * + * * @param c The character to begin with. * @return The last contiguous character with the same value. */ int rangeEnd(int start, int limitp, int val) { int c; int limit = Math.min(highStart, limitp); - + for (c = start+1; c < limit; c++) { if (get(c) != val) { break; @@ -1008,28 +1020,28 @@ public abstract class Trie2 implements Iterable { } return c - 1; } - - + + // // Hashing implementation functions. FNV hash. Respected public domain algorithm. // private static int initHash() { return 0x811c9DC5; // unsigned 2166136261 } - + private static int hashByte(int h, int b) { h = h * 16777619; h = h ^ b; return h; } - + private static int hashUChar32(int h, int c) { h = Trie2.hashByte(h, c & 255); h = Trie2.hashByte(h, (c>>8) & 255); h = Trie2.hashByte(h, c>>16); return h; } - + private static int hashInt(int h, int i) { h = Trie2.hashByte(h, i & 255); h = Trie2.hashByte(h, (i>>8) & 255); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java index 92ba2045fbb..5b4bd4c38bd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java @@ -23,38 +23,38 @@ import com.ibm.icu.util.RangeValueIterator; *

Result of each iteration contains the interval of codepoints that have * the same value type and the value type itself.

*

The comparison of each codepoint value is done via extract(), which the - * default implementation is to return the value as it is.

- *

Method extract() can be overwritten to perform manipulations on + * default implementation is to return the value as it is.

+ *

Method extract() can be overwritten to perform manipulations on * codepoint values in order to perform specialized comparison.

*

TrieIterator is designed to be a generic iterator for the CharTrie - * and the IntTrie, hence to accommodate both types of data, the return + * and the IntTrie, hence to accommodate both types of data, the return * result will be in terms of int (32 bit) values.

*

See com.ibm.icu.text.UCharacterTypeIterator for examples of use.

*

Notes for porting utrie_enum from icu4c to icu4j:
* Internally, icu4c's utrie_enum performs all iterations in its body. In Java - * sense, the caller will have to pass a object with a callback function - * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, - * uint32_t value) into utrie_enum. utrie_enum will then find ranges of - * codepoints with the same value as determined by - * UTrieEnumValue(const void *context, uint32_t value). for each range, + * sense, the caller will have to pass a object with a callback function + * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, + * uint32_t value) into utrie_enum. utrie_enum will then find ranges of + * codepoints with the same value as determined by + * UTrieEnumValue(const void *context, uint32_t value). for each range, * utrie_enum calls the callback function to perform a task. In this way, * icu4c performs the iteration within utrie_enum. * To follow the JDK model, icu4j is slightly different from icu4c. * Instead of requesting the caller to implement an object for a callback. * The caller will have to implement a subclass of TrieIterator, fleshing out - * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j, - * the caller will have to code his own iteration and flesh out the task + * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j, + * the caller will have to code his own iteration and flesh out the task * (equivalent to UTrieEnumRange) to be performed in the iteration loop. *

*

There are basically 3 usage scenarios for porting:

- *

1) UTrieEnumValue is the only implemented callback then just implement a - * subclass of TrieIterator and override the extract(int) method. The + *

1) UTrieEnumValue is the only implemented callback then just implement a + * subclass of TrieIterator and override the extract(int) method. The * extract(int) method is analogus to UTrieEnumValue callback. *

- *

2) UTrieEnumValue and UTrieEnumRange both are implemented then implement + *

2) UTrieEnumValue and UTrieEnumRange both are implemented then implement * a subclass of TrieIterator, override the extract method and iterate, e.g *

- *

utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange, + *

utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange, * set);
* In Java :
*

@@ -66,14 +66,14 @@ import com.ibm.icu.util.RangeValueIterator;
  *         // port the implementation of _enumPropertyStartsValue here
  *     }
  * }
- * .... 
+ * ....
  * TrieIterator fcdIter  = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
  * while(fcdIter.next(result)) {
  *     // port the implementation of _enumPropertyStartsRange
  * }
  * 
*

- *

3) UTrieEnumRange is the only implemented callback then just implement + *

3) UTrieEnumRange is the only implemented callback then just implement * the while loop, when utrie_enum is called *

  * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
@@ -90,7 +90,7 @@ public class TrieIterator implements RangeValueIterator
 
 {
     // public constructor ---------------------------------------------
-    
+
     /**
     * TrieEnumeration constructor
     * @param trie to be used
@@ -107,19 +107,20 @@ public class TrieIterator implements RangeValueIterator
         m_initialValue_     = extract(m_trie_.getInitialValue());
         reset();
     }
-    
+
     // public methods -------------------------------------------------
-    
+
     /**
-    * 

Returns true if we are not at the end of the iteration, false + *

Returns true if we are not at the end of the iteration, false * otherwise.

- *

The next set of codepoints with the same value type will be + *

The next set of codepoints with the same value type will be * calculated during this call and returned in the arguement element.

- * @param element return result + * @param element return result * @return true if we are not at the end of the iteration, false otherwise. * @exception NoSuchElementException - if no more elements exist. * @see com.ibm.icu.util.RangeValueIterator.Element */ + @Override public final boolean next(Element element) { if (m_nextCodepoint_ > UCharacter.MAX_VALUE) { @@ -128,14 +129,15 @@ public class TrieIterator implements RangeValueIterator if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE && calculateNextBMPElement(element)) { return true; - } + } calculateNextSupplementaryElement(element); return true; } - + /** * Resets the iterator to the beginning of the iteration */ + @Override public final void reset() { m_currentCodepoint_ = 0; @@ -151,9 +153,9 @@ public class TrieIterator implements RangeValueIterator m_nextBlockIndex_ = 0; m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_; } - + // protected methods ---------------------------------------------- - + /** * Called by next() to extracts a 32 bit value from a trie value * used for comparison. @@ -167,30 +169,30 @@ public class TrieIterator implements RangeValueIterator { return value; } - + // private methods ------------------------------------------------ - + /** * Set the result values * @param element return result object - * @param start codepoint of range + * @param start codepoint of range * @param limit (end + 1) codepoint of range * @param value common value of range */ - private final void setResult(Element element, int start, int limit, + private final void setResult(Element element, int start, int limit, int value) { element.start = start; element.limit = limit; element.value = value; } - + /** * Finding the next element. - * This method is called just before returning the result of + * This method is called just before returning the result of * next(). * We always store the next element before it is requested. - * In the case that we have to continue calculations into the + * In the case that we have to continue calculations into the * supplementary planes, a false will be returned. * @param element return result object * @return true if the next range is found, false if we have to proceed to @@ -203,11 +205,11 @@ public class TrieIterator implements RangeValueIterator m_nextCodepoint_ ++; m_nextBlockIndex_ ++; if (!checkBlockDetail(currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, + setResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); return true; } - // synwee check that next block index == 0 here + // synwee check that next block index == 0 here // enumerate BMP - the main loop enumerates data blocks while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) { // because of the way the character is split to form the index @@ -224,10 +226,10 @@ public class TrieIterator implements RangeValueIterator } else { m_nextIndex_ ++; } - + m_nextBlockIndex_ = 0; if (!checkBlock(currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, + setResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); return true; } @@ -248,9 +250,9 @@ public class TrieIterator implements RangeValueIterator * lower bound of the next element, in calculateNextBMP() it gets set * at the start of any loop, where-else, in calculateNextSupplementary() * since m_currentCodepoint_ already contains the lower bound of the - * next element (passed down from calculateNextBMP()), we keep it till + * next element (passed down from calculateNextBMP()), we keep it till * the end before resetting it to the new value. - * Note, if there are no more iterations, it will never get to here. + * Note, if there are no more iterations, it will never get to here. * Blocked out by next(). * @param element return result object */ @@ -259,13 +261,13 @@ public class TrieIterator implements RangeValueIterator int currentValue = m_nextValue_; m_nextCodepoint_ ++; m_nextBlockIndex_ ++; - - if (UTF16.getTrailSurrogate(m_nextCodepoint_) - != UTF16.TRAIL_SURROGATE_MIN_VALUE) { + + if (UTF16.getTrailSurrogate(m_nextCodepoint_) + != UTF16.TRAIL_SURROGATE_MIN_VALUE) { // this piece is only called when we are in the middle of a lead // surrogate block if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, + setResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; @@ -274,7 +276,7 @@ public class TrieIterator implements RangeValueIterator m_nextIndex_ ++; m_nextTrailIndexOffset_ ++; if (!checkTrailBlock(currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, + setResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; @@ -284,8 +286,8 @@ public class TrieIterator implements RangeValueIterator // enumerate supplementary code points while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) { // lead surrogate access - final int leadBlock = - m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << + final int leadBlock = + m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << Trie.INDEX_STAGE_2_SHIFT_; if (leadBlock == m_trie_.m_dataOffset_) { // no entries for a whole block of lead surrogates @@ -293,7 +295,7 @@ public class TrieIterator implements RangeValueIterator m_nextValue_ = m_initialValue_; m_nextBlock_ = leadBlock; // == m_trie_.m_dataOffset_ m_nextBlockIndex_ = 0; - setResult(element, m_currentCodepoint_, m_nextCodepoint_, + setResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; @@ -302,7 +304,7 @@ public class TrieIterator implements RangeValueIterator nextLead += DATA_BLOCK_LENGTH_; // number of total affected supplementary codepoints in one // block - // this is not a simple addition of + // this is not a simple addition of // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider // that we might have moved some of the codepoints m_nextCodepoint_ = Character.toCodePoint((char)nextLead, (char)UTF16.TRAIL_SURROGATE_MIN_VALUE); @@ -314,7 +316,7 @@ public class TrieIterator implements RangeValueIterator } // enumerate trail surrogates for this lead surrogate m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( - m_trie_.getValue(leadBlock + + m_trie_.getValue(leadBlock + (nextLead & Trie.INDEX_STAGE_3_MASK_))); if (m_nextIndex_ <= 0) { // no data for this lead surrogate @@ -322,7 +324,7 @@ public class TrieIterator implements RangeValueIterator m_nextValue_ = m_initialValue_; m_nextBlock_ = m_trie_.m_dataOffset_; m_nextBlockIndex_ = 0; - setResult(element, m_currentCodepoint_, m_nextCodepoint_, + setResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; @@ -331,20 +333,20 @@ public class TrieIterator implements RangeValueIterator } else { m_nextTrailIndexOffset_ = 0; if (!checkTrailBlock(currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, + setResult(element, m_currentCodepoint_, m_nextCodepoint_, currentValue); m_currentCodepoint_ = m_nextCodepoint_; return; } - } + } nextLead ++; } // deliver last range - setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1, + setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1, currentValue); - } - + } + /** * Internal block value calculations * Performs calculations on a data block to find codepoints in m_nextBlock_ @@ -360,7 +362,7 @@ public class TrieIterator implements RangeValueIterator private final boolean checkBlockDetail(int currentValue) { while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) { - m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ + + m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ + m_nextBlockIndex_)); if (m_nextValue_ != currentValue) { return false; @@ -370,11 +372,11 @@ public class TrieIterator implements RangeValueIterator } return true; } - + /** * Internal block value calculations * Performs calculations on a data block to find codepoints in m_nextBlock_ - * that has the same value. + * that has the same value. * Will call checkBlockDetail() if highlevel check fails. * Note m_*_ variables at this point is the next codepoint whose value * has not been calculated. @@ -383,14 +385,14 @@ public class TrieIterator implements RangeValueIterator * @return true if the whole block has the same value as currentValue or if * the whole block has been calculated, false otherwise. */ - private final boolean checkBlock(int currentValue) + private final boolean checkBlock(int currentValue) { int currentBlock = m_nextBlock_; - m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] << + m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] << Trie.INDEX_STAGE_2_SHIFT_; if (m_nextBlock_ == currentBlock && (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) { - // the block is the same as the previous one, filled with + // the block is the same as the previous one, filled with // currentValue m_nextCodepoint_ += DATA_BLOCK_LENGTH_; } @@ -410,11 +412,11 @@ public class TrieIterator implements RangeValueIterator } return true; } - + /** * Internal block value calculations - * Performs calculations on multiple data blocks for a set of trail - * surrogates to find codepoints in m_nextBlock_ that has the same value. + * Performs calculations on multiple data blocks for a set of trail + * surrogates to find codepoints in m_nextBlock_ that has the same value. * Will call checkBlock() for internal block checks. * Note m_*_ variables at this point is the next codepoint whose value * has not been calculated. @@ -425,7 +427,7 @@ public class TrieIterator implements RangeValueIterator private final boolean checkTrailBlock(int currentValue) { // enumerate code points for this lead surrogate - while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_) + while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_) { // if we ever reach here, we are at the start of a new block m_nextBlockIndex_ = 0; @@ -438,7 +440,7 @@ public class TrieIterator implements RangeValueIterator } return true; } - + /** * Checks if we are beginning at the start of a initial block. * If we are then the rest of the codepoints in this initial block @@ -453,15 +455,15 @@ public class TrieIterator implements RangeValueIterator if (m_nextIndex_ <= 0) { m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); - int leadBlock = - m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << + int leadBlock = + m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << Trie.INDEX_STAGE_2_SHIFT_; if (m_trie_.m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( - m_trie_.getValue(leadBlock + + m_trie_.getValue(leadBlock + (nextLead & Trie.INDEX_STAGE_3_MASK_))); m_nextIndex_ --; m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; @@ -505,7 +507,7 @@ public class TrieIterator implements RangeValueIterator /** * Number of data values in a stage 2 (data array) block. */ - private static final int DATA_BLOCK_LENGTH_ = + private static final int DATA_BLOCK_LENGTH_ = 1 << Trie.INDEX_STAGE_1_SHIFT_; // /** // * Number of codepoints in a stage 2 block diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java index 8239ec69e2d..4c5963d4948 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java @@ -85,6 +85,7 @@ public final class UBiDiProps { // implement ICUBinary.Authenticate private final static class IsAcceptable implements ICUBinary.Authenticate { + @Override public boolean isDataVersionAcceptable(byte version[]) { return version[0]==2; } @@ -226,12 +227,12 @@ public final class UBiDiProps { start=indexes[IX_JG_START]; limit=indexes[IX_JG_LIMIT]; if(start<=c && c text.length || start > limit) { throw new IllegalArgumentException("start: " + start + " or limit: " - + limit + " out of range [0, " + + limit + " out of range [0, " + text.length + ")"); } this.text = text; @@ -36,35 +36,42 @@ public final class UCharArrayIterator extends UCharacterIterator { this.pos = start; } + @Override public int current() { return pos < limit ? text[pos] : DONE; } + @Override public int getLength() { return limit - start; } + @Override public int getIndex() { return pos - start; } + @Override public int next() { return pos < limit ? text[pos++] : DONE; } + @Override public int previous() { return pos > start ? text[--pos] : DONE; } + @Override public void setIndex(int index) { if (index < 0 || index > limit - start) { - throw new IndexOutOfBoundsException("index: " + index + - " out of range [0, " + throw new IndexOutOfBoundsException("index: " + index + + " out of range [0, " + (limit - start) + ")"); } pos = start + index; } + @Override public int getText(char[] fillIn, int offset) { int len = limit - start; System.arraycopy(text, start, fillIn, offset, len); @@ -72,10 +79,11 @@ public final class UCharArrayIterator extends UCharacterIterator { } /** - * Creates a copy of this iterator, does not clone the underlying + * Creates a copy of this iterator, does not clone the underlying * Replaceableobject * @return copy of this iterator */ + @Override public Object clone(){ try { return super.clone(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java index 088eab846eb..315560ba2bd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterIteratorWrapper.java @@ -6,7 +6,7 @@ * others. All Rights Reserved. * ******************************************************************************* */ - + package com.ibm.icu.impl; import java.text.CharacterIterator; @@ -14,16 +14,16 @@ import java.text.CharacterIterator; import com.ibm.icu.text.UCharacterIterator; /** - * This class is a wrapper around UCharacterIterator and implements the + * This class is a wrapper around UCharacterIterator and implements the * CharacterIterator protocol * @author ram */ public class UCharacterIteratorWrapper implements CharacterIterator{ - + public UCharacterIteratorWrapper(UCharacterIterator iter){ this.iterator = iter; } - + private UCharacterIterator iterator; @@ -33,6 +33,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * @return the first character in the text, or DONE if the text is empty * @see #getBeginIndex() */ + @Override public char first(){ //UCharacterIterator always iterates from 0 to length iterator.setToStart(); @@ -45,6 +46,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * @return the last character in the text, or DONE if the text is empty * @see #getEndIndex() */ + @Override public char last(){ iterator.setToLimit(); return (char)iterator.previous(); @@ -56,6 +58,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * position is off the end of the text. * @see #getIndex() */ + @Override public char current(){ return (char) iterator.current(); } @@ -68,6 +71,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * @return the character at the new position or DONE if the new * position is off the end of the text range. */ + @Override public char next(){ //pre-increment iterator.next(); @@ -81,6 +85,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * @return the character at the new position or DONE if the current * position is equal to getBeginIndex(). */ + @Override public char previous(){ //pre-decrement return (char) iterator.previous(); @@ -94,6 +99,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * if an invalid value is supplied. * @return the character at the specified position or DONE if the specified position is equal to getEndIndex() */ + @Override public char setIndex(int position){ iterator.setIndex(position); return (char) iterator.current(); @@ -103,6 +109,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * Returns the start index of the text. * @return the index at which the text begins. */ + @Override public int getBeginIndex(){ //UCharacterIterator always starts from 0 return 0; @@ -113,6 +120,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * character following the end of the text. * @return the index after the last character in the text */ + @Override public int getEndIndex(){ return iterator.getLength(); } @@ -121,6 +129,7 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * Returns the current index. * @return the current index. */ + @Override public int getIndex(){ return iterator.getIndex(); } @@ -129,15 +138,16 @@ public class UCharacterIteratorWrapper implements CharacterIterator{ * Create a copy of this iterator * @return A copy of this */ + @Override public Object clone(){ try { UCharacterIteratorWrapper result = (UCharacterIteratorWrapper) super.clone(); result.iterator = (UCharacterIterator)this.iterator.clone(); return result; - } catch (CloneNotSupportedException e) { + } catch (CloneNotSupportedException e) { return null; // only invoked if bad underlying character iterator } - } + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java index ec63e093265..a44d1d68494 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterNameReader.java @@ -14,22 +14,23 @@ import java.nio.ByteBuffer; import java.util.Arrays; /** -*

Internal reader class for ICU data file uname.dat containing -* Unicode codepoint name data.

+*

Internal reader class for ICU data file uname.dat containing +* Unicode codepoint name data.

*

This class simply reads unames.icu, authenticates that it is a valid * ICU data file and split its contents up into blocks of data for use in * com.ibm.icu.impl.UCharacterName. -*

-*

unames.icu which is in big-endian format is jared together with this +*

+*

unames.icu which is in big-endian format is jared together with this * package.

* @author Syn Wee Quek * @since release 2.1, February 1st 2002 */ final class UCharacterNameReader implements ICUBinary.Authenticate -{ +{ // public methods ---------------------------------------------------- - + + @Override public boolean isDataVersionAcceptable(byte version[]) { return version[0] == 1; @@ -64,7 +65,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate m_groupindex_ = m_byteBuffer_.getInt(); m_groupstringindex_ = m_byteBuffer_.getInt(); m_algnamesindex_ = m_byteBuffer_.getInt(); - + // reading tokens int count = m_byteBuffer_.getChar(); char token[] = ICUBinary.getChars(m_byteBuffer_, count, 0); @@ -72,7 +73,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate byte tokenstr[] = new byte[size]; m_byteBuffer_.get(tokenstr); data.setToken(token, tokenstr); - + // reading the group information records count = m_byteBuffer_.getChar(); data.setGroupCountSize(count, GROUP_INFO_SIZE_); @@ -82,13 +83,13 @@ final class UCharacterNameReader implements ICUBinary.Authenticate size = m_algnamesindex_ - m_groupstringindex_; byte groupstring[] = new byte[size]; m_byteBuffer_.get(groupstring); - + data.setGroup(group, groupstring); - + count = m_byteBuffer_.getInt(); - UCharacterName.AlgorithmName alg[] = + UCharacterName.AlgorithmName alg[] = new UCharacterName.AlgorithmName[count]; - + for (int i = 0; i < count; i ++) { UCharacterName.AlgorithmName an = readAlg(); @@ -99,7 +100,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate } data.setAlgorithm(alg); } - + /** *

Checking the file for the correct format.

* @param dataformatid @@ -116,7 +117,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate isDataVersionAcceptable(dataformatversion); } ///CLOVER:ON - + // private variables ------------------------------------------------- /** @@ -135,10 +136,10 @@ final class UCharacterNameReader implements ICUBinary.Authenticate private int m_groupindex_; private int m_groupstringindex_; private int m_algnamesindex_; - + /** * Size of an algorithmic name information group - * start code point size + end code point size + type size + variant size + + * start code point size + end code point size + type size + variant size + * size of data size */ private static final int ALG_INFO_SIZE_ = 12; @@ -149,7 +150,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate private static final int DATA_FORMAT_ID_ = 0x756E616D; // private methods --------------------------------------------------- - + /** * Reads an individual record of AlgorithmNames * @return an instance of AlgorithNames if read is successful otherwise null @@ -157,7 +158,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate */ private UCharacterName.AlgorithmName readAlg() throws IOException { - UCharacterName.AlgorithmName result = + UCharacterName.AlgorithmName result = new UCharacterName.AlgorithmName(); int rangestart = m_byteBuffer_.getInt(); int rangeend = m_byteBuffer_.getInt(); @@ -166,7 +167,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate if (!result.setInfo(rangestart, rangeend, type, variant)) { return null; } - + int size = m_byteBuffer_.getChar(); if (type == UCharacterName.AlgorithmName.TYPE_1_) { @@ -175,7 +176,7 @@ final class UCharacterNameReader implements ICUBinary.Authenticate result.setFactor(factor); size -= (variant << 1); } - + StringBuilder prefix = new StringBuilder(); char c = (char)(m_byteBuffer_.get() & 0x00FF); while (c != 0) @@ -183,11 +184,11 @@ final class UCharacterNameReader implements ICUBinary.Authenticate prefix.append(c); c = (char)(m_byteBuffer_.get() & 0x00FF); } - + result.setPrefix(prefix.toString()); - + size -= (ALG_INFO_SIZE_ + prefix.length() + 1); - + if (size > 0) { byte string[] = new byte[size]; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java index 9f0459a12e2..6eae0511a44 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java @@ -212,6 +212,7 @@ public final class UCharacterProperty super(SRC_CASE); this.which=which; } + @Override boolean contains(int c) { return UCaseProps.INSTANCE.hasBinaryProperty(c, which); } @@ -223,6 +224,7 @@ public final class UCharacterProperty super(source); this.which=which; } + @Override boolean contains(int c) { return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_INERT).isInert(c); } @@ -236,11 +238,13 @@ public final class UCharacterProperty new BinaryProperty(1, (1<>>GCB_SHIFT; @@ -543,6 +571,7 @@ public final class UCharacterProperty return HangulSyllableType.NOT_APPLICABLE; } } + @Override int getMaxValue(int which) { return HangulSyllableType.COUNT-1; } @@ -554,11 +583,13 @@ public final class UCharacterProperty new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFC_QUICK_CHECK, 2), new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKC_QUICK_CHECK, 2), new CombiningClassIntProperty(SRC_NFC) { // LEAD_CANONICAL_COMBINING_CLASS + @Override int getValue(int c) { return Norm2AllModes.getNFCInstance().impl.getFCD16(c)>>8; } }, new CombiningClassIntProperty(SRC_NFC) { // TRAIL_CANONICAL_COMBINING_CLASS + @Override int getValue(int c) { return Norm2AllModes.getNFCInstance().impl.getFCD16(c)&0xff; } @@ -567,6 +598,7 @@ public final class UCharacterProperty new IntProperty(2, SB_MASK, SB_SHIFT), // SENTENCE_BREAK new IntProperty(2, WB_MASK, WB_SHIFT), // WORD_BREAK new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE + @Override int getValue(int c) { return UBiDiProps.INSTANCE.getPairedBracketType(c); } @@ -1240,6 +1272,7 @@ public final class UCharacterProperty private static final class IsAcceptable implements ICUBinary.Authenticate { // @Override when we switch to Java 6 + @Override public boolean isDataVersionAcceptable(byte version[]) { return version[0] == 7; } @@ -1332,7 +1365,7 @@ public final class UCharacterProperty /* add for u_charDigitValue() */ // TODO remove when UCharacter.getHanNumericValue() is changed to just return - // Unicode numeric values + // Unicode numeric values set.add(0x3007); set.add(0x3008); set.add(0x4e00); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java index 2e9e90b40d9..16e930c3d27 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java @@ -69,6 +69,7 @@ public final class UPropertyAliases { private static final class IsAcceptable implements ICUBinary.Authenticate { // @Override when we switch to Java 6 + @Override public boolean isDataVersionAcceptable(byte version[]) { return version[0]==2; } @@ -186,7 +187,7 @@ public final class UPropertyAliases { // Find the end of this name. int nameStart=nameGroupsIndex; while(0!=nameGroups.charAt(nameGroupsIndex)) { - ++nameGroupsIndex; + ++nameGroupsIndex; } if(nameStart==nameGroupsIndex) { return null; // no name (Property[Value]Aliases.txt has "n/a") diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/URLHandler.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/URLHandler.java index 328eaace26f..7e0387415e6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/URLHandler.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/URLHandler.java @@ -27,14 +27,14 @@ import java.util.jar.JarFile; public abstract class URLHandler { public static final String PROPNAME = "urlhandler.props"; - + private static final Map handlers; - + private static final boolean DEBUG = ICUDebug.enabled("URLHandler"); - + static { Map h = null; - + BufferedReader br = null; try { @SuppressWarnings("resource") // Closed by BufferedReader. @@ -44,32 +44,32 @@ public abstract class URLHandler { if (is != null) { Class[] params = { URL.class }; br = new BufferedReader(new InputStreamReader(is)); - + for (String line = br.readLine(); line != null; line = br.readLine()) { line = line.trim(); - + if (line.length() == 0 || line.charAt(0) == '#') { continue; } - + int ix = line.indexOf('='); - + if (ix == -1) { if (DEBUG) System.err.println("bad urlhandler line: '" + line + "'"); break; } - + String key = line.substring(0, ix).trim(); String value = line.substring(ix+1).trim(); - + try { Class cl = Class.forName(value); Method m = cl.getDeclaredMethod("get", params); - + if (h == null) { h = new HashMap(); } - + h.put(key, m); } catch (ClassNotFoundException e) { @@ -102,16 +102,16 @@ public abstract class URLHandler { if (url == null) { return null; } - + String protocol = url.getProtocol(); - + if (handlers != null) { Method m = handlers.get(protocol); - + if (m != null) { try { URLHandler handler = (URLHandler)m.invoke(null, new Object[] { url }); - + if (handler != null) { return handler; } @@ -127,10 +127,10 @@ public abstract class URLHandler { } } } - + return getDefault(url); } - + protected static URLHandler getDefault(URL url) { URLHandler handler = null; @@ -146,7 +146,7 @@ public abstract class URLHandler { } return handler; } - + private static class FileURLHandler extends URLHandler { File file; @@ -161,7 +161,8 @@ public abstract class URLHandler { throw new IllegalArgumentException(); } } - + + @Override public void guide(URLVisitor v, boolean recurse, boolean strip) { if (file.isDirectory()) { process(v, recurse, strip, "/", file.listFiles()); @@ -169,12 +170,12 @@ public abstract class URLHandler { v.visit(file.getName()); } } - + private void process(URLVisitor v, boolean recurse, boolean strip, String path, File[] files) { if (files != null) { for (int i = 0; i < files.length; i++) { File f = files[i]; - + if (f.isDirectory()) { if (recurse) { process(v, recurse, strip, path + f.getName()+ '/', f.listFiles()); @@ -186,7 +187,7 @@ public abstract class URLHandler { } } } - + private static class JarURLHandler extends URLHandler { JarFile jarFile; String prefix; @@ -194,9 +195,9 @@ public abstract class URLHandler { JarURLHandler(URL url) { try { prefix = url.getPath(); - + int ix = prefix.lastIndexOf("!/"); - + if (ix >= 0) { prefix = prefix.substring(ix + 2); // truncate after "!/" } @@ -220,17 +221,18 @@ public abstract class URLHandler { throw new IllegalArgumentException("jar error: " + e.getMessage()); } } - + + @Override public void guide(URLVisitor v, boolean recurse, boolean strip) { try { Enumeration entries = jarFile.entries(); - + while (entries.hasMoreElements()) { JarEntry entry = entries.nextElement(); - + if (!entry.isDirectory()) { // skip just directory paths String name = entry.getName(); - + if (name.startsWith(prefix)) { name = name.substring(prefix.length()); int ix = name.lastIndexOf('/'); @@ -255,9 +257,9 @@ public abstract class URLHandler { { guide(visitor, recurse, true); } - + public abstract void guide(URLVisitor visitor, boolean recurse, boolean strip); - + public interface URLVisitor { void visit(String str); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java index 9ccf381df0e..fb7a0c0df30 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeRegex.java @@ -74,7 +74,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT * consistent with Java regex, so be careful of the differences. *

Not thread-safe; create a separate copy for different threads. *

In the future, we may extend this to support other regex packages. - * + * * @regex A modified Java regex pattern, as in the input to * Pattern.compile(), except that all "character classes" are * processed as if they were UnicodeSet patterns. Example: @@ -82,6 +82,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT * @return A processed Java regex pattern, suitable for input to * Pattern.compile(). */ + @Override public String transform(String regex) { StringBuilder result = new StringBuilder(); UnicodeSet temp = new UnicodeSet(); @@ -150,7 +151,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT /** * Compile a regex string, after processing by fix(...). - * + * * @param regex Raw regex pattern, as in fix(...). * @return Pattern */ @@ -160,7 +161,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT /** * Compile a regex string, after processing by fix(...). - * + * * @param regex Raw regex pattern, as in fix(...). * @return Pattern */ @@ -170,7 +171,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT /** * Compile a composed string from a set of BNF lines; see the List version for more information. - * + * * @param bnfLines Series of BNF lines. * @return Pattern */ @@ -194,10 +195,10 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT *

*

* Caveats: at this point the parsing is simple; for example, # cannot be - * quoted (use \\u0023); you can set it to null to disable. + * quoted (use \\u0023); you can set it to null to disable. * The equality sign and a few others can be reset with * setBnfX(). - * + * * @param lines Series of lines that represent a BNF expression. The lines contain * a series of statements that of the form x=y;. A statement can take * multiple lines, but there can't be multiple statements on a line. @@ -213,7 +214,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT for (Entry entry : variables.entrySet()) { String variable = entry.getKey(), definition = entry.getValue(); - + for (Entry entry2 : variables.entrySet()) { String variable2 = entry2.getKey(), definition2 = entry2.getValue(); @@ -300,12 +301,13 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT } return result; } - - + + /* (non-Javadoc) * @see com.ibm.icu.util.Freezable#cloneAsThawed() */ + @Override public UnicodeRegex cloneAsThawed() { // TODO Auto-generated method stub try { @@ -318,6 +320,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT /* (non-Javadoc) * @see com.ibm.icu.util.Freezable#freeze() */ + @Override public UnicodeRegex freeze() { // no action needed now. return this; @@ -326,6 +329,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT /* (non-Javadoc) * @see com.ibm.icu.util.Freezable#isFrozen() */ + @Override public boolean isFrozen() { // at this point, always true return true; @@ -353,6 +357,7 @@ public class UnicodeRegex implements Cloneable, Freezable, StringT // private Appendable log = null; private Comparator LongestFirst = new Comparator() { + @Override public int compare(Object obj0, Object obj1) { String arg0 = obj0.toString(); String arg1 = obj1.toString(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java index 3a133b5c149..10fdfea85c7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle.java @@ -14,17 +14,19 @@ import java.util.ListResourceBundle; public class HolidayBundle extends ListResourceBundle { // Normally, each HolidayBundle uses the holiday's US English name - // as the string key for looking up the localized name. This means + // as the string key for looking up the localized name. This means // that the key itself can be used if no name is found for the requested // locale. // // For holidays where the key is _not_ the English name, e.g. in the // case of conflicts, the English name must be given here. // - static private final Object[][] fContents = { - { "", "" }, // Can't be empty! + static private final Object[][] fContents = { { "", "" }, // Can't be empty! }; - public synchronized Object[][] getContents() { return fContents; } + @Override + public synchronized Object[][] getContents() { + return fContents; + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java index d35f39b76f6..61f7896dc46 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da.java @@ -28,5 +28,6 @@ public class HolidayBundle_da extends ListResourceBundle { "Pentecost", "pinse" }, { "Shrove Tuesday", "hvidetirsdag" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java index 4f067378ec0..7b151def5a0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_da_DK.java @@ -16,28 +16,20 @@ import com.ibm.icu.util.EasterHoliday; import com.ibm.icu.util.Holiday; import com.ibm.icu.util.SimpleHoliday; -public class HolidayBundle_da_DK extends ListResourceBundle -{ - static private final Holiday[] fHolidays = { - SimpleHoliday.NEW_YEARS_DAY, - new SimpleHoliday(Calendar.APRIL, 30, -Calendar.FRIDAY, "General Prayer Day"), - new SimpleHoliday(Calendar.JUNE, 5, "Constitution Day"), - SimpleHoliday.CHRISTMAS_EVE, - SimpleHoliday.CHRISTMAS, - SimpleHoliday.BOXING_DAY, - SimpleHoliday.NEW_YEARS_EVE, +public class HolidayBundle_da_DK extends ListResourceBundle { + static private final Holiday[] fHolidays = { SimpleHoliday.NEW_YEARS_DAY, + new SimpleHoliday(Calendar.APRIL, 30, -Calendar.FRIDAY, "General Prayer Day"), + new SimpleHoliday(Calendar.JUNE, 5, "Constitution Day"), SimpleHoliday.CHRISTMAS_EVE, + SimpleHoliday.CHRISTMAS, SimpleHoliday.BOXING_DAY, SimpleHoliday.NEW_YEARS_EVE, - // Easter and related holidays - EasterHoliday.MAUNDY_THURSDAY, - EasterHoliday.GOOD_FRIDAY, - EasterHoliday.EASTER_SUNDAY, - EasterHoliday.EASTER_MONDAY, - EasterHoliday.ASCENSION, - EasterHoliday.WHIT_MONDAY, - }; + // Easter and related holidays + EasterHoliday.MAUNDY_THURSDAY, EasterHoliday.GOOD_FRIDAY, EasterHoliday.EASTER_SUNDAY, + EasterHoliday.EASTER_MONDAY, EasterHoliday.ASCENSION, EasterHoliday.WHIT_MONDAY, }; - static private final Object[][] fContents = { - { "holidays", fHolidays }, - }; - public synchronized Object[][] getContents() { return fContents; } + static private final Object[][] fContents = { { "holidays", fHolidays }, }; + + @Override + public synchronized Object[][] getContents() { + return fContents; + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java index 240c097200c..bef264c74c6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de.java @@ -65,5 +65,6 @@ public class HolidayBundle_de extends ListResourceBundle { { "Whit Sunday", "Pfingstsonntag" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java index 7354c37ef1d..2728e893330 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_AT.java @@ -44,5 +44,6 @@ public class HolidayBundle_de_AT extends ListResourceBundle { { "Christmas", "Christtag" }, { "New Year's Day", "Neujahrstag" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java index 7cef891f39c..d92a5e86138 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_de_DE.java @@ -39,5 +39,6 @@ public class HolidayBundle_de_DE extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java index 59b05ed021d..9a9e05ba4b8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el.java @@ -27,5 +27,6 @@ public class HolidayBundle_el extends ListResourceBundle { { "Whit Monday", "\u0394\u03b5\u03cd\u03c4\u03b5\u03c1\u03b7 \u03bc\u03ad\u03c1\u03b1 \u03c4\u03bf\u03cd \u03a0\u03b5\u03bd\u03c4\u03b7\u03ba\u03bf\u03c3\u03c4\u03ae" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java index 41ef5f88cb4..4f5225a0611 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_el_GR.java @@ -40,5 +40,6 @@ public class HolidayBundle_el_GR extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java index 4cd1fba31de..004ec2b531c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en.java @@ -25,6 +25,7 @@ public class HolidayBundle_en extends ListResourceBundle { { "", "" }, // Can't be empty! }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java index e5931b52dd5..3b44c8115a5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_CA.java @@ -40,5 +40,6 @@ public class HolidayBundle_en_CA extends ListResourceBundle { { "Labor Day", "Labour Day" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java index 85122bceb07..92f7270b4c1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_GB.java @@ -37,5 +37,6 @@ public class HolidayBundle_en_GB extends ListResourceBundle { "Labor Day", "Labour Day" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java index e153be35c45..5a672ca38a6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_en_US.java @@ -47,5 +47,6 @@ public class HolidayBundle_en_US extends ListResourceBundle static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java index dc488eff874..df9293ca723 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es.java @@ -48,5 +48,6 @@ public class HolidayBundle_es extends ListResourceBundle { { "Whit Sunday", "Pentecost\u00e9s" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java index 86b77722f63..c2a02d0ba9c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_es_MX.java @@ -34,5 +34,6 @@ public class HolidayBundle_es_MX extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java index 0929e4debb9..ee3d0eada55 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr.java @@ -41,5 +41,6 @@ public class HolidayBundle_fr extends ListResourceBundle { { "Victory Day", "F\u00EAte de la Victoire" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java index e64c0e19271..edd28a1fa42 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_CA.java @@ -38,5 +38,6 @@ public class HolidayBundle_fr_CA extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java index d44fd002298..d3212ffa7b4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_fr_FR.java @@ -37,5 +37,6 @@ public class HolidayBundle_fr_FR extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java index ff9ec91075f..34462018f7c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it.java @@ -33,5 +33,6 @@ public class HolidayBundle_it extends ListResourceBundle { { "Thanksgiving", "Giorno del Ringraziamento" }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java index 63fe93e0efc..b0c2511f1db 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_it_IT.java @@ -36,5 +36,6 @@ public class HolidayBundle_it_IT extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java index 08ee96f62e8..3fc72e0db49 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw.java @@ -17,6 +17,7 @@ public class HolidayBundle_iw extends ListResourceBundle { { "", "" }, // Can't be empty! }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java index 54741f2fa75..5245619da3b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_iw_IL.java @@ -28,5 +28,6 @@ public class HolidayBundle_iw_IL extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java index 8f475a598eb..779b469a064 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/HolidayBundle_ja_JP.java @@ -22,5 +22,6 @@ public class HolidayBundle_ja_JP extends ListResourceBundle { static private final Object[][] fContents = { { "holidays", fHolidays }, }; + @Override public synchronized Object[][] getContents() { return fContents; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java index 11d691b99f6..d2b63fba25c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/data/ResourceReader.java @@ -38,7 +38,7 @@ public class ResourceReader implements Closeable { private String resourceName; private String encoding; // null for default encoding private Class root; - + /** * The one-based line number. Has the special value -1 before the * object is initialized. Has the special value 0 after initialization @@ -103,7 +103,7 @@ public class ResourceReader implements Closeable { this.lineNo = -1; try { - InputStreamReader isr = (encoding == null) + InputStreamReader isr = (encoding == null) ? new InputStreamReader(is) : new InputStreamReader(is, encoding); @@ -200,7 +200,7 @@ public class ResourceReader implements Closeable { public int getLineNumber() { return lineNo; } - + /** * Return a string description of the position of the last line * returned by readLine() or readLineSkippingComments(). @@ -208,7 +208,7 @@ public class ResourceReader implements Closeable { public String describePosition() { return resourceName + ':' + lineNo; } - + /** * Reset this reader so that the next call to * readLine() returns the first line of the file @@ -244,7 +244,7 @@ public class ResourceReader implements Closeable { if (is == null) { throw new IllegalArgumentException("Can't open " + resourceName); } - + InputStreamReader isr = (encoding == null) ? new InputStreamReader(is) : new InputStreamReader(is, encoding); @@ -257,6 +257,7 @@ public class ResourceReader implements Closeable { * associated with it. If the stream is already closed then invoking * this method has no effect. */ + @Override public void close() throws IOException { if (reader != null) { reader.close(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java index f56bc449520..e0adf54bb18 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatter.java @@ -29,7 +29,7 @@ class BasicDurationFormatter implements DurationFormatter { * the locales and timezones of these are in sync. */ public BasicDurationFormatter(PeriodFormatter formatter, - PeriodBuilder builder, + PeriodBuilder builder, DateFormatter fallback, long fallbackLimit) { this.formatter = formatter; @@ -39,7 +39,7 @@ class BasicDurationFormatter implements DurationFormatter { } protected BasicDurationFormatter(PeriodFormatter formatter, - PeriodBuilder builder, + PeriodBuilder builder, DateFormatter fallback, long fallbackLimit, String localeName, @@ -52,17 +52,20 @@ class BasicDurationFormatter implements DurationFormatter { this.timeZone = timeZone; } + @Override public String formatDurationFromNowTo(Date targetDate) { long now = System.currentTimeMillis(); long duration = targetDate.getTime() - now; return formatDurationFrom(duration, now); } - public String formatDurationFromNow(long duration) { + @Override +public String formatDurationFromNow(long duration) { return formatDurationFrom(duration, System.currentTimeMillis()); } - public String formatDurationFrom(long duration, long referenceDate) { + @Override +public String formatDurationFrom(long duration, long referenceDate) { String s = doFallback(duration, referenceDate); if (s == null) { Period p = doBuild(duration, referenceDate); @@ -71,12 +74,13 @@ class BasicDurationFormatter implements DurationFormatter { return s; } - public DurationFormatter withLocale(String locName) { + @Override +public DurationFormatter withLocale(String locName) { if (!locName.equals(localeName)) { PeriodFormatter newFormatter = formatter.withLocale(locName); PeriodBuilder newBuilder = builder.withLocale(locName); - DateFormatter newFallback = fallback == null - ? null + DateFormatter newFallback = fallback == null + ? null : fallback.withLocale(locName); return new BasicDurationFormatter(newFormatter, newBuilder, newFallback, fallbackLimit, @@ -85,11 +89,12 @@ class BasicDurationFormatter implements DurationFormatter { return this; } - public DurationFormatter withTimeZone(TimeZone tz) { + @Override +public DurationFormatter withTimeZone(TimeZone tz) { if (!tz.equals(timeZone)) { PeriodBuilder newBuilder = builder.withTimeZone(tz); - DateFormatter newFallback = fallback == null - ? null + DateFormatter newFallback = fallback == null + ? null : fallback.withTimeZone(tz); return new BasicDurationFormatter(formatter, newBuilder, newFallback, fallbackLimit, @@ -99,7 +104,7 @@ class BasicDurationFormatter implements DurationFormatter { } protected String doFallback(long duration, long referenceDate) { - if (fallback != null + if (fallback != null && fallbackLimit > 0 && Math.abs(duration) >= fallbackLimit) { return fallback.format(referenceDate + duration); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java index 7375c780ba1..4fbeb64b52d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java @@ -47,6 +47,7 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { * * @return this BasicDurationFormatterFactory */ + @Override public DurationFormatterFactory setPeriodFormatter( PeriodFormatter formatter) { if (formatter != this.formatter) { @@ -63,6 +64,7 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { * @param builder the builder to use * @return this BasicDurationFormatterFactory */ + @Override public DurationFormatterFactory setPeriodBuilder(PeriodBuilder builder) { if (builder != this.builder) { this.builder = builder; @@ -77,6 +79,7 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { * @param fallback the fallback formatter to use, or null * @return this BasicDurationFormatterFactory */ + @Override public DurationFormatterFactory setFallback(DateFormatter fallback) { boolean doReset = fallback == null ? this.fallback != null @@ -94,6 +97,7 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { * @param fallbackLimit the fallback limit to use, or 0 if none is desired. * @return this BasicDurationFormatterFactory */ + @Override public DurationFormatterFactory setFallbackLimit(long fallbackLimit) { if (fallbackLimit < 0) { fallbackLimit = 0; @@ -106,12 +110,13 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { } /** - * Set the name of the locale that will be used when + * Set the name of the locale that will be used when * creating new formatters. * * @param localeName the name of the Locale * @return this BasicDurationFormatterFactory */ + @Override public DurationFormatterFactory setLocale(String localeName) { if (!localeName.equals(this.localeName)) { this.localeName = localeName; @@ -127,12 +132,13 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { } /** - * Set the name of the locale that will be used when + * Set the name of the locale that will be used when * creating new formatters. * * @param timeZone The time zone to use. * @return this BasicDurationFormatterFactory */ + @Override public DurationFormatterFactory setTimeZone(TimeZone timeZone) { if (!timeZone.equals(this.timeZone)) { this.timeZone = timeZone; @@ -149,6 +155,7 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { * * @return a BasicDurationFormatter */ + @Override public DurationFormatter getFormatter() { if (f == null) { if (fallback != null) { @@ -232,7 +239,7 @@ class BasicDurationFormatterFactory implements DurationFormatterFactory { * Create the formatter. All local fields are already initialized. */ protected BasicDurationFormatter createFormatter() { - return new BasicDurationFormatter(formatter, builder, fallback, + return new BasicDurationFormatter(formatter, builder, fallback, fallbackLimit, localeName, timeZone); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java index c34116edcc1..ade665e56d2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java @@ -22,9 +22,9 @@ import com.ibm.icu.impl.duration.impl.PeriodFormatterDataService; class BasicPeriodBuilderFactory implements PeriodBuilderFactory { private PeriodFormatterDataService ds; private Settings settings; - + private static final short allBits = 0xff; - + BasicPeriodBuilderFactory(PeriodFormatterDataService ds) { this.ds = ds; this.settings = new Settings(); @@ -50,7 +50,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { return this; } Settings result = inUse ? copy() : this; - + result.uset = (short)uset; if ((uset & allBits) == allBits) { @@ -67,7 +67,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { lastUnit = i; } } - if (lastUnit == -1) { + if (lastUnit == -1) { // currently empty, but this might be transient so no fail result.minUnit = result.maxUnit = null; } else { @@ -84,7 +84,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { } return (short)(uset & ~(1 << TimeUnit.MILLISECOND.ordinal)); } - + TimeUnit effectiveMinUnit() { if (allowMillis || minUnit != TimeUnit.MILLISECOND) { return minUnit; @@ -97,7 +97,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { } return TimeUnit.SECOND; // default for pathological case } - + Settings setMaxLimit(float maxLimit) { int val = maxLimit <= 0 ? 0 : (int)(maxLimit*1000); if (maxLimit == val) { @@ -150,7 +150,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { return this .setAllowZero(data.allowZero()) .setWeeksAloneOnly(data.weeksAloneOnly()) - .setAllowMilliseconds(data.useMilliseconds() != DataRecord.EMilliSupport.NO); + .setAllowMilliseconds(data.useMilliseconds() != DataRecord.EMilliSupport.NO); } Settings setInUse() { @@ -165,7 +165,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { return Period.moreThan(maxLimit/1000f, maxUnit).inPast(inPast); } } - + if (minLimit > 0) { TimeUnit emu = effectiveMinUnit(); long emud = approximateDurationOf(emu); @@ -193,6 +193,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { } } + @Override public PeriodBuilderFactory setAvailableUnitRange(TimeUnit minUnit, TimeUnit maxUnit) { int uset = 0; @@ -206,7 +207,8 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { return this; } - public PeriodBuilderFactory setUnitIsAvailable(TimeUnit unit, + @Override + public PeriodBuilderFactory setUnitIsAvailable(TimeUnit unit, boolean available) { int uset = settings.uset; if (available) { @@ -218,36 +220,43 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { return this; } + @Override public PeriodBuilderFactory setMaxLimit(float maxLimit) { settings = settings.setMaxLimit(maxLimit); return this; } + @Override public PeriodBuilderFactory setMinLimit(float minLimit) { settings = settings.setMinLimit(minLimit); return this; } + @Override public PeriodBuilderFactory setAllowZero(boolean allow) { settings = settings.setAllowZero(allow); return this; } + @Override public PeriodBuilderFactory setWeeksAloneOnly(boolean aloneOnly) { settings = settings.setWeeksAloneOnly(aloneOnly); return this; } + @Override public PeriodBuilderFactory setAllowMilliseconds(boolean allow) { settings = settings.setAllowMilliseconds(allow); return this; } + @Override public PeriodBuilderFactory setLocale(String localeName) { settings = settings.setLocale(localeName); return this; } - + + @Override public PeriodBuilderFactory setTimeZone(TimeZone timeZone) { // ignore this return this; @@ -267,6 +276,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { * @param unit the single TimeUnit with which to represent times * @return a builder */ + @Override public PeriodBuilder getFixedUnitBuilder(TimeUnit unit) { return FixedUnitBuilder.get(unit, getSettings()); } @@ -277,6 +287,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { * * @return a builder */ + @Override public PeriodBuilder getSingleUnitBuilder() { return SingleUnitBuilder.get(getSettings()); } @@ -289,6 +300,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { * * @return a builder */ + @Override public PeriodBuilder getOneOrTwoUnitBuilder() { return OneOrTwoUnitBuilder.get(getSettings()); } @@ -300,6 +312,7 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { * * @return a builder */ + @Override public PeriodBuilder getMultiUnitBuilder(int periodCount) { return MultiUnitBuilder.get(periodCount, getSettings()); } @@ -308,7 +321,8 @@ class BasicPeriodBuilderFactory implements PeriodBuilderFactory { abstract class PeriodBuilderImpl implements PeriodBuilder { protected BasicPeriodBuilderFactory.Settings settings; - + + @Override public Period create(long duration) { return createWithReferenceDate(duration, System.currentTimeMillis()); } @@ -317,6 +331,7 @@ abstract class PeriodBuilderImpl implements PeriodBuilder { return BasicPeriodBuilderFactory.approximateDurationOf(unit); } + @Override public Period createWithReferenceDate(long duration, long referenceDate) { boolean inPast = duration < 0; if (inPast) { @@ -332,11 +347,13 @@ abstract class PeriodBuilderImpl implements PeriodBuilder { return ts; } + @Override public PeriodBuilder withTimeZone(TimeZone timeZone) { // ignore the time zone return this; } + @Override public PeriodBuilder withLocale(String localeName) { BasicPeriodBuilderFactory.Settings newSettings = settings.setLocale(localeName); if (newSettings != settings) { @@ -347,7 +364,7 @@ abstract class PeriodBuilderImpl implements PeriodBuilder { protected abstract PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse); - protected abstract Period handleCreate(long duration, long referenceDate, + protected abstract Period handleCreate(long duration, long referenceDate, boolean inPast); protected PeriodBuilderImpl(BasicPeriodBuilderFactory.Settings settings) { @@ -357,7 +374,7 @@ abstract class PeriodBuilderImpl implements PeriodBuilder { class FixedUnitBuilder extends PeriodBuilderImpl { private TimeUnit unit; - + public static FixedUnitBuilder get(TimeUnit unit, BasicPeriodBuilderFactory.Settings settingsToUse) { if (settingsToUse != null && (settingsToUse.effectiveSet() & (1 << unit.ordinal)) != 0) { return new FixedUnitBuilder(unit, settingsToUse); @@ -370,11 +387,13 @@ class FixedUnitBuilder extends PeriodBuilderImpl { this.unit = unit; } + @Override protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { return get(unit, settingsToUse); } - protected Period handleCreate(long duration, long referenceDate, + @Override + protected Period handleCreate(long duration, long referenceDate, boolean inPast) { if (unit == null) { return null; @@ -397,11 +416,13 @@ class SingleUnitBuilder extends PeriodBuilderImpl { return new SingleUnitBuilder(settings); } + @Override protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { return SingleUnitBuilder.get(settingsToUse); } - protected Period handleCreate(long duration, long referenceDate, + @Override + protected Period handleCreate(long duration, long referenceDate, boolean inPast) { short uset = settings.effectiveSet(); for (int i = 0; i < TimeUnit.units.length; ++i) { @@ -430,11 +451,13 @@ class OneOrTwoUnitBuilder extends PeriodBuilderImpl { return new OneOrTwoUnitBuilder(settings); } + @Override protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { return OneOrTwoUnitBuilder.get(settingsToUse); } - protected Period handleCreate(long duration, long referenceDate, + @Override + protected Period handleCreate(long duration, long referenceDate, boolean inPast) { Period period = null; short uset = settings.effectiveSet(); @@ -479,11 +502,13 @@ class MultiUnitBuilder extends PeriodBuilderImpl { return null; } + @Override protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) { return MultiUnitBuilder.get(nPeriods, settingsToUse); } - protected Period handleCreate(long duration, long referenceDate, + @Override + protected Period handleCreate(long duration, long referenceDate, boolean inPast) { Period period = null; int n = 0; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java index 3b8744089d7..887752f5418 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java @@ -26,9 +26,9 @@ class BasicPeriodFormatter implements PeriodFormatter { private PeriodFormatterData data; private Customizations customs; - BasicPeriodFormatter(BasicPeriodFormatterFactory factory, + BasicPeriodFormatter(BasicPeriodFormatterFactory factory, String localeName, - PeriodFormatterData data, + PeriodFormatterData data, Customizations customs) { this.factory = factory; this.localeName = localeName; @@ -36,17 +36,19 @@ class BasicPeriodFormatter implements PeriodFormatter { this.customs = customs; } - public String format(Period period) { + @Override +public String format(Period period) { if (!period.isSet()) { throw new IllegalArgumentException("period is not set"); } return format(period.timeLimit, period.inFuture, period.counts); } + @Override public PeriodFormatter withLocale(String locName) { if (!this.localeName.equals(locName)) { PeriodFormatterData newData = factory.getData(locName); - return new BasicPeriodFormatter(factory, locName, newData, + return new BasicPeriodFormatter(factory, locName, newData, customs); } return this; @@ -60,7 +62,7 @@ class BasicPeriodFormatter implements PeriodFormatter { } } - // if the data does not allow formatting of zero periods, + // if the data does not allow formatting of zero periods, // remove these from consideration. If the result has no // periods set, return null to indicate we could not format // the duration. @@ -79,13 +81,13 @@ class BasicPeriodFormatter implements PeriodFormatter { // set, merge them with seconds and force display of seconds to // decimal with 3 places. boolean forceD3Seconds = false; - if (data.useMilliseconds() != EMilliSupport.YES && + if (data.useMilliseconds() != EMilliSupport.YES && (mask & (1 << TimeUnit.MILLISECOND.ordinal)) != 0) { int sx = TimeUnit.SECOND.ordinal; int mx = TimeUnit.MILLISECOND.ordinal; int sf = 1 << sx; int mf = 1 << mx; - switch (data.useMilliseconds()) { + switch (data.useMilliseconds()) { case EMilliSupport.WITH_SECONDS: { // if there are seconds, merge with seconds, otherwise leave alone if ((mask & sf) != 0) { @@ -176,7 +178,7 @@ class BasicPeriodFormatter implements PeriodFormatter { cv = ECountVariant.INTEGER; } boolean isLast = i == last; - boolean mustSkip = data.appendUnit(unit, count, cv, customs.unitVariant, + boolean mustSkip = data.appendUnit(unit, count, cv, customs.unitVariant, countSep, useDigitPrefix, multiple, isLast, wasSkipped, sb); skipped |= mustSkip; wasSkipped = false; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java index eb339976ae5..ecff5a0af95 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java @@ -81,6 +81,7 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { /** * Set the locale for this factory. */ + @Override public PeriodFormatterFactory setLocale(String localeName) { data = null; this.localeName = localeName; @@ -93,6 +94,7 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { * @param display true if limits will be displayed * @return this PeriodFormatterFactory */ + @Override public PeriodFormatterFactory setDisplayLimit(boolean display) { updateCustomizations().displayLimit = display; return this; @@ -113,6 +115,7 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { * @param display true if past and future will be displayed * @return this PeriodFormatterFactory */ + @Override public PeriodFormatterFactory setDisplayPastFuture(boolean display) { updateCustomizations().displayDirection = display; return this; @@ -133,6 +136,7 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { * @param variant the variant indicating separators will be displayed * @return this PeriodFormatterFactory */ + @Override public PeriodFormatterFactory setSeparatorVariant(int variant) { updateCustomizations().separatorVariant = (byte) variant; return this; @@ -153,6 +157,7 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { * @param variant the variant to use * @return this PeriodFormatterFactory */ + @Override public PeriodFormatterFactory setUnitVariant(int variant) { updateCustomizations().unitVariant = (byte) variant; return this; @@ -173,6 +178,7 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { * @param variant the variant to use * @return this PeriodFormatterFactory */ + @Override public PeriodFormatterFactory setCountVariant(int variant) { updateCustomizations().countVariant = (byte) variant; return this; @@ -187,9 +193,10 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { return customizations.countVariant; } + @Override public PeriodFormatter getFormatter() { customizationsInUse = true; - return new BasicPeriodFormatter(this, localeName, getData(), + return new BasicPeriodFormatter(this, localeName, getData(), customizations); } @@ -221,7 +228,7 @@ public class BasicPeriodFormatterFactory implements PeriodFormatterFactory { byte separatorVariant = ESeparatorVariant.FULL; byte unitVariant = EUnitVariant.PLURALIZED; byte countVariant = ECountVariant.INTEGER; - + public Customizations copy() { Customizations result = new Customizations(); result.displayLimit = displayLimit; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java index c97ae618401..c840bdc4caf 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java @@ -24,7 +24,7 @@ public class BasicPeriodFormatterService implements PeriodFormatterService { /** * Return the default service instance. This uses the default data service. - * + * * @return an BasicPeriodFormatterService */ public static BasicPeriodFormatterService getInstance() { @@ -39,25 +39,29 @@ public class BasicPeriodFormatterService implements PeriodFormatterService { /** * Construct a BasicPeriodFormatterService using the given * PeriodFormatterDataService. - * + * * @param ds the data service to use */ public BasicPeriodFormatterService(PeriodFormatterDataService ds) { this.ds = ds; } + @Override public DurationFormatterFactory newDurationFormatterFactory() { return new BasicDurationFormatterFactory(this); } + @Override public PeriodFormatterFactory newPeriodFormatterFactory() { return new BasicPeriodFormatterFactory(ds); } + @Override public PeriodBuilderFactory newPeriodBuilderFactory() { return new BasicPeriodBuilderFactory(ds); } + @Override public Collection getAvailableLocaleNames() { return ds.getAvailableLocales(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/Period.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/Period.java index 76ceab89764..2f2a105d71f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/Period.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/Period.java @@ -17,9 +17,9 @@ import com.ibm.icu.impl.duration.impl.DataRecord.ETimeLimit; * In addition Period can either represent the duration as being into the past * or future, and as being more or less than the defined value. *

- * Use a PeriodFormatter to convert a Period to a String. + * Use a PeriodFormatter to convert a Period to a String. *

- * Periods are immutable. Mutating operations return the new + * Periods are immutable. Mutating operations return the new * result leaving the original unchanged. *

* Example:

@@ -92,7 +92,7 @@ public final class Period {
   public Period omit(TimeUnit unit) {
     return setTimeUnitInternalValue(unit, 0);
   }
-  
+
   /**
    * Mark the duration as being at the defined duration.
    *
@@ -197,9 +197,9 @@ public final class Period {
   }
 
   /**
-   * Returns true if this represents a 
+   * Returns true if this represents a
    * duration into the future.
-   * @return true if this represents a 
+   * @return true if this represents a
    * duration into the future.
    */
   public boolean isInFuture() {
@@ -207,9 +207,9 @@ public final class Period {
   }
 
   /**
-   * Returns true if this represents a 
+   * Returns true if this represents a
    * duration into the past
-   * @return true if this represents a 
+   * @return true if this represents a
    * duration into the past
    */
   public boolean isInPast  () {
@@ -236,12 +236,13 @@ public final class Period {
     return timeLimit == ETimeLimit.LT;
   }
 
-  /** 
+  /**
    * Returns true if rhs extends Period and
    * the two Periods are equal.
    * @param rhs the object to compare to
    * @return true if rhs is a Period and is equal to this
    */
+  @Override
   public boolean equals(Object rhs) {
     try {
       return equals((Period)rhs);
@@ -275,11 +276,12 @@ public final class Period {
     return false;
   }
 
-  /** 
-   * Returns the hashCode. 
+  /**
+   * Returns the hashCode.
    * @return the hashCode
    */
-  public int hashCode() {
+  @Override
+public int hashCode() {
     int hc = (timeLimit << 1) | (inFuture ? 1 : 0);
     for (int i = 0; i < counts.length; ++i) {
       hc = (hc << 2) ^ counts[i];
@@ -316,7 +318,7 @@ public final class Period {
     return setTimeUnitInternalValue(unit, (int)(value * 1000) + 1);
   }
 
-  /** 
+  /**
    * Sets the period to have the provided value, 1/1000 of the
    * unit plus 1.  Thus unset values are '0', 1' is the set value '0',
    * 2 is the set value '1/1000', 3 is the set value '2/1000' etc.
@@ -368,7 +370,7 @@ public final class Period {
    */
   private static void checkCount(float count) {
     if (count < 0) {
-      throw new IllegalArgumentException("count (" + count + 
+      throw new IllegalArgumentException("count (" + count +
                                          ") cannot be negative");
     }
   }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java
index ae2314ea5cf..e9065bc7deb 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/TimeUnit.java
@@ -10,7 +10,7 @@
 package com.ibm.icu.impl.duration;
 
 /**
- * 'Enum' for individual time units.  Not an actual enum so that it can be 
+ * 'Enum' for individual time units.  Not an actual enum so that it can be
  * used by Java 1.4.
  */
 public final class TimeUnit {
@@ -26,32 +26,33 @@ public final class TimeUnit {
     this.ordinal = (byte) ordinal;
   }
 
+  @Override
   public String toString() {
     return name;
   }
-  
-  /** Represents a year. */ 
+
+  /** Represents a year. */
   public static final TimeUnit YEAR = new TimeUnit("year", 0);
 
-  /** Represents a month. */  
+  /** Represents a month. */
   public static final TimeUnit MONTH = new TimeUnit("month", 1);
 
-  /** Represents a week. */ 
+  /** Represents a week. */
   public static final TimeUnit WEEK = new TimeUnit("week", 2);
 
-  /** Represents a day. */ 
+  /** Represents a day. */
   public static final TimeUnit DAY = new TimeUnit("day", 3);
 
-  /** Represents an hour. */ 
+  /** Represents an hour. */
   public static final TimeUnit HOUR = new TimeUnit("hour", 4);
 
-  /** Represents a minute. */ 
+  /** Represents a minute. */
   public static final TimeUnit MINUTE = new TimeUnit("minute", 5);
 
-  /** Represents a second. */ 
+  /** Represents a second. */
   public static final TimeUnit SECOND = new TimeUnit("second", 6);
 
-  /** Represents a millisecond. */ 
+  /** Represents a millisecond. */
   public static final TimeUnit MILLISECOND = new TimeUnit("millisecond", 7);
 
   /** Returns the next larger time unit, or null if this is the largest. */
@@ -81,7 +82,7 @@ public final class TimeUnit {
   // compute entire expression using 'long'.  differs from initializtion of
   // a single constant
   static final long[] approxDurations = {
-    36525L*24*60*60*10, 3045*24*60*60*10L, 7*24*60*60*1000L, 24*60*60*1000L, 
+    36525L*24*60*60*10, 3045*24*60*60*10L, 7*24*60*60*1000L, 24*60*60*1000L,
     60*60*1000L, 60*1000L, 1000L, 1L
   };
 }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java
index 7f70fbf4e18..e6f375442a4 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java
@@ -82,6 +82,7 @@ public class ResourceBasedPeriodFormatterDataService extends
         availableLocales = Collections.unmodifiableList(localeNames);
     }
 
+    @Override
     public PeriodFormatterData get(String localeName) {
         // remove tag info including calendar, we don't use the calendar
         int x = localeName.indexOf('@');
@@ -152,6 +153,7 @@ public class ResourceBasedPeriodFormatterDataService extends
         }
     }
 
+    @Override
     public Collection getAvailableLocales() {
         return availableLocales;
     }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java
index 488d5d742b9..0b8e5bf4d7b 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java
@@ -40,6 +40,7 @@ public class XMLRecordReader implements RecordReader {
         }
     }
 
+    @Override
     public boolean open(String title) {
         if (getTag().equals(title)) {
             nameStack.add(title);
@@ -49,6 +50,7 @@ public class XMLRecordReader implements RecordReader {
         return false;
     }
 
+    @Override
     public boolean close() {
         int ix = nameStack.size() - 1;
         String name = nameStack.get(ix);
@@ -60,6 +62,7 @@ public class XMLRecordReader implements RecordReader {
         return false;
     }
 
+    @Override
     public boolean bool(String name) {
         String s = string(name);
         if (s != null) {
@@ -68,6 +71,7 @@ public class XMLRecordReader implements RecordReader {
         return false;
     }
 
+    @Override
     public boolean[] boolArray(String name) {
         String[] sa = stringArray(name);
         if (sa != null) {
@@ -80,6 +84,7 @@ public class XMLRecordReader implements RecordReader {
         return null;
     }
 
+    @Override
     public char character(String name) {
         String s = string(name);
         if (s != null) {
@@ -88,6 +93,7 @@ public class XMLRecordReader implements RecordReader {
         return '\uffff';
     }
 
+    @Override
     public char[] characterArray(String name) {
         String[] sa = stringArray(name);
         if (sa != null) {
@@ -100,6 +106,7 @@ public class XMLRecordReader implements RecordReader {
         return null;
     }
 
+    @Override
     public byte namedIndex(String name, String[] names) {
         String sa = string(name);
         if (sa != null) {
@@ -112,6 +119,7 @@ public class XMLRecordReader implements RecordReader {
         return (byte) -1;
     }
 
+    @Override
     public byte[] namedIndexArray(String name, String[] names) {
         String[] sa = stringArray(name);
         if (sa != null) {
@@ -131,6 +139,7 @@ public class XMLRecordReader implements RecordReader {
         return null;
     }
 
+    @Override
     public String string(String name) {
         if (match(name)) {
             String result = readData();
@@ -141,6 +150,7 @@ public class XMLRecordReader implements RecordReader {
         return null;
     }
 
+    @Override
     public String[] stringArray(String name) {
         if (match(name + "List")) {
             List list = new ArrayList();
@@ -158,6 +168,7 @@ public class XMLRecordReader implements RecordReader {
         return null;
     }
 
+    @Override
     public String[][] stringTable(String name) {
         if (match(name + "Table")) {
             List list = new ArrayList();
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java
index bd524490499..d2f18673eb9 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java
@@ -25,6 +25,7 @@ public class XMLRecordWriter implements RecordWriter {
         this.nameStack = new ArrayList();
     }
 
+    @Override
     public boolean open(String title) {
         newline();
         writeString("<" + title + ">");
@@ -32,6 +33,7 @@ public class XMLRecordWriter implements RecordWriter {
         return true;
     }
 
+    @Override
     public boolean close() {
         int ix = nameStack.size() - 1;
         if (ix >= 0) {
@@ -50,10 +52,12 @@ public class XMLRecordWriter implements RecordWriter {
         }
     }
 
+    @Override
     public void bool(String name, boolean value) {
         internalString(name, String.valueOf(value));
     }
 
+    @Override
     public void boolArray(String name, boolean[] values) {
         if (values != null) {
             String[] stringValues = new String[values.length];
@@ -74,12 +78,14 @@ public class XMLRecordWriter implements RecordWriter {
         return String.valueOf(value);
     }
 
+    @Override
     public void character(String name, char value) {
         if (value != '\uffff') {
             internalString(name, ctos(value));
         }
     }
 
+    @Override
     public void characterArray(String name, char[] values) {
         if (values != null) {
             String[] stringValues = new String[values.length];
@@ -95,12 +101,14 @@ public class XMLRecordWriter implements RecordWriter {
         }
     }
 
+    @Override
     public void namedIndex(String name, String[] names, int value) {
         if (value >= 0) {
             internalString(name, names[value]);
         }
     }
 
+    @Override
     public void namedIndexArray(String name, String[] names, byte[] values) {
         if (values != null) {
             String[] stringValues = new String[values.length];
@@ -182,10 +190,12 @@ public class XMLRecordWriter implements RecordWriter {
         }
     }
 
+    @Override
     public void string(String name, String value) {
         internalString(name, normalize(value));
     }
 
+    @Override
     public void stringArray(String name, String[] values) {
         if (values != null) {
             push(name + "List");
@@ -200,6 +210,7 @@ public class XMLRecordWriter implements RecordWriter {
         }
     }
 
+    @Override
     public void stringTable(String name, String[][] values) {
         if (values != null) {
             push(name + "Table");
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java
index 449317f43bc..fe29229cc1e 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/AsciiUtil.java
@@ -168,6 +168,7 @@ public final class AsciiUtil {
             _hash = AsciiUtil.toLowerString(key).hashCode();
         }
 
+        @Override
         public boolean equals(Object o) {
             if (this == o) {
                 return true;
@@ -178,6 +179,7 @@ public final class AsciiUtil {
             return false;
         }
 
+        @Override
         public int hashCode() {
             return _hash;
         }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java
index 5a754d47ea3..66deee99d63 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/BaseLocale.java
@@ -78,6 +78,7 @@ public final class BaseLocale {
         return _variant;
     }
 
+    @Override
     public boolean equals(Object obj) {
         if (this == obj) {
             return true;
@@ -93,6 +94,7 @@ public final class BaseLocale {
                 && _variant.equals(other._variant);
     }
 
+    @Override
     public String toString() {
         StringBuilder buf = new StringBuilder();
         if (_language.length() > 0) {
@@ -123,6 +125,7 @@ public final class BaseLocale {
         return buf.toString();
     }
 
+    @Override
     public int hashCode() {
         int h = _hash;
         if (h == 0) {
@@ -167,6 +170,7 @@ public final class BaseLocale {
             }
         }
 
+        @Override
         public boolean equals(Object obj) {
             if (JDKIMPL) {
                 return (this == obj) ||
@@ -184,6 +188,7 @@ public final class BaseLocale {
                     && AsciiUtil.caseIgnoreMatch(((Key)obj)._vart, this._vart);
         }
 
+        @Override
         public int compareTo(Key other) {
             int res = AsciiUtil.caseIgnoreCompare(this._lang, other._lang);
             if (res == 0) {
@@ -202,6 +207,7 @@ public final class BaseLocale {
             return res;
         }
 
+        @Override
         public int hashCode() {
             int h = _hash;
             if (h == 0) {
@@ -247,10 +253,12 @@ public final class BaseLocale {
         public Cache() {
         }
 
+        @Override
         protected Key normalizeKey(Key key) {
             return Key.normalize(key);
         }
 
+        @Override
         protected BaseLocale createObject(Key key) {
             return new BaseLocale(key._lang, key._scrt, key._regn, key._vart);
         }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/Extension.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/Extension.java
index b52e4bc55fd..bccda569120 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/Extension.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/Extension.java
@@ -34,6 +34,7 @@ public class Extension {
         return _key + LanguageTag.SEP + _value;
     }
 
+    @Override
     public String toString() {
         return getID();
     }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java
index b4e3278c0ea..615156b86f7 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/InternalLocaleBuilder.java
@@ -642,10 +642,12 @@ public final class InternalLocaleBuilder {
             return _s;
         }
 
+        @Override
         public int hashCode() {
             return AsciiUtil.toLowerString(_s).hashCode();
         }
 
+        @Override
         public boolean equals(Object obj) {
             if (this == obj) {
                 return true;
@@ -668,10 +670,12 @@ public final class InternalLocaleBuilder {
             return _c;
         }
 
+        @Override
         public int hashCode() {
             return AsciiUtil.toLower(_c);
         }
 
+        @Override
         public boolean equals(Object obj) {
             if (this == obj) {
                 return true;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java
index 5b56a06c719..cea97d8597f 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java
@@ -42,6 +42,7 @@ public class KeyTypeData {
 
     private static class CodepointsTypeHandler extends SpecialTypeHandler {
         private static final Pattern pat = Pattern.compile("[0-9a-fA-F]{4,6}(-[0-9a-fA-F]{4,6})*");
+        @Override
         boolean isWellFormed(String value) {
             return pat.matcher(value).matches();
         }
@@ -49,6 +50,7 @@ public class KeyTypeData {
 
     private static class ReorderCodeTypeHandler extends SpecialTypeHandler {
         private static final Pattern pat = Pattern.compile("[a-zA-Z]{3,8}(-[a-zA-Z]{3,8})*");
+        @Override
         boolean isWellFormed(String value) {
             return pat.matcher(value).matches();
         }
@@ -56,20 +58,23 @@ public class KeyTypeData {
 
     private static class RgKeyValueTypeHandler extends SpecialTypeHandler {
         private static final Pattern pat = Pattern.compile("([a-zA-Z]{2}|[0-9]{3})[zZ]{4}");
+        @Override
         boolean isWellFormed(String value) {
             return pat.matcher(value).matches();
         }
     }
-    
+
     private static class SubdivisionKeyValueTypeHandler extends SpecialTypeHandler {
         private static final Pattern pat = Pattern.compile("([a-zA-Z]{2}|[0-9]{3})");
+        @Override
         boolean isWellFormed(String value) {
             return pat.matcher(value).matches();
         }
     }
-    
+
     private static class PrivateUseKeyValueTypeHandler extends SpecialTypeHandler {
         private static final Pattern pat = Pattern.compile("[a-zA-Z0-9]{3,8}(-[a-zA-Z0-9]{3,8})*");
+        @Override
         boolean isWellFormed(String value) {
             return pat.matcher(value).matches();
         }
@@ -658,7 +663,7 @@ typeInfo{
     public static Set getBcp47Keys() {
         return BCP47_KEYS.keySet();
     };
-    
+
     public static Set getBcp47KeyTypes(String key) {
         return BCP47_KEYS.get(key);
     };
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
index 87f6a3dedfd..786b4a00db9 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
@@ -114,38 +114,38 @@ public class LanguageTag {
 
     /*
      * BNF in RFC5464
-     *  
+     *
      * Language-Tag  = langtag             ; normal language tags
      *               / privateuse          ; private use tag
      *               / grandfathered       ; grandfathered tags
      *
-     * 
+     *
      * langtag       = language
      *                 ["-" script]
      *                 ["-" region]
      *                 *("-" variant)
      *                 *("-" extension)
      *                 ["-" privateuse]
-     * 
+     *
      * language      = 2*3ALPHA            ; shortest ISO 639 code
      *                 ["-" extlang]       ; sometimes followed by
      *                                     ; extended language subtags
      *               / 4ALPHA              ; or reserved for future use
      *               / 5*8ALPHA            ; or registered language subtag
-     * 
+     *
      * extlang       = 3ALPHA              ; selected ISO 639 codes
      *                 *2("-" 3ALPHA)      ; permanently reserved
-     * 
+     *
      * script        = 4ALPHA              ; ISO 15924 code
-     * 
+     *
      * region        = 2ALPHA              ; ISO 3166-1 code
      *               / 3DIGIT              ; UN M.49 code
-     * 
+     *
      * variant       = 5*8alphanum         ; registered variants
      *               / (DIGIT 3alphanum)
-     * 
+     *
      * extension     = singleton 1*("-" (2*8alphanum))
-     * 
+     *
      *                                     ; Single alphanumerics
      *                                     ; "x" reserved for private use
      * singleton     = DIGIT               ; 0 - 9
@@ -153,9 +153,9 @@ public class LanguageTag {
      *               / %x59-5A             ; Y - Z
      *               / %x61-77             ; a - w
      *               / %x79-7A             ; y - z
-     * 
+     *
      * privateuse    = "x" 1*("-" (1*8alphanum))
-     * 
+     *
      */
     public static LanguageTag parse(String languageTag, ParseStatus sts) {
         if (sts == null) {
@@ -201,7 +201,7 @@ public class LanguageTag {
             if (s.length() == 0) {
                 sts._errorMsg = "Empty subtag";
             } else {
-                sts._errorMsg = "Invalid subtag: " + s; 
+                sts._errorMsg = "Invalid subtag: " + s;
             }
         }
 
@@ -692,6 +692,7 @@ public class LanguageTag {
         return AsciiUtil.toLowerString(s);
     }
 
+    @Override
     public String toString() {
         StringBuilder sb = new StringBuilder();
 
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java
index 6c019a979e9..6418cd91089 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleExtensions.java
@@ -199,6 +199,7 @@ public class LocaleExtensions {
     }
 
 
+    @Override
     public String toString() {
         return _id;
     }
@@ -207,10 +208,12 @@ public class LocaleExtensions {
         return _id;
     }
 
+    @Override
     public int hashCode() {
         return _id.hashCode();
     }
 
+    @Override
     public boolean equals(Object other) {
         if (this == other) {
             return true;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java
index 0cacf0a6b0d..b38da26f370 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacterNameIterator.java
@@ -34,6 +34,7 @@ class UCharacterNameIterator implements ValueIterator
     * @return true if we are not at the end of the iteration, false otherwise.
     * @see com.ibm.icu.util.ValueIterator.Element
     */
+    @Override
     public boolean next(ValueIterator.Element element)
     {
         if (m_current_ >= m_limit_) {
@@ -74,7 +75,7 @@ class UCharacterNameIterator implements ValueIterator
                             return true;
                         }
                     }
-                    /* 
+                    /*
                     // "if (m_current_ >= m_limit_)" would not return true
                     // because it can never be reached due to:
                     // 1) It has already been checked earlier
@@ -116,6 +117,7 @@ class UCharacterNameIterator implements ValueIterator
     * UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously.
     * 

*/ + @Override public void reset() { m_current_ = m_start_; @@ -138,6 +140,7 @@ class UCharacterNameIterator implements ValueIterator * @exception IllegalArgumentException thrown when attempting to set an * illegal range. E.g limit <= start */ + @Override public void setRange(int start, int limit) { if (start >= limit) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/math/BigDecimal.java b/icu4j/main/classes/core/src/com/ibm/icu/math/BigDecimal.java index f3a98eca35a..b329300ceed 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/math/BigDecimal.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/math/BigDecimal.java @@ -125,7 +125,7 @@ import com.ibm.icu.lang.UCharacter; * The floating point arithmetic provided by this class is defined by the ANSI X3.274-1996 standard, and is also * documented at http://www2.hursley.ibm.com/decimal
* [This URL will change.] - * + * *

Operator methods

*

* Operations on BigDecimal numbers are controlled by a {@link MathContext} object, which provides the @@ -215,7 +215,7 @@ import com.ibm.icu.lang.UCharacter; *

* The names of methods in this class follow the conventions established by java.lang.Number, * java.math.BigInteger, and java.math.BigDecimal in Java 1.1 and Java 1.2. - * + * * @see MathContext * @author Mike Cowlishaw * @stable ICU 2.0 @@ -228,7 +228,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /* properties constant public */// useful to others /** * The BigDecimal constant "0". - * + * * @see #ONE * @see #TEN * @stable ICU 2.0 @@ -240,7 +240,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * The BigDecimal constant "1". - * + * * @see #TEN * @see #ZERO * @stable ICU 2.0 @@ -252,7 +252,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * The BigDecimal constant "10". - * + * * @see #ONE * @see #ZERO * @stable ICU 2.0 @@ -262,7 +262,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable // the rounding modes (copied here for upwards compatibility) /** * Rounding mode to round to a more positive number. - * + * * @see MathContext#ROUND_CEILING * @stable ICU 2.0 */ @@ -270,7 +270,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Rounding mode to round towards zero. - * + * * @see MathContext#ROUND_DOWN * @stable ICU 2.0 */ @@ -278,7 +278,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Rounding mode to round to a more negative number. - * + * * @see MathContext#ROUND_FLOOR * @stable ICU 2.0 */ @@ -286,7 +286,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Rounding mode to round to nearest neighbor, where an equidistant value is rounded down. - * + * * @see MathContext#ROUND_HALF_DOWN * @stable ICU 2.0 */ @@ -294,7 +294,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Rounding mode to round to nearest neighbor, where an equidistant value is rounded to the nearest even neighbor. - * + * * @see MathContext#ROUND_HALF_EVEN * @stable ICU 2.0 */ @@ -302,7 +302,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Rounding mode to round to nearest neighbor, where an equidistant value is rounded up. - * + * * @see MathContext#ROUND_HALF_UP * @stable ICU 2.0 */ @@ -310,7 +310,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Rounding mode to assert that no rounding is necessary. - * + * * @see MathContext#ROUND_UNNECESSARY * @stable ICU 2.0 */ @@ -318,7 +318,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Rounding mode to round away from zero. - * + * * @see MathContext#ROUND_UP * @stable ICU 2.0 */ @@ -358,7 +358,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

    *
  • ispos -- the number is positive
  • iszero -- the number is zero
  • isneg -- the number is negative *
- * + * * @serial */ private byte ind; // assumed undefined @@ -375,7 +375,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* This property is an optimization; it allows us to defer number layout until it is actually needed as a string, * hence avoiding unnecessary formatting. - * + * * @serial */ private byte form = (byte) com.ibm.icu.math.MathContext.PLAIN; // assumed PLAIN @@ -391,7 +391,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* If the first byte is 0 then the value of the number is zero (and mant.length=1, except when constructed from a * plain number, for example, 0.000). - * + * * @serial */ private byte mant[]; // assumed null @@ -400,9 +400,9 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The exponent. *

* For fixed point arithmetic, scale is -exp, and can apply to zero. - * + * * Note that this property can have a value less than MinExp when the mantissa has more than one digit. - * + * * @serial */ private int exp; @@ -422,7 +422,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* (Note: this constructor is provided only in the com.ibm.icu.math version of the BigDecimal class. * It would not be present in a java.math version.) - * + * * @param bd The BigDecimal to be translated. * @stable ICU 2.0 */ @@ -441,7 +441,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The BigDecimal will contain only decimal digits, prefixed with a leading minus sign (hyphen) if the * BigInteger is negative. A leading zero will be present only if the BigInteger is zero. - * + * * @param bi The BigInteger to be converted. * @stable ICU 2.0 */ @@ -464,7 +464,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The BigDecimal will contain only decimal digits, (with an embedded decimal point followed by * scale decimal digits if the scale is positive), prefixed with a leading minus sign (hyphen) if the * BigInteger is negative. A leading zero will be present only if the BigInteger is zero. - * + * * @param bi The BigInteger to be converted. * @param scale The int specifying the scale. * @throws NumberFormatException If the scale is negative. @@ -488,7 +488,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Using this constructor is faster than using the BigDecimal(String) constructor if the string is * already available in character array form. - * + * * @param inchars The char[] array containing the number to be converted. * @throws NumberFormatException If the parameter is not a valid number. * @stable ICU 2.0 @@ -508,7 +508,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Using this constructor is faster than using the BigDecimal(String) constructor if the string is * already available within a character array. - * + * * @param inchars The char[] array containing the number to be converted. * @param offset The int offset into the array of the start of the number to be converted. * @param length The int length of the number. @@ -620,7 +620,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable if (dvalue < 0) bad(inchars); // not base 10 } else - dvalue = ((int) (sj)) - ((int) ('0')); + dvalue = ((sj)) - (('0')); exp = (exp * 10) + dvalue; } }/* j */ @@ -676,7 +676,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable j++; // at dot sj = inchars[j]; if (sj <= '9') - mant[i] = (byte) (((int) (sj)) - ((int) ('0')));/* easy */ + mant[i] = (byte) (((sj)) - (('0')));/* easy */ else { dvalue = UCharacter.digit(sj, 10); if (dvalue < 0) @@ -696,7 +696,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable for (; $5 > 0; $5--, i++) { if (i == dotoff) j++; - mant[i] = (byte) (((int) (inchars[j])) - ((int) ('0'))); + mant[i] = (byte) (((inchars[j])) - (('0'))); j++; } }/* i */ @@ -743,7 +743,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * to a String using the Double.toString() method and then using the * {@link #BigDecimal(java.lang.String)} constructor. To get that result, use the static {@link #valueOf(double)} * method to construct a BigDecimal from a double. - * + * * @param num The double to be converted. * @throws NumberFormatException If the parameter is infinite or not a number. * @stable ICU 2.0 @@ -764,7 +764,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Constructs a BigDecimal which is the exact decimal representation of the 32-bit signed binary * integer parameter. The BigDecimal will contain only decimal digits, prefixed with a leading minus * sign (hyphen) if the parameter is negative. A leading zero will be present only if the parameter is zero. - * + * * @param num The int to be converted. * @stable ICU 2.0 */ @@ -843,7 +843,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Constructs a BigDecimal which is the exact decimal representation of the 64-bit signed binary * integer parameter. The BigDecimal will contain only decimal digits, prefixed with a leading minus * sign (hyphen) if the parameter is negative. A leading zero will be present only if the parameter is zero. - * + * * @param num The long to be converted. * @stable ICU 2.0 */ @@ -896,13 +896,13 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * decimal point, and exponential notation may be used. They follow conventional syntax, and may not contain blanks. *

* Some valid strings from which a BigDecimal might be constructed are: - * + * *

-     * 
+     *
      * "0" -- Zero "12" -- A whole number "-76" -- A signed whole number "12.70" -- Some decimal places "+0.003" -- Plus
      * sign is allowed "17." -- The same as 17 ".5" -- The same as 0.5 "4E+9" -- Exponential notation "0.73e-7" --
      * Exponential notation
-     * 
+     *
      * 
*

* (Exponential notation means that the number includes an optional sign and a power of ten following an @@ -920,7 +920,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Any digits in the parameter must be decimal; that is, Character.digit(c, 10) (where c * is the character in question) would not return -1. - * + * * @param string The String to be converted. * @throws NumberFormatException If the parameter is not a valid number. * @stable ICU 2.0 @@ -948,7 +948,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The same as {@link #abs(MathContext)}, where the context is new MathContext(0, MathContext.PLAIN). *

* The length of the decimal part (the scale) of the result will be this.scale() - * + * * @return A BigDecimal whose value is the absolute value of this BigDecimal. * @stable ICU 2.0 */ @@ -963,7 +963,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * If the current object is zero or positive, then the same result as invoking the {@link #plus(MathContext)} method * with the same parameter is returned. Otherwise, the same result as invoking the {@link #negate(MathContext)} * method with the same parameter is returned. - * + * * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is the absolute value of this BigDecimal. * @stable ICU 2.0 @@ -982,7 +982,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * context is new MathContext(0, MathContext.PLAIN). *

* The length of the decimal part (the scale) of the result will be the maximum of the scales of the two operands. - * + * * @param rhs The BigDecimal for the right hand side of the addition. * @return A BigDecimal whose value is this+rhs, using fixed point arithmetic. * @stable ICU 2.0 @@ -997,7 +997,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Implements the addition (+) operator (as defined in the decimal documentation, see * {@link BigDecimal class header}), and returns the result as a BigDecimal object. - * + * * @param rhs The BigDecimal for the right hand side of the addition. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is this+rhs. @@ -1218,13 +1218,14 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The same as {@link #compareTo(BigDecimal, MathContext)}, where the BigDecimal is rhs, * and the context is new MathContext(0, MathContext.PLAIN). - * + * * @param rhs The BigDecimal for the right hand side of the comparison. * @return An int whose value is -1, 0, or 1 as this is numerically less than, equal to, * or greater than rhs. * @stable ICU 2.0 */ + @Override public int compareTo(com.ibm.icu.math.BigDecimal rhs) { return this.compareTo(rhs, plainMC); } @@ -1249,7 +1250,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * *

* A {@link #compareTo(BigDecimal)} method is also provided. - * + * * @param rhs The BigDecimal for the right hand side of the comparison. * @param set The MathContext arithmetic settings. * @return An int whose value is -1, 0, or 1 as this is numerically less than, equal to, @@ -1308,10 +1309,10 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The same as {@link #divide(BigDecimal, int)}, where the BigDecimal is rhs, and the * rounding mode is {@link MathContext#ROUND_HALF_UP}. - * + * * The length of the decimal part (the scale) of the result will be the same as the scale of the current object, if * the latter were formatted without exponential notation. - * + * * @param rhs The BigDecimal for the right hand side of the division. * @return A plain BigDecimal whose value is this/rhs, using fixed point arithmetic. * @throws ArithmeticException If rhs is zero. @@ -1332,7 +1333,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The length of the decimal part (the scale) of the result will therefore be the same as the scale of the current * object, if the latter were formatted without exponential notation. *

- * + * * @param rhs The BigDecimal for the right hand side of the division. * @param round The int rounding mode to be used for the division (see the {@link MathContext} class). * @return A plain BigDecimal whose value is this/rhs, using fixed point arithmetic and @@ -1361,7 +1362,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The length of the decimal part (the scale) of the result will be the same as the scale of the current object, if * the latter were formatted without exponential notation. *

- * + * * @param rhs The BigDecimal for the right hand side of the division. * @param scale The int scale to be used for the result. * @param round The int rounding mode to be used for the division (see the {@link MathContext} class). @@ -1388,7 +1389,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Implements the division (/) operator (as defined in the decimal documentation, see * {@link BigDecimal class header}), and returns the result as a BigDecimal object. - * + * * @param rhs The BigDecimal for the right hand side of the division. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is this/rhs. @@ -1405,7 +1406,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The same as {@link #divideInteger(BigDecimal, MathContext)}, where the BigDecimal is rhs * , and the context is new MathContext(0, MathContext.PLAIN). - * + * * @param rhs The BigDecimal for the right hand side of the integer division. * @return A BigDecimal whose value is the integer part of this/rhs. * @throws ArithmeticException if rhs is zero. @@ -1422,7 +1423,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Implements the integer division operator (as defined in the decimal documentation, see {@link BigDecimal class * header}), and returns the result as a BigDecimal object. - * + * * @param rhs The BigDecimal for the right hand side of the integer division. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is the integer part of this/rhs. @@ -1441,7 +1442,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The same as {@link #max(BigDecimal, MathContext)}, where the BigDecimal is rhs, and the * context is new MathContext(0, MathContext.PLAIN). - * + * * @param rhs The BigDecimal for the right hand side of the comparison. * @return A BigDecimal whose value is the maximum of this and rhs. * @stable ICU 2.0 @@ -1461,7 +1462,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * (using the same MathContext parameter) is returned. Otherwise, the result of calling the * {@link #plus(MathContext)} method on the first parameter object (using the same MathContext * parameter) is returned. - * + * * @param rhs The BigDecimal for the right hand side of the comparison. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is the maximum of this and rhs. @@ -1480,7 +1481,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The same as {@link #min(BigDecimal, MathContext)}, where the BigDecimal is rhs, and the * context is new MathContext(0, MathContext.PLAIN). - * + * * @param rhs The BigDecimal for the right hand side of the comparison. * @return A BigDecimal whose value is the minimum of this and rhs. * @stable ICU 2.0 @@ -1500,7 +1501,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * (using the same MathContext parameter) is returned. Otherwise, the result of calling the * {@link #plus(MathContext)} method on the first parameter object (using the same MathContext * parameter) is returned. - * + * * @param rhs The BigDecimal for the right hand side of the comparison. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is the minimum of this and rhs. @@ -1522,7 +1523,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The length of the decimal part (the scale) of the result will be the sum of the scales of the operands, if they * were formatted without exponential notation. - * + * * @param rhs The BigDecimal for the right hand side of the multiplication. * @return A BigDecimal whose value is this*rhs, using fixed point arithmetic. * @stable ICU 2.0 @@ -1537,7 +1538,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Implements the multiplication (*) operator (as defined in the decimal documentation, see * {@link BigDecimal class header}), and returns the result as a BigDecimal object. - * + * * @param rhs The BigDecimal for the right hand side of the multiplication. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is this*rhs. @@ -1638,8 +1639,8 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * . *

* The length of the decimal part (the scale) of the result will be be this.scale() - * - * + * + * * @return A BigDecimal whose value is -this. * @stable ICU 2.0 */ @@ -1653,7 +1654,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Implements the negation (Prefix -) operator (as defined in the decimal documentation, see * {@link BigDecimal class header}), and returns the result as a BigDecimal object. - * + * * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is -this. * @stable ICU 2.0 @@ -1677,7 +1678,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The same as {@link #plus(MathContext)}, where the context is new MathContext(0, MathContext.PLAIN). *

* The length of the decimal part (the scale) of the result will be be this.scale() - * + * * @return A BigDecimal whose value is +this. * @stable ICU 2.0 */ @@ -1693,7 +1694,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * {@link BigDecimal class header}), and returns the result as a BigDecimal object. *

* This method is useful for rounding or otherwise applying a context to a decimal value. - * + * * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is +this. * @stable ICU 2.0 @@ -1727,7 +1728,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* In addition, the power must not be negative, as no MathContext is used and so the result would then * always be 0. - * + * * @param rhs The BigDecimal for the right hand side of the operation (the power). * @return A BigDecimal whose value is this**rhs, using fixed point arithmetic. * @throws ArithmeticException if rhs is out of range or is not a whole number. @@ -1753,7 +1754,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* If the digits setting of the MathContext parameter is 0, the power must be zero or * positive. - * + * * @param rhs The BigDecimal for the right hand side of the operation (the power). * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is this**rhs. @@ -1833,7 +1834,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * and the context is new MathContext(0, MathContext.PLAIN). *

* This is not the modulo operator -- the result may be negative. - * + * * @param rhs The BigDecimal for the right hand side of the remainder operation. * @return A BigDecimal whose value is the remainder of this/rhs, using fixed point * arithmetic. @@ -1852,7 +1853,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * and returns the result as a BigDecimal object. *

* This is not the modulo operator -- the result may be negative. - * + * * @param rhs The BigDecimal for the right hand side of the remainder operation. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is the remainder of this+rhs. @@ -1872,7 +1873,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * and the context is new MathContext(0, MathContext.PLAIN). *

* The length of the decimal part (the scale) of the result will be the maximum of the scales of the two operands. - * + * * @param rhs The BigDecimal for the right hand side of the subtraction. * @return A BigDecimal whose value is this-rhs, using fixed point arithmetic. * @stable ICU 2.0 @@ -1887,7 +1888,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Implements the subtraction (-) operator (as defined in the decimal documentation, see * {@link BigDecimal class header}), and returns the result as a BigDecimal object. - * + * * @param rhs The BigDecimal for the right hand side of the subtraction. * @param set The MathContext arithmetic settings. * @return A BigDecimal whose value is this-rhs. @@ -1914,7 +1915,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Converts this BigDecimal to a byte. If the BigDecimal has a non-zero * decimal part or is out of the possible range for a byte (8-bit signed integer) result then an * ArithmeticException is thrown. - * + * * @return A byte equal in value to this. * @throws ArithmeticException if this has a non-zero decimal part, or will not fit in a byte. * @stable ICU 2.0 @@ -1936,11 +1937,12 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The double produced is identical to result of expressing the BigDecimal as a String and * then converting it using the Double(String) constructor; this can result in values of * Double.NEGATIVE_INFINITY or Double.POSITIVE_INFINITY. - * + * * @return A double corresponding to this. * @stable ICU 2.0 */ + @Override public double doubleValue() { // We go via a String [as does BigDecimal in JDK 1.2] // Next line could possibly raise NumberFormatException @@ -1957,7 +1959,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * are identical (they have the same characters in the same sequence). *

* The {@link #compareTo(BigDecimal, MathContext)} method should be used for more general comparisons. - * + * * @param obj The Object for the right hand side of the comparison. * @return A boolean whose value true if and only if the operands have identical string * representations. @@ -1967,6 +1969,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * @see #compareTo(BigDecimal, MathContext) */ + @Override public boolean equals(java.lang.Object obj) { com.ibm.icu.math.BigDecimal rhs; int i = 0; @@ -2018,11 +2021,12 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The float produced is identical to result of expressing the BigDecimal as a String and * then converting it using the Float(String) constructor; this can result in values of * Float.NEGATIVE_INFINITY or Float.POSITIVE_INFINITY. - * + * * @return A float corresponding to this. * @stable ICU 2.0 */ + @Override public float floatValue() { return java.lang.Float.valueOf(this.toString()).floatValue(); } @@ -2056,7 +2060,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Other rounding methods, and the use of exponential notation, can be selected by using * {@link #format(int,int,int,int,int,int)}. Using the two-parameter form of the method has exactly the same effect * as using the six-parameter form with the final four parameters all being -1. - * + * * @param before The int specifying the number of places before the decimal point. Use -1 for 'as many as are needed'. * @param after The int specifying the number of places after the decimal point. Use -1 for 'as many as are needed'. * @return A String representing this BigDecimal, laid out according to the specified parameters @@ -2118,7 +2122,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The special value MathContext.ROUND_UNNECESSARY may be used to detect whether non-zero digits are * discarded -- if exround has this value than if non-zero digits would be discarded (rounded) during * formatting then an ArithmeticException is thrown. - * + * * @param before The int specifying the number of places before the decimal point. Use -1 for 'as many as * are needed'. * @param after The int specifying the number of places after the decimal point. Use -1 for 'as many as @@ -2303,7 +2307,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable newa[i] = ' '; } }/* i */ - java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, i, a.length); + java.lang.System.arraycopy(a, 0, newa, i, a.length); a = newa; } // [if p=before then it's just the right length] @@ -2322,7 +2326,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable // p is now offset of 'E', or 0 if (p == 0) { // no E part; add trailing blanks newa = new char[(a.length + explaces) + 2]; - java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, 0, a.length); + java.lang.System.arraycopy(a, 0, newa, 0, a.length); { int $14 = explaces + 2; i = a.length; @@ -2337,7 +2341,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable badarg("format", 3, java.lang.String.valueOf(explaces)); if (places < explaces) { // need to insert zeros newa = new char[(a.length + explaces) - places]; - java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, 0, p + 2); // through E + java.lang.System.arraycopy(a, 0, newa, 0, p + 2); // through E // and sign { int $15 = explaces - places; @@ -2346,7 +2350,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable newa[i] = '0'; } }/* i */ - java.lang.System.arraycopy((java.lang.Object) a, p + 2, (java.lang.Object) newa, i, places); // remainder + java.lang.System.arraycopy(a, p + 2, newa, i, places); // remainder // of // exponent a = newa; @@ -2364,11 +2368,12 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Note that two BigDecimal objects are only guaranteed to produce the same hashcode if they are * exactly equal (that is, the String representations of the BigDecimal numbers are * identical -- they have the same characters in the same sequence). - * + * * @return An int that is the hashcode for this. * @stable ICU 2.0 */ + @Override public int hashCode() { // Maybe calculate ourselves, later. If so, note that there can be // more than one internal representation for a given toString() result. @@ -2381,11 +2386,12 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * (32-bit signed integer) result then only the low-order 32 bits are used. (That is, the number may be * decapitated.) To avoid unexpected errors when these conditions occur, use the {@link #intValueExact} * method. - * + * * @return An int converted from this, truncated and decapitated if necessary. * @stable ICU 2.0 */ + @Override public int intValue() { return toBigInteger().intValue(); } @@ -2394,7 +2400,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Converts this BigDecimal to an int. If the BigDecimal has a non-zero * decimal part or is out of the possible range for an int (32-bit signed integer) result then an * ArithmeticException is thrown. - * + * * @return An int equal in value to this. * @throws ArithmeticException if this has a non-zero decimal part, or will not fit in an int. * @stable ICU 2.0 @@ -2465,11 +2471,12 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * (64-bit signed integer) result then only the low-order 64 bits are used. (That is, the number may be * decapitated.) To avoid unexpected errors when these conditions occur, use the {@link #longValueExact} * method. - * + * * @return A long converted from this, truncated and decapitated if necessary. * @stable ICU 2.0 */ + @Override public long longValue() { return toBigInteger().longValue(); } @@ -2478,7 +2485,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Converts this BigDecimal to a long. If the BigDecimal has a non-zero * decimal part or is out of the possible range for a long (64-bit signed integer) result then an * ArithmeticException is thrown. - * + * * @return A long equal in value to this. * @throws ArithmeticException if this has a non-zero decimal part, or will not fit in a long. * @stable ICU 2.0 @@ -2517,7 +2524,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable // note that we could safely use the 'test for wrap to negative' // algorithm here, but instead we parallel the intValueExact // algorithm for ease of checking and maintenance. - result = (long) 0; + result = 0; { int $17 = lodigit + useexp; i = 0; @@ -2556,7 +2563,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* n may be negative, in which case the method returns the same result as movePointRight(-n) * . - * + * * @param n The int specifying the number of places to move the decimal point leftwards. * @return A BigDecimal derived from this, with the decimal point moved n * places to the left. @@ -2580,7 +2587,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* n may be negative, in which case the method returns the same result as movePointLeft(-n) * . - * + * * @param n The int specifying the number of places to move the decimal point rightwards. * @return A BigDecimal derived from this, with the decimal point moved n * places to the right. @@ -2598,7 +2605,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Returns the scale of this BigDecimal. Returns a non-negative int which is the scale of * the number. The scale is the number of digits in the decimal part of the number if the number were formatted * without exponential notation. - * + * * @return An int whose value is the scale of this BigDecimal. * @stable ICU 2.0 */ @@ -2621,7 +2628,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* The same as {@link #setScale(int, int)}, where the first parameter is the scale, and the second is * MathContext.ROUND_UNNECESSARY. - * + * * @param scale The int specifying the scale of the resulting BigDecimal. * @return A plain BigDecimal with the given scale. * @throws ArithmeticException if scale is negative. @@ -2646,7 +2653,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* If round is MathContext.ROUND_UNNECESSARY, an ArithmeticException is * thrown if any discarded digits are non-zero. - * + * * @param scale The int specifying the scale of the resulting BigDecimal. * @param round The int rounding mode to be used for the division (see the {@link MathContext} class). * @return A plain BigDecimal with the given scale. @@ -2698,7 +2705,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Converts this BigDecimal to a short. If the BigDecimal has a non-zero * decimal part or is out of the possible range for a short (16-bit signed integer) result then an * ArithmeticException is thrown. - * + * * @return A short equal in value to this. * @throws ArithmeticException if this has a non-zero decimal part, or will not fit in a short. * @stable ICU 2.0 @@ -2716,14 +2723,14 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Returns the sign of this BigDecimal, as an int. This returns the signum function * value that represents the sign of this BigDecimal. That is, -1 if the BigDecimal is * negative, 0 if it is numerically equal to zero, or 1 if it is positive. - * + * * @return An int which is -1 if the BigDecimal is negative, 0 if it is numerically equal * to zero, or 1 if it is positive. * @stable ICU 2.0 */ public int signum() { - return (int) this.ind; // [note this assumes values for ind.] + return this.ind; // [note this assumes values for ind.] } /** @@ -2735,7 +2742,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* (Note: this method is provided only in the com.ibm.icu.math version of the BigDecimal class. It * would not be present in a java.math version.) - * + * * @return The java.math.BigDecimal equal in value to this BigDecimal. * @stable ICU 2.0 */ @@ -2749,7 +2756,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* Any decimal part is truncated (discarded). If an exception is desired should the decimal part be non-zero, use * {@link #toBigIntegerExact()}. - * + * * @return The java.math.BigInteger equal in value to the integer part of this BigDecimal. * @stable ICU 2.0 */ @@ -2773,7 +2780,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable res = clone(this); // safe copy newlen = res.mant.length + res.exp; newmant = new byte[newlen]; // [shorter] - java.lang.System.arraycopy((java.lang.Object) res.mant, 0, (java.lang.Object) newmant, 0, + java.lang.System.arraycopy(res.mant, 0, newmant, 0, newlen); res.mant = newmant; res.form = (byte) com.ibm.icu.math.MathContext.PLAIN; @@ -2789,7 +2796,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Converts this BigDecimal to a java.math.BigInteger. *

* An exception is thrown if the decimal part (if any) is non-zero. - * + * * @return The java.math.BigInteger equal in value to the integer part of this BigDecimal. * @throws ArithmeticException if this has a non-zero decimal part. * @stable ICU 2.0 @@ -2809,7 +2816,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Returns the BigDecimal as a character array. The result of this method is the same as using the * sequence toString().toCharArray(), but avoids creating the intermediate String and * char[] objects. - * + * * @return The char[] array corresponding to this BigDecimal. * @stable ICU 2.0 */ @@ -2825,7 +2832,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable *

* By definition, using the {@link #BigDecimal(String)} constructor on the result String will create a * BigDecimal that is exactly equal to the original BigDecimal. - * + * * @return The String exactly corresponding to this BigDecimal. * @see #format(int, int) * @see #format(int, int, int, int, int, int) @@ -2833,6 +2840,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * @stable ICU 2.0 */ + @Override public java.lang.String toString() { return new java.lang.String(layout()); } @@ -2841,7 +2849,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Returns the number as a BigInteger after removing the scale. That is, the number is expressed as a * plain number, any decimal point is then removed (retaining the digits of any decimal part), and the result is * then converted to a BigInteger. - * + * * @return The java.math.BigInteger equal in value to this BigDecimal multiplied by ten to * the power of this.scale(). * @stable ICU 2.0 @@ -2868,7 +2876,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * The number is constructed as though num had been converted to a String using the * Double.toString() method and the {@link #BigDecimal(java.lang.String)} constructor had then been used. * This is typically not an exact conversion. - * + * * @param dub The double to be translated. * @return The BigDecimal equal in value to dub. * @throws NumberFormatException if the parameter is infinite or not a number. @@ -2885,7 +2893,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /** * Translates a long to a BigDecimal. That is, returns a plain BigDecimal * whose value is equal to the given long. - * + * * @param lint The long to be translated. * @return The BigDecimal equal in value to lint. * @stable ICU 2.0 @@ -2905,7 +2913,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * (new BigDecimal(lint)).divide(TEN.pow(new BigDecimal(scale))) *

* A NumberFormatException is thrown if scale is negative. - * + * * @param lint The long to be translated. * @param scale The int scale to be applied. * @return The BigDecimal equal in value to lint. @@ -2963,7 +2971,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable int $18 = mant.length; i = 0; for (; $18 > 0; $18--, i++) { - cmant[i] = (char) (mant[i] + ((int) ('0'))); + cmant[i] = (char) (mant[i] + (('0'))); } }/* i */ @@ -3019,7 +3027,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable return cmant; // non-negative integer rec = new char[cmant.length + 1]; rec[0] = '-'; - java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, 1, cmant.length); + java.lang.System.arraycopy(cmant, 0, rec, 1, cmant.length); return rec; } @@ -3045,7 +3053,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable rec[i] = '0'; } }/* i */ - java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, (needsign + 2) - mag, + java.lang.System.arraycopy(cmant, 0, rec, (needsign + 2) - mag, cmant.length); return rec; } @@ -3055,7 +3063,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable rec = new char[len]; if (needsign != 0) rec[0] = '-'; - java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, needsign, cmant.length); + java.lang.System.arraycopy(cmant, 0, rec, needsign, cmant.length); { int $21 = mag - cmant.length; i = needsign + cmant.length; @@ -3071,9 +3079,9 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable rec = new char[len]; if (needsign != 0) rec[0] = '-'; - java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, needsign, mag); + java.lang.System.arraycopy(cmant, 0, rec, needsign, mag); rec[needsign + mag] = '.'; - java.lang.System.arraycopy((java.lang.Object) cmant, mag, (java.lang.Object) rec, (needsign + mag) + 1, + java.lang.System.arraycopy(cmant, mag, rec, (needsign + mag) + 1, cmant.length - mag); return rec; } @@ -3096,9 +3104,9 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /* * Arg1 is operation code: D=divide, I=integer divide, R=remainder Arg2 is the rhs. Arg3 is the context. Arg4 is * explicit scale iff code='D' or 'I' (-1 if none). - * + * * Underlying algorithm (complications for Remainder function and scaled division are omitted for clarity): - * + * * Test for x/0 and then 0/x Exp =Exp1 - Exp2 Exp =Exp +len(var1) -len(var2) Sign=Sign1 Sign2 Pad accumulator (Var1) * to double-length with 0's (pad1) Pad Var2 to same length as Var1 B2B=1st two digits of var2, +1 to allow for * roundup have=0 Do until (have=digits+1 OR residue=0) if exp<0 then if integer divide/residue then leave @@ -3108,7 +3116,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * if mult=0 then mult=1 this_digit=this_digit+mult subtract end inner_loop if have\=0 | this_digit\=0 then do * output this_digit have=have+1; end var2=var2/10 exp=exp-1 end outer_loop exp=exp+1 -- set the proper exponent if * have=0 then generate answer=0 Return to FINISHED Result defined by MATHV1 - * + * * For extended commentary, see DMSRCN. */ @@ -3248,7 +3256,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable } while (false); }/* compare */ /* prepare for subtraction. Estimate BA (lengths the same) */ - ba = (int) var1[0]; // use only first digit + ba = var1[0]; // use only first digit } // lengths the same else {/* lhs longer than rhs */ /* use first two digits for estimate */ @@ -3281,7 +3289,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable if (start == 0) continue inner; // shift left - java.lang.System.arraycopy((java.lang.Object) var1, start, (java.lang.Object) var1, 0, var1len); + java.lang.System.arraycopy(var1, start, var1, 0, var1len); } }/* inner */ @@ -3351,7 +3359,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable }/* i */ if (d < var1.length) {/* need to reduce */ newvar1 = new byte[d]; - java.lang.System.arraycopy((java.lang.Object) var1, 0, (java.lang.Object) newvar1, 0, d); // shorten + java.lang.System.arraycopy(var1, 0, newvar1, 0, d); // shorten var1 = newvar1; } res.mant = var1; @@ -3404,7 +3412,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable // [we could let finish do this, during strip, if we adjusted // the exponent; however, truncation avoids the strip loop] newmant = new byte[have]; // shorten - java.lang.System.arraycopy((java.lang.Object) res.mant, 0, (java.lang.Object) newmant, 0, have); + java.lang.System.arraycopy(res.mant, 0, newmant, 0, have); res.mant = newmant; } return res.finish(set, true); @@ -3429,7 +3437,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /* * Extend byte array to given length, padding with 0s. If no extension is required then return the same * array. - * + * * Arg1 is the source byte array Arg2 is the new length (longer) */ @@ -3438,7 +3446,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable if (inarr.length == newlen) return inarr; newarr = new byte[newlen]; - java.lang.System.arraycopy((java.lang.Object) inarr, 0, (java.lang.Object) newarr, 0, inarr.length); + java.lang.System.arraycopy(inarr, 0, newarr, 0, inarr.length); // 0 padding is carried out by the JVM on allocation initialization return newarr; } @@ -3447,15 +3455,15 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable * Add or subtract two >=0 integers in byte arrays

This routine performs the calculation:

 C=A+(BM)
      * 
Where M is in the range -9 through +9

If M<0 then A>=B must be true, so the result is always * non-negative. - * + * * Leading zeros are not removed after a subtraction. The result is either the same length as the longer of A and B, * or 1 longer than that (if a carry occurred). - * + * * A is not altered unless Arg6 is 1. B is never altered. - * + * * Arg1 is A Arg2 is A length to use (if longer than A, pad with 0's) Arg3 is B Arg4 is B length to use (if longer * than B, pad with 0's) Arg5 is M, the multiplier Arg6 is 1 if A can be used to build the result (if it fits) - * + * * This routine is severely performance-critical;any change here must be measured (timed) to assure no performance * degradation. */ @@ -3488,7 +3496,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable maxarr = bp; if (maxarr < ap) maxarr = ap; - reb = (byte[]) null; // result byte array + reb = null; // result byte array if (reuse) if ((maxarr + 1) == alength) reb = a; // OK to reuse A @@ -3543,7 +3551,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable // if digit<0 then signal ArithmeticException("internal.error ["digit"]") /* We have carry -- need to make space for the extra digit */ - newarr = (byte[]) null; + newarr = null; if (reuse) if ((maxarr + 2) == a.length) newarr = a; // OK to reuse A @@ -3559,7 +3567,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable } }/* i */ else - java.lang.System.arraycopy((java.lang.Object) reb, 0, (java.lang.Object) newarr, 1, maxarr + 1); + java.lang.System.arraycopy(reb, 0, newarr, 1, maxarr + 1); return newarr; } @@ -3637,9 +3645,9 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /* * Round to specified digits, if necessary. Arg1 is requested length (digits to round to) [may be <=0 when * called from format, dodivide, etc.] Arg2 is rounding mode returns this, for convenience - * + * * ind and exp are adjusted, but not cleared for a mantissa of zero - * + * * The length of the mantissa returned will be Arg1, except when Arg1 is 0, in which case the returned mantissa * length will be 1. */ @@ -3657,12 +3665,12 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable return this; // nowt to do exp = exp + adjust; // exponent of result - sign = (int) ind; // save [assumes -1, 0, 1] + sign = ind; // save [assumes -1, 0, 1] oldmant = mant; // save if (len > 0) { // remove the unwanted digits mant = new byte[len]; - java.lang.System.arraycopy((java.lang.Object) oldmant, 0, (java.lang.Object) mant, 0, len); + java.lang.System.arraycopy(oldmant, 0, mant, 0, len); reuse = true; // can reuse mantissa first = oldmant[len]; // first of discarded digits } else {/* len<=0 */ @@ -3736,7 +3744,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable // drop rightmost digit and raise exponent exp++; // mant is already the correct length - java.lang.System.arraycopy((java.lang.Object) newmant, 0, (java.lang.Object) mant, 0, + java.lang.System.arraycopy(newmant, 0, mant, 0, mant.length); } else mant = newmant; @@ -3752,7 +3760,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable /* * Test if rightmost digits are all 0. Arg1 is a mantissa array to test Arg2 is the offset of first digit to * check [may be negative; if so, digits to left are 0's] returns 1 if all the digits starting at Arg2 are 0 - * + * * Arg2 may be beyond array bounds, in which case 1 is returned */ @@ -3809,7 +3817,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable }/* i */ if (d < this.mant.length) {/* need to reduce */ newmant = new byte[d]; - java.lang.System.arraycopy((java.lang.Object) this.mant, 0, (java.lang.Object) newmant, 0, d); + java.lang.System.arraycopy(this.mant, 0, newmant, 0, d); this.mant = newmant; } } @@ -3827,7 +3835,7 @@ public class BigDecimal extends java.lang.Number implements java.io.Serializable if (i > 0) { do { newmant = new byte[this.mant.length - i]; - java.lang.System.arraycopy((java.lang.Object) this.mant, i, (java.lang.Object) newmant, 0, + java.lang.System.arraycopy(this.mant, i, newmant, 0, this.mant.length - i); this.mant = newmant; } while (false); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/math/MathContext.java b/icu4j/main/classes/core/src/com/ibm/icu/math/MathContext.java index 690cc2f2716..d777c4bec13 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/math/MathContext.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/math/MathContext.java @@ -81,7 +81,7 @@ package com.ibm.icu.math; public final class MathContext implements java.io.Serializable{ //private static final java.lang.String $0="MathContext.nrx"; - + /* ----- Properties ----- */ /* properties public constant */ /** @@ -96,7 +96,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int PLAIN=0; // [no exponent] - + /** * Standard floating point notation (with scientific exponential * format, where there is one digit before any decimal point). @@ -110,7 +110,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int SCIENTIFIC=1; // 1 digit before . - + /** * Standard floating point notation (with engineering exponential * format, where the power of ten is a multiple of 3). @@ -124,7 +124,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ENGINEERING=2; // 1-3 digits before . - + // The rounding modes match the original BigDecimal class values /** * Rounding mode to round to a more positive number. @@ -136,7 +136,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_CEILING=2; - + /** * Rounding mode to round towards zero. * Used as a setting to control the rounding mode used during a @@ -147,7 +147,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_DOWN=1; - + /** * Rounding mode to round to a more negative number. * Used as a setting to control the rounding mode used during a @@ -158,7 +158,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_FLOOR=3; - + /** * Rounding mode to round to nearest neighbor, where an equidistant * value is rounded down. @@ -172,7 +172,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_HALF_DOWN=5; - + /** * Rounding mode to round to nearest neighbor, where an equidistant * value is rounded to the nearest even neighbor. @@ -190,7 +190,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_HALF_EVEN=6; - + /** * Rounding mode to round to nearest neighbor, where an equidistant * value is rounded up. @@ -204,7 +204,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_HALF_UP=4; - + /** * Rounding mode to assert that no rounding is necessary. * Used as a setting to control the rounding mode used during a @@ -216,7 +216,7 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_UNNECESSARY=7; - + /** * Rounding mode to round away from zero. * Used as a setting to control the rounding mode used during a @@ -227,8 +227,8 @@ public final class MathContext implements java.io.Serializable{ * @stable ICU 2.0 */ public static final int ROUND_UP=0; - - + + /* properties shared */ /** * The number of digits (precision) to be used for an operation. @@ -245,7 +245,7 @@ public final class MathContext implements java.io.Serializable{ * @serial */ int digits; - + /** * The form of results from an operation. *

@@ -259,7 +259,7 @@ public final class MathContext implements java.io.Serializable{ * @serial */ int form; // values for this must fit in a byte - + /** * Controls whether lost digits checking is enabled for an * operation. @@ -275,7 +275,7 @@ public final class MathContext implements java.io.Serializable{ * @serial */ boolean lostDigits; - + /** * The rounding algorithm to be used for an operation. *

@@ -296,33 +296,33 @@ public final class MathContext implements java.io.Serializable{ * @serial */ int roundingMode; - + /* properties private constant */ // default settings private static final int DEFAULT_FORM=SCIENTIFIC; private static final int DEFAULT_DIGITS=9; private static final boolean DEFAULT_LOSTDIGITS=false; private static final int DEFAULT_ROUNDINGMODE=ROUND_HALF_UP; - + /* properties private constant */ - + private static final int MIN_DIGITS=0; // smallest value for DIGITS. private static final int MAX_DIGITS=999999999; // largest value for DIGITS. If increased, // the BigDecimal class may need update. // list of valid rounding mode values, most common two first private static final int ROUNDS[]=new int[]{ROUND_HALF_UP,ROUND_UNNECESSARY,ROUND_CEILING,ROUND_DOWN,ROUND_FLOOR,ROUND_HALF_DOWN,ROUND_HALF_EVEN,ROUND_UP}; - - + + private static final java.lang.String ROUNDWORDS[]=new java.lang.String[]{"ROUND_HALF_UP","ROUND_UNNECESSARY","ROUND_CEILING","ROUND_DOWN","ROUND_FLOOR","ROUND_HALF_DOWN","ROUND_HALF_EVEN","ROUND_UP"}; // matching names of the ROUNDS values - - - - + + + + /* properties private constant unused */ - + // Serialization version private static final long serialVersionUID=7163376998892515376L; - + /* properties public constant */ /** * A MathContext object initialized to the default @@ -336,11 +336,11 @@ public final class MathContext implements java.io.Serializable{ */ public static final com.ibm.icu.math.MathContext DEFAULT=new com.ibm.icu.math.MathContext(DEFAULT_DIGITS,DEFAULT_FORM,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE); - - - + + + /* ----- Constructors ----- */ - + /** * Constructs a new MathContext with a specified * precision. @@ -356,12 +356,12 @@ public final class MathContext implements java.io.Serializable{ * @throws IllegalArgumentException parameter out of range. * @stable ICU 2.0 */ - + public MathContext(int setdigits){ this(setdigits,DEFAULT_FORM,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE); return;} - + /** * Constructs a new MathContext with a specified * precision and form. @@ -381,7 +381,7 @@ public final class MathContext implements java.io.Serializable{ * @throws IllegalArgumentException parameter out of range. * @stable ICU 2.0 */ - + public MathContext(int setdigits,int setform){ this(setdigits,setform,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE); return;} @@ -407,7 +407,7 @@ public final class MathContext implements java.io.Serializable{ * @throws IllegalArgumentException parameter out of range. * @stable ICU 2.0 */ - + public MathContext(int setdigits,int setform,boolean setlostdigits){ this(setdigits,setform,setlostdigits,DEFAULT_ROUNDINGMODE); return;} @@ -433,16 +433,16 @@ public final class MathContext implements java.io.Serializable{ * @throws IllegalArgumentException parameter out of range. * @stable ICU 2.0 */ - + public MathContext(int setdigits,int setform,boolean setlostdigits,int setroundingmode){super(); - - + + // set values, after checking - if (setdigits!=DEFAULT_DIGITS) + if (setdigits!=DEFAULT_DIGITS) { - if (setdigitsMAX_DIGITS) + if (setdigits>MAX_DIGITS) throw new java.lang.IllegalArgumentException("Digits too large:"+" "+setdigits); } {/*select*/ @@ -454,7 +454,7 @@ public final class MathContext implements java.io.Serializable{ throw new java.lang.IllegalArgumentException("Bad form value:"+" "+setform); } } - if ((!(isValidRound(setroundingmode)))) + if ((!(isValidRound(setroundingmode)))) throw new java.lang.IllegalArgumentException("Bad roundingMode value:"+" "+setroundingmode); digits=setdigits; form=setform; @@ -470,7 +470,7 @@ public final class MathContext implements java.io.Serializable{ * setting * @stable ICU 2.0 */ - + public int getDigits(){ return digits; } @@ -485,7 +485,7 @@ public final class MathContext implements java.io.Serializable{ * @return an int which is the value of the form setting * @stable ICU 2.0 */ - + public int getForm(){ return form; } @@ -499,7 +499,7 @@ public final class MathContext implements java.io.Serializable{ * setting * @stable ICU 2.0 */ - + public boolean getLostDigits(){ return lostDigits; } @@ -520,7 +520,7 @@ public final class MathContext implements java.io.Serializable{ * setting * @stable ICU 2.0 */ - + public int getRoundingMode(){ return roundingMode; } @@ -562,7 +562,8 @@ public final class MathContext implements java.io.Serializable{ * @return a String representing the context settings. * @stable ICU 2.0 */ - + + @Override public java.lang.String toString(){ java.lang.String formstr=null; int r=0; @@ -577,7 +578,7 @@ public final class MathContext implements java.io.Serializable{ } } {int $1=ROUNDS.length;r=0;r:for(;$1>0;$1--,r++){ - if (roundingMode==ROUNDS[r]) + if (roundingMode==ROUNDS[r]) { roundword=ROUNDWORDS[r]; break r; @@ -587,14 +588,14 @@ public final class MathContext implements java.io.Serializable{ return "digits="+digits+" "+"form="+formstr+" "+"lostDigits="+(lostDigits?"1":"0")+" "+"roundingMode="+roundword; } - + /* Test whether round is valid. */ // This could be made shared for use by BigDecimal for setScale. - + private static boolean isValidRound(int testround){ int r=0; {int $2=ROUNDS.length;for(r=0;$2>0;$2--,r++){ - if (testround==ROUNDS[r]) + if (testround==ROUNDS[r]) return true; } }/*r*/ diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ArabicShaping.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ArabicShaping.java index c39bbc89bf8..1944de6f22e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/ArabicShaping.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ArabicShaping.java @@ -61,7 +61,7 @@ public final class ArabicShaping { private char tailChar; /** - * Convert a range of text in the source array, putting the result + * Convert a range of text in the source array, putting the result * into a range of text in the destination array, and return the number * of characters written. * @@ -69,14 +69,14 @@ public final class ArabicShaping { * @param sourceStart The start of the range of text to convert * @param sourceLength The length of the range of text to convert * @param dest The destination array that will receive the result. - * It may be NULL only if destSize is 0. + * It may be NULL only if destSize is 0. * @param destStart The start of the range of the destination buffer to use. * @param destSize The size (capacity) of the destination buffer. * If destSize is 0, then no output is produced, * but the necessary buffer size is returned ("preflighting"). This - * does not validate the text against the options, for example, + * does not validate the text against the options, for example, * if letters are being unshaped, and spaces are being consumed - * following lamalef, this will not detect a lamalef without a + * following lamalef, this will not detect a lamalef without a * corresponding space. An error will be thrown when the actual * conversion is attempted. * @return The number of chars written to the destination buffer. @@ -100,8 +100,8 @@ public final class ArabicShaping { } if ((destSize != 0) && (destStart < 0 || destSize < 0 || destStart + destSize > dest.length)) { - throw new IllegalArgumentException("bad dest start (" + destStart + - ") or size (" + destSize + + throw new IllegalArgumentException("bad dest start (" + destStart + + ") or size (" + destSize + ") for buffer of length " + dest.length); } /* Validate input options */ @@ -124,7 +124,7 @@ public final class ArabicShaping { throw new IllegalArgumentException("Wrong Lam Alef argument"); } ///CLOVER:ON - + /* Validate Tashkeel (Tashkeel replacement options should be enabled in shaping mode only)*/ if(((options&TASHKEEL_MASK) != 0) && (options&LETTERS_MASK) == LETTERS_UNSHAPE) { throw new IllegalArgumentException("Tashkeel replacement should not be enabled in deshaping mode "); @@ -174,7 +174,7 @@ public final class ArabicShaping { * Construct ArabicShaping using the options flags. * The flags are as follows:
* 'LENGTH' flags control whether the text can change size, and if not, - * how to maintain the size of the text when LamAlef ligatures are + * how to maintain the size of the text when LamAlef ligatures are * formed or broken.
* 'TEXT_DIRECTION' flags control whether the text is read and written * in visual order or in logical order.
@@ -191,7 +191,7 @@ public final class ArabicShaping { if ((options & DIGITS_MASK) > 0x80) { throw new IllegalArgumentException("bad DIGITS options"); } - + isLogical = ( (options & TEXT_DIRECTION_MASK) == TEXT_DIRECTION_LOGICAL ); /* Validate options */ spacesRelativeToTextBeginEnd = ( (options & SPACES_RELATIVE_TO_TEXT_MASK) == SPACES_RELATIVE_TO_TEXT_BEGIN_END ); @@ -201,11 +201,11 @@ public final class ArabicShaping { tailChar = OLD_TAIL_CHAR; } } - - /* Seen Tail options */ + + /* Seen Tail options */ /** * Memory option: the result must have the same length as the source. - * Shaping mode: The SEEN family character will expand into two characters using space near + * Shaping mode: The SEEN family character will expand into two characters using space near * the SEEN family character(i.e. the space after the character). * if there are no spaces found, ArabicShapingException will be thrown * @@ -216,35 +216,35 @@ public final class ArabicShaping { */ public static final int SEEN_TWOCELL_NEAR = 0x200000; - /** Bit mask for Seen memory options. + /** Bit mask for Seen memory options. * @stable ICU 4.2 */ public static final int SEEN_MASK = 0x700000; - /* YehHamza options */ + /* YehHamza options */ /** * Memory option: the result must have the same length as the source. - * Shaping mode: The YEHHAMZA character will expand into two characters using space near it + * Shaping mode: The YEHHAMZA character will expand into two characters using space near it * (i.e. the space after the character) * if there are no spaces found, ArabicShapingException will be thrown * * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be * replaced by one cell YehHamza and space will replace the Hamza. * Affects: YehHamza options - * @stable ICU 4.2 + * @stable ICU 4.2 */ public static final int YEHHAMZA_TWOCELL_NEAR = 0x1000000; - /** Bit mask for YehHamza memory options. + /** Bit mask for YehHamza memory options. * @stable ICU 4.2 */ public static final int YEHHAMZA_MASK = 0x3800000; - /* New Tashkeel options */ + /* New Tashkeel options */ /** * Memory option: the result must have the same length as the source. - * Shaping mode: Tashkeel characters will be replaced by spaces. + * Shaping mode: Tashkeel characters will be replaced by spaces. * Spaces will be placed at beginning of the buffer * * De-shaping mode: N/A @@ -255,7 +255,7 @@ public final class ArabicShaping { /** * Memory option: the result must have the same length as the source. - * Shaping mode: Tashkeel characters will be replaced by spaces. + * Shaping mode: Tashkeel characters will be replaced by spaces. * Spaces will be placed at end of the buffer * * De-shaping mode: N/A @@ -266,8 +266,8 @@ public final class ArabicShaping { /** * Memory option: allow the result to have a different length than the source. - * Shaping mode: Tashkeel characters will be removed, buffer length will shrink. - * De-shaping mode: N/A + * Shaping mode: Tashkeel characters will be removed, buffer length will shrink. + * De-shaping mode: N/A * * Affects: Tashkeel options * @stable ICU 4.2 @@ -285,42 +285,42 @@ public final class ArabicShaping { */ public static final int TASHKEEL_REPLACE_BY_TATWEEL = 0xC0000; - /** Bit mask for Tashkeel replacement with Space or Tatweel memory options. + /** Bit mask for Tashkeel replacement with Space or Tatweel memory options. * @stable ICU 4.2 */ public static final int TASHKEEL_MASK = 0xE0000; - - /* Space location Control options */ + + /* Space location Control options */ /** * This option effects the meaning of BEGIN and END options. if this option is not used the default - * for BEGIN and END will be as following: + * for BEGIN and END will be as following: * The Default (for both Visual LTR, Visual RTL and Logical Text) * 1. BEGIN always refers to the start address of physical memory. * 2. END always refers to the end address of physical memory. * - * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text. + * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text. * * The affect on BEGIN and END Memory Options will be as following: - * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text + * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text * (corresponding to the physical memory address end, same as END in default behavior) - * B. BEGIN For Logical text: Same as BEGIN in default behavior. - * C. END For Visual LTR text: This will be the end (left side) of the visual text. (corresponding to - * the physical memory address beginning, same as BEGIN in default behavior) - * D. END For Logical text: Same as END in default behavior. + * B. BEGIN For Logical text: Same as BEGIN in default behavior. + * C. END For Visual LTR text: This will be the end (left side) of the visual text. (corresponding to + * the physical memory address beginning, same as BEGIN in default behavior) + * D. END For Logical text: Same as END in default behavior. * Affects: All LamAlef BEGIN, END and AUTO options. - * @stable ICU 4.2 + * @stable ICU 4.2 */ public static final int SPACES_RELATIVE_TO_TEXT_BEGIN_END = 0x4000000; - /** Bit mask for swapping BEGIN and END for Visual LTR text + /** Bit mask for swapping BEGIN and END for Visual LTR text * @stable ICU 4.2 */ public static final int SPACES_RELATIVE_TO_TEXT_MASK = 0x4000000; - + /** - * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73). + * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73). * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B) - * De-shaping will not use this option as it will always search for both the new Unicode code point for the + * De-shaping will not use this option as it will always search for both the new Unicode code point for the * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the * Seen-Family letter accordingly. * @@ -331,7 +331,7 @@ public final class ArabicShaping { */ public static final int SHAPE_TAIL_NEW_UNICODE = 0x8000000; - /** Bit mask for new Unicode Tail option + /** Bit mask for new Unicode Tail option * @stable ICU 4.2 */ public static final int SHAPE_TAIL_TYPE_MASK = 0x8000000; @@ -349,7 +349,7 @@ public final class ArabicShaping { * @stable ICU 4.2 */ public static final int LAMALEF_RESIZE = 0; - + /** * Memory option: the result must have the same length as the source. * If more room is necessary, then try to consume spaces next to modified characters. @@ -365,7 +365,7 @@ public final class ArabicShaping { * @stable ICU 4.2 */ public static final int LAMALEF_NEAR = 1 ; - + /** * Memory option: the result must have the same length as the source. * If more room is necessary, then try to consume spaces at the end of the text. @@ -382,7 +382,7 @@ public final class ArabicShaping { * @stable ICU 4.2 */ public static final int LAMALEF_END = 2; - + /** * Memory option: the result must have the same length as the source. * If more room is necessary, then try to consume spaces at the beginning of the text. @@ -397,7 +397,7 @@ public final class ArabicShaping { * This option is an alias to LENGTH_FIXED_SPACES_AT_BEGINNING * @stable ICU 4.2 */ - public static final int LAMALEF_BEGIN = 3; + public static final int LAMALEF_BEGIN = 3; /** * Memory option: the result must have the same length as the source. @@ -406,26 +406,26 @@ public final class ArabicShaping { * is no space at beginning of the buffer, use spaces at the near (i.e. the space * after the LAMALEF character). * - * Deshaping Mode: Perform the same function as the flag equals LAMALEF_END. + * Deshaping Mode: Perform the same function as the flag equals LAMALEF_END. * Affects: LamAlef options * @stable ICU 4.2 */ - public static final int LAMALEF_AUTO = 0x10000; - - /** - * Bit mask for memory options. + public static final int LAMALEF_AUTO = 0x10000; + + /** + * Bit mask for memory options. * @stable ICU 2.0 */ public static final int LENGTH_MASK = 0x10003; - /** Bit mask for LamAlef memory options. + /** Bit mask for LamAlef memory options. * @stable ICU 4.2 */ public static final int LAMALEF_MASK = 0x10003; - /** - * Direction indicator: the source is in logical (keyboard) order. + /** + * Direction indicator: the source is in logical (keyboard) order. * @stable ICU 2.0 */ public static final int TEXT_DIRECTION_LOGICAL = 0; @@ -437,28 +437,28 @@ public final class ArabicShaping { * @stable ICU 4.2 */ public static final int TEXT_DIRECTION_VISUAL_RTL = 0; - - /** + + /** * Direction indicator: the source is in visual (display) order, that is, * the leftmost displayed character is stored first. * @stable ICU 2.0 */ public static final int TEXT_DIRECTION_VISUAL_LTR = 4; - /** - * Bit mask for direction indicators. + /** + * Bit mask for direction indicators. * @stable ICU 2.0 */ public static final int TEXT_DIRECTION_MASK = 4; /** - * Letter shaping option: do not perform letter shaping. + * Letter shaping option: do not perform letter shaping. * @stable ICU 2.0 */ public static final int LETTERS_NOOP = 0; - /** + /** * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block, * by shaped ones in the U+FE70 (Presentation Forms B) block. Performs Lam-Alef ligature * substitution. @@ -466,7 +466,7 @@ public final class ArabicShaping { */ public static final int LETTERS_SHAPE = 8; - /** + /** * Letter shaping option: replace shaped letter characters in the U+FE70 (Presentation Forms B) block * by normative ones in the U+0600 (Arabic) block. Converts Lam-Alef ligatures to pairs of Lam and * Alef characters, consuming spaces if required. @@ -483,15 +483,15 @@ public final class ArabicShaping { */ public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18; - /** - * Bit mask for letter shaping options. + /** + * Bit mask for letter shaping options. * @stable ICU 2.0 */ public static final int LETTERS_MASK = 0x18; - /** - * Digit shaping option: do not perform digit shaping. + /** + * Digit shaping option: do not perform digit shaping. * @stable ICU 2.0 */ public static final int DIGITS_NOOP = 0; @@ -512,7 +512,7 @@ public final class ArabicShaping { * Digit shaping option: * Replace European digits (U+0030...U+0039) by Arabic-Indic digits * if the most recent strongly directional character - * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC). + * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC). * The initial state at the start of the text is assumed to be not an Arabic, * letter, so European digits at the start of the text will not change. * Compare to DIGITS_ALEN2AN_INIT_AL. @@ -524,7 +524,7 @@ public final class ArabicShaping { * Digit shaping option: * Replace European digits (U+0030...U+0039) by Arabic-Indic digits * if the most recent strongly directional character - * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC). + * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC). * The initial state at the start of the text is assumed to be an Arabic, * letter, so European digits at the start of the text will change. * Compare to DIGITS_ALEN2AN_INT_LR. @@ -535,26 +535,26 @@ public final class ArabicShaping { /** Not a valid option value. */ //private static final int DIGITS_RESERVED = 0xa0; - /** - * Bit mask for digit shaping options. + /** + * Bit mask for digit shaping options. * @stable ICU 2.0 */ public static final int DIGITS_MASK = 0xe0; - /** - * Digit type option: Use Arabic-Indic digits (U+0660...U+0669). + /** + * Digit type option: Use Arabic-Indic digits (U+0660...U+0669). * @stable ICU 2.0 */ public static final int DIGIT_TYPE_AN = 0; - /** - * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). + /** + * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). * @stable ICU 2.0 */ public static final int DIGIT_TYPE_AN_EXTENDED = 0x100; - /** - * Bit mask for digit type options. + /** + * Bit mask for digit type options. * @stable ICU 2.0 */ public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00? @@ -571,7 +571,7 @@ public final class ArabicShaping { private static final char LAM_CHAR = '\u0644'; private static final char SPACE_CHAR = '\u0020'; private static final char SHADDA_CHAR = '\uFE7C'; - private static final char SHADDA06_CHAR = '\u0651'; + private static final char SHADDA06_CHAR = '\u0651'; private static final char TATWEEL_CHAR = '\u0640'; private static final char SHADDA_TATWEEL_CHAR = '\uFE7D'; private static final char NEW_TAIL_CHAR = '\uFE73'; @@ -582,9 +582,10 @@ public final class ArabicShaping { /** * @stable ICU 2.0 */ + @Override public boolean equals(Object rhs) { - return rhs != null && - rhs.getClass() == ArabicShaping.class && + return rhs != null && + rhs.getClass() == ArabicShaping.class && options == ((ArabicShaping)rhs).options; } @@ -592,6 +593,7 @@ public final class ArabicShaping { * @stable ICU 2.0 */ ///CLOVER:OFF + @Override public int hashCode() { return options; } @@ -599,6 +601,7 @@ public final class ArabicShaping { /** * @stable ICU 2.0 */ + @Override public String toString() { StringBuilder buf = new StringBuilder(super.toString()); buf.append('['); @@ -662,23 +665,23 @@ public final class ArabicShaping { private static final int LINKL = 2; private static final int LINK_MASK = 3; - private static final int irrelevantPos[] = { - 0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE + private static final int irrelevantPos[] = { + 0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE }; /* private static final char convertLamAlef[] = { - '\u0622', // FEF5 + '\u0622', // FEF5 '\u0622', // FEF6 '\u0623', // FEF7 '\u0623', // FEF8 '\u0625', // FEF9 '\u0625', // FEFA '\u0627', // FEFB - '\u0627' // FEFC + '\u0627' // FEFC }; */ - + private static final int tailFamilyIsolatedFinal[] = { /* FEB1 */ 1, /* FEB2 */ 1, @@ -923,7 +926,7 @@ public final class ArabicShaping { * Function: Converts the Alef characters into an equivalent * LamAlef location in the 0x06xx Range, this is an * intermediate stage in the operation of the program - * later it'll be converted into the 0xFExx LamAlefs + * later it'll be converted into the 0xFExx LamAlefs * in the shaping function. */ private static char changeLamAlef(char ch) { @@ -942,7 +945,7 @@ public final class ArabicShaping { * function, this function returns 1 or 2 for these special characters */ private static int specialChar(char ch) { - if ((ch > '\u0621' && ch < '\u0626') || + if ((ch > '\u0621' && ch < '\u0626') || (ch == '\u0627') || (ch > '\u062E' && ch < '\u0633') || (ch > '\u0647' && ch < '\u064A') || @@ -950,7 +953,7 @@ public final class ArabicShaping { return 1; } else if (ch >= '\u064B' && ch<= '\u0652') { return 2; - } else if (ch >= 0x0653 && ch <= 0x0655 || + } else if (ch >= 0x0653 && ch <= 0x0655 || ch == 0x0670 || ch >= 0xFE70 && ch <= 0xFE7F) { return 3; @@ -958,10 +961,10 @@ public final class ArabicShaping { return 0; } } - + /* * Name : getLink - * Function: Resolves the link between the characters as + * Function: Resolves the link between the characters as * Arabic characters have four forms : * Isolated, Initial, Middle and Final Form */ @@ -984,7 +987,7 @@ public final class ArabicShaping { * Function: Counts the number of spaces * at each end of the logical buffer */ - private static int countSpacesLeft(char[] dest, + private static int countSpacesLeft(char[] dest, int start, int count) { for (int i = start, e = start + count; i < e; ++i) { @@ -1017,7 +1020,7 @@ public final class ArabicShaping { /* *Name : isSeenTailFamilyChar - *Function : returns 1 if the character is a seen family isolated character + *Function : returns 1 if the character is a seen family isolated character * in the FE range otherwise returns 0 */ @@ -1044,7 +1047,7 @@ public final class ArabicShaping { /* *Name : isTailChar - *Function : returns true if the character matches one of the tail characters + *Function : returns true if the character matches one of the tail characters * (0xfe73 or 0x200b) otherwise returns false */ @@ -1055,15 +1058,15 @@ public final class ArabicShaping { return false; } } - + /* *Name : isAlefMaksouraChar - *Function : returns true if the character is a Alef Maksoura Final or isolated + *Function : returns true if the character is a Alef Maksoura Final or isolated * otherwise returns false */ private static boolean isAlefMaksouraChar(char ch) { return ( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); - } + } /* * Name : isYehHamzaChar @@ -1076,22 +1079,22 @@ public final class ArabicShaping { }else{ return false; } - } + } /* *Name : isTashkeelCharFE *Function : Returns true for Tashkeel characters in FE range else return false */ - + private static boolean isTashkeelCharFE(char ch) { return ( ch!=0xFE75 &&(ch>=0xFE70 && ch<= 0xFE7F) ); } - /* + /* * Name: isTashkeelOnTatweelChar - * Function: Checks if the Tashkeel Character is on Tatweel or not,if the - * Tashkeel on tatweel (FE range), it returns 1 else if the - * Tashkeel with shadda on tatweel (FC range)return 2 otherwise + * Function: Checks if the Tashkeel Character is on Tatweel or not,if the + * Tashkeel on tatweel (FE range), it returns 1 else if the + * Tashkeel with shadda on tatweel (FC range)return 2 otherwise * returns 0 */ private static int isTashkeelOnTatweelChar(char ch){ @@ -1104,11 +1107,11 @@ public final class ArabicShaping { return 0; } } - + /* * Name: isIsolatedTashkeelChar - * Function: Checks if the Tashkeel Character is in the isolated form - * (i.e. Unicode FE range) returns 1 else if the Tashkeel + * Function: Checks if the Tashkeel Character is in the isolated form + * (i.e. Unicode FE range) returns 1 else if the Tashkeel * with shadda is in the isolated form (i.e. Unicode FC range) * returns 1 otherwise returns 0 */ @@ -1121,7 +1124,7 @@ public final class ArabicShaping { return 0; } } - + /* * Name : isAlefChar * Function: Returns 1 for Alef characters else return 0 @@ -1129,7 +1132,7 @@ public final class ArabicShaping { private static boolean isAlefChar(char ch) { return ch == '\u0622' || ch == '\u0623' || ch == '\u0625' || ch == '\u0627'; } - + /* * Name : isLamAlefChar * Function: Returns true for LamAlef characters else return false @@ -1150,9 +1153,9 @@ public final class ArabicShaping { private int calculateSize(char[] source, int sourceStart, int sourceLength) { - + int destSize = sourceLength; - + switch (options & LETTERS_MASK) { case LETTERS_SHAPE: case LETTERS_SHAPE_TASHKEEL_ISOLATED: @@ -1185,8 +1188,8 @@ public final class ArabicShaping { return destSize; } - - + + /* * Name : countSpaceSub * Function: Counts number of times the subChar appears in the array @@ -1199,10 +1202,10 @@ public final class ArabicShaping { count++; } i++; - } - return count; + } + return count; } - + /* * Name : shiftArray * Function: Shifts characters to replace space sub characters @@ -1239,12 +1242,12 @@ public final class ArabicShaping { } return w; } - + /* * Name : handleTashkeelWithTatweel - * Function : Replaces Tashkeel as following: - * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. - * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace + * Function : Replaces Tashkeel as following: + * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. + * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace * it with Shadda on Tatweel. * Case 3: if the Tashkeel is isolated replace it with Space. * @@ -1266,53 +1269,53 @@ public final class ArabicShaping { /* *Name : handleGeneratedSpaces *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space, - * and Tashkeel to space. - * handleGeneratedSpaces function puts these generated spaces + * and Tashkeel to space. + * handleGeneratedSpaces function puts these generated spaces * according to the options the user specifies. LamAlef and Tashkeel - * spaces can be replaced at begin, at end, at near or decrease the + * spaces can be replaced at begin, at end, at near or decrease the * buffer size. * * There is also Auto option for LamAlef and tashkeel, which will put - * the spaces at end of the buffer (or end of text if the user used + * the spaces at end of the buffer (or end of text if the user used * the option SPACES_RELATIVE_TO_TEXT_BEGIN_END). * - * If the text type was visual_LTR and the option - * SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END + * If the text type was visual_LTR and the option + * SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END * option will place the space at the beginning of the buffer and - * BEGIN will place the space at the end of the buffer. + * BEGIN will place the space at the end of the buffer. */ - private int handleGeneratedSpaces(char[] dest, + private int handleGeneratedSpaces(char[] dest, int start, int length) { - + int lenOptionsLamAlef = options & LAMALEF_MASK; int lenOptionsTashkeel = options & TASHKEEL_MASK; boolean lamAlefOn = false; boolean tashkeelOn = false; - + if (!isLogical & !spacesRelativeToTextBeginEnd) { switch (lenOptionsLamAlef) { case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break; case LAMALEF_END: lenOptionsLamAlef = LAMALEF_BEGIN; break; default: break; - } + } switch (lenOptionsTashkeel){ case TASHKEEL_BEGIN: lenOptionsTashkeel = TASHKEEL_END; break; case TASHKEEL_END: lenOptionsTashkeel = TASHKEEL_BEGIN; break; default: break; } } - - + + if (lenOptionsLamAlef == LAMALEF_NEAR) { for (int i = start, e = i + length; i < e; ++i) { if (dest[i] == LAMALEF_SPACE_SUB) { dest[i] = SPACE_CHAR; } } - + } else { - + final int e = start + length; int wL = countSpaceSub(dest, length, LAMALEF_SPACE_SUB); int wT = countSpaceSub(dest, length, TASHKEEL_SPACE_SUB); @@ -1325,11 +1328,11 @@ public final class ArabicShaping { } - if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_END)) { + if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_END)) { shiftArray(dest, start, e, LAMALEF_SPACE_SUB); while (wL > start) { dest[--wL] = SPACE_CHAR; - } + } } if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_END)){ @@ -1338,32 +1341,32 @@ public final class ArabicShaping { dest[--wT] = SPACE_CHAR; } } - - lamAlefOn = false; + + lamAlefOn = false; tashkeelOn = false; - + if (lenOptionsLamAlef == LAMALEF_RESIZE){ lamAlefOn = true; } if (lenOptionsTashkeel == TASHKEEL_RESIZE){ tashkeelOn = true; } - + if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_RESIZE)){ shiftArray(dest, start, e, LAMALEF_SPACE_SUB); wL = flipArray(dest,start,e, wL); length = wL - start; } - if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_RESIZE)) { + if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_RESIZE)) { shiftArray(dest, start, e, TASHKEEL_SPACE_SUB); wT = flipArray(dest,start,e, wT); length = wT - start; - } + } - lamAlefOn = false; - tashkeelOn = false; - - if ((lenOptionsLamAlef == LAMALEF_BEGIN) || + lamAlefOn = false; + tashkeelOn = false; + + if ((lenOptionsLamAlef == LAMALEF_BEGIN) || (lenOptionsLamAlef == LAMALEF_AUTO)){ lamAlefOn = true; } @@ -1387,15 +1390,15 @@ public final class ArabicShaping { } } } - + return length; } - - + + /* *Name :expandCompositCharAtBegin *Function :Expands the LamAlef character to Lam and Alef consuming the required - * space from beginning of the buffer. If the text type was visual_LTR + * space from beginning of the buffer. If the text type was visual_LTR * and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected * the spaces will be located at end of buffer. * If there are no spaces to expand the LamAlef, an exception is thrown. @@ -1403,7 +1406,7 @@ public final class ArabicShaping { private boolean expandCompositCharAtBegin(char[] dest,int start, int length, int lacount) { boolean spaceNotFound = false; - + if (lacount > countSpacesRight(dest, start, length)) { spaceNotFound = true; return spaceNotFound; @@ -1418,22 +1421,22 @@ public final class ArabicShaping { } } return spaceNotFound; - + } /* *Name : expandCompositCharAtEnd - *Function : Expands the LamAlef character to Lam and Alef consuming the + *Function : Expands the LamAlef character to Lam and Alef consuming the * required space from end of the buffer. If the text type was * Visual LTR and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END - * was used, the spaces will be consumed from begin of buffer. If - * there are no spaces to expand the LamAlef, an exception is thrown. + * was used, the spaces will be consumed from begin of buffer. If + * there are no spaces to expand the LamAlef, an exception is thrown. */ private boolean expandCompositCharAtEnd(char[] dest,int start, int length, int lacount){ boolean spaceNotFound = false; - + if (lacount > countSpacesLeft(dest, start, length)) { spaceNotFound = true; return spaceNotFound; @@ -1453,22 +1456,22 @@ public final class ArabicShaping { /* *Name : expandCompositCharAtNear *Function : Expands the LamAlef character into Lam + Alef, YehHamza character - * into Yeh + Hamza, SeenFamily character into SeenFamily character - * + Tail, while consuming the space next to the character. + * into Yeh + Hamza, SeenFamily character into SeenFamily character + * + Tail, while consuming the space next to the character. */ private boolean expandCompositCharAtNear(char[] dest,int start, int length, int yehHamzaOption, int seenTailOption, int lamAlefOption){ - + boolean spaceNotFound = false; - - - + + + if (isNormalizedLamAlefChar(dest[start])) { spaceNotFound = true; return spaceNotFound; } - for (int i = start + length; --i >=start;) { + for (int i = start + length; --i >=start;) { char ch = dest[i]; if (lamAlefOption == 1 && isNormalizedLamAlefChar(ch)) { if (i>start &&dest[i-1] == SPACE_CHAR) { @@ -1486,7 +1489,7 @@ public final class ArabicShaping { return spaceNotFound; } }else if(yehHamzaOption == 1 && isYehHamzaChar(ch)){ - + if(i>start &&dest[i-1] == SPACE_CHAR){ dest[i] = yehHamzaToYeh[ch - YEH_HAMZAFE_CHAR]; dest[i-1] = HAMZAFE_CHAR; @@ -1494,14 +1497,14 @@ public final class ArabicShaping { spaceNotFound = true; return spaceNotFound; } - - + + } } return false; } - + /* * Name : expandCompositChar * Function: LamAlef needs special handling as the LamAlef is @@ -1520,9 +1523,9 @@ public final class ArabicShaping { int lenOptionsLamAlef = options & LAMALEF_MASK; int lenOptionsSeen = options & SEEN_MASK; - int lenOptionsYehHamza = options & YEHHAMZA_MASK; + int lenOptionsYehHamza = options & YEHHAMZA_MASK; boolean spaceNotFound = false; - + if (!isLogical && !spacesRelativeToTextBeginEnd) { switch (lenOptionsLamAlef) { case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break; @@ -1530,7 +1533,7 @@ public final class ArabicShaping { default: break; } } - + if(shapingMode == 1){ if(lenOptionsLamAlef == LAMALEF_AUTO){ if(isLogical){ @@ -1565,11 +1568,11 @@ public final class ArabicShaping { spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount); if(spaceNotFound){ throw new ArabicShapingException("No spacefor lamalef"); - } + } }else if(lenOptionsLamAlef == LAMALEF_NEAR){ spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1); if(spaceNotFound){ - throw new ArabicShapingException("No spacefor lamalef"); + throw new ArabicShapingException("No spacefor lamalef"); } }else if(lenOptionsLamAlef == LAMALEF_RESIZE){ for (int r = start + length, w = r + lacount; --r >= start;) { @@ -1587,20 +1590,20 @@ public final class ArabicShaping { if(lenOptionsSeen == SEEN_TWOCELL_NEAR){ spaceNotFound = expandCompositCharAtNear(dest, start, length,0,1,0); if(spaceNotFound){ - throw new ArabicShapingException("No space for Seen tail expansion"); + throw new ArabicShapingException("No space for Seen tail expansion"); } } if(lenOptionsYehHamza == YEHHAMZA_TWOCELL_NEAR){ spaceNotFound = expandCompositCharAtNear(dest, start, length,1,0,0); if(spaceNotFound){ - throw new ArabicShapingException("No space for YehHamza expansion"); + throw new ArabicShapingException("No space for YehHamza expansion"); } - } + } } return length; } - + /* Convert the input buffer from FExx Range into 06xx Range * to put all characters into the 06xx range * even the lamalef is converted to the special region in @@ -1623,9 +1626,9 @@ public final class ArabicShaping { /* * Name : deshapeNormalize * Function: Convert the input buffer from FExx Range into 06xx Range - * even the lamalef is converted to the special region in the 06xx range. - * According to the options the user enters, all seen family characters - * followed by a tail character are merged to seen tail family character and + * even the lamalef is converted to the special region in the 06xx range. + * According to the options the user enters, all seen family characters + * followed by a tail character are merged to seen tail family character and * any yeh followed by a hamza character are merged to yehhamza character. * Method returns the number of lamalef chars found. */ @@ -1636,15 +1639,15 @@ public final class ArabicShaping { yehHamzaComposeEnabled = ((options&YEHHAMZA_MASK) == YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; seenComposeEnabled = ((options&SEEN_MASK) == SEEN_TWOCELL_NEAR)? 1 : 0; - + for (int i = start, e = i + length; i < e; ++i) { char ch = dest[i]; - - if( (yehHamzaComposeEnabled == 1) && ((ch == HAMZA06_CHAR) || (ch == HAMZAFE_CHAR)) + + if( (yehHamzaComposeEnabled == 1) && ((ch == HAMZA06_CHAR) || (ch == HAMZAFE_CHAR)) && (i < (length - 1)) && isAlefMaksouraChar(dest[i+1] )) { dest[i] = SPACE_CHAR; dest[i+1] = YEH_HAMZA_CHAR; - } else if ( (seenComposeEnabled == 1) && (isTailChar(ch)) && (i< (length - 1)) + } else if ( (seenComposeEnabled == 1) && (isTailChar(ch)) && (i< (length - 1)) && (isSeenTailFamilyChar(dest[i+1])==1) ) { dest[i] = SPACE_CHAR; } @@ -1663,12 +1666,12 @@ public final class ArabicShaping { * Function: Converts an Arabic Unicode buffer in 06xx Range into a shaped * arabic Unicode buffer in FExx Range */ - private int shapeUnicode(char[] dest, + private int shapeUnicode(char[] dest, int start, int length, int destSize, int tashkeelFlag)throws ArabicShapingException { - + int lamalef_count = normalize(dest, start, length); // resolve the link between the characters. @@ -1708,7 +1711,7 @@ public final class ArabicShaping { } if (((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0)) { - lamalef_found = true; + lamalef_found = true; char wLamalef = changeLamAlef(dest[i]); // get from 0x065C-0x065f if (wLamalef != '\u0000') { // replace alef by marker, it will be removed later @@ -1721,7 +1724,7 @@ public final class ArabicShaping { currLink = getLink(wLamalef); // requires '\u0000', unfortunately } if ((i > 0) && (dest[i-1] == SPACE_CHAR)) - { + { if ( isSeenFamilyChar(dest[i]) == 1){ seenfam_found = true; } else if (dest[i] == YEH_HAMZA_CHAR) { @@ -1751,31 +1754,31 @@ public final class ArabicShaping { shape &= 0x1; } else if (flag == 2) { if (tashkeelFlag == 0 && - ((lastLink & LINKL) != 0) && - ((nextLink & LINKR) != 0) && - dest[i] != '\u064C' && + ((lastLink & LINKL) != 0) && + ((nextLink & LINKR) != 0) && + dest[i] != '\u064C' && dest[i] != '\u064D' && - !((nextLink & ALEFTYPE) == ALEFTYPE && + !((nextLink & ALEFTYPE) == ALEFTYPE && (lastLink & LAMTYPE) == LAMTYPE)) { - + shape = 1; - + } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){ shape = 1; - + } else { shape = 0; } } if (flag == 2) { - if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR) { + if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR) { dest[i] = TASHKEEL_SPACE_SUB; tashkeel_found = true; } else{ dest[i] = (char)('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape); } - // else leave tashkeel alone + // else leave tashkeel alone } else { dest[i] = (char)('\uFE70' + (currLink >> 8) + shape); } @@ -1798,7 +1801,7 @@ public final class ArabicShaping { } } - // If we found a lam/alef pair in the buffer + // If we found a lam/alef pair in the buffer // call handleGeneratedSpaces to remove the spaces that were added destSize = length; @@ -1816,17 +1819,17 @@ public final class ArabicShaping { * Function: Converts an Arabic Unicode buffer in FExx Range into unshaped * arabic Unicode buffer in 06xx Range */ - private int deShapeUnicode(char[] dest, + private int deShapeUnicode(char[] dest, int start, int length, int destSize) throws ArabicShapingException { - int lamalef_count = deshapeNormalize(dest, start, length); + int lamalef_count = deshapeNormalize(dest, start, length); // If there was a lamalef in the buffer call expandLamAlef if (lamalef_count != 0) { // need to adjust dest to fit expanded buffer... !!! - destSize = expandCompositChar(dest, start, length, lamalef_count,DESHAPE_MODE); + destSize = expandCompositChar(dest, start, length, lamalef_count,DESHAPE_MODE); } else { destSize = length; } @@ -1834,7 +1837,7 @@ public final class ArabicShaping { return destSize; } - private int internalShape(char[] source, + private int internalShape(char[] source, int sourceStart, int sourceLength, char[] dest, @@ -1848,7 +1851,7 @@ public final class ArabicShaping { if (destSize == 0) { if (((options & LETTERS_MASK) != LETTERS_NOOP) && ((options & LAMALEF_MASK) == LAMALEF_RESIZE)) { - + return calculateSize(source, sourceStart, sourceLength); } else { return sourceLength; // by definition @@ -1871,7 +1874,7 @@ public final class ArabicShaping { break; case LETTERS_SHAPE: - if( ((options&TASHKEEL_MASK) != 0) && + if( ((options&TASHKEEL_MASK) != 0) && ((options&TASHKEEL_MASK) !=TASHKEEL_REPLACE_BY_TATWEEL)) { /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 2); @@ -1888,12 +1891,12 @@ public final class ArabicShaping { case LETTERS_UNSHAPE: outputSize = deShapeUnicode(temp, 0, sourceLength, destSize); - break; + break; default: break; } - + if (outputSize > destSize) { throw new ArabicShapingException("not enough room for result data"); } @@ -1955,9 +1958,9 @@ public final class ArabicShaping { if (isLogical) { invertBuffer(temp, 0, outputSize); } - + System.arraycopy(temp, 0, dest, destStart, outputSize); - + return outputSize; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BidiRun.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BidiRun.java index ede9f05541c..011d810159c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BidiRun.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BidiRun.java @@ -148,6 +148,7 @@ public class BidiRun { * String to display run * @stable ICU 3.8 */ + @Override public String toString() { return "BidiRun " + start + " - " + limit + " @ " + level; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIterator.java index fdc6d729570..fefeea0f3d8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIterator.java @@ -231,6 +231,7 @@ public abstract class BreakIterator implements Cloneable * @return The clone. * @stable ICU 2.0 */ + @Override public Object clone() { try { @@ -380,52 +381,52 @@ public abstract class BreakIterator implements Cloneable */ public abstract int current(); - - /** - * Tag value for "words" that do not fit into any of other categories. - * Includes spaces and most punctuation. + + /** + * Tag value for "words" that do not fit into any of other categories. + * Includes spaces and most punctuation. * @stable ICU 53 */ public static final int WORD_NONE = 0; /** - * Upper bound for tags for uncategorized words. + * Upper bound for tags for uncategorized words. * @stable ICU 53 */ public static final int WORD_NONE_LIMIT = 100; /** - * Tag value for words that appear to be numbers, lower limit. + * Tag value for words that appear to be numbers, lower limit. * @stable ICU 53 */ public static final int WORD_NUMBER = 100; - /** + /** * Tag value for words that appear to be numbers, upper limit. * @stable ICU 53 */ public static final int WORD_NUMBER_LIMIT = 200; - /** + /** * Tag value for words that contain letters, excluding - * hiragana, katakana or ideographic characters, lower limit. + * hiragana, katakana or ideographic characters, lower limit. * @stable ICU 53 */ public static final int WORD_LETTER = 200; - /** - * Tag value for words containing letters, upper limit + /** + * Tag value for words containing letters, upper limit * @stable ICU 53 */ public static final int WORD_LETTER_LIMIT = 300; - /** + /** * Tag value for words containing kana characters, lower limit * @stable ICU 53 */ public static final int WORD_KANA = 300; - /** + /** * Tag value for words containing kana characters, upper limit * @stable ICU 53 */ @@ -764,11 +765,11 @@ s */ * {@icu} Registers a new break iterator of the indicated kind, to use in the given * locale. Clones of the iterator will be returned if a request for a break iterator * of the given kind matches or falls back to this locale. - * + * *

Because ICU may choose to cache BreakIterator objects internally, this must * be called at application startup, prior to any calls to * BreakIterator.getInstance to avoid undefined behavior. - * + * * @param iter the BreakIterator instance to adopt. * @param locale the Locale for which this instance is to be registered * @param kind the type of iterator for which this instance is to be registered @@ -783,11 +784,11 @@ s */ * {@icu} Registers a new break iterator of the indicated kind, to use in the given * locale. Clones of the iterator will be returned if a request for a break iterator * of the given kind matches or falls back to this locale. - * + * *

Because ICU may choose to cache BreakIterator objects internally, this must * be called at application startup, prior to any calls to * BreakIterator.getInstance to avoid undefined behavior. - * + * * @param iter the BreakIterator instance to adopt. * @param locale the Locale for which this instance is to be registered * @param kind the type of iterator for which this instance is to be registered diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java index 1857c3c511f..141e3e04238 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java @@ -16,7 +16,7 @@ import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UScript; class BurmeseBreakEngine extends DictionaryBreakEngine { - + // Constants for BurmeseBreakIterator // How many words in a row are "good enough"? private static final byte BURMESE_LOOKAHEAD = 3; @@ -27,13 +27,13 @@ class BurmeseBreakEngine extends DictionaryBreakEngine { private static final byte BURMESE_PREFIX_COMBINE_THRESHOLD = 3; // Minimum word size private static final byte BURMESE_MIN_WORD = 2; - + private DictionaryMatcher fDictionary; private static UnicodeSet fBurmeseWordSet; private static UnicodeSet fEndWordSet; private static UnicodeSet fBeginWordSet; private static UnicodeSet fMarkSet; - + static { // Initialize UnicodeSets fBurmeseWordSet = new UnicodeSet(); @@ -52,14 +52,14 @@ class BurmeseBreakEngine extends DictionaryBreakEngine { fMarkSet.compact(); fEndWordSet.compact(); fBeginWordSet.compact(); - + // Freeze the static UnicodeSet fBurmeseWordSet.freeze(); fMarkSet.freeze(); fEndWordSet.freeze(); fBeginWordSet.freeze(); } - + public BurmeseBreakEngine() throws IOException { super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE); setCharacters(fBurmeseWordSet); @@ -67,16 +67,19 @@ class BurmeseBreakEngine extends DictionaryBreakEngine { fDictionary = DictionaryData.loadDictionaryFor("Mymr"); } + @Override public boolean equals(Object obj) { // Normally is a singleton, but it's possible to have duplicates // during initialization. All are equivalent. return obj instanceof BurmeseBreakEngine; } + @Override public int hashCode() { return getClass().hashCode(); } - + + @Override public boolean handles(int c, int breakType) { if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT); @@ -85,10 +88,11 @@ class BurmeseBreakEngine extends DictionaryBreakEngine { return false; } + @Override public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd, DequeI foundBreaks) { - - + + if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD) { return 0; // Not enough characters for word } @@ -159,7 +163,7 @@ class BurmeseBreakEngine extends DictionaryBreakEngine { // no preceding word, or the non-word shares less than the minimum threshold // of characters with a dictionary word, then scan to resynchronize if (words[wordsFound%BURMESE_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 && - (wordLength == 0 || + (wordLength == 0 || words[wordsFound%BURMESE_LOOKAHEAD].longestPrefix() < BURMESE_PREFIX_COMBINE_THRESHOLD)) { // Look for a plausible word boundary int remaining = rangeEnd - (current + wordLength); @@ -205,7 +209,7 @@ class BurmeseBreakEngine extends DictionaryBreakEngine { // Look ahead for possible suffixes if a dictionary word does not follow. // We do this in code rather than using a rule so that the heuristic - // resynch continues to function. For example, one of the suffix characters + // resynch continues to function. For example, one of the suffix characters // could be a typo in the middle of a word. // NOT CURRENTLY APPLICABLE TO BURMESE diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BytesDictionaryMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BytesDictionaryMatcher.java index 6e5979cf342..b7c5f5c3dda 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BytesDictionaryMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BytesDictionaryMatcher.java @@ -17,7 +17,7 @@ import com.ibm.icu.util.BytesTrie.Result; class BytesDictionaryMatcher extends DictionaryMatcher { private final byte[] characters; private final int transform; - + public BytesDictionaryMatcher(byte[] chars, int transform) { characters = chars; Assert.assrt((transform & DictionaryData.TRANSFORM_TYPE_MASK) == DictionaryData.TRANSFORM_TYPE_OFFSET); @@ -26,9 +26,9 @@ class BytesDictionaryMatcher extends DictionaryMatcher { // than adding a "transform type" variable this.transform = transform; } - + private int transform(int c) { - if (c == 0x200D) { + if (c == 0x200D) { return 0xFF; } else if (c == 0x200C) { return 0xFE; @@ -41,6 +41,7 @@ class BytesDictionaryMatcher extends DictionaryMatcher { return delta; } + @Override public int matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) { UCharacterIterator text = UCharacterIterator.getInstance(text_); BytesTrie bt = new BytesTrie(characters, 0); @@ -83,6 +84,7 @@ class BytesDictionaryMatcher extends DictionaryMatcher { return numChars; } + @Override public int getType() { return DictionaryData.TRIE_TYPE_BYTES; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsDictionaryMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsDictionaryMatcher.java index ec13d545410..f1c2a91acd7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsDictionaryMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsDictionaryMatcher.java @@ -15,11 +15,12 @@ import com.ibm.icu.util.CharsTrie; class CharsDictionaryMatcher extends DictionaryMatcher { private CharSequence characters; - + public CharsDictionaryMatcher(CharSequence chars) { characters = chars; } + @Override public int matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) { UCharacterIterator text = UCharacterIterator.getInstance(text_); CharsTrie uct = new CharsTrie(characters, 0); @@ -53,7 +54,7 @@ class CharsDictionaryMatcher extends DictionaryMatcher { } c = text.nextCodePoint(); if (c == UCharacterIterator.DONE) { - break; + break; } ++numChars; result = uct.nextForCodePoint(c); @@ -62,6 +63,7 @@ class CharsDictionaryMatcher extends DictionaryMatcher { return numChars; } + @Override public int getType() { return DictionaryData.TRIE_TYPE_UCHARS; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetMatch.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetMatch.java index 938581ade4e..c695233d116 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetMatch.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetMatch.java @@ -30,13 +30,13 @@ import java.io.Reader; */ public class CharsetMatch implements Comparable { - + /** * Create a java.io.Reader for reading the Unicode character data corresponding * to the original byte data supplied to the Charset detect operation. *

* CAUTION: if the source of the byte data was an InputStream, a Reader - * can be created for only one matching char set using this method. If more + * can be created for only one matching char set using this method. If more * than one charset needs to be tried, the caller will need to reset * the InputStream and create InputStreamReaders itself, based on the charset name. * @@ -46,11 +46,11 @@ public class CharsetMatch implements Comparable { */ public Reader getReader() { InputStream inputStream = fInputStream; - + if (inputStream == null) { inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength); } - + try { inputStream.reset(); return new InputStreamReader(inputStream, getName()); @@ -94,14 +94,14 @@ public class CharsetMatch implements Comparable { Reader reader = getReader(); int max = maxLength < 0? Integer.MAX_VALUE : maxLength; int bytesRead = 0; - + while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) { sb.append(buffer, 0, bytesRead); max -= bytesRead; } - + reader.close(); - + return sb.toString(); } else { String name = getName(); @@ -119,7 +119,7 @@ public class CharsetMatch implements Comparable { return result; } - + /** * Get an indication of the confidence in the charset detected. * Confidence values range from 0-100, with larger numbers indicating @@ -135,7 +135,7 @@ public class CharsetMatch implements Comparable { } /** - * Get the name of the detected charset. + * Get the name of the detected charset. * The name will be one that can be used with other APIs on the * platform that accept charset names. It is the "Canonical name" * as defined by the class java.nio.charset.Charset; for @@ -152,9 +152,9 @@ public class CharsetMatch implements Comparable { public String getName() { return fCharsetName; } - + /** - * Get the ISO code for the language of the detected charset. + * Get the ISO code for the language of the detected charset. * * @return The ISO code for the language or null if the language cannot be determined. * @@ -166,17 +166,18 @@ public class CharsetMatch implements Comparable { /** * Compare to other CharsetMatch objects. - * Comparison is based on the match confidence value, which - * allows CharsetDetector.detectAll() to order its results. + * Comparison is based on the match confidence value, which + * allows CharsetDetector.detectAll() to order its results. * * @param other the CharsetMatch object to compare against. - * @return a negative integer, zero, or a positive integer as the + * @return a negative integer, zero, or a positive integer as the * confidence level of this CharsetMatch * is less than, equal to, or greater than that of * the argument. * @throws ClassCastException if the argument is not a CharsetMatch. * @stable ICU 4.4 */ + @Override public int compareTo (CharsetMatch other) { int compareResult = 0; if (this.fConfidence > other.fConfidence) { @@ -192,7 +193,7 @@ public class CharsetMatch implements Comparable { */ CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) { fConfidence = conf; - + // The references to the original application input data must be copied out // of the charset recognizer to here, in case the application resets the // recognizer before using this CharsetMatch. @@ -212,7 +213,7 @@ public class CharsetMatch implements Comparable { */ CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) { fConfidence = conf; - + // The references to the original application input data must be copied out // of the charset recognizer to here, in case the application resets the // recognizer before using this CharsetMatch. @@ -227,7 +228,7 @@ public class CharsetMatch implements Comparable { fLang = lang; } - + // // Private Data // @@ -238,7 +239,7 @@ public class CharsetMatch implements Comparable { private InputStream fInputStream = null; // User's input stream, or null if the user // gave us a byte array. - + private String fCharsetName; // The name of the charset this CharsetMatch // represents. Filled in by the recognizer. private String fLang; // The language, if one was determined by diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_2022.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_2022.java index c3eb262f6e6..712d4f2ace8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_2022.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_2022.java @@ -13,19 +13,19 @@ package com.ibm.icu.text; * This is a superclass for the individual detectors for * each of the detectable members of the ISO 2022 family * of encodings. - * + * * The separate classes are nested within this class. */ abstract class CharsetRecog_2022 extends CharsetRecognizer { - + /** * Matching function shared among the 2022 detectors JP, CN and KR * Counts up the number of legal an unrecognized escape sequences in * the sample of text, and computes a score based on the total number & * the proportion that fit the encoding. - * - * + * + * * @param text the byte buffer containing text to analyse * @param textLen the size of the text in the byte. * @param escapeSequences the byte escape sequences to test for. @@ -44,59 +44,59 @@ abstract class CharsetRecog_2022 extends CharsetRecognizer { checkEscapes: for (escN=0; escN= 3 && + + if (det.fRawLength >= 3 && (input[0] & 0xFF) == 0xef && (input[1] & 0xFF) == 0xbb && (input[2] & 0xFF) == 0xbf) { hasBOM = true; } - + // Scan for multi-byte sequences for (i=0; i numInvalid*10) { confidence = 80; } else if (numValid > 3 && numInvalid == 0) { - confidence = 100; + confidence = 100; } else if (numValid > 0 && numInvalid == 0) { confidence = 80; } else if (numValid == 0 && numInvalid == 0) { // Plain ASCII. Confidence must be > 10, it's more likely than UTF-16, which // accepts ASCII with confidence = 10. // TODO: add plain ASCII as an explicitly detected type. - confidence = 15; + confidence = 15; } else if (numValid > numInvalid*10) { // Probably corruput utf-8 data. Valid sequences aren't likely by chance. confidence = 25; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_Unicode.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_Unicode.java index 75288dc6c5d..d46779007d5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_Unicode.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_Unicode.java @@ -19,22 +19,24 @@ abstract class CharsetRecog_Unicode extends CharsetRecognizer { /* (non-Javadoc) * @see com.ibm.icu.text.CharsetRecognizer#getName() */ + @Override abstract String getName(); /* (non-Javadoc) * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector) */ + @Override abstract CharsetMatch match(CharsetDetector det); - + static int codeUnit16FromBytes(byte hi, byte lo) { return ((hi & 0xff) << 8) | (lo & 0xff); } - + // UTF-16 confidence calculation. Very simple minded, but better than nothing. // Any 8 bit non-control characters bump the confidence up. These have a zero high byte, // and are very likely to be UTF-16, although they could also be part of a UTF-32 code. // NULs are a contra-indication, they will appear commonly if the actual encoding is UTF-32. - // NULs should be rare in actual text. + // NULs should be rare in actual text. static int adjustConfidence(int codeUnit, int confidence) { if (codeUnit == 0) { confidence -= 10; @@ -48,19 +50,21 @@ abstract class CharsetRecog_Unicode extends CharsetRecognizer { } return confidence; } - + static class CharsetRecog_UTF_16_BE extends CharsetRecog_Unicode { + @Override String getName() { return "UTF-16BE"; } - + + @Override CharsetMatch match(CharsetDetector det) { byte[] input = det.fRawInput; int confidence = 10; - + int bytesToCheck = Math.min(input.length, 30); for (int charIndex=0; charIndex= 0x10FFFF || (ch >= 0xD800 && ch <= 0xDFFF)) { numInvalid += 1; } else { numValid += 1; } } - - + + // Cook up some sort of confidence score, based on presence of a BOM // and the existence of valid and/or invalid multi-byte sequences. if (hasBOM && numInvalid==0) { @@ -157,41 +165,45 @@ abstract class CharsetRecog_Unicode extends CharsetRecognizer { } else if (hasBOM && numValid > numInvalid*10) { confidence = 80; } else if (numValid > 3 && numInvalid == 0) { - confidence = 100; + confidence = 100; } else if (numValid > 0 && numInvalid == 0) { confidence = 80; } else if (numValid > numInvalid*10) { // Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance. confidence = 25; } - + return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - + static class CharsetRecog_UTF_32_BE extends CharsetRecog_UTF_32 { + @Override int getChar(byte[] input, int index) { return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 | (input[index + 2] & 0xFF) << 8 | (input[index + 3] & 0xFF); } - + + @Override String getName() { return "UTF-32BE"; } } - + static class CharsetRecog_UTF_32_LE extends CharsetRecog_UTF_32 { + @Override int getChar(byte[] input, int index) { return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 | (input[index + 1] & 0xFF) << 8 | (input[index + 0] & 0xFF); } - + + @Override String getName() { return "UTF-32LE"; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_mbcs.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_mbcs.java index 3a389588bc9..034e4a821ec 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_mbcs.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_mbcs.java @@ -29,13 +29,14 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { * Get the IANA name of this charset. * @return the charset name. */ + @Override abstract String getName() ; - - + + /** * Test the match of this charset with the input text data * which is obtained via the CharsetDetector object. - * + * * @param det The CharsetDetector, which contains the input text * to be checked for being in this charset. * @return Two values packed into one int (Damn java, anyhow) @@ -53,15 +54,15 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { int totalCharCount = 0; int confidence = 0; iteratedChar iter = new iteratedChar(); - + detectBlock: { for (iter.reset(); nextChar(iter, det);) { totalCharCount++; if (iter.error) { - badCharCount++; + badCharCount++; } else { long cv = iter.charValue & 0xFFFFFFFFL; - + if (cv <= 0xff) { singleByteCharCount++; } else { @@ -79,7 +80,7 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { break detectBlock; } } - + if (doubleByteCharCount <= 10 && badCharCount== 0) { // Not many multi-byte chars. if (doubleByteCharCount == 0 && totalCharCount < 10) { @@ -93,10 +94,10 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { // but is not incompatible with our encoding, so don't give it a zero. confidence = 10; } - + break detectBlock; } - + // // No match if there are too many characters that don't fit the encoding scheme. // (should we have zero tolerance for these?) @@ -105,7 +106,7 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { confidence = 0; break detectBlock; } - + if (commonChars == null) { // We have no statistics on frequently occuring characters. // Assess confidence purely on having a reasonable number of @@ -124,10 +125,10 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { confidence = Math.min(confidence, 100); } } // end of detectBlock: - + return confidence; } - + // "Character" iterated character class. // Recognizers for specific mbcs encodings make their "characters" available // by providing a nextChar() function that fills in an instance of iteratedChar @@ -144,29 +145,29 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { int nextIndex = 0; boolean error = false; boolean done = false; - + void reset() { charValue = 0; nextIndex = 0; error = false; done = false; } - + int nextByte(CharsetDetector det) { if (nextIndex >= det.fRawLength) { done = true; return -1; } - int byteValue = (int)det.fRawInput[nextIndex++] & 0x00ff; + int byteValue = det.fRawInput[nextIndex++] & 0x00ff; return byteValue; - } + } } - + /** * Get the next character (however many bytes it is) from the input data * Subclasses for specific charset encodings must implement this function * to get characters according to the rules of their encoding scheme. - * + * * This function is not a method of class iteratedChar only because * that would require a lot of extra derived classes, which is awkward. * @param it The iteratedChar "struct" into which the returned char is placed. @@ -175,42 +176,43 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { * @return True if a character was returned, false at end of input. */ abstract boolean nextChar(iteratedChar it, CharsetDetector det); - - - + + + /** - * Shift-JIS charset recognizer. + * Shift-JIS charset recognizer. * */ static class CharsetRecog_sjis extends CharsetRecog_mbcs { - static int [] commonChars = + static int [] commonChars = // TODO: This set of data comes from the character frequency- // of-occurence analysis tool. The data needs to be moved // into a resource and loaded from there. - {0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, - 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, - 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, - 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, - 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, + {0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, + 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, + 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, + 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, + 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa}; - - boolean nextChar(iteratedChar it, CharsetDetector det) { + + @Override + boolean nextChar(iteratedChar it, CharsetDetector det) { it.error = false; int firstByte; firstByte = it.charValue = it.nextByte(det); if (firstByte < 0) { return false; } - + if (firstByte <= 0x7f || (firstByte>0xa0 && firstByte<=0xdf)) { return true; } - + int secondByte = it.nextByte(det); if (secondByte < 0) { - return false; + return false; } it.charValue = (firstByte << 8) | secondByte; if (! ((secondByte>=0x40 && secondByte<=0x7f) || (secondByte>=0x80 && secondByte<=0xff))) { @@ -219,61 +221,65 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { } return true; } - - CharsetMatch match(CharsetDetector det) { + + @Override + CharsetMatch match(CharsetDetector det) { int confidence = match(det, commonChars); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } - - String getName() { + + @Override + String getName() { return "Shift_JIS"; } - - public String getLanguage() + + @Override + public String getLanguage() { return "ja"; } - + } - - + + /** - * Big5 charset recognizer. + * Big5 charset recognizer. * */ static class CharsetRecog_big5 extends CharsetRecog_mbcs { - static int [] commonChars = + static int [] commonChars = // TODO: This set of data comes from the character frequency- // of-occurence analysis tool. The data needs to be moved // into a resource and loaded from there. - {0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, - 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, - 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, - 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, - 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, - 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, - 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, - 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, - 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, + {0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, + 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, + 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, + 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, + 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, + 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, + 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, + 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, + 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f}; - - boolean nextChar(iteratedChar it, CharsetDetector det) { + + @Override + boolean nextChar(iteratedChar it, CharsetDetector det) { it.error = false; int firstByte; firstByte = it.charValue = it.nextByte(det); if (firstByte < 0) { return false; } - + if (firstByte <= 0x7f || firstByte==0xff) { // single byte character. return true; } - + int secondByte = it.nextByte(det); if (secondByte < 0) { - return false; + return false; } it.charValue = (it.charValue << 8) | secondByte; @@ -284,47 +290,51 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { } return true; } - - CharsetMatch match(CharsetDetector det) { + + @Override + CharsetMatch match(CharsetDetector det) { int confidence = match(det, commonChars); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } - - String getName() { + + @Override + String getName() { return "Big5"; } - - - public String getLanguage() + + + @Override + public String getLanguage() { return "zh"; } } - - + + /** * EUC charset recognizers. One abstract class that provides the common function * for getting the next character according to the EUC encoding scheme, - * and nested derived classes for EUC_KR, EUC_JP, EUC_CN. + * and nested derived classes for EUC_KR, EUC_JP, EUC_CN. * */ abstract static class CharsetRecog_euc extends CharsetRecog_mbcs { - + /* * (non-Javadoc) * Get the next character value for EUC based encodings. * Character "value" is simply the raw bytes that make up the character * packed into an int. */ - boolean nextChar(iteratedChar it, CharsetDetector det) { + @Override + boolean nextChar(iteratedChar it, CharsetDetector det) { it.error = false; int firstByte = 0; int secondByte = 0; int thirdByte = 0; //int fourthByte = 0; - + buildChar: { - firstByte = it.charValue = it.nextByte(det); + firstByte = it.charValue = it.nextByte(det); if (firstByte < 0) { // Ran off the end of the input data it.done = true; @@ -334,10 +344,10 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { // single byte char break buildChar; } - + secondByte = it.nextByte(det); it.charValue = (it.charValue << 8) | secondByte; - + if (firstByte >= 0xA1 && firstByte <= 0xfe) { // Two byte Char if (secondByte < 0xa1) { @@ -351,13 +361,13 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { // In EUC-TW, total char size is 4 bytes, three bytes contribute to char value. // We don't know which we've got. // Treat it like EUC-JP. If the data really was EUC-TW, the following two - // bytes will look like a well formed 2 byte char. + // bytes will look like a well formed 2 byte char. if (secondByte < 0xa1) { it.error = true; } - break buildChar; + break buildChar; } - + if (firstByte == 0x8f) { // Code set 3. // Three byte total char size, two bytes of actual char value. @@ -368,175 +378,185 @@ abstract class CharsetRecog_mbcs extends CharsetRecognizer { } } } - + return (it.done == false); } - + /** * The charset recognize for EUC-JP. A singleton instance of this class * is created and kept by the public CharsetDetector class */ static class CharsetRecog_euc_jp extends CharsetRecog_euc { - static int [] commonChars = + static int [] commonChars = // TODO: This set of data comes from the character frequency- // of-occurence analysis tool. The data needs to be moved // into a resource and loaded from there. - {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, - 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, - 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, - 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, - 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, - 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, - 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, - 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, - 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, - 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1}; - String getName() { + {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, + 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, + 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, + 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, + 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, + 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, + 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, + 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, + 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, + 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1}; + @Override + String getName() { return "EUC-JP"; } - - CharsetMatch match(CharsetDetector det) { + + @Override + CharsetMatch match(CharsetDetector det) { int confidence = match(det, commonChars); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } - - public String getLanguage() + + @Override + public String getLanguage() { return "ja"; } } - + /** * The charset recognize for EUC-KR. A singleton instance of this class * is created and kept by the public CharsetDetector class */ static class CharsetRecog_euc_kr extends CharsetRecog_euc { - static int [] commonChars = + static int [] commonChars = // TODO: This set of data comes from the character frequency- // of-occurence analysis tool. The data needs to be moved // into a resource and loaded from there. - {0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, - 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, - 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, - 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, - 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, - 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, - 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, - 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, - 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, + {0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, + 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, + 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, + 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, + 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, + 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, + 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, + 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, + 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad}; - - String getName() { + + @Override + String getName() { return "EUC-KR"; } - - CharsetMatch match(CharsetDetector det) { + + @Override + CharsetMatch match(CharsetDetector det) { int confidence = match(det, commonChars); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } - - public String getLanguage() + + @Override + public String getLanguage() { return "ko"; } } } - + /** - * - * GB-18030 recognizer. Uses simplified Chinese statistics. + * + * GB-18030 recognizer. Uses simplified Chinese statistics. * */ static class CharsetRecog_gb_18030 extends CharsetRecog_mbcs { - + /* * (non-Javadoc) * Get the next character value for EUC based encodings. * Character "value" is simply the raw bytes that make up the character * packed into an int. */ - boolean nextChar(iteratedChar it, CharsetDetector det) { + @Override + boolean nextChar(iteratedChar it, CharsetDetector det) { it.error = false; int firstByte = 0; int secondByte = 0; int thirdByte = 0; int fourthByte = 0; - + buildChar: { - firstByte = it.charValue = it.nextByte(det); - + firstByte = it.charValue = it.nextByte(det); + if (firstByte < 0) { // Ran off the end of the input data it.done = true; break buildChar; } - + if (firstByte <= 0x80) { // single byte char break buildChar; } - + secondByte = it.nextByte(det); it.charValue = (it.charValue << 8) | secondByte; - + if (firstByte >= 0x81 && firstByte <= 0xFE) { // Two byte Char if ((secondByte >= 0x40 && secondByte <= 0x7E) || (secondByte >=80 && secondByte <=0xFE)) { break buildChar; } - + // Four byte char if (secondByte >= 0x30 && secondByte <= 0x39) { thirdByte = it.nextByte(det); - + if (thirdByte >= 0x81 && thirdByte <= 0xFE) { fourthByte = it.nextByte(det); - + if (fourthByte >= 0x30 && fourthByte <= 0x39) { it.charValue = (it.charValue << 16) | (thirdByte << 8) | fourthByte; break buildChar; } } } - + it.error = true; break buildChar; } } - + return (it.done == false); } - - static int [] commonChars = + + static int [] commonChars = // TODO: This set of data comes from the character frequency- // of-occurence analysis tool. The data needs to be moved // into a resource and loaded from there. - {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, - 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, - 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, - 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, - 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, - 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, - 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, - 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, - 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, + {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, + 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, + 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, + 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, + 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, + 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, + 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, + 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, + 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0}; - - String getName() { + + @Override + String getName() { return "GB18030"; } - - CharsetMatch match(CharsetDetector det) { + + @Override + CharsetMatch match(CharsetDetector det) { int confidence = match(det, commonChars); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } - - public String getLanguage() + + @Override + public String getLanguage() { return "zh"; } } - - + + } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java index d55c372e87a..ea42f5b7c76 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java @@ -19,6 +19,7 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { /* (non-Javadoc) * @see com.ibm.icu.text.CharsetRecognizer#getName() */ + @Override abstract String getName(); static class NGramParser @@ -28,36 +29,36 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { protected int byteIndex = 0; private int ngram = 0; - + private int[] ngramList; protected byte[] byteMap; - + private int ngramCount; private int hitCount; - + protected byte spaceChar; - + public NGramParser(int[] theNgramList, byte[] theByteMap) { ngramList = theNgramList; byteMap = theByteMap; - + ngram = 0; - + ngramCount = hitCount = 0; } - + /* * Binary search for value in table, which must have exactly 64 entries. */ private static int search(int[] table, int value) { int index = 0; - + if (table[index + 32] <= value) { index += 32; } - + if (table[index + 16] <= value) { index += 16; } @@ -81,57 +82,57 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { if (table[index] > value) { index -= 1; } - + if (index < 0 || table[index] != value) { return -1; } - + return index; } private void lookup(int thisNgram) { ngramCount += 1; - + if (search(ngramList, thisNgram) >= 0) { hitCount += 1; } - + } - + protected void addByte(int b) { ngram = ((ngram << 8) + (b & 0xFF)) & N_GRAM_MASK; lookup(ngram); } - + private int nextByte(CharsetDetector det) { if (byteIndex >= det.fInputLen) { return -1; } - + return det.fInputBytes[byteIndex++] & 0xFF; } - + protected void parseCharacters(CharsetDetector det) { int b; boolean ignoreSpace = false; - + while ((b = nextByte(det)) >= 0) { byte mb = byteMap[b]; - + // TODO: 0x20 might not be a space in all character sets... if (mb != 0) { if (!(mb == spaceChar && ignoreSpace)) { - addByte(mb); + addByte(mb); } - + ignoreSpace = (mb == spaceChar); } } - + } public int parse(CharsetDetector det) @@ -140,60 +141,60 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { } public int parse(CharsetDetector det, byte spaceCh) { - + this.spaceChar = spaceCh; - + parseCharacters(det); - + // TODO: Is this OK? The buffer could have ended in the middle of a word... addByte(spaceChar); double rawPercent = (double) hitCount / (double) ngramCount; - + // if (rawPercent <= 2.0) { // return 0; // } - + // TODO - This is a bit of a hack to take care of a case // were we were getting a confidence of 135... if (rawPercent > 0.33) { return 98; } - + return (int) (rawPercent * 300.0); } } - + static class NGramParser_IBM420 extends NGramParser { private byte alef = 0x00; - + protected static byte[] unshapeMap = { /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ -/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x42, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x47, (byte) 0x49, (byte) 0x4A, (byte) 0x4B, (byte) 0x4C, (byte) 0x4D, (byte) 0x4E, (byte) 0x4F, -/* 5- */ (byte) 0x50, (byte) 0x49, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x56, (byte) 0x58, (byte) 0x58, (byte) 0x5A, (byte) 0x5B, (byte) 0x5C, (byte) 0x5D, (byte) 0x5E, (byte) 0x5F, -/* 6- */ (byte) 0x60, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x63, (byte) 0x65, (byte) 0x65, (byte) 0x67, (byte) 0x67, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, -/* 7- */ (byte) 0x69, (byte) 0x71, (byte) 0x71, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x77, (byte) 0x79, (byte) 0x7A, (byte) 0x7B, (byte) 0x7C, (byte) 0x7D, (byte) 0x7E, (byte) 0x7F, -/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x80, (byte) 0x8B, (byte) 0x8B, (byte) 0x8D, (byte) 0x8D, (byte) 0x8F, -/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9E, (byte) 0x9E, -/* A- */ (byte) 0x9E, (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x9E, (byte) 0xAB, (byte) 0xAB, (byte) 0xAD, (byte) 0xAD, (byte) 0xAF, -/* B- */ (byte) 0xAF, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8, (byte) 0xB9, (byte) 0xB1, (byte) 0xBB, (byte) 0xBB, (byte) 0xBD, (byte) 0xBD, (byte) 0xBF, -/* C- */ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xBF, (byte) 0xCC, (byte) 0xBF, (byte) 0xCE, (byte) 0xCF, -/* D- */ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDA, (byte) 0xDC, (byte) 0xDC, (byte) 0xDC, (byte) 0xDF, -/* E- */ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, -/* F- */ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, +/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x42, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x47, (byte) 0x49, (byte) 0x4A, (byte) 0x4B, (byte) 0x4C, (byte) 0x4D, (byte) 0x4E, (byte) 0x4F, +/* 5- */ (byte) 0x50, (byte) 0x49, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x56, (byte) 0x58, (byte) 0x58, (byte) 0x5A, (byte) 0x5B, (byte) 0x5C, (byte) 0x5D, (byte) 0x5E, (byte) 0x5F, +/* 6- */ (byte) 0x60, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x63, (byte) 0x65, (byte) 0x65, (byte) 0x67, (byte) 0x67, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, +/* 7- */ (byte) 0x69, (byte) 0x71, (byte) 0x71, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x77, (byte) 0x79, (byte) 0x7A, (byte) 0x7B, (byte) 0x7C, (byte) 0x7D, (byte) 0x7E, (byte) 0x7F, +/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x80, (byte) 0x8B, (byte) 0x8B, (byte) 0x8D, (byte) 0x8D, (byte) 0x8F, +/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9E, (byte) 0x9E, +/* A- */ (byte) 0x9E, (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x9E, (byte) 0xAB, (byte) 0xAB, (byte) 0xAD, (byte) 0xAD, (byte) 0xAF, +/* B- */ (byte) 0xAF, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8, (byte) 0xB9, (byte) 0xB1, (byte) 0xBB, (byte) 0xBB, (byte) 0xBD, (byte) 0xBD, (byte) 0xBF, +/* C- */ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xBF, (byte) 0xCC, (byte) 0xBF, (byte) 0xCE, (byte) 0xCF, +/* D- */ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDA, (byte) 0xDC, (byte) 0xDC, (byte) 0xDC, (byte) 0xDF, +/* E- */ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, +/* F- */ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, }; - + public NGramParser_IBM420(int[] theNgramList, byte[] theByteMap) { super(theNgramList, theByteMap); } - + private byte isLamAlef(byte b) { if(b == (byte)0xb2 || b == (byte)0xb3){ return (byte)0x47; @@ -204,7 +205,7 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { }else return (byte)0x00; } - + /* * Arabic shaping needs to be done manually. Cannot call ArabicShaping class * because CharsetDetector is dealing with bytes not Unicode code points. We could @@ -216,70 +217,71 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { { if (byteIndex >= det.fInputLen || det.fInputBytes[byteIndex] == 0) { return -1; - } + } int next; - + alef = isLamAlef(det.fInputBytes[byteIndex]); if(alef != (byte)0x00) next = 0xB1 & 0xFF; else next = unshapeMap[det.fInputBytes[byteIndex]& 0xFF] & 0xFF; - + byteIndex++; - + return next; } - + + @Override protected void parseCharacters(CharsetDetector det) { - int b; + int b; boolean ignoreSpace = false; - + while ((b = nextByte(det)) >= 0) { byte mb = byteMap[b]; - + // TODO: 0x20 might not be a space in all character sets... if (mb != 0) { if (!(mb == spaceChar && ignoreSpace)) { - addByte(mb); + addByte(mb); } - + ignoreSpace = (mb == spaceChar); } if(alef != (byte)0x00){ mb = byteMap[alef & 0xFF]; - + // TODO: 0x20 might not be a space in all character sets... if (mb != 0) { if (!(mb == spaceChar && ignoreSpace)) { - addByte(mb); + addByte(mb); } - + ignoreSpace = (mb == spaceChar); } - + } } } } - - + + int match(CharsetDetector det, int[] ngrams, byte[] byteMap) { return match (det, ngrams, byteMap, (byte)0x20); } - + int match(CharsetDetector det, int[] ngrams, byte[] byteMap, byte spaceChar) { NGramParser parser = new NGramParser(ngrams, byteMap); return parser.parse(det, spaceChar); } - + int matchIBM420(CharsetDetector det, int[] ngrams, byte[] byteMap, byte spaceChar){ NGramParser_IBM420 parser = new NGramParser_IBM420(ngrams, byteMap); return parser.parse(det, spaceChar); } - + static class NGramsPlusLang { int[] fNGrams; String fLang; @@ -292,134 +294,135 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { static class CharsetRecog_8859_1 extends CharsetRecog_sbcs { protected static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, }; - - + + private static NGramsPlusLang[] ngrams_8859_1 = new NGramsPlusLang[] { new NGramsPlusLang( - "da", + "da", new int[] { - 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, - 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, - 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, - 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, + 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, + 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, + 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, + 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, }), new NGramsPlusLang( "de", new int[] { - 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, - 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, - 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, - 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, + 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, + 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, + 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, + 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, }), new NGramsPlusLang( "en", new int[] { - 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, - 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, - 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, - 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, + 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, + 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, + 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, + 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, }), new NGramsPlusLang( "es", new int[] { - 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, - 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, - 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, - 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, + 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, + 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, + 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, }), - + new NGramsPlusLang( "fr", new int[] { - 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, - 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, - 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, + 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, + 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, + 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, }), new NGramsPlusLang( "it", new int[] { - 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, - 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, - 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, - 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, + 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, + 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, + 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, + 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, }), - + new NGramsPlusLang( "nl", new int[] { - 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, - 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, - 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, - 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, + 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, + 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, + 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, + 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, }), - + new NGramsPlusLang( "no", new int[] { - 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, - 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, - 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, - 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, + 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, + 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, + 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, }), - + new NGramsPlusLang( "pt", new int[] { - 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, - 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, - 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, - 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, + 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, + 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, + 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, }), - + new NGramsPlusLang( "sv", new int[] { - 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, - 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, - 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, - 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, + 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, + 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, + 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, }), - + }; - + + @Override public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1252" : "ISO-8859-1"; @@ -435,86 +438,88 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang); } - + + @Override public String getName() { return "ISO-8859-1"; } } - + static class CharsetRecog_8859_2 extends CharsetRecog_sbcs { protected static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0x20, - (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF, - (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, - (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0x20, + (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF, + (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, + (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20, }; private static NGramsPlusLang[] ngrams_8859_2 = new NGramsPlusLang[] { new NGramsPlusLang( - "cs", + "cs", new int[] { - 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, - 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, - 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, - 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, + 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, + 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, + 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, + 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, }), new NGramsPlusLang( - "hu", + "hu", new int[] { - 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, - 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, - 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, - 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, + 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, + 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, + 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, + 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, }), new NGramsPlusLang( - "pl", + "pl", new int[] { - 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, - 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, - 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, - 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, + 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, + 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, + 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, + 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, }), new NGramsPlusLang( - "ro", + "ro", new int[] { - 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, - 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, - 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, - 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, + 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, + 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, + 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, + 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, }) }; + @Override public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1250" : "ISO-8859-2"; @@ -530,199 +535,209 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang); } + @Override public String getName() { return "ISO-8859-2"; } } - - + + abstract static class CharsetRecog_8859_5 extends CharsetRecog_sbcs { protected static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, - (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, - (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, - (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, }; + @Override public String getName() { return "ISO-8859-5"; } } - + static class CharsetRecog_8859_5_ru extends CharsetRecog_8859_5 { private static int[] ngrams = { - 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, - 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, - 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, - 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, + 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, + 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, + 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, + 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, }; + @Override public String getLanguage() { return "ru"; } - + + @Override public CharsetMatch match(CharsetDetector det) { int confidence = match(det, ngrams, byteMap); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - + abstract static class CharsetRecog_8859_6 extends CharsetRecog_sbcs { protected static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, - (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, - (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, - (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, }; + @Override public String getName() { return "ISO-8859-6"; } } - + static class CharsetRecog_8859_6_ar extends CharsetRecog_8859_6 { private static int[] ngrams = { - 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, - 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, - 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, - 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, + 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, + 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, + 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, + 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, }; + @Override public String getLanguage() { return "ar"; } - + + @Override public CharsetMatch match(CharsetDetector det) { int confidence = match(det, ngrams, byteMap); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - + abstract static class CharsetRecog_8859_7 extends CharsetRecog_sbcs { protected static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0xA1, (byte) 0xA2, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xDC, (byte) 0x20, - (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0x20, (byte) 0xFC, (byte) 0x20, (byte) 0xFD, (byte) 0xFE, - (byte) 0xC0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0x20, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xA1, (byte) 0xA2, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xDC, (byte) 0x20, + (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0x20, (byte) 0xFC, (byte) 0x20, (byte) 0xFD, (byte) 0xFE, + (byte) 0xC0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0x20, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20, }; + @Override public String getName() { return "ISO-8859-7"; } } - + static class CharsetRecog_8859_7_el extends CharsetRecog_8859_7 { private static int[] ngrams = { - 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, - 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, - 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, - 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, + 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, + 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, + 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, + 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, }; + @Override public String getLanguage() { return "el"; } - + + @Override public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1253" : "ISO-8859-7"; @@ -730,69 +745,73 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "el"); } } - + abstract static class CharsetRecog_8859_8 extends CharsetRecog_sbcs { protected static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, }; + @Override public String getName() { return "ISO-8859-8"; } } - + static class CharsetRecog_8859_8_I_he extends CharsetRecog_8859_8 { private static int[] ngrams = { - 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, - 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, - 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, - 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, + 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, + 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, + 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, + 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, }; + @Override public String getName() { return "ISO-8859-8-I"; } + @Override public String getLanguage() { return "he"; } - + + @Override public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8-I"; @@ -800,21 +819,23 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "he"); } } - + static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8 { private static int[] ngrams = { - 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, - 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, - 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, - 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, + 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, + 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, + 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, + 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, }; + @Override public String getLanguage() { return "he"; } - + + @Override public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8"; @@ -823,64 +844,67 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { } } - + abstract static class CharsetRecog_8859_9 extends CharsetRecog_sbcs { protected static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x69, (byte) 0xFE, (byte) 0xDF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x69, (byte) 0xFE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, }; + @Override public String getName() { return "ISO-8859-9"; } } - + static class CharsetRecog_8859_9_tr extends CharsetRecog_8859_9 { private static int[] ngrams = { - 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, - 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, - 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, - 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, + 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, + 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, + 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, + 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, }; + @Override public String getLanguage() { return "tr"; } - + + @Override public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1254" : "ISO-8859-9"; @@ -888,237 +912,250 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "tr"); } } - + static class CharsetRecog_windows_1251 extends CharsetRecog_sbcs { private static int[] ngrams = { - 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, - 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, - 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, - 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, + 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, + 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, + 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, + 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, }; private static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x90, (byte) 0x83, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, - (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, - (byte) 0x20, (byte) 0xA2, (byte) 0xA2, (byte) 0xBC, (byte) 0x20, (byte) 0xB4, (byte) 0x20, (byte) 0x20, - (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xBF, - (byte) 0x20, (byte) 0x20, (byte) 0xB3, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x20, (byte) 0x20, - (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0xBC, (byte) 0xBE, (byte) 0xBE, (byte) 0xBF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, - (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x90, (byte) 0x83, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, + (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, + (byte) 0x20, (byte) 0xA2, (byte) 0xA2, (byte) 0xBC, (byte) 0x20, (byte) 0xB4, (byte) 0x20, (byte) 0x20, + (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xBF, + (byte) 0x20, (byte) 0x20, (byte) 0xB3, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0xBC, (byte) 0xBE, (byte) 0xBE, (byte) 0xBF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, + (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF, }; + @Override public String getName() { return "windows-1251"; } - + + @Override public String getLanguage() { return "ru"; } - + + @Override public CharsetMatch match(CharsetDetector det) { int confidence = match(det, ngrams, byteMap); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - + static class CharsetRecog_windows_1256 extends CharsetRecog_sbcs { private static int[] ngrams = { - 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, - 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, - 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, - 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, + 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, + 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, + 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, + 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, }; private static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x81, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x88, (byte) 0x20, (byte) 0x8A, (byte) 0x20, (byte) 0x9C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F, - (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x98, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x20, (byte) 0x20, (byte) 0x9F, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, - (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, - (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0x20, - (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, - (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, - (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xF4, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0xF9, (byte) 0x20, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0x20, (byte) 0xFF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x81, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x88, (byte) 0x20, (byte) 0x8A, (byte) 0x20, (byte) 0x9C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F, + (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x98, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x20, (byte) 0x20, (byte) 0x9F, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0x20, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, + (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xF4, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0xF9, (byte) 0x20, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0x20, (byte) 0xFF, }; + @Override public String getName() { return "windows-1256"; } - + + @Override public String getLanguage() { return "ar"; } - + + @Override public CharsetMatch match(CharsetDetector det) { int confidence = match(det, ngrams, byteMap); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - + static class CharsetRecog_KOI8_R extends CharsetRecog_sbcs { private static int[] ngrams = { - 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, - 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, - 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, - 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, + 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, + 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, + 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, + 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, }; private static byte[] byteMap = { - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, - (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, - (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, - (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, - (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, - (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, - (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, - (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, - (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, - (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, - (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, - (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, + (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F, + (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, + (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, + (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, + (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, + (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, + (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, + (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, }; - + + @Override public String getName() { return "KOI8-R"; } - + + @Override public String getLanguage() { return "ru"; } - + + @Override public CharsetMatch match(CharsetDetector det) { int confidence = match(det, ngrams, byteMap); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - + abstract static class CharsetRecog_IBM424_he extends CharsetRecog_sbcs { protected static byte[] byteMap = { /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ -/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 4- */ (byte) 0x40, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 7- */ (byte) 0x40, (byte) 0x71, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x00, (byte) 0x40, (byte) 0x40, -/* 8- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 9- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* B- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 4- */ (byte) 0x40, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 7- */ (byte) 0x40, (byte) 0x71, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x00, (byte) 0x40, (byte) 0x40, +/* 8- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 9- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* B- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, }; + @Override public String getLanguage() { return "he"; } } - static class CharsetRecog_IBM424_he_rtl extends CharsetRecog_IBM424_he + static class CharsetRecog_IBM424_he_rtl extends CharsetRecog_IBM424_he { + @Override public String getName() { return "IBM424_rtl"; } private static int[] ngrams = { - 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, - 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, - 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, - 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, + 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, + 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, + 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, + 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, }; + @Override public CharsetMatch match(CharsetDetector det) { int confidence = match(det, ngrams, byteMap, (byte)0x40); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - static class CharsetRecog_IBM424_he_ltr extends CharsetRecog_IBM424_he + static class CharsetRecog_IBM424_he_ltr extends CharsetRecog_IBM424_he { + @Override public String getName() { return "IBM424_ltr"; @@ -1130,44 +1167,46 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651 }; + @Override public CharsetMatch match(CharsetDetector det) { int confidence = match(det, ngrams, byteMap, (byte)0x40); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } } - + abstract static class CharsetRecog_IBM420_ar extends CharsetRecog_sbcs { protected static byte[] byteMap = { /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ -/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x40, (byte) 0x40, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 7- */ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x78, (byte) 0x79, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, -/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F, -/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, -/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC, (byte) 0xAD, (byte) 0xAE, (byte) 0xAF, -/* B- */ (byte) 0xB0, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x40, (byte) 0x40, (byte) 0xB8, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0xBD, (byte) 0xBE, (byte) 0xBF, -/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0xCB, (byte) 0x40, (byte) 0xCD, (byte) 0x40, (byte) 0xCF, -/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, -/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xEA, (byte) 0xEB, (byte) 0x40, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, -/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x40, +/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x40, (byte) 0x40, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 7- */ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x78, (byte) 0x79, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, +/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F, +/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F, +/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC, (byte) 0xAD, (byte) 0xAE, (byte) 0xAF, +/* B- */ (byte) 0xB0, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x40, (byte) 0x40, (byte) 0xB8, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0xBD, (byte) 0xBE, (byte) 0xBF, +/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0xCB, (byte) 0x40, (byte) 0xCD, (byte) 0x40, (byte) 0xCF, +/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, +/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xEA, (byte) 0xEB, (byte) 0x40, (byte) 0xED, (byte) 0xEE, (byte) 0xEF, +/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x40, }; - + + @Override public String getLanguage() { return "ar"; } - + } - static class CharsetRecog_IBM420_ar_rtl extends CharsetRecog_IBM420_ar + static class CharsetRecog_IBM420_ar_rtl extends CharsetRecog_IBM420_ar { private static int[] ngrams = { 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, @@ -1176,35 +1215,39 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, }; + @Override public String getName() { return "IBM420_rtl"; } + @Override public CharsetMatch match(CharsetDetector det) { int confidence = matchIBM420(det, ngrams, byteMap, (byte)0x40); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } - + } - static class CharsetRecog_IBM420_ar_ltr extends CharsetRecog_IBM420_ar + static class CharsetRecog_IBM420_ar_ltr extends CharsetRecog_IBM420_ar { private static int[] ngrams = { - 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, + 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156 }; + @Override public String getName() { return "IBM420_ltr"; } + @Override public CharsetMatch match(CharsetDetector det) { int confidence = matchIBM420(det, ngrams, byteMap, (byte)0x40); return confidence == 0 ? null : new CharsetMatch(det, this, confidence); } - + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormat.java index 657efa614a5..735fac6606d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormat.java @@ -48,7 +48,7 @@ import com.ibm.icu.util.ULocale; public class ChineseDateFormat extends SimpleDateFormat { // Generated by serialver from JDK 1.4.1_01 static final long serialVersionUID = -4610300753104099899L; - + // TODO Finish the constructors /** @@ -88,7 +88,7 @@ public class ChineseDateFormat extends SimpleDateFormat { */ @Deprecated public ChineseDateFormat(String pattern, String override, ULocale locale) { - super(pattern, new ChineseDateFormatSymbols(locale), + super(pattern, new ChineseDateFormatSymbols(locale), new ChineseCalendar(TimeZone.getDefault(), locale), locale, true, override); } @@ -113,13 +113,14 @@ public class ChineseDateFormat extends SimpleDateFormat { // default: // return super.subFormat(ch, count, beginOffset, pos, formatData, cal); // } -// } +// } /** * {@inheritDoc} * @internal * @deprecated This API is ICU internal only. */ + @Override @Deprecated protected void subFormat(StringBuffer buf, char ch, int count, int beginOffset, @@ -141,10 +142,11 @@ public class ChineseDateFormat extends SimpleDateFormat { /** * {@inheritDoc} - * + * * @deprecated ICU 50 */ @Deprecated + @Override protected int subParse(String text, int start, char ch, int count, boolean obeyCount, boolean allowNegative, boolean[] ambiguousYear, Calendar cal) { // Logic to handle numeric 'G' eras for chinese calendar, and to skip special 2-digit year @@ -158,9 +160,10 @@ public class ChineseDateFormat extends SimpleDateFormat { /** * {@inheritDoc} - * + * * @deprecated ICU 50 */ + @Override @Deprecated protected DateFormat.Field patternCharToDateFormatField(char ch) { // no longer any field corresponding to pattern char 'l' @@ -194,10 +197,10 @@ public class ChineseDateFormat extends SimpleDateFormat { * the ChineseCalendar field which this attribute represents. * Use -1 for calendarField if this field does not have a * corresponding ChineseCalendar field. - * + * * @param name Name of the attribute * @param calendarField Calendar field constant - * + * * @deprecated ICU 50 */ @Deprecated @@ -209,13 +212,13 @@ public class ChineseDateFormat extends SimpleDateFormat { * Returns the Field constant that corresponds to the * ChineseCalendar field calendarField. If there is no * corresponding Field is available, null is returned. - * + * * @param calendarField ChineseCalendar field constant * @return Field associated with the calendarField, * or null if no associated Field is available. * @throws IllegalArgumentException if calendarField is not * a valid Calendar field constant. - * + * * @deprecated ICU 50 */ @Deprecated @@ -230,9 +233,10 @@ public class ChineseDateFormat extends SimpleDateFormat { /** * {@inheritDoc} - * + * * @deprecated ICU 50 */ + @Override @Deprecated ///CLOVER:OFF protected Object readResolve() throws InvalidObjectException { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormatSymbols.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormatSymbols.java index 7cf8a54a690..a4eddfee3f4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormatSymbols.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ChineseDateFormatSymbols.java @@ -24,13 +24,13 @@ import com.ibm.icu.util.ULocale.Category; * @see ChineseDateFormat * @see com.ibm.icu.util.ChineseCalendar * @author Alan Liu - * @deprecated ICU 50 + * @deprecated ICU 50 */ @Deprecated public class ChineseDateFormatSymbols extends DateFormatSymbols { // Generated by serialver from JDK 1.4.1_01 static final long serialVersionUID = 6827816119783952890L; - + /* * Package-private array that ChineseDateFormat needs to be able to * read. @@ -111,6 +111,7 @@ public class ChineseDateFormatSymbols extends DateFormatSymbols { initializeIsLeapMonth(); } + @Override void initializeData(DateFormatSymbols dfs) { super.initializeData(dfs); if (dfs instanceof ChineseDateFormatSymbols) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java index 192940166f8..b2c4c61b7fb 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java @@ -27,7 +27,7 @@ class CjkBreakEngine extends DictionaryBreakEngine { fHanWordSet.applyPattern("[:Han:]"); fKatakanaWordSet.applyPattern("[[:Katakana:]\\uff9e\\uff9f]"); fHiraganaWordSet.applyPattern("[:Hiragana:]"); - + // freeze them all fHangulWordSet.freeze(); fHanWordSet.freeze(); @@ -36,7 +36,7 @@ class CjkBreakEngine extends DictionaryBreakEngine { } private DictionaryMatcher fDictionary = null; - + public CjkBreakEngine(boolean korean) throws IOException { super(BreakIterator.KIND_WORD); fDictionary = DictionaryData.loadDictionaryFor("Hira"); @@ -53,6 +53,7 @@ class CjkBreakEngine extends DictionaryBreakEngine { } } + @Override public boolean equals(Object obj) { if (obj instanceof CjkBreakEngine) { CjkBreakEngine other = (CjkBreakEngine)obj; @@ -61,10 +62,11 @@ class CjkBreakEngine extends DictionaryBreakEngine { return false; } + @Override public int hashCode() { return getClass().hashCode(); } - + private static final int kMaxKatakanaLength = 8; private static final int kMaxKatakanaGroupLength = 20; private static final int maxSnlp = 255; @@ -73,12 +75,13 @@ class CjkBreakEngine extends DictionaryBreakEngine { int katakanaCost[] = new int[] { 8192, 984, 408, 240, 204, 252, 300, 372, 480 }; return (wordlength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordlength]; } - + private static boolean isKatakana(int value) { return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) || (value >= 0xFF66 && value <= 0xFF9F); } - + + @Override public int divideUpDictionaryRange(CharacterIterator inText, int startPos, int endPos, DequeI foundBreaks) { if (startPos >= endPos) { @@ -124,7 +127,7 @@ class CjkBreakEngine extends DictionaryBreakEngine { charPositions[numChars] = index; } } - + // From here on out, do the algorithm. Note that our indices // refer to indices within the normalized string. int[] bestSnlp = new int[numChars + 1]; @@ -137,7 +140,7 @@ class CjkBreakEngine extends DictionaryBreakEngine { for (int i = 0; i <= numChars; i++) { prev[i] = -1; } - + final int maxWordSize = 20; int values[] = new int[numChars]; int lengths[] = new int[numChars]; @@ -148,16 +151,16 @@ class CjkBreakEngine extends DictionaryBreakEngine { if (bestSnlp[i] == kint32max) { continue; } - + int maxSearchLength = (i + maxWordSize < numChars) ? maxWordSize : (numChars - i); int[] count_ = new int[1]; fDictionary.matches(text, maxSearchLength, lengths, count_, maxSearchLength, values); int count = count_[0]; - - // if there are no single character matches found in the dictionary + + // if there are no single character matches found in the dictionary // starting with this character, treat character as a 1-character word // with the highest value possible (i.e. the least likely to occur). - // Exclude Korean characters from this treatment, as they should be + // Exclude Korean characters from this treatment, as they should be // left together by default. text.setIndex(i); // fDictionary.matches() advances the text position; undo that. if ((count == 0 || lengths[0] != 1) && current32(text) != DONE32 && !fHangulWordSet.contains(current32(text))) { @@ -173,11 +176,11 @@ class CjkBreakEngine extends DictionaryBreakEngine { prev[lengths[j] + i] = i; } } - + // In Japanese, single-character Katakana words are pretty rare. // So we apply the following heuristic to Katakana: any continuous // run of Katakana characters is considered a candidate word with - // a default cost specified in the katakanaCost table according + // a default cost specified in the katakanaCost table according // to its length. boolean is_katakana = isKatakana(current32(text)); if (!is_prev_katakana && is_katakana) { @@ -187,7 +190,7 @@ class CjkBreakEngine extends DictionaryBreakEngine { next32(text); ++j; } - + if ((j - i) < kMaxKatakanaGroupLength) { int newSnlp = bestSnlp[i] + getKatakanaCost(j - i); if (newSnlp < bestSnlp[j]) { @@ -229,7 +232,7 @@ class CjkBreakEngine extends DictionaryBreakEngine { foundBreaks.pop(); correctedNumBreaks--; } - if (!foundBreaks.isEmpty()) + if (!foundBreaks.isEmpty()) inText.setIndex(foundBreaks.peek()); return correctedNumBreaks; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java index e2bf705a8e2..f5021533586 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormatSymbols.java @@ -1422,6 +1422,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { * Overrides clone. * @stable ICU 2.0 */ + @Override public Object clone() { try { @@ -1439,6 +1440,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { * Generates a hash code for the DateFormatSymbols object. * @stable ICU 2.0 */ + @Override public int hashCode() { // Is this sufficient? return requestedLocale.toString().hashCode(); @@ -1448,6 +1450,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { * Overrides equals. * @stable ICU 2.0 */ + @Override public boolean equals(Object obj) { if (this == obj) return true; @@ -2013,7 +2016,7 @@ public class DateFormatSymbols implements Serializable, Cloneable { } UResourceBundle contextTransformsBundle = null; try { - contextTransformsBundle = (UResourceBundle)rb.getWithFallback("contextTransforms"); + contextTransformsBundle = rb.getWithFallback("contextTransforms"); } catch (MissingResourceException e) { contextTransformsBundle = null; // probably redundant diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java index 5a05638916b..99ed238fd8e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java @@ -14,7 +14,7 @@ import java.util.BitSet; import com.ibm.icu.impl.CharacterIteration; abstract class DictionaryBreakEngine implements LanguageBreakEngine { - + /* Helper class for improving readability of the Thai/Lao/Khmer word break * algorithm. */ @@ -82,7 +82,7 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine { mark = current; } } - + /** * A deque-like structure holding raw ints. * Partial, limited implementation, only what is needed by the dictionary implementation. @@ -93,55 +93,55 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine { private int[] data = new int[50]; private int lastIdx = 4; // or base of stack. Index of element. private int firstIdx = 4; // or Top of Stack. Index of element + 1. - + int size() { return firstIdx - lastIdx; } - + boolean isEmpty() { return size() == 0; } - + private void grow() { int[] newData = new int[data.length * 2]; System.arraycopy(data, 0, newData, 0, data.length); data = newData; } - + void offer(int v) { // Note that the actual use cases of offer() add at most one element. // We make no attempt to handle more than a few. assert lastIdx > 0; data[--lastIdx] = v; } - + void push(int v) { if (firstIdx >= data.length) { grow(); } data[firstIdx++] = v; } - + int pop() { assert size() > 0; return data[--firstIdx]; } - + int peek() { assert size() > 0; return data[firstIdx - 1]; } - + int peekLast() { assert size() > 0; return data[lastIdx]; } - + int pollLast() { assert size() > 0; return data[lastIdx++]; } - + boolean contains(int v) { for (int i=lastIdx; i< firstIdx; i++) { if (data[i] == v) { @@ -151,13 +151,13 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine { return false; } } - + UnicodeSet fSet = new UnicodeSet(); private BitSet fTypes = new BitSet(32); /** * @param breakTypes The types of break iterators that can use this engine. - * For example, BreakIterator.KIND_LINE + * For example, BreakIterator.KIND_LINE */ public DictionaryBreakEngine(Integer... breakTypes) { for (Integer type: breakTypes) { @@ -165,15 +165,17 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine { } } + @Override public boolean handles(int c, int breakType) { return fTypes.get(breakType) && // this type can use us fSet.contains(c); // we recognize the character } - public int findBreaks(CharacterIterator text, int startPos, int endPos, + @Override + public int findBreaks(CharacterIterator text, int startPos, int endPos, boolean reverse, int breakType, DequeI foundBreaks) { int result = 0; - + // Find the span of characters included in the set. // The span to break begins at the current position int the text, and // extends towards the start or end of the text, depending on 'reverse'. @@ -206,7 +208,7 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine { return result; } - + void setCharacters(UnicodeSet set) { fSet = new UnicodeSet(set); fSet.compact(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DurationFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DurationFormat.java index ae54faa36f3..3a0fcd2db1e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DurationFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DurationFormat.java @@ -39,7 +39,7 @@ public abstract class DurationFormat extends UFormat { public static DurationFormat getInstance(ULocale locale) { return BasicDurationFormat.getInstance(locale); } - + /** * Subclass interface @@ -49,7 +49,7 @@ public abstract class DurationFormat extends UFormat { @Deprecated protected DurationFormat() { } - + /** * Subclass interface * @internal @@ -70,6 +70,7 @@ public abstract class DurationFormat extends UFormat { * @deprecated ICU 56 */ @Deprecated + @Override public abstract StringBuffer format(Object object, StringBuffer toAppend, FieldPosition pos); @@ -77,6 +78,7 @@ public abstract class DurationFormat extends UFormat { * DurationFormat cannot parse, by default. This method will throw an UnsupportedOperationException. * @deprecated ICU 56 */ + @Override @Deprecated public Object parseObject(String source, ParsePosition pos) { throw new UnsupportedOperationException(); @@ -89,7 +91,7 @@ public abstract class DurationFormat extends UFormat { * formatDurationFrom(long, long) using now * as the reference date, and the difference between now and * targetDate.getTime() as the duration. - * + * * @param targetDate the ending date * @return the formatted time * @deprecated ICU 56 @@ -102,7 +104,7 @@ public abstract class DurationFormat extends UFormat { *

* This is a convenience method that calls formatDurationFrom * using the current system time as the reference date. - * + * * @param duration the duration in milliseconds * @return the formatted time * @deprecated ICU 56 @@ -119,7 +121,7 @@ public abstract class DurationFormat extends UFormat { * The duration is expressed as the number of milliseconds in the * past (negative values) or future (positive values) with respect * to a reference date (expressed as milliseconds in epoch). - * + * * @param duration the duration in milliseconds * @param referenceDate the date from which to compute the duration * @return the formatted time diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/LocaleDisplayNames.java b/icu4j/main/classes/core/src/com/ibm/icu/text/LocaleDisplayNames.java index 75443c1e68c..005e52ad707 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/LocaleDisplayNames.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/LocaleDisplayNames.java @@ -111,7 +111,7 @@ public abstract class LocaleDisplayNames { if (FACTORY_DISPLAYCONTEXT != null) { try { result = (LocaleDisplayNames) FACTORY_DISPLAYCONTEXT.invoke(null, - locale, (Object[])contexts); + locale, contexts); } catch (InvocationTargetException e) { // fall through } catch (IllegalAccessException e) { @@ -397,6 +397,7 @@ public abstract class LocaleDisplayNames { this.collator = collator; this.useSelf = useSelf; } + @Override public int compare(UiListItem o1, UiListItem o2) { int result = useSelf ? collator.compare(o1.nameInSelf, o2.nameInSelf) : collator.compare(o1.nameInDisplayLocale, o2.nameInDisplayLocale); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java index 471364699f8..9b2dd308e49 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java @@ -307,7 +307,7 @@ import com.ibm.icu.util.ULocale.Category; * System.out.println(msgFmt.format(args)); * args.put("num_files", 3); * System.out.println(msgFmt.format(args)); - * + * * output: * There are no files on disk "MyDisk". * There are 3 files on "MyDisk". @@ -437,7 +437,7 @@ public class MessageFormat extends UFormat { public ULocale getULocale() { return ulocale; } - + /** * Sets the pattern used by this message format. * Parses the pattern and caches Format objects for simple argument types. @@ -1053,6 +1053,7 @@ public class MessageFormat extends UFormat { * an array of Object and this format uses named arguments * @stable ICU 3.0 */ + @Override public final StringBuffer format(Object arguments, StringBuffer result, FieldPosition pos) { @@ -1096,6 +1097,7 @@ public class MessageFormat extends UFormat { * expected by the corresponding argument or custom Format object. * @stable ICU 3.8 */ + @Override public AttributedCharacterIterator formatToCharacterIterator(Object arguments) { if (arguments == null) { throw new NullPointerException( @@ -1148,7 +1150,7 @@ public class MessageFormat extends UFormat { "This method is not available in MessageFormat objects " + "that use named argument."); } - + // Count how many slots we need in the array. int maxArgId = -1; for (int partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { @@ -1167,7 +1169,7 @@ public class MessageFormat extends UFormat { return resultArray; } - + /** * {@icu} Parses the string, returning the results in a Map. * This is similar to the version that returns an array @@ -1188,9 +1190,9 @@ public class MessageFormat extends UFormat { if (pos.getIndex() == backupStartPos) { return null; } - return result; + return result; } - + /** * Parses text from the beginning of the given string to produce an object * array. @@ -1265,7 +1267,7 @@ public class MessageFormat extends UFormat { // We do not support parsing Plural formats. (No REPLACE_NUMBER here.) assert type==Part.Type.ARG_START : "Unexpected Part "+part+" in parsed message."; int argLimit=msgPattern.getLimitPartIndex(i); - + ArgType argType=part.getArgType(); part=msgPattern.getPart(++i); // Compute the argId, so we can use it as a key. @@ -1405,6 +1407,7 @@ public class MessageFormat extends UFormat { * @throws NullPointerException if pos is null. * @stable ICU 3.0 */ + @Override public Object parseObject(String source, ParsePosition pos) { if (!msgPattern.hasNamedArguments()) { return parse(source, pos); @@ -1429,7 +1432,7 @@ public class MessageFormat extends UFormat { } else { other.customFormatArgStarts = null; } - + if (cachedFormatters != null) { other.cachedFormatters = new HashMap(); Iterator> it = cachedFormatters.entrySet().iterator(); @@ -1440,7 +1443,7 @@ public class MessageFormat extends UFormat { } else { other.cachedFormatters = null; } - + other.msgPattern = msgPattern == null ? null : (MessagePattern)msgPattern.clone(); other.stockDateFormatter = stockDateFormatter == null ? null : (DateFormat) stockDateFormatter.clone(); @@ -1510,6 +1513,7 @@ public class MessageFormat extends UFormat { * * @stable ICU 3.8 */ + @Override protected Object readResolve() throws InvalidObjectException { if (this.getClass() != MessageFormat.Field.class) { throw new InvalidObjectException( @@ -1824,7 +1828,7 @@ public class MessageFormat extends UFormat { * as soon as it finds an argument, or it reaches the end of the string. * @param from Index in the pattern string to start from. * @return A substring from the pattern string representing the longest possible - * substring with no arguments. + * substring with no arguments. */ private String getLiteralStringUntilNextArgument(int from) { StringBuilder b = new StringBuilder(); @@ -2080,6 +2084,7 @@ public class MessageFormat extends UFormat { msgFormat = mf; this.type = type; } + @Override public String select(Object ctx, double number) { if(rules == null) { rules = PluralRules.forLocale(msgFormat.ulocale, type); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/MessagePattern.java b/icu4j/main/classes/core/src/com/ibm/icu/text/MessagePattern.java index 0c0a5e32417..13316725411 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/MessagePattern.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/MessagePattern.java @@ -874,6 +874,7 @@ public final class MessagePattern implements Cloneable, Freezable * *

To a user of your program, however, both of these sequences should be - * treated as the same "user-level" character "A with acute accent". When you - * are searching or comparing text, you must ensure that these two sequences are + * treated as the same "user-level" character "A with acute accent". When you + * are searching or comparing text, you must ensure that these two sequences are * treated equivalently. In addition, you must handle characters with more than * one accent. Sometimes the order of a character's combining accents is * significant, while in other cases accent sequences in different orders are @@ -75,9 +75,9 @@ import com.ibm.icu.util.ICUCloneNotSupportedException; * into the corresponding semantic characters. When sorting and searching, you * will often want to use these mappings. * - *

normalize helps solve these problems by transforming text into - * the canonical composed and decomposed forms as shown in the first example - * above. In addition, you can have it perform compatibility decompositions so + *

normalize helps solve these problems by transforming text into + * the canonical composed and decomposed forms as shown in the first example + * above. In addition, you can have it perform compatibility decompositions so * that you can treat compatibility characters the same as their equivalents. * Finally, normalize rearranges accents into the proper canonical * order, so that you do not have to worry about accent rearrangement on your @@ -85,22 +85,22 @@ import com.ibm.icu.util.ICUCloneNotSupportedException; * *

Form FCD, "Fast C or D", is also designed for collation. * It allows to work on strings that are not necessarily normalized - * with an algorithm (like in collation) that works under "canonical closure", - * i.e., it treats precomposed characters and their decomposed equivalents the + * with an algorithm (like in collation) that works under "canonical closure", + * i.e., it treats precomposed characters and their decomposed equivalents the * same. * - *

It is not a normalization form because it does not provide for uniqueness of - * representation. Multiple strings may be canonically equivalent (their NFDs + *

It is not a normalization form because it does not provide for uniqueness of + * representation. Multiple strings may be canonically equivalent (their NFDs * are identical) and may all conform to FCD without being identical themselves. * - *

The form is defined such that the "raw decomposition", the recursive - * canonical decomposition of each character, results in a string that is - * canonically ordered. This means that precomposed characters are allowed for + *

The form is defined such that the "raw decomposition", the recursive + * canonical decomposition of each character, results in a string that is + * canonically ordered. This means that precomposed characters are allowed for * as long as their decompositions do not need canonical reordering. * *

Its advantage for a process like collation is that all NFD and most NFC texts - * - and many unnormalized texts - already conform to FCD and do not need to be - * normalized (NFD) for such a process. The FCD quick check will return YES for + * - and many unnormalized texts - already conform to FCD and do not need to be + * normalized (NFD) for such a process. The FCD quick check will return YES for * most strings in practice. * *

normalize(FCD) may be implemented with NFD. @@ -108,9 +108,9 @@ import com.ibm.icu.util.ICUCloneNotSupportedException; *

For more details on FCD see Unicode Technical Note #5 (Canonical Equivalence in Applications): * http://www.unicode.org/notes/tn5/#FCD * - *

ICU collation performs either NFD or FCD normalization automatically if - * normalization is turned on for the collator object. Beyond collation and - * string search, normalized strings may be useful for string equivalence + *

ICU collation performs either NFD or FCD normalization automatically if + * normalization is turned on for the collator object. Beyond collation and + * string search, normalized strings may be useful for string equivalence * comparisons, transliteration/transcription, unique representations, etc. * *

The W3C generally recommends to exchange texts in NFC. @@ -246,40 +246,46 @@ public final class Normalizer implements Cloneable { } private static final class NONEMode extends Mode { + @Override protected Normalizer2 getNormalizer2(int options) { return Norm2AllModes.NOOP_NORMALIZER2; } } private static final class NFDMode extends Mode { + @Override protected Normalizer2 getNormalizer2(int options) { return (options&UNICODE_3_2) != 0 ? NFD32ModeImpl.INSTANCE.normalizer2 : NFDModeImpl.INSTANCE.normalizer2; } } private static final class NFKDMode extends Mode { + @Override protected Normalizer2 getNormalizer2(int options) { return (options&UNICODE_3_2) != 0 ? NFKD32ModeImpl.INSTANCE.normalizer2 : NFKDModeImpl.INSTANCE.normalizer2; } } private static final class NFCMode extends Mode { + @Override protected Normalizer2 getNormalizer2(int options) { return (options&UNICODE_3_2) != 0 ? NFC32ModeImpl.INSTANCE.normalizer2 : NFCModeImpl.INSTANCE.normalizer2; } } private static final class NFKCMode extends Mode { + @Override protected Normalizer2 getNormalizer2(int options) { return (options&UNICODE_3_2) != 0 ? NFKC32ModeImpl.INSTANCE.normalizer2 : NFKCModeImpl.INSTANCE.normalizer2; } } private static final class FCDMode extends Mode { + @Override protected Normalizer2 getNormalizer2(int options) { return (options&UNICODE_3_2) != 0 ? FCD32ModeImpl.INSTANCE.normalizer2 : FCDModeImpl.INSTANCE.normalizer2; } } - /** + /** * No decomposition/composition. * * @deprecated ICU 56 Use {@link Normalizer2} instead. @@ -287,7 +293,7 @@ public final class Normalizer implements Cloneable { @Deprecated public static final Mode NONE = new NONEMode(); - /** + /** * Canonical decomposition. * * @deprecated ICU 56 Use {@link Normalizer2} instead. @@ -295,7 +301,7 @@ public final class Normalizer implements Cloneable { @Deprecated public static final Mode NFD = new NFDMode(); - /** + /** * Compatibility decomposition. * * @deprecated ICU 56 Use {@link Normalizer2} instead. @@ -303,7 +309,7 @@ public final class Normalizer implements Cloneable { @Deprecated public static final Mode NFKD = new NFKDMode(); - /** + /** * Canonical decomposition followed by canonical composition. * * @deprecated ICU 56 Use {@link Normalizer2} instead. @@ -311,15 +317,15 @@ public final class Normalizer implements Cloneable { @Deprecated public static final Mode NFC = new NFCMode(); - /** + /** * Default normalization. * * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated - public static final Mode DEFAULT = NFC; + public static final Mode DEFAULT = NFC; - /** + /** * Compatibility decomposition followed by canonical composition. * * @deprecated ICU 56 Use {@link Normalizer2} instead. @@ -327,7 +333,7 @@ public final class Normalizer implements Cloneable { @Deprecated public static final Mode NFKC =new NFKCMode(); - /** + /** * "Fast C or D" form. * * @deprecated ICU 56 Use {@link Normalizer2} instead. @@ -354,13 +360,13 @@ public final class Normalizer implements Cloneable { /** * Canonical decomposition followed by canonical composition. Used with the - * {@link com.ibm.icu.text.Normalizer constructors} and the static - * {@link #normalize normalize} method to determine the operation to be + * {@link com.ibm.icu.text.Normalizer constructors} and the static + * {@link #normalize normalize} method to determine the operation to be * performed. *

* If all optional features (e.g. {@link #IGNORE_HANGUL}) are turned * off, this operation produces output that is in - * Unicode Canonical + * Unicode Canonical * Form * C. *

@@ -374,12 +380,12 @@ public final class Normalizer implements Cloneable { /** * Compatibility decomposition followed by canonical composition. * Used with the {@link com.ibm.icu.text.Normalizer constructors} and the static - * {@link #normalize normalize} method to determine the operation to be + * {@link #normalize normalize} method to determine the operation to be * performed. *

* If all optional features (e.g. {@link #IGNORE_HANGUL}) are turned * off, this operation produces output that is in - * Unicode Canonical + * Unicode Canonical * Form * KC. *

@@ -398,7 +404,7 @@ public final class Normalizer implements Cloneable { *

* If all optional features (e.g. {@link #IGNORE_HANGUL}) are turned * off, this operation produces output that is in - * Unicode Canonical + * Unicode Canonical * Form * D. *

@@ -411,13 +417,13 @@ public final class Normalizer implements Cloneable { /** * Compatibility decomposition. This value is passed to the - * {@link com.ibm.icu.text.Normalizer constructors} and the static + * {@link com.ibm.icu.text.Normalizer constructors} and the static * {@link #normalize normalize} * method to determine the operation to be performed. *

* If all optional features (e.g. {@link #IGNORE_HANGUL}) are turned * off, this operation produces output that is in - * Unicode Canonical + * Unicode Canonical * Form * KD. *

@@ -448,7 +454,7 @@ public final class Normalizer implements Cloneable { */ @Deprecated public static final int IGNORE_HANGUL = 0x0001; - + /** * Result values for quickCheck(). * For details see Unicode Technical Report 15. @@ -460,46 +466,46 @@ public final class Normalizer implements Cloneable { //resultValue=value; } } - /** + /** * Indicates that string is not in the normalized format * @stable ICU 2.8 */ public static final QuickCheckResult NO = new QuickCheckResult(0); - - /** + + /** * Indicates that string is in the normalized format * @stable ICU 2.8 */ public static final QuickCheckResult YES = new QuickCheckResult(1); - /** - * Indicates it cannot be determined if string is in the normalized + /** + * Indicates it cannot be determined if string is in the normalized * format without further thorough checks. * @stable ICU 2.8 */ public static final QuickCheckResult MAYBE = new QuickCheckResult(2); - + /** * Option bit for compare: * Case sensitively compare the strings * @stable ICU 2.8 */ public static final int FOLD_CASE_DEFAULT = UCharacter.FOLD_CASE_DEFAULT; - + /** * Option bit for compare: * Both input strings are assumed to fulfill FCD conditions. * @stable ICU 2.8 */ public static final int INPUT_IS_FCD = 0x20000; - + /** * Option bit for compare: * Perform case-insensitive comparison. * @stable ICU 2.8 */ public static final int COMPARE_IGNORE_CASE = 0x10000; - + /** * Option bit for compare: * Compare strings in code point order instead of code unit order. @@ -507,7 +513,7 @@ public final class Normalizer implements Cloneable { */ public static final int COMPARE_CODE_POINT_ORDER = 0x8000; - /** + /** * Option value for case folding: * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I * and dotless i appropriately for Turkic languages (tr, az). @@ -533,7 +539,7 @@ public final class Normalizer implements Cloneable { */ @Deprecated public static final int COMPARE_NORM_OPTIONS_SHIFT = 20; - + //------------------------------------------------------------------------- // Iterator constructors //------------------------------------------------------------------------- @@ -559,7 +565,7 @@ public final class Normalizer implements Cloneable { @Deprecated public Normalizer(String str, Mode mode, int opt) { this.text = UCharacterIterator.getInstance(str); - this.mode = mode; + this.mode = mode; this.options=opt; norm2 = mode.getNormalizer2(opt); buffer = new StringBuilder(); @@ -659,23 +665,23 @@ public final class Normalizer implements Cloneable { * Compose a string. * The string will be composed to according to the specified mode. * @param str The string to compose. - * @param compat If true the string will be composed according to - * NFKC rules and if false will be composed according to + * @param compat If true the string will be composed according to + * NFKC rules and if false will be composed according to * NFC rules. * @return String The composed string * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static String compose(String str, boolean compat) { - return compose(str,compat,0); + return compose(str,compat,0); } - + /** * Compose a string. * The string will be composed to according to the specified mode. * @param str The string to compose. - * @param compat If true the string will be composed according to - * NFKC rules and if false will be composed according to + * @param compat If true the string will be composed according to + * NFKC rules and if false will be composed according to * NFC rules. * @param options The only recognized option is UNICODE_3_2 * @return String The composed string @@ -685,19 +691,19 @@ public final class Normalizer implements Cloneable { public static String compose(String str, boolean compat, int options) { return getComposeNormalizer2(compat, options).normalize(str); } - + /** * Compose a string. * The string will be composed to according to the specified mode. * @param source The char array to compose. * @param target A char buffer to receive the normalized text. - * @param compat If true the char array will be composed according to - * NFKC rules and if false will be composed according to + * @param compat If true the char array will be composed according to + * NFKC rules and if false will be composed according to * NFC rules. * @param options The normalization options, ORed together (0 for no options). - * @return int The total buffer size needed;if greater than length of + * @return int The total buffer size needed;if greater than length of * result, the output was truncated. - * @exception IndexOutOfBoundsException if target.length is less than the + * @exception IndexOutOfBoundsException if target.length is less than the * required length * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @@ -705,7 +711,7 @@ public final class Normalizer implements Cloneable { public static int compose(char[] source,char[] target, boolean compat, int options) { return compose(source, 0, source.length, target, 0, target.length, compat, options); } - + /** * Compose a string. * The string will be composed to according to the specified mode. @@ -713,15 +719,15 @@ public final class Normalizer implements Cloneable { * @param srcStart Start index of the source * @param srcLimit Limit index of the source * @param dest The char buffer to fill in - * @param destStart Start index of the destination buffer + * @param destStart Start index of the destination buffer * @param destLimit End index of the destination buffer - * @param compat If true the char array will be composed according to - * NFKC rules and if false will be composed according to + * @param compat If true the char array will be composed according to + * NFKC rules and if false will be composed according to * NFC rules. * @param options The normalization options, ORed together (0 for no options). - * @return int The total buffer size needed;if greater than length of + * @return int The total buffer size needed;if greater than length of * result, the output was truncated. - * @exception IndexOutOfBoundsException if target.length is less than the + * @exception IndexOutOfBoundsException if target.length is less than the * required length * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @@ -739,23 +745,23 @@ public final class Normalizer implements Cloneable { * Decompose a string. * The string will be decomposed to according to the specified mode. * @param str The string to decompose. - * @param compat If true the string will be decomposed according to NFKD - * rules and if false will be decomposed according to NFD + * @param compat If true the string will be decomposed according to NFKD + * rules and if false will be decomposed according to NFD * rules. * @return String The decomposed string * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static String decompose(String str, boolean compat) { - return decompose(str,compat,0); + return decompose(str,compat,0); } - + /** * Decompose a string. * The string will be decomposed to according to the specified mode. * @param str The string to decompose. - * @param compat If true the string will be decomposed according to NFKD - * rules and if false will be decomposed according to NFD + * @param compat If true the string will be decomposed according to NFKD + * rules and if false will be decomposed according to NFD * rules. * @param options The normalization options, ORed together (0 for no options). * @return String The decomposed string @@ -771,10 +777,10 @@ public final class Normalizer implements Cloneable { * The string will be decomposed to according to the specified mode. * @param source The char array to decompose. * @param target A char buffer to receive the normalized text. - * @param compat If true the char array will be decomposed according to NFKD - * rules and if false will be decomposed according to + * @param compat If true the char array will be decomposed according to NFKD + * rules and if false will be decomposed according to * NFD rules. - * @return int The total buffer size needed;if greater than length of + * @return int The total buffer size needed;if greater than length of * result,the output was truncated. * @param options The normalization options, ORed together (0 for no options). * @exception IndexOutOfBoundsException if the target capacity is less than @@ -785,7 +791,7 @@ public final class Normalizer implements Cloneable { public static int decompose(char[] source,char[] target, boolean compat, int options) { return decompose(source, 0, source.length, target, 0, target.length, compat, options); } - + /** * Decompose a string. * The string will be decomposed to according to the specified mode. @@ -793,13 +799,13 @@ public final class Normalizer implements Cloneable { * @param srcStart Start index of the source * @param srcLimit Limit index of the source * @param dest The char buffer to fill in - * @param destStart Start index of the destination buffer + * @param destStart Start index of the destination buffer * @param destLimit End index of the destination buffer - * @param compat If true the char array will be decomposed according to NFKD - * rules and if false will be decomposed according to + * @param compat If true the char array will be decomposed according to NFKD + * rules and if false will be decomposed according to * NFD rules. * @param options The normalization options, ORed together (0 for no options). - * @return int The total buffer size needed;if greater than length of + * @return int The total buffer size needed;if greater than length of * result,the output was truncated. * @exception IndexOutOfBoundsException if the target capacity is less than * the required length @@ -834,35 +840,35 @@ public final class Normalizer implements Cloneable { public static String normalize(String str, Mode mode, int options) { return mode.getNormalizer2(options).normalize(str); } - + /** * Normalize a string. - * The string will be normalized according to the specified normalization + * The string will be normalized according to the specified normalization * mode and options. * @param src The string to normalize. - * @param mode The normalization mode; one of Normalizer.NONE, - * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, + * @param mode The normalization mode; one of Normalizer.NONE, + * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, * Normalizer.NFKD, Normalizer.DEFAULT * @return the normalized string * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static String normalize(String src,Mode mode) { - return normalize(src, mode, 0); + return normalize(src, mode, 0); } /** * Normalize a string. - * The string will be normalized according to the specified normalization + * The string will be normalized according to the specified normalization * mode and options. * @param source The char array to normalize. * @param target A char buffer to receive the normalized text. - * @param mode The normalization mode; one of Normalizer.NONE, - * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, + * @param mode The normalization mode; one of Normalizer.NONE, + * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, * Normalizer.NFKD, Normalizer.DEFAULT * @param options The normalization options, ORed together (0 for no options). - * @return int The total buffer size needed;if greater than length of + * @return int The total buffer size needed;if greater than length of * result, the output was truncated. - * @exception IndexOutOfBoundsException if the target capacity is less + * @exception IndexOutOfBoundsException if the target capacity is less * than the required length * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @@ -879,20 +885,20 @@ public final class Normalizer implements Cloneable { * @param srcStart Start index of the source * @param srcLimit Limit index of the source * @param dest The char buffer to fill in - * @param destStart Start index of the destination buffer + * @param destStart Start index of the destination buffer * @param destLimit End index of the destination buffer - * @param mode The normalization mode; one of Normalizer.NONE, - * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, + * @param mode The normalization mode; one of Normalizer.NONE, + * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, * Normalizer.NFKD, Normalizer.DEFAULT - * @param options The normalization options, ORed together (0 for no options). - * @return int The total buffer size needed;if greater than length of + * @param options The normalization options, ORed together (0 for no options). + * @return int The total buffer size needed;if greater than length of * result, the output was truncated. - * @exception IndexOutOfBoundsException if the target capacity is + * @exception IndexOutOfBoundsException if the target capacity is * less than the required length * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated - public static int normalize(char[] src,int srcStart, int srcLimit, + public static int normalize(char[] src,int srcStart, int srcLimit, char[] dest,int destStart, int destLimit, Mode mode, int options) { CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart); @@ -939,9 +945,9 @@ public final class Normalizer implements Cloneable { * Convenience method. * * @param source string for determining if it is in a normalized format - * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, + * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, * Normalizer.NFKC,Normalizer.NFKD) - * @return Return code to specify if the text is normalized or not + * @return Return code to specify if the text is normalized or not * (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE) * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @@ -951,22 +957,22 @@ public final class Normalizer implements Cloneable { } /** - * Performing quick check on a string, to quickly determine if the string is + * Performing quick check on a string, to quickly determine if the string is * in a particular normalization format. * Three types of result can be returned Normalizer.YES, Normalizer.NO or * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument * string is in the desired normalized format, Normalizer.NO determines that - * argument string is not in the desired normalized format. A - * Normalizer.MAYBE result indicates that a more thorough check is required, - * the user may have to put the string in its normalized form and compare + * argument string is not in the desired normalized format. A + * Normalizer.MAYBE result indicates that a more thorough check is required, + * the user may have to put the string in its normalized form and compare * the results. * * @param source string for determining if it is in a normalized format - * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, + * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, * Normalizer.NFKC,Normalizer.NFKD) * @param options Options for use with exclusion set and tailored Normalization - * The only option that is currently recognized is UNICODE_3_2 - * @return Return code to specify if the text is normalized or not + * The only option that is currently recognized is UNICODE_3_2 + * @return Return code to specify if the text is normalized or not * (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE) * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @@ -978,13 +984,13 @@ public final class Normalizer implements Cloneable { /** * Convenience method. * - * @param source Array of characters for determining if it is in a + * @param source Array of characters for determining if it is in a * normalized format - * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, + * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, * Normalizer.NFKC,Normalizer.NFKD) * @param options Options for use with exclusion set and tailored Normalization * The only option that is currently recognized is UNICODE_3_2 - * @return Return code to specify if the text is normalized or not + * @return Return code to specify if the text is normalized or not * (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE) * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @@ -994,31 +1000,31 @@ public final class Normalizer implements Cloneable { } /** - * Performing quick check on a string, to quickly determine if the string is + * Performing quick check on a string, to quickly determine if the string is * in a particular normalization format. * Three types of result can be returned Normalizer.YES, Normalizer.NO or * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument * string is in the desired normalized format, Normalizer.NO determines that - * argument string is not in the desired normalized format. A - * Normalizer.MAYBE result indicates that a more thorough check is required, - * the user may have to put the string in its normalized form and compare + * argument string is not in the desired normalized format. A + * Normalizer.MAYBE result indicates that a more thorough check is required, + * the user may have to put the string in its normalized form and compare * the results. * * @param source string for determining if it is in a normalized format * @param start the start index of the source * @param limit the limit index of the source it is equal to the length - * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, + * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, * Normalizer.NFKC,Normalizer.NFKD) * @param options Options for use with exclusion set and tailored Normalization - * The only option that is currently recognized is UNICODE_3_2 - * @return Return code to specify if the text is normalized or not + * The only option that is currently recognized is UNICODE_3_2 + * @return Return code to specify if the text is normalized or not * (Normalizer.YES, Normalizer.NO or * Normalizer.MAYBE) * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated - public static QuickCheckResult quickCheck(char[] source,int start, - int limit, Mode mode,int options) { + public static QuickCheckResult quickCheck(char[] source,int start, + int limit, Mode mode,int options) { CharBuffer srcBuffer = CharBuffer.wrap(source, start, limit - start); return mode.getNormalizer2(options).quickCheck(srcBuffer); } @@ -1032,20 +1038,20 @@ public final class Normalizer implements Cloneable { * For NFD, NFKD, and FCD, both functions work exactly the same. * For NFC and NFKC where quickCheck may return "maybe", this function will * perform further tests to arrive at a true/false result. - * @param src The input array of characters to be checked to see if + * @param src The input array of characters to be checked to see if * it is normalized * @param start The strart index in the source * @param limit The limit index in the source * @param mode the normalization mode * @param options Options for use with exclusion set and tailored Normalization - * The only option that is currently recognized is UNICODE_3_2 + * The only option that is currently recognized is UNICODE_3_2 * @return Boolean value indicating whether the source string is in the * "mode" normalization form * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static boolean isNormalized(char[] src,int start, - int limit, Mode mode, + int limit, Mode mode, int options) { CharBuffer srcBuffer = CharBuffer.wrap(src, start, limit - start); return mode.getNormalizer2(options).isNormalized(srcBuffer); @@ -1060,11 +1066,11 @@ public final class Normalizer implements Cloneable { * For NFD, NFKD, and FCD, both functions work exactly the same. * For NFC and NFKC where quickCheck may return "maybe", this function will * perform further tests to arrive at a true/false result. - * @param str the input string to be checked to see if it is + * @param str the input string to be checked to see if it is * normalized * @param mode the normalization mode * @param options Options for use with exclusion set and tailored Normalization - * The only option that is currently recognized is UNICODE_3_2 + * The only option that is currently recognized is UNICODE_3_2 * @see #isNormalized * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @@ -1075,11 +1081,11 @@ public final class Normalizer implements Cloneable { /** * Convenience Method - * @param char32 the input code point to be checked to see if it is + * @param char32 the input code point to be checked to see if it is * normalized * @param mode the normalization mode * @param options Options for use with exclusion set and tailored Normalization - * The only option that is currently recognized is UNICODE_3_2 + * The only option that is currently recognized is UNICODE_3_2 * * @see #isNormalized * @deprecated ICU 56 Use {@link Normalizer2} instead. @@ -1100,8 +1106,8 @@ public final class Normalizer implements Cloneable { * (and optionally case-folding) both strings entirely, * improving performance significantly. * - * Bulk normalization is only necessary if the strings do not fulfill the - * FCD conditions. Only in this case, and only if the strings are relatively + * Bulk normalization is only necessary if the strings do not fulfill the + * FCD conditions. Only in this case, and only if the strings are relatively * long, is memory allocated temporarily. * For FCD strings and short non-FCD strings there is no memory allocation. * @@ -1116,14 +1122,14 @@ public final class Normalizer implements Cloneable { * @param s2 Second source character array. * @param s2Start start index of the source * @param s2Limit limit of the source - * + * * @param options A bit set of options: * - FOLD_CASE_DEFAULT or 0 is used for default options: * Case-sensitive comparison in code unit order, and the input strings * are quick-checked for FCD. * * - INPUT_IS_FCD - * Set if the caller knows that both s1 and s2 fulfill the FCD + * Set if the caller knows that both s1 and s2 fulfill the FCD * conditions.If not set, the function will quickCheck for FCD * and normalize if necessary. * @@ -1145,16 +1151,16 @@ public final class Normalizer implements Cloneable { public static int compare(char[] s1, int s1Start, int s1Limit, char[] s2, int s2Start, int s2Limit, int options) { - if( s1==null || s1Start<0 || s1Limit<0 || + if( s1==null || s1Start<0 || s1Limit<0 || s2==null || s2Start<0 || s2Limit<0 || s1Limit * - * For details see concatenate + * For details see concatenate * * @param left Left source string. * @param right Right source string. @@ -1502,8 +1508,8 @@ public final class Normalizer implements Cloneable { return DONE; } } - - + + /** * Return the previous character in the normalized text and decrement * the iteration position by one. If the beginning @@ -1521,7 +1527,7 @@ public final class Normalizer implements Cloneable { return DONE; } } - + /** * Reset the index to the beginning of the text. * This is equivalent to setIndexOnly(startIndex)). @@ -1533,7 +1539,7 @@ public final class Normalizer implements Cloneable { currentIndex=nextIndex=0; clearBuffer(); } - + /** * Set the iteration position in the input text that is being normalized, * without any immediate normalization. @@ -1549,7 +1555,7 @@ public final class Normalizer implements Cloneable { currentIndex=nextIndex=index; clearBuffer(); } - + /** * Set the iteration position in the input text that is being normalized * and return the first normalized character at that position. @@ -1579,8 +1585,8 @@ public final class Normalizer implements Cloneable { } ///CLOVER:ON /** - * Retrieve the index of the start of the input text. This is the begin - * index of the CharacterIterator or the start (i.e. 0) of the + * Retrieve the index of the start of the input text. This is the begin + * index of the CharacterIterator or the start (i.e. 0) of the * String over which this Normalizer is iterating * @deprecated ICU 2.2. Use startIndex() instead. * @return The codepoint as an int @@ -1614,7 +1620,7 @@ public final class Normalizer implements Cloneable { reset(); return next(); } - + /** * Return the last character in the normalized text. This resets * the Normalizer's position to be just before the @@ -1655,8 +1661,8 @@ public final class Normalizer implements Cloneable { } /** - * Retrieve the index of the start of the input text. This is the begin - * index of the CharacterIterator or the start (i.e. 0) of the + * Retrieve the index of the start of the input text. This is the begin + * index of the CharacterIterator or the start (i.e. 0) of the * String over which this Normalizer is iterating * @return The current iteration position * @deprecated ICU 56 @@ -1764,7 +1770,7 @@ public final class Normalizer implements Cloneable { return 0; } } - + /** * Gets the underlying text storage * @param fillIn the char buffer to fill the UTF-16 units. @@ -1778,7 +1784,7 @@ public final class Normalizer implements Cloneable { public int getText(char[] fillIn) { return text.getText(fillIn); } - + /** * Gets the length of underlying text storage * @return the length @@ -1788,7 +1794,7 @@ public final class Normalizer implements Cloneable { public int getLength() { return text.getLength(); } - + /** * Returns the text under iteration as a string * @return a copy of the text under iteration. @@ -1798,7 +1804,7 @@ public final class Normalizer implements Cloneable { public String getText() { return text.getText(); } - + /** * Set the input text over which this Normalizer will iterate. * The iteration position is set to the beginning of the input text. @@ -1810,7 +1816,7 @@ public final class Normalizer implements Cloneable { UCharacterIterator newIter = UCharacterIterator.getInstance(newText); if (newIter == null) { throw new IllegalStateException("Could not create a new UCharacterIterator"); - } + } text = newIter; reset(); } @@ -1826,7 +1832,7 @@ public final class Normalizer implements Cloneable { UCharacterIterator newIter = UCharacterIterator.getInstance(newText); if (newIter == null) { throw new IllegalStateException("Could not create a new UCharacterIterator"); - } + } text = newIter; reset(); } @@ -1842,7 +1848,7 @@ public final class Normalizer implements Cloneable { UCharacterIterator newIter = UCharacterIterator.getInstance(newText); if (newIter == null) { throw new IllegalStateException("Could not create a new UCharacterIterator"); - } + } text = newIter; reset(); } @@ -1858,7 +1864,7 @@ public final class Normalizer implements Cloneable { UCharacterIterator newIter = UCharacterIterator.getInstance(newText); if (newIter == null) { throw new IllegalStateException("Could not create a new UCharacterIterator"); - } + } text = newIter; reset(); } @@ -1870,7 +1876,7 @@ public final class Normalizer implements Cloneable { * @deprecated ICU 56 */ @Deprecated - public void setText(UCharacterIterator newText) { + public void setText(UCharacterIterator newText) { try{ UCharacterIterator newIter = (UCharacterIterator)newText.clone(); if (newIter == null) { @@ -1992,7 +1998,7 @@ public final class Normalizer implements Cloneable { } return cmpEquivFold(s1, s2, options); - } + } /* * Compare two strings for canonical equivalence. @@ -2517,6 +2523,7 @@ public final class Normalizer implements Cloneable { throw new IndexOutOfBoundsException(Integer.toString(len)); } } + @Override public Appendable append(char c) { if(offsetLanguage Plural Rules page at - * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html + * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html * * *

Usage of PluralFormat

@@ -90,8 +90,8 @@ import com.ibm.icu.util.ULocale.Category; * between the {curly braces} and their sub-message, * and between the '=' and the number of an explicitValue. *

- * There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and - * 'other'. You always have to define a message text for the default plural case + * There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and + * 'other'. You always have to define a message text for the default plural case * "other" which is contained in every rule set. * If you do not specify a message text for a particular plural case, the * message text of the plural case "other" gets assigned to this @@ -155,7 +155,7 @@ public class PluralFormat extends UFormat { * The MessagePattern which contains the parsed structure of the pattern string. */ transient private MessagePattern msgPattern; - + /** * Obsolete with use of MessagePattern since ICU 4.8. Used to be: * The format messages for each plural case. It is a mapping: @@ -552,6 +552,7 @@ public class PluralFormat extends UFormat { // We could avoid this adapter class if we made PluralSelector public // (or at least publicly visible) and had PluralRules implement PluralSelector. private final class PluralSelectorAdapter implements PluralSelector { + @Override public String select(Object context, double number) { FixedDecimal dec = (FixedDecimal) context; assert dec.source == (dec.isNegative ? -number : number); @@ -591,6 +592,7 @@ public class PluralFormat extends UFormat { * @throws IllegalArgumentException if number is not an instance of Number * @stable ICU 3.8 */ + @Override public StringBuffer format(Object number, StringBuffer toAppendTo, FieldPosition pos) { if (!(number instanceof Number)) { @@ -687,6 +689,7 @@ public class PluralFormat extends UFormat { * @throws UnsupportedOperationException will always be thrown by this method. * @stable ICU 3.8 */ + @Override public Object parseObject(String source, ParsePosition pos) { throw new UnsupportedOperationException(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRanges.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRanges.java index 3177d6af05e..b1b8e31c6bd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRanges.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRanges.java @@ -18,7 +18,7 @@ import com.ibm.icu.util.Output; /** * Utility class for returning the plural category for a range of numbers, such as 1–5, so that appropriate messages can * be chosen. The rules for determining this value vary widely across locales. - * + * * @author markdavis * @internal * @deprecated This API is ICU internal only. @@ -32,7 +32,7 @@ public final class PluralRanges implements Freezable, Comparable

, Comparable

, Comparable

, Comparable

, Comparable

, Comparable

, Comparable

, Comparable

, Comparable

, Comparable

, Comparable

{ * @return result * @stable ICU 3.8 */ + @Override public String transform(String source); } \ No newline at end of file diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/TimeZoneFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/TimeZoneFormat.java index e393bd3f8c5..fb9b73dcdce 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/TimeZoneFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/TimeZoneFormat.java @@ -60,7 +60,7 @@ import com.ibm.icu.util.ULocale; * Unicode Locale Data Markup Language (LDML). {@link TimeZoneNames} represents the * time zone display name data model and this class implements the algorithm for actual * formatting and parsing. - * + * * @see SimpleDateFormat * @see TimeZoneNames * @stable ICU 49 @@ -74,7 +74,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Time zone display format style enum used by format/parse APIs in TimeZoneFormat. - * + * * @see TimeZoneFormat#format(Style, TimeZone, long) * @see TimeZoneFormat#format(Style, TimeZone, long, Output) * @see TimeZoneFormat#parse(Style, String, ParsePosition, Output) @@ -210,7 +210,7 @@ public class TimeZoneFormat extends UFormat implements Freezable EXEMPLAR_LOCATION (0x0800); final int flag; - + private Style(int flag) { this.flag = flag; } @@ -218,7 +218,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Offset pattern type enum. - * + * * @see TimeZoneFormat#getGMTOffsetPattern(GMTOffsetPatternType) * @see TimeZoneFormat#setGMTOffsetPattern(GMTOffsetPatternType, String) * @stable ICU 49 @@ -281,7 +281,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Time type enum used for receiving time type (standard time, daylight time or unknown) * in TimeZoneFormat APIs. - * + * * @stable ICU 49 */ public enum TimeType { @@ -318,7 +318,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * When parsing a time zone display name in {@link Style#SPECIFIC_SHORT}, * look for the IANA tz database compatible zone abbreviations in addition * to the localized names coming from the {@link TimeZoneNames} currently - * used by the {@link TimeZoneFormat}. + * used by the {@link TimeZoneFormat}. * @stable ICU 54 */ TZ_DATABASE_ABBREVIATIONS; @@ -372,7 +372,7 @@ public class TimeZoneFormat extends UFormat implements Freezable private static final String UNKNOWN_LOCATION = "Unknown"; // Order of GMT offset pattern parsing, *_HMS must be evaluated first - // because *_HM is most likely a substring of *_HMS + // because *_HM is most likely a substring of *_HMS private static final GMTOffsetPatternType[] PARSE_GMT_OFFSET_TYPES = { GMTOffsetPatternType.POSITIVE_HMS, GMTOffsetPatternType.NEGATIVE_HMS, GMTOffsetPatternType.POSITIVE_HM, GMTOffsetPatternType.NEGATIVE_HM, @@ -480,7 +480,7 @@ public class TimeZoneFormat extends UFormat implements Freezable *

Note: The instance returned by this method is frozen. If you want to * customize a TimeZoneFormat, you must use {@link #cloneAsThawed()} to get a * thawed copy first. - * + * * @param locale the locale. * @return a frozen instance of TimeZoneFormat for the given locale. * @stable ICU 49 @@ -498,7 +498,7 @@ public class TimeZoneFormat extends UFormat implements Freezable *

Note: The instance returned by this method is frozen. If you want to * customize a TimeZoneFormat, you must use {@link #cloneAsThawed()} to get a * thawed copy first. - * + * * @param locale the {@link Locale}. * @return a frozen instance of TimeZoneFormat for the given locale. * @stable ICU 54 @@ -509,7 +509,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the time zone display name data used by this instance. - * + * * @return the time zone display name data. * @see #setTimeZoneNames(TimeZoneNames) * @stable ICU 49 @@ -557,7 +557,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Sets the time zone display name data to this instance. - * + * * @param tznames the time zone display name data. * @return this object. * @throws UnsupportedOperationException when this object is frozen. @@ -576,7 +576,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the localized GMT format pattern. - * + * * @return the localized GMT format pattern. * @see #setGMTPattern(String) * @stable ICU 49 @@ -588,7 +588,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Sets the localized GMT format pattern. The pattern must contain * a single argument {0}, for example "GMT {0}". - * + * * @param pattern the localized GMT format pattern string * @return this object. * @throws IllegalArgumentException when the pattern string does not contain "{0}" @@ -606,7 +606,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the offset pattern used for localized GMT format. - * + * * @param type the offset pattern enum * @see #setGMTOffsetPattern(GMTOffsetPatternType, String) * @stable ICU 49 @@ -617,7 +617,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Sets the offset pattern for the given offset type. - * + * * @param type the offset pattern. * @param pattern the pattern string. * @return this object. @@ -646,7 +646,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the decimal digit characters used for localized GMT format in a single string * containing from 0 to 9 in the ascending order. - * + * * @return the decimal digits for localized GMT format. * @see #setGMTOffsetDigits(String) * @stable ICU 49 @@ -661,7 +661,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Sets the decimal digit characters used for localized GMT format. - * + * * @param digits a string contains the decimal digit characters from 0 to 9 n the ascending order. * @return this object. * @throws IllegalArgumentException when the string did not contain ten characters. @@ -686,7 +686,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the localized GMT format string for GMT(UTC) itself (GMT offset is 0). - * + * * @return the localized GMT string string for GMT(UTC) itself. * @see #setGMTZeroFormat(String) * @stable ICU 49 @@ -697,7 +697,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Sets the localized GMT format string for GMT(UTC) itself (GMT offset is 0). - * + * * @param gmtZeroFormat the localized GMT format string for GMT(UTC). * @return this object. * @throws UnsupportedOperationException when this object is frozen. @@ -723,7 +723,7 @@ public class TimeZoneFormat extends UFormat implements Freezable *

* Note: By default, an instance of TimeZoneFormat * created by {#link {@link #getInstance(ULocale)} has no parse options set. - * + * * @param options the default parse options. * @return this object. * @see ParseOption @@ -755,7 +755,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the ISO 8601 basic time zone string for the given offset. * For example, "-08", "-0830" and "Z" - * + * * @param offset the offset from GMT(UTC) in milliseconds. * @param useUtcIndicator true if ISO 8601 UTC indicator "Z" is used when the offset is 0. * @param isShort true if shortest form is used. @@ -774,7 +774,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the ISO 8601 extended time zone string for the given offset. * For example, "-08:00", "-08:30" and "Z" - * + * * @param offset the offset from GMT(UTC) in milliseconds. * @param useUtcIndicator true if ISO 8601 UTC indicator "Z" is used when the offset is 0. * @param isShort true if shortest form is used. @@ -839,7 +839,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns the display name of the time zone at the given date for * the style. - * + * *

Note: A style may have fallback styles defined. For example, * when GENERIC_LONG is requested, but there is no display name * data available for GENERIC_LONG style, the implementation @@ -847,7 +847,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * See UTS#35 UNICODE LOCALE DATA MARKUP LANGUAGE (LDML) * Appendix J: Time Zone Display Name * for the details. - * + * * @param style the style enum (e.g. GENERIC_LONG, LOCALIZED_GMT...) * @param tz the time zone. * @param date the date. @@ -866,7 +866,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * in addition to the argument list of {@link #format(Style, TimeZone, long)}. * The argument is used for receiving the time type (standard time * or daylight saving time, or unknown) actually used for the display name. - * + * * @param style the style enum (e.g. GENERIC_LONG, LOCALIZED_GMT...) * @param tz the time zone. * @param date the date. @@ -1005,7 +1005,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * basic or extended time zone string. When the given string is not an ISO 8601 time * zone string, this method sets the current position as the error index * to ParsePosition pos and returns 0. - * + * * @param text the text contains ISO 8601 style time zone string (e.g. "-08", "-0800", "-08:00", and "Z") * at the position. * @param pos the position. @@ -1024,7 +1024,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * offset format string. When the given string cannot be parsed, this method * sets the current position as the error index to ParsePosition pos * and returns 0. - * + * * @param text the text contains a localized GMT offset string at the position. * @param pos the position. * @return the offset from GMT(UTC) in milliseconds for the given localized GMT @@ -1041,7 +1041,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * offset format string. When the given string cannot be parsed, this method * sets the current position as the error index to ParsePosition pos * and returns 0. - * + * * @param text the text contains a short localized GMT offset string at the position. * @param pos the position. * @return the offset from GMT(UTC) in milliseconds for the given short localized GMT @@ -1056,7 +1056,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Returns a TimeZone by parsing the time zone string according to * the parse position, the style and the parse options. - * + * * @param text the text contains a time zone string at the position. * @param style the format style. * @param pos the position. @@ -1513,7 +1513,7 @@ public class TimeZoneFormat extends UFormat implements Freezable *

* Note: This method is equivalent to {@link #parse(Style, String, ParsePosition, EnumSet, Output) * parse(style, text, pos, null, timeType)}. - * + * * @param text the text contains a time zone string at the position. * @param style the format style * @param pos the position. @@ -1536,7 +1536,7 @@ public class TimeZoneFormat extends UFormat implements Freezable *

* Note: This method is equivalent to {@link #parse(Style, String, ParsePosition, EnumSet, Output) * parse(Style.GENERIC_LOCATION, text, pos, EnumSet.of(ParseOption.ALL_STYLES), timeType)}. - * + * * @param text the text contains a time zone string at the position. * @param pos the position. * @return A TimeZone, or null if the input could not be parsed. @@ -1569,7 +1569,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * {@inheritDoc} - * + * * @stable ICU 49 */ @Override @@ -1600,7 +1600,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * {@inheritDoc} - * + * * @stable ICU 49 */ @Override @@ -1618,7 +1618,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * {@inheritDoc} - * + * * @stable ICU 49 */ @Override @@ -1769,7 +1769,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Private method returning the time zone's specific format string. - * + * * @param tz the time zone * @param stdType the name type used for standard time * @param dstType the name type used for daylight time @@ -1795,7 +1795,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Private method returning the time zone's exemplar location string. * This method will never return null. - * + * * @param tz the time zone * @return the time zone's exemplar location name. */ @@ -1816,7 +1816,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * Private method returns a time zone ID. If tzID is not null, the value of tzID is returned. * If tzID is null, then this method look up a time zone ID for the current region. This is a * small helper method used by the parse implementation method - * + * * @param tzID * the time zone ID or null * @param mzID @@ -1842,7 +1842,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * the locale of this instance. When a generic name is coming from * a meta zone, this region is used for checking if the time zone * is a reference zone of the meta zone. - * + * * @return the target region */ private synchronized String getTargetRegion() { @@ -1883,7 +1883,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * Parses the localized GMT pattern string and initialize * localized gmt pattern fields including {{@link #_gmtPatternTokens}. * This method must be also called at deserialization time. - * + * * @param gmtPattern the localized GMT pattern string such as "GMT {0}" * @throws IllegalArgumentException when the pattern string does not contain "{0}" */ @@ -1900,7 +1900,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Unquotes the message format style pattern. - * + * * @param s the pattern * @return the unquoted pattern string */ @@ -1933,7 +1933,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * Initialize localized GMT format offset hour/min/sec patterns. * This method parses patterns into optimized run-time format. * This method must be called at deserialization time. - * + * * @param gmtOffsetPatterns patterns, String[4] * @throws IllegalArgumentException when patterns are not valid */ @@ -2005,7 +2005,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Parse the GMT offset pattern into runtime optimized format - * + * * @param pattern the offset pattern string * @param letters the required pattern letters such as "Hm" * @return An array of Object. Each array entry is either String (representing @@ -2110,7 +2110,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Appends seconds field to the offset pattern with hour/minute - * + * * @param offsetHM the offset pattern including hours and minutes fields * @return the offset pattern including hours, minutes and seconds fields */ @@ -2130,7 +2130,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Truncates minutes field from the offset pattern with hour/minute - * + * * @param offsetHM the offset pattern including hours and minutes fields * @return the offset pattern including only hours field */ @@ -2155,7 +2155,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * Appends localized digits to the buffer. *

* Note: This code assumes that the input number is 0 - 59 - * + * * @param buf the target buffer * @param n the integer number * @param minDigits the minimum digits width @@ -2190,7 +2190,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * offset format string. When the given string cannot be parsed, this method * sets the current position as the error index to ParsePosition pos * and returns 0. - * + * * @param text the text contains a localized GMT offset string at the position. * @param pos the position. * @param isShort true if this parser to try the short format first @@ -2276,7 +2276,7 @@ public class TimeZoneFormat extends UFormat implements Freezable break; } idx += len; - + // Offset part int[] offsetLen = new int[1]; offset = parseOffsetFields(text, idx, false, offsetLen); @@ -2285,7 +2285,7 @@ public class TimeZoneFormat extends UFormat implements Freezable break; } idx += offsetLen[0]; - + // Suffix part len = _gmtPatternSuffix.length(); if (len > 0 && !text.regionMatches(true, idx, _gmtPatternSuffix, 0, len)) { @@ -2302,7 +2302,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Parses localized GMT offset fields into offset. - * + * * @param text the input text * @param start the start index * @param isShort true if this is a short format - currently not used @@ -2376,7 +2376,7 @@ public class TimeZoneFormat extends UFormat implements Freezable /** * Parses localized GMT offset fields with the given pattern - * + * * @param text the input text * @param start the start index * @param patternItems the pattern (already itemized) @@ -2631,7 +2631,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * Reads an offset field value. This method will stop parsing when * 1) number of digits reaches maxDigits * 2) just before already parsed number exceeds maxVal - * + * * @param text the text * @param start the start offset * @param minDigits the minimum number of required digits @@ -2714,7 +2714,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * Break input String into String[]. Each array element represents * a code point. This method is used for parsing localized digit * characters and support characters in Unicode supplemental planes. - * + * * @param str the string * @return the array of code points in String[] */ @@ -2737,7 +2737,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * (basic format, extended format, or UTC indicator). When the given string is not an ISO 8601 time * zone string, this method sets the current position as the error index * to ParsePosition pos and returns 0. - * + * * @param text the text contains ISO 8601 style time zone string (e.g. "-08", "-08:00", "Z") * at the position. * @param pos the position. @@ -2805,7 +2805,7 @@ public class TimeZoneFormat extends UFormat implements Freezable *

* Note: This method expects the input position is already at the start of * ASCII digits and does not parse sign (+/-). - * + * * @param text The text contains a sequence of ASCII digits * @param pos The parse position * @param minFields The minimum Fields to be parsed @@ -2897,7 +2897,7 @@ public class TimeZoneFormat extends UFormat implements Freezable *

* Note: This method expects the input position is already at the start of * ASCII digits and does not parse sign (+/-). - * + * * @param text The text * @param pos The parse position * @param sep The separator character @@ -3142,7 +3142,7 @@ public class TimeZoneFormat extends UFormat implements Freezable }; /** - * + * * @param oos the object output stream * @throws IOException */ @@ -3161,7 +3161,7 @@ public class TimeZoneFormat extends UFormat implements Freezable } /** - * + * * @param ois the object input stream * @throws ClassNotFoundException * @throws IOException @@ -3256,6 +3256,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * {@inheritDoc} * @stable ICU 49 */ + @Override public boolean isFrozen() { return _frozen; } @@ -3264,6 +3265,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * {@inheritDoc} * @stable ICU 49 */ + @Override public TimeZoneFormat freeze() { _frozen = true; return this; @@ -3273,6 +3275,7 @@ public class TimeZoneFormat extends UFormat implements Freezable * {@inheritDoc} * @stable ICU 49 */ + @Override public TimeZoneFormat cloneAsThawed() { TimeZoneFormat copy = (TimeZoneFormat)super.clone(); copy._frozen = false; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java index 36b21d10a75..cfcf0a9f145 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java @@ -8,7 +8,6 @@ */ package com.ibm.icu.text; - import java.text.CharacterIterator; import com.ibm.icu.impl.CharacterIteratorWrapper; @@ -16,136 +15,145 @@ import com.ibm.icu.impl.ReplaceableUCharacterIterator; import com.ibm.icu.impl.UCharArrayIterator; import com.ibm.icu.impl.UCharacterIteratorWrapper; - /** - * Abstract class that defines an API for iteration on text objects.This is an - * interface for forward and backward iteration and random access into a text - * object. Forward iteration is done with post-increment and backward iteration - * is done with pre-decrement semantics, while the - * java.text.CharacterIterator interface methods provided forward - * iteration with "pre-increment" and backward iteration with pre-decrement - * semantics. This API is more efficient for forward iteration over code points. - * The other major difference is that this API can do both code unit and code point - * iteration, java.text.CharacterIterator can only iterate over - * code units and is limited to BMP (0 - 0xFFFF) + * Abstract class that defines an API for iteration on text objects.This is an interface for forward and backward + * iteration and random access into a text object. Forward iteration is done with post-increment and backward iteration + * is done with pre-decrement semantics, while the java.text.CharacterIterator interface methods provided + * forward iteration with "pre-increment" and backward iteration with pre-decrement semantics. This API is more + * efficient for forward iteration over code points. The other major difference is that this API can do both code unit + * and code point iteration, java.text.CharacterIterator can only iterate over code units and is limited to + * BMP (0 - 0xFFFF) + * * @author Ram * @stable ICU 2.4 */ -public abstract class UCharacterIterator - implements Cloneable,UForwardCharacterIterator { +public abstract class UCharacterIterator implements Cloneable, UForwardCharacterIterator { /** * Protected default constructor for the subclasses + * * @stable ICU 2.4 */ - protected UCharacterIterator(){ + protected UCharacterIterator() { } - + // static final methods ---------------------------------------------------- - + /** - * Returns a UCharacterIterator object given a - * Replaceable object. - * @param source a valid source as a Replaceable object + * Returns a UCharacterIterator object given a Replaceable object. + * + * @param source + * a valid source as a Replaceable object * @return UCharacterIterator object - * @exception IllegalArgumentException if the argument is null + * @exception IllegalArgumentException + * if the argument is null * @stable ICU 2.4 */ - public static final UCharacterIterator getInstance(Replaceable source){ - return new ReplaceableUCharacterIterator(source); - } - - /** - * Returns a UCharacterIterator object given a - * source string. - * @param source a string - * @return UCharacterIterator object - * @exception IllegalArgumentException if the argument is null - * @stable ICU 2.4 - */ - public static final UCharacterIterator getInstance(String source){ - return new ReplaceableUCharacterIterator(source); - } - - /** - * Returns a UCharacterIterator object given a - * source character array. - * @param source an array of UTF-16 code units - * @return UCharacterIterator object - * @exception IllegalArgumentException if the argument is null - * @stable ICU 2.4 - */ - public static final UCharacterIterator getInstance(char[] source){ - return getInstance(source,0,source.length); - } - - /** - * Returns a UCharacterIterator object given a - * source character array. - * @param source an array of UTF-16 code units - * @return UCharacterIterator object - * @exception IllegalArgumentException if the argument is null - * @stable ICU 2.4 - */ - public static final UCharacterIterator getInstance(char[] source, int start, int limit){ - return new UCharArrayIterator(source,start,limit); - } - /** - * Returns a UCharacterIterator object given a - * source StringBuffer. - * @param source an string buffer of UTF-16 code units - * @return UCharacterIterator object - * @exception IllegalArgumentException if the argument is null - * @stable ICU 2.4 - */ - public static final UCharacterIterator getInstance(StringBuffer source){ + public static final UCharacterIterator getInstance(Replaceable source) { return new ReplaceableUCharacterIterator(source); } /** - * Returns a UCharacterIterator object given a - * CharacterIterator. - * @param source a valid CharacterIterator object. + * Returns a UCharacterIterator object given a source string. + * + * @param source + * a string * @return UCharacterIterator object - * @exception IllegalArgumentException if the argument is null + * @exception IllegalArgumentException + * if the argument is null * @stable ICU 2.4 - */ - public static final UCharacterIterator getInstance(CharacterIterator source){ + */ + public static final UCharacterIterator getInstance(String source) { + return new ReplaceableUCharacterIterator(source); + } + + /** + * Returns a UCharacterIterator object given a source character array. + * + * @param source + * an array of UTF-16 code units + * @return UCharacterIterator object + * @exception IllegalArgumentException + * if the argument is null + * @stable ICU 2.4 + */ + public static final UCharacterIterator getInstance(char[] source) { + return getInstance(source, 0, source.length); + } + + /** + * Returns a UCharacterIterator object given a source character array. + * + * @param source + * an array of UTF-16 code units + * @return UCharacterIterator object + * @exception IllegalArgumentException + * if the argument is null + * @stable ICU 2.4 + */ + public static final UCharacterIterator getInstance(char[] source, int start, int limit) { + return new UCharArrayIterator(source, start, limit); + } + + /** + * Returns a UCharacterIterator object given a source StringBuffer. + * + * @param source + * an string buffer of UTF-16 code units + * @return UCharacterIterator object + * @exception IllegalArgumentException + * if the argument is null + * @stable ICU 2.4 + */ + public static final UCharacterIterator getInstance(StringBuffer source) { + return new ReplaceableUCharacterIterator(source); + } + + /** + * Returns a UCharacterIterator object given a CharacterIterator. + * + * @param source + * a valid CharacterIterator object. + * @return UCharacterIterator object + * @exception IllegalArgumentException + * if the argument is null + * @stable ICU 2.4 + */ + public static final UCharacterIterator getInstance(CharacterIterator source) { return new CharacterIteratorWrapper(source); } - + // public methods ---------------------------------------------------------- /** - * Returns a java.text.CharacterIterator object for - * the underlying text of this iterator. The returned iterator is - * independent of this iterator. + * Returns a java.text.CharacterIterator object for the underlying text of this iterator. The returned + * iterator is independent of this iterator. + * * @return java.text.CharacterIterator object - * @stable ICU 2.4 + * @stable ICU 2.4 */ - public CharacterIterator getCharacterIterator(){ + public CharacterIterator getCharacterIterator() { return new UCharacterIteratorWrapper(this); - } - + } + /** - * Returns the code unit at the current index. If index is out - * of range, returns DONE. Index is not changed. + * Returns the code unit at the current index. If index is out of range, returns DONE. Index is not changed. + * * @return current code unit * @stable ICU 2.4 */ public abstract int current(); - + /** - * Returns the codepoint at the current index. - * If the current index is invalid, DONE is returned. - * If the current index points to a lead surrogate, and there is a following - * trail surrogate, then the code point is returned. Otherwise, the code - * unit at index is returned. Index is not changed. + * Returns the codepoint at the current index. If the current index is invalid, DONE is returned. If the current + * index points to a lead surrogate, and there is a following trail surrogate, then the code point is returned. + * Otherwise, the code unit at index is returned. Index is not changed. + * * @return current codepoint * @stable ICU 2.4 */ - public int currentCodePoint(){ + public int currentCodePoint() { int ch = current(); - if(UTF16.isLeadSurrogate((char)ch)){ + if (UTF16.isLeadSurrogate((char) ch)) { // advance the index to get the // next code point next(); @@ -156,61 +164,58 @@ public abstract class UCharacterIterator // current should never change // the current index so back off previous(); - - if(UTF16.isTrailSurrogate((char)ch2)){ - // we found a surrogate pair + + if (UTF16.isTrailSurrogate((char) ch2)) { + // we found a surrogate pair // return the codepoint - return Character.toCodePoint((char)ch, (char)ch2); + return Character.toCodePoint((char) ch, (char) ch2); } } return ch; } - + /** * Returns the length of the text + * * @return length of the text * @stable ICU 2.4 */ public abstract int getLength(); - /** * Gets the current index in text. + * * @return current index in text. * @stable ICU 2.4 */ public abstract int getIndex(); - /** - * Returns the UTF16 code unit at index, and increments to the next - * code unit (post-increment semantics). If index is out of - * range, DONE is returned, and the iterator is reset to the limit - * of the text. - * @return the next UTF16 code unit, or DONE if the index is at the limit - * of the text. - * @stable ICU 2.4 + * Returns the UTF16 code unit at index, and increments to the next code unit (post-increment semantics). If index + * is out of range, DONE is returned, and the iterator is reset to the limit of the text. + * + * @return the next UTF16 code unit, or DONE if the index is at the limit of the text. + * @stable ICU 2.4 */ + @Override public abstract int next(); /** - * Returns the code point at index, and increments to the next code - * point (post-increment semantics). If index does not point to a - * valid surrogate pair, the behavior is the same as - * next(). Otherwise the iterator is incremented past - * the surrogate pair, and the code point represented by the pair - * is returned. - * @return the next codepoint in text, or DONE if the index is at - * the limit of the text. - * @stable ICU 2.4 + * Returns the code point at index, and increments to the next code point (post-increment semantics). If index does + * not point to a valid surrogate pair, the behavior is the same as next(). Otherwise the iterator is + * incremented past the surrogate pair, and the code point represented by the pair is returned. + * + * @return the next codepoint in text, or DONE if the index is at the limit of the text. + * @stable ICU 2.4 */ - public int nextCodePoint(){ + @Override + public int nextCodePoint() { int ch1 = next(); - if(UTF16.isLeadSurrogate((char)ch1)){ + if (UTF16.isLeadSurrogate((char) ch1)) { int ch2 = next(); - if(UTF16.isTrailSurrogate((char)ch2)){ - return Character.toCodePoint((char)ch1, (char)ch2); - }else if (ch2 != DONE) { + if (UTF16.isTrailSurrogate((char) ch2)) { + return Character.toCodePoint((char) ch1, (char) ch2); + } else if (ch2 != DONE) { // unmatched surrogate so back out previous(); } @@ -219,61 +224,60 @@ public abstract class UCharacterIterator } /** - * Decrement to the position of the previous code unit in the - * text, and return it (pre-decrement semantics). If the - * resulting index is less than 0, the index is reset to 0 and - * DONE is returned. - * @return the previous code unit in the text, or DONE if the new - * index is before the start of the text. - * @stable ICU 2.4 + * Decrement to the position of the previous code unit in the text, and return it (pre-decrement semantics). If the + * resulting index is less than 0, the index is reset to 0 and DONE is returned. + * + * @return the previous code unit in the text, or DONE if the new index is before the start of the text. + * @stable ICU 2.4 */ public abstract int previous(); - /** - * Retreat to the start of the previous code point in the text, - * and return it (pre-decrement semantics). If the index is not - * preceeded by a valid surrogate pair, the behavior is the same - * as previous(). Otherwise the iterator is - * decremented to the start of the surrogate pair, and the code - * point represented by the pair is returned. - * @return the previous code point in the text, or DONE if the new - * index is before the start of the text. - * @stable ICU 2.4 + * Retreat to the start of the previous code point in the text, and return it (pre-decrement semantics). If the + * index is not preceeded by a valid surrogate pair, the behavior is the same as previous(). Otherwise + * the iterator is decremented to the start of the surrogate pair, and the code point represented by the pair is + * returned. + * + * @return the previous code point in the text, or DONE if the new index is before the start of the text. + * @stable ICU 2.4 */ - public int previousCodePoint(){ + public int previousCodePoint() { int ch1 = previous(); - if(UTF16.isTrailSurrogate((char)ch1)){ + if (UTF16.isTrailSurrogate((char) ch1)) { int ch2 = previous(); - if(UTF16.isLeadSurrogate((char)ch2)){ - return Character.toCodePoint((char)ch2, (char)ch1); - }else if (ch2 != DONE) { - //unmatched trail surrogate so back out + if (UTF16.isLeadSurrogate((char) ch2)) { + return Character.toCodePoint((char) ch2, (char) ch1); + } else if (ch2 != DONE) { + // unmatched trail surrogate so back out next(); - } + } } return ch1; } /** * Sets the index to the specified index in the text. - * @param index the index within the text. - * @exception IndexOutOfBoundsException is thrown if an invalid index is - * supplied + * + * @param index + * the index within the text. + * @exception IndexOutOfBoundsException + * is thrown if an invalid index is supplied * @stable ICU 2.4 */ public abstract void setIndex(int index); /** * Sets the current index to the limit. + * * @stable ICU 2.4 */ public void setToLimit() { setIndex(getLength()); } - + /** * Sets the current index to the start. + * * @stable ICU 2.4 */ public void setToStart() { @@ -281,17 +285,15 @@ public abstract class UCharacterIterator } /** - * Fills the buffer with the underlying text storage of the iterator - * If the buffer capacity is not enough a exception is thrown. The capacity - * of the fill in buffer should at least be equal to length of text in the - * iterator obtained by calling getLength()). - * Usage: - * + * Fills the buffer with the underlying text storage of the iterator If the buffer capacity is not enough a + * exception is thrown. The capacity of the fill in buffer should at least be equal to length of text in the + * iterator obtained by calling getLength()). Usage: + * *

      *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
      *         char[] buf = new char[iter.getLength()];
      *         iter.getText(buf);
-     *         
+     *
      *         OR
      *         char[] buf= new char[1];
      *         int len = 0;
@@ -305,32 +307,34 @@ public abstract class UCharacterIterator
      *         }
      * 
* - * @param fillIn an array of chars to fill with the underlying UTF-16 code - * units. - * @param offset the position within the array to start putting the data. + * @param fillIn + * an array of chars to fill with the underlying UTF-16 code units. + * @param offset + * the position within the array to start putting the data. * @return the number of code units added to fillIn, as a convenience - * @exception IndexOutOfBoundsException exception if there is not enough - * room after offset in the array, or if offset < 0. - * @stable ICU 2.4 + * @exception IndexOutOfBoundsException + * exception if there is not enough room after offset in the array, or if offset < 0. + * @stable ICU 2.4 */ - public abstract int getText(char[] fillIn, int offset); + public abstract int getText(char[] fillIn, int offset); /** - * Convenience override for getText(char[], int) that provides - * an offset of 0. - * @param fillIn an array of chars to fill with the underlying UTF-16 code - * units. + * Convenience override for getText(char[], int) that provides an offset of 0. + * + * @param fillIn + * an array of chars to fill with the underlying UTF-16 code units. * @return the number of code units added to fillIn, as a convenience - * @exception IndexOutOfBoundsException exception if there is not enough - * room in the array. - * @stable ICU 2.4 + * @exception IndexOutOfBoundsException + * exception if there is not enough room in the array. + * @stable ICU 2.4 */ public final int getText(char[] fillIn) { return getText(fillIn, 0); } - + /** * Convenience method for returning the underlying text storage as as string + * * @return the underlying text storage in the iterator as a string * @stable ICU 2.4 */ @@ -339,22 +343,19 @@ public abstract class UCharacterIterator getText(text); return new String(text); } - + /** - * Moves the current position by the number of code units - * specified, either forward or backward depending on the sign - * of delta (positive or negative respectively). If the resulting - * index would be less than zero, the index is set to zero, and if - * the resulting index would be greater than limit, the index is - * set to limit. + * Moves the current position by the number of code units specified, either forward or backward depending on the + * sign of delta (positive or negative respectively). If the resulting index would be less than zero, the index is + * set to zero, and if the resulting index would be greater than limit, the index is set to limit. * - * @param delta the number of code units to move the current - * index. + * @param delta + * the number of code units to move the current index. * @return the new index. - * @exception IndexOutOfBoundsException is thrown if an invalid index is - * supplied - * @stable ICU 2.4 - * + * @exception IndexOutOfBoundsException + * is thrown if an invalid index is supplied + * @stable ICU 2.4 + * */ public int moveIndex(int delta) { int x = Math.max(0, Math.min(getIndex() + delta, getLength())); @@ -363,42 +364,46 @@ public abstract class UCharacterIterator } /** - * Moves the current position by the number of code points - * specified, either forward or backward depending on the sign of - * delta (positive or negative respectively). If the current index - * is at a trail surrogate then the first adjustment is by code - * unit, and the remaining adjustments are by code points. If the - * resulting index would be less than zero, the index is set to - * zero, and if the resulting index would be greater than limit, - * the index is set to limit. - * @param delta the number of code units to move the current index. - * @return the new index - * @exception IndexOutOfBoundsException is thrown if an invalid delta is - * supplied + * Moves the current position by the number of code points specified, either forward or backward depending on the + * sign of delta (positive or negative respectively). If the current index is at a trail surrogate then the first + * adjustment is by code unit, and the remaining adjustments are by code points. If the resulting index would be + * less than zero, the index is set to zero, and if the resulting index would be greater than limit, the index is + * set to limit. + * + * @param delta + * the number of code units to move the current index. + * @return the new index + * @exception IndexOutOfBoundsException + * is thrown if an invalid delta is supplied * @stable ICU 2.4 */ - public int moveCodePointIndex(int delta){ - if(delta>0){ - while(delta>0 && nextCodePoint() != DONE){delta--;} - }else{ - while(delta<0 && previousCodePoint() != DONE){delta++;} + public int moveCodePointIndex(int delta) { + if (delta > 0) { + while (delta > 0 && nextCodePoint() != DONE) { + delta--; + } + } else { + while (delta < 0 && previousCodePoint() != DONE) { + delta++; + } } - if(delta!=0){ + if (delta != 0) { throw new IndexOutOfBoundsException(); } - + return getIndex(); } /** - * Creates a copy of this iterator, independent from other iterators. - * If it is not possible to clone the iterator, returns null. + * Creates a copy of this iterator, independent from other iterators. If it is not possible to clone the iterator, + * returns null. + * * @return copy of this iterator * @stable ICU 2.4 */ - public Object clone() throws CloneNotSupportedException{ + @Override + public Object clone() throws CloneNotSupportedException { return super.clone(); - } - -} + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeFilter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeFilter.java index 0c201707ed8..3f88e687a30 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeFilter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeFilter.java @@ -34,6 +34,7 @@ public abstract class UnicodeFilter implements UnicodeMatcher { * filters. Matches a single 16-bit code unit at offset. * @stable ICU 2.0 */ + @Override public int matches(Replaceable text, int[] offset, int limit, diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java index e2a9847fbec..1d844ce6577 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java @@ -507,6 +507,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * Return a new set that is equivalent to this one. * @stable ICU 2.0 */ + @Override public Object clone() { if (isFrozen()) { return this; @@ -694,6 +695,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * will produce another set that is equal to this one. * @stable ICU 2.0 */ + @Override public String toPattern(boolean escapeUnprintable) { if (pat != null && !escapeUnprintable) { return pat; @@ -864,6 +866,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * indexing. * @stable ICU 2.0 */ + @Override public boolean matchesIndexValue(int v) { /* The index value v, in the range [0,255], is contained in this set if * it is contained in any pair of this set. Pairs either have the high @@ -905,6 +908,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * longest possible multichar string. * @stable ICU 2.0 */ + @Override public int matches(Replaceable text, int[] offset, int limit, @@ -915,7 +919,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa // about them here. If we ever allow zero-length strings // we much check for them here. if (contains(UnicodeMatcher.ETHER)) { - return incremental ? U_PARTIAL_MATCH : U_MATCH; + return incremental ? U_PARTIAL_MATCH : U_MATCH; } else { return U_MISMATCH; } @@ -951,7 +955,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa // Strings are sorted, so we can optimize in the // forward direction. if (forward && c > firstChar) break; - if (c != firstChar) continue; + if (c != firstChar) continue; int length = matchRest(text, offset[0], limit, trial); @@ -1030,7 +1034,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa } /** - * Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1. + * Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1. * @internal * @deprecated This API is ICU internal only. */ @@ -1098,6 +1102,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @param toUnionTo the set into which to union the source characters * @stable ICU 2.2 */ + @Override public void addMatchSetTo(UnicodeSet toUnionTo) { toUnionTo.addAll(this); } @@ -1264,7 +1269,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa // c is before start of next range list[i] = c; // if we touched the HIGH mark, then add a new one - if (c == MAX_VALUE) { + if (c == MAX_VALUE) { ensureCapacity(len+1); list[len++] = HIGH; } @@ -1356,7 +1361,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa if (s.length() == 1) return s.charAt(0); // at this point, len = 2 - int cp = UTF16.charAt(s, 0); + int cp = UTF16.charAt(s, 0); if (cp > 0xFFFF) { // is surrogate pair return cp; } @@ -1499,7 +1504,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa */ public final UnicodeSet retain(CharSequence cs) { - int cp = getSingleCP(cs); + int cp = getSingleCP(cs); if (cp < 0) { String s = cs.toString(); boolean isIn = strings.contains(s); @@ -1661,6 +1666,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @return true if the test condition is met * @stable ICU 2.0 */ + @Override public boolean contains(int c) { if (c < MIN_VALUE || c > MAX_VALUE) { throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6)); @@ -2325,6 +2331,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @return true if the specified Object is equal to this set. * @stable ICU 2.0 */ + @Override public boolean equals(Object o) { if (o == null) { return false; @@ -2352,6 +2359,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @see java.lang.Object#hashCode() * @stable ICU 2.0 */ + @Override public int hashCode() { int result = len; for (int i = 0; i < len; ++i) { @@ -2365,6 +2373,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * Return a programmer-readable string representation of this object. * @stable ICU 2.0 */ + @Override public String toString() { return toPattern(true); } @@ -2436,17 +2445,17 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa // Add constants to make the applyPattern() code easier to follow. - private static final int LAST0_START = 0, - LAST1_RANGE = 1, + private static final int LAST0_START = 0, + LAST1_RANGE = 1, LAST2_SET = 2; - private static final int MODE0_NONE = 0, - MODE1_INBRACKET = 1, + private static final int MODE0_NONE = 0, + MODE1_INBRACKET = 1, MODE2_OUTBRACKET = 2; - private static final int SETMODE0_NONE = 0, - SETMODE1_UNICODESET = 1, - SETMODE2_PROPERTYPAT = 2, + private static final int SETMODE0_NONE = 0, + SETMODE1_UNICODESET = 1, + SETMODE2_PROPERTYPAT = 2, SETMODE3_PREPARSED = 3; /** @@ -2885,7 +2894,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa } /** - * Add the contents of the collection (as strings) into this UnicodeSet. + * Add the contents of the collection (as strings) into this UnicodeSet. * The collection must not contain null. * @param source the collection to add * @return a reference to this object @@ -2916,7 +2925,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa private void ensureCapacity(int newLen) { if (newLen <= list.length) return; - int[] temp = new int[newLen + GROW_EXTRA]; + int[] temp = new int[newLen + GROW_EXTRA]; System.arraycopy(list, 0, temp, 0, len); list = temp; } @@ -3175,6 +3184,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa private static class NumericValueFilter implements Filter { double value; NumericValueFilter(double value) { this.value = value; } + @Override public boolean contains(int ch) { return UCharacter.getUnicodeNumericValue(ch) == value; } @@ -3183,6 +3193,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa private static class GeneralCategoryMaskFilter implements Filter { int mask; GeneralCategoryMaskFilter(int mask) { this.mask = mask; } + @Override public boolean contains(int ch) { return ((1 << UCharacter.getType(ch)) & mask) != 0; } @@ -3195,6 +3206,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa this.prop = prop; this.value = value; } + @Override public boolean contains(int ch) { return UCharacter.getIntPropertyValue(ch, prop) == value; } @@ -3203,6 +3215,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa private static class ScriptExtensionsFilter implements Filter { int script; ScriptExtensionsFilter(int script) { this.script = script; } + @Override public boolean contains(int c) { return UScript.hasScript(c, script); } @@ -3214,6 +3227,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa private static class VersionFilter implements Filter { VersionInfo version; VersionFilter(VersionInfo version) { this.version = version; } + @Override public boolean contains(int ch) { VersionInfo v = UCharacter.getAge(ch); // Reference comparison ok; VersionInfo caches and reuses @@ -3923,6 +3937,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @draft ICU3.8 (retain) * @provisional This API might change or be removed in a future release. */ + @Override public UnicodeMatcher lookupMatcher(int i) { return null; } @@ -3933,7 +3948,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa *

* This routine will be called whenever the parsing of a UnicodeSet pattern finds such a * propertyName+propertyValue combination. - * + * * @param propertyName * the name of the property * @param propertyValue @@ -3954,6 +3969,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @draft ICU3.8 (retain) * @provisional This API might change or be removed in a future release. */ + @Override public char[] lookup(String s) { return null; } @@ -3962,6 +3978,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @draft ICU3.8 (retain) * @provisional This API might change or be removed in a future release. */ + @Override public String parseReference(String text, ParsePosition pos, int limit) { return null; } @@ -3969,20 +3986,22 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /** * Is this frozen, according to the Freezable interface? - * + * * @return value * @stable ICU 3.8 */ + @Override public boolean isFrozen() { return (bmpSet != null || stringSpan != null); } /** * Freeze this class, according to the Freezable interface. - * + * * @return this * @stable ICU 4.4 */ + @Override public UnicodeSet freeze() { if (!isFrozen()) { // Do most of what compact() does before freezing because @@ -4190,6 +4209,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @return the clone, not frozen * @stable ICU 4.4 */ + @Override public UnicodeSet cloneAsThawed() { UnicodeSet result = new UnicodeSet(this); assert !result.isFrozen(); @@ -4210,19 +4230,19 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /** * A struct-like class used for iteration through ranges, for faster iteration than by String. * Read about the restrictions on usage in {@link UnicodeSet#ranges()}. - * + * * @stable ICU 54 */ public static class EntryRange { /** * The starting code point of the range. - * + * * @stable ICU 54 */ public int codepoint; /** * The ending code point of the range - * + * * @stable ICU 54 */ public int codepointEnd; @@ -4232,13 +4252,13 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /** * {@inheritDoc} - * + * * @stable ICU 54 */ @Override public String toString() { StringBuilder b = new StringBuilder(); - return ( + return ( codepoint == codepointEnd ? _appendToPat(b, codepoint, false) : _appendToPat(_appendToPat(b, codepoint, false).append('-'), codepointEnd, false)) .toString(); @@ -4252,9 +4272,9 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * *

Warning: To iterate over the full contents, you have to also iterate over the strings. * - *

Warning: For speed, UnicodeSet iteration does not check for concurrent modification. + *

Warning: For speed, UnicodeSet iteration does not check for concurrent modification. * Do not alter the UnicodeSet while iterating. - * + * *

      * // Sample code
      * for (EntryRange range : us1.ranges()) {
@@ -4264,7 +4284,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa
      *     // do something with each string;
      * }
      * 
- * + * * @stable ICU 54 */ public Iterable ranges() { @@ -4272,6 +4292,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa } private class EntryRangeIterable implements Iterable { + @Override public Iterator iterator() { return new EntryRangeIterator(); } @@ -4281,9 +4302,11 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa int pos; EntryRange result = new EntryRange(); + @Override public boolean hasNext() { return pos < len-1; } + @Override public EntryRange next() { if (pos < len-1) { result.codepoint = list[pos++]; @@ -4293,6 +4316,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa } return result; } + @Override public void remove() { throw new UnsupportedOperationException(); } @@ -4301,16 +4325,17 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /** * Returns a string iterator. Uses the same order of iteration as {@link UnicodeSetIterator}. - *

Warning: For speed, UnicodeSet iteration does not check for concurrent modification. + *

Warning: For speed, UnicodeSet iteration does not check for concurrent modification. * Do not alter the UnicodeSet while iterating. * @see java.util.Set#iterator() * @stable ICU 4.4 */ + @Override public Iterator iterator() { return new UnicodeSetIterator2(this); } - // Cover for string iteration. + // Cover for string iteration. private static class UnicodeSetIterator2 implements Iterator { // Invariants: // sourceList != null then sourceList[item] is a valid character @@ -4341,6 +4366,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /* (non-Javadoc) * @see java.util.Iterator#hasNext() */ + @Override public boolean hasNext() { return sourceList != null || stringIterator.hasNext(); } @@ -4348,6 +4374,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /* (non-Javadoc) * @see java.util.Iterator#next() */ + @Override public String next() { if (sourceList == null) { return stringIterator.next(); @@ -4382,9 +4409,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /* (non-Javadoc) * @see java.util.Iterator#remove() */ + @Override public void remove() { throw new UnsupportedOperationException(); - } + } } /** @@ -4485,6 +4513,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * @see java.lang.Comparable#compareTo(java.lang.Object) * @stable ICU 4.4 */ + @Override public int compareTo(UnicodeSet o) { return compareTo(o, ComparisonStyle.SHORTER_FIRST); } @@ -4535,7 +4564,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /** * Utility to compare a string to a code point. * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString()) - * and comparing, but much faster (no object creation). + * and comparing, but much faster (no object creation). * Actually, there is one difference; a null compares as less. * Note that this (=String) order is UTF-16 order -- *not* code point order. * @stable ICU 4.4 @@ -4547,7 +4576,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa /** * Utility to compare a string to a code point. - * Same results as turning the code point into a string and comparing, but much faster (no object creation). + * Same results as turning the code point into a string and comparing, but much faster (no object creation). * Actually, there is one difference; a null compares as less. * Note that this (=String) order is UTF-16 order -- *not* code point order. * @stable ICU 4.4 @@ -4657,8 +4686,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa } /** - * Simplify the ranges in a Unicode set by merging any ranges that are only separated by characters in the dontCare set. - * For example, the ranges: \\u2E80-\\u2E99\\u2E9B-\\u2EF3\\u2F00-\\u2FD5\\u2FF0-\\u2FFB\\u3000-\\u303E change to \\u2E80-\\u303E + * Simplify the ranges in a Unicode set by merging any ranges that are only separated by characters in the dontCare set. + * For example, the ranges: \\u2E80-\\u2E99\\u2E9B-\\u2EF3\\u2F00-\\u2FD5\\u2FF0-\\u2FFB\\u3000-\\u303E change to \\u2E80-\\u303E * if the dontCare set includes unassigned characters (for a particular version of Unicode). * @param dontCare Set with the don't-care characters for spanning * @return the input set, modified @@ -4854,14 +4883,14 @@ public class UnicodeSet extends UnicodeFilter implements Iterable, Compa * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable} * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}. - * + * * @param xSymbolTable the new default symbol table. * @internal * @deprecated This API is ICU internal only. */ @Deprecated public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) { - INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated. + INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated. XSYMBOL_TABLE = xSymbolTable; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/AnnualTimeZoneRule.java b/icu4j/main/classes/core/src/com/ibm/icu/util/AnnualTimeZoneRule.java index 16b77d149c1..5ae8516ad1a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/AnnualTimeZoneRule.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/AnnualTimeZoneRule.java @@ -16,7 +16,7 @@ import com.ibm.icu.impl.Grego; * AnnualTimeZoneRule is a class used for representing a time zone * rule which takes effect annually. Years used in this class are * all Gregorian calendar years. - * + * * @stable ICU 3.8 */ public class AnnualTimeZoneRule extends TimeZoneRule { @@ -37,7 +37,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * Constructs a AnnualTimeZoneRule with the name, the GMT offset of its * standard time, the amount of daylight saving offset adjustment, * the annual start time rule and the start/until years. - * + * * @param name The time zone name. * @param rawOffset The GMT offset of its standard time in milliseconds. * @param dstSavings The amount of daylight saving offset adjustment in @@ -47,7 +47,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * @param startYear The first year when this rule takes effect. * @param endYear The last year when this rule takes effect. If this * rule is effective forever in future, specify MAX_YEAR. - * + * * @stable ICU 3.8 */ public AnnualTimeZoneRule(String name, int rawOffset, int dstSavings, @@ -60,10 +60,10 @@ public class AnnualTimeZoneRule extends TimeZoneRule { /** * Gets the start date/time rule associated used by this rule. - * + * * @return An AnnualDateTimeRule which represents the start date/time * rule used by this time zone rule. - * + * * @stable ICU 3.8 */ public DateTimeRule getRule() { @@ -72,10 +72,10 @@ public class AnnualTimeZoneRule extends TimeZoneRule { /** * Gets the first year when this rule takes effect. - * + * * @return The start year of this rule. The year is in Gregorian calendar * with 0 == 1 BCE, -1 == 2 BCE, etc. - * + * * @stable ICU 3.8 */ public int getStartYear() { @@ -84,10 +84,10 @@ public class AnnualTimeZoneRule extends TimeZoneRule { /** * Gets the end year when this rule takes effect. - * + * * @return The end year of this rule (inclusive). The year is in Gregorian calendar * with 0 == 1 BCE, -1 == 2 BCE, etc. - * + * * @stable ICU 3.8 */ public int getEndYear() { @@ -96,16 +96,16 @@ public class AnnualTimeZoneRule extends TimeZoneRule { /** * Gets the time when this rule takes effect in the given year. - * + * * @param year The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. * @param prevRawOffset The standard time offset from UTC before this rule * takes effect in milliseconds. * @param prevDSTSavings The amount of daylight saving offset from the * standard time. - * + * * @return The time when this rule takes effect in the year, or * null if this rule is not applicable in the year. - * + * * @stable ICU 3.8 */ public Date getStartInYear(int year, int prevRawOffset, int prevDSTSavings) { @@ -127,7 +127,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { ruleDay += 7 * (weeks - 1); } else { after = false; - ruleDay = Grego.fieldsToDay(year, dateTimeRule.getRuleMonth(), + ruleDay = Grego.fieldsToDay(year, dateTimeRule.getRuleMonth(), Grego.monthLength(year, dateTimeRule.getRuleMonth())); ruleDay += 7 * (weeks + 1); } @@ -168,6 +168,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getFirstStart(int prevRawOffset, int prevDSTSavings) { return getStartInYear(startYear, prevRawOffset, prevDSTSavings); } @@ -176,6 +177,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getFinalStart(int prevRawOffset, int prevDSTSavings) { if (endYear == MAX_YEAR) { return null; @@ -187,6 +189,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getNextStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive) { int[] fields = Grego.timeToFields(base, null); int year = fields[0]; @@ -204,6 +207,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getPreviousStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive) { int[] fields = Grego.timeToFields(base, null); int year = fields[0]; @@ -221,6 +225,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public boolean isEquivalentTo(TimeZoneRule other) { if (!(other instanceof AnnualTimeZoneRule)) { return false; @@ -239,6 +244,7 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * Note: This method in AnnualTimeZoneRule always returns true. * @stable ICU 3.8 */ + @Override public boolean isTransitionRule() { return true; } @@ -247,9 +253,10 @@ public class AnnualTimeZoneRule extends TimeZoneRule { * Returns a String representation of this AnnualTimeZoneRule object. * This method is used for debugging purpose only. The string representation can be changed * in future version of ICU without any notice. - * + * * @stable ICU 3.8 */ + @Override public String toString() { StringBuilder buf = new StringBuilder(); buf.append(super.toString()); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/BytesTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/util/BytesTrie.java index 1deab13626b..ba3d7f73e0e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/BytesTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/BytesTrie.java @@ -423,6 +423,7 @@ public final class BytesTrie implements Cloneable, Iterable { * @return A new BytesTrie.Iterator. * @stable ICU 4.8 */ + @Override public Iterator iterator() { return new Iterator(bytes_, pos_, remainingMatchLength_, 0); } @@ -564,6 +565,7 @@ public final class BytesTrie implements Cloneable, Iterable { * @return true if there are more elements. * @stable ICU 4.8 */ + @Override public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); } /** @@ -577,6 +579,7 @@ public final class BytesTrie implements Cloneable, Iterable { * @throws NoSuchElementException - iteration has no more elements. * @stable ICU 4.8 */ + @Override public Entry next() { int pos=pos_; if(pos<0) { @@ -646,6 +649,7 @@ public final class BytesTrie implements Cloneable, Iterable { * @throws UnsupportedOperationException (always) * @stable ICU 4.8 */ + @Override public void remove() { throw new UnsupportedOperationException(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/CharsTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/util/CharsTrie.java index e5c4f60143f..6ffe5b026c4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/CharsTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/CharsTrie.java @@ -385,6 +385,7 @@ public final class CharsTrie implements Cloneable, Iterable { * @return A new CharsTrie.Iterator. * @stable ICU 4.8 */ + @Override public Iterator iterator() { return new Iterator(chars_, pos_, remainingMatchLength_, 0); } @@ -480,6 +481,7 @@ public final class CharsTrie implements Cloneable, Iterable { * @return true if there are more elements. * @stable ICU 4.8 */ + @Override public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); } /** @@ -493,6 +495,7 @@ public final class CharsTrie implements Cloneable, Iterable { * @throws NoSuchElementException - iteration has no more elements. * @stable ICU 4.8 */ + @Override public Entry next() { int pos=pos_; if(pos<0) { @@ -578,6 +581,7 @@ public final class CharsTrie implements Cloneable, Iterable { * @throws UnsupportedOperationException (always) * @stable ICU 4.8 */ + @Override public void remove() { throw new UnsupportedOperationException(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java b/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java index e9e79432b6e..06300ca3dfd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java @@ -78,15 +78,15 @@ public class Currency extends MeasureUnit { * @stable ICU 2.6 */ public static final int LONG_NAME = 1; - + /** - * Selector for getName() indicating the plural long name for a - * currency, such as "US dollar" for USD in "1 US dollar", + * Selector for getName() indicating the plural long name for a + * currency, such as "US dollar" for USD in "1 US dollar", * and "US dollars" for USD in "2 US dollars". * @stable ICU 4.2 */ public static final int PLURAL_LONG_NAME = 2; - + private static final EquivalenceRelation EQUIVALENT_CURRENCY_SYMBOLS = new EquivalenceRelation() .add("\u00a5", "\uffe5") @@ -105,7 +105,7 @@ public class Currency extends MeasureUnit { * @stable ICU 54 */ STANDARD, - + /** * a setting to specify currency usage which determines currency digit and rounding * for cash usage, for example: "50 NT$" @@ -113,7 +113,7 @@ public class Currency extends MeasureUnit { */ CASH } - + // begin registry stuff // shim for service code @@ -177,7 +177,7 @@ public class Currency extends MeasureUnit { /** * Returns an array of Strings which contain the currency - * identifiers that are valid for the given locale on the + * identifiers that are valid for the given locale on the * given date. If there are no such identifiers, returns null. * Returned identifiers are in preference order. * @param loc the locale for which to retrieve currency codes. @@ -189,7 +189,7 @@ public class Currency extends MeasureUnit { String region = ULocale.getRegionForSupplementalData(loc, false); CurrencyFilter filter = CurrencyFilter.onDate(d).withRegion(region); List list = getTenderCurrencies(filter); - // Note: Prior to 4.4 the spec didn't say that we return null if there are no results, but + // Note: Prior to 4.4 the spec didn't say that we return null if there are no results, but // the test assumed it did. Kept the behavior and amended the spec. if (list.isEmpty()) { return null; @@ -215,10 +215,10 @@ public class Currency extends MeasureUnit { * Returns the set of available currencies. The returned set of currencies contains all of the * available currencies, including obsolete ones. The result set can be modified without * affecting the available currencies in the runtime. - * + * * @return The set of available currencies. The returned set could be empty if there is no * currency data available. - * + * * @stable ICU 49 */ public static Set getAvailableCurrencies() { @@ -302,8 +302,8 @@ public class Currency extends MeasureUnit { } return (Currency) MeasureUnit.internalGetInstance("currency", theISOCode.toUpperCase(Locale.ENGLISH)); } - - + + private static boolean isAlpha3Code(String code) { if (code.length() != 3) { return false; @@ -321,11 +321,11 @@ public class Currency extends MeasureUnit { /** * Registers a new currency for the provided locale. The returned object * is a key that can be used to unregister this currency object. - * + * *

Because ICU may choose to cache Currency objects internally, this must * be called at application startup, prior to any calls to * Currency.getInstance to avoid undefined behavior. - * + * * @param currency the currency to register * @param locale the ulocale under which to register the currency * @return a registry key that can be used to unregister this currency @@ -386,7 +386,7 @@ public class Currency extends MeasureUnit { /** * Given a key and a locale, returns an array of values for the key for which data * exists. If commonlyUsed is true, these are the values that typically are used - * with this locale, otherwise these are all values for which data exists. + * with this locale, otherwise these are all values for which data exists. * This is a common service API. *

* The only supported key is "currency", other values return an empty array. @@ -398,11 +398,11 @@ public class Currency extends MeasureUnit { * If commonlyUsed is true, only the currencies known to be in use as of the current date * are returned. When there are more than one, these are returned in preference order * (typically, this occurs when a country is transitioning to a new currency, and the - * newer currency is preferred), see - * Unicode TR#35 Sec. C1. + * newer currency is preferred), see + * Unicode TR#35 Sec. C1. * If commonlyUsed is false, all currencies ever used in any locale are returned, in no * particular order. - * + * * @param key key whose values to look up. the only recognized key is "currency" * @param locale the locale * @param commonlyUsed if true, return only values that are currently used in the locale. @@ -411,19 +411,19 @@ public class Currency extends MeasureUnit { * array will be empty. * @stable ICU 4.2 */ - public static final String[] getKeywordValuesForLocale(String key, ULocale locale, + public static final String[] getKeywordValuesForLocale(String key, ULocale locale, boolean commonlyUsed) { - + // The only keyword we recognize is 'currency' if (!"currency".equals(key)) { return EMPTY_STRING_ARRAY; } - + if (!commonlyUsed) { // Behavior change from 4.3.3, no longer sort the currencies return getAllTenderCurrencies().toArray(new String[0]); } - + // Don't resolve region if the requested locale is 'und', it will resolve to US // which we don't want. if (UND.equals(locale)) { @@ -432,11 +432,11 @@ public class Currency extends MeasureUnit { String prefRegion = ULocale.getRegionForSupplementalData(locale, true); CurrencyFilter filter = CurrencyFilter.now().withRegion(prefRegion); - + // currencies are in region's preferred order when we're filtering on region, which // matches our spec List result = getTenderCurrencies(filter); - + // No fallback anymore (change from 4.3.3) if (result.size() == 0) { return EMPTY_STRING_ARRAY; @@ -444,7 +444,7 @@ public class Currency extends MeasureUnit { return result.toArray(new String[result.size()]); } - + private static final ULocale UND = new ULocale("und"); private static final String[] EMPTY_STRING_ARRAY = new String[0]; @@ -514,9 +514,9 @@ public class Currency extends MeasureUnit { /** * Returns the display name for the given currency in the - * given locale. - * This is a convenient method for - * getName(ULocale, int, boolean[]); + * given locale. + * This is a convenient method for + * getName(ULocale, int, boolean[]); * @stable ICU 3.2 */ public String getName(Locale locale, @@ -531,7 +531,7 @@ public class Currency extends MeasureUnit { * currency object in the en_US locale is "$". * @param locale locale in which to display currency * @param nameStyle selector for which kind of name to return. - * The nameStyle should be either SYMBOL_NAME or + * The nameStyle should be either SYMBOL_NAME or * LONG_NAME. Otherwise, throw IllegalArgumentException. * @param isChoiceFormat fill-in; isChoiceFormat[0] is set to true * if the returned value is a ChoiceFormat pattern; otherwise it @@ -564,7 +564,7 @@ public class Currency extends MeasureUnit { } /** - * Returns the display name for the given currency in the given locale. + * Returns the display name for the given currency in the given locale. * This is a convenience overload of getName(ULocale, int, String, boolean[]); * @stable ICU 4.2 */ @@ -577,7 +577,7 @@ public class Currency extends MeasureUnit { * Returns the display name for the given currency in the * given locale. For example, the SYMBOL_NAME for the USD * currency object in the en_US locale is "$". - * The PLURAL_LONG_NAME for the USD currency object when the currency + * The PLURAL_LONG_NAME for the USD currency object when the currency * amount is plural is "US dollars", such as in "3.00 US dollars"; * while the PLURAL_LONG_NAME for the USD currency object when the currency * amount is singular is "US dollar", such as in "1.00 US dollar". @@ -607,7 +607,7 @@ public class Currency extends MeasureUnit { if (isChoiceFormat != null) { isChoiceFormat[0] = false; } - + CurrencyDisplayNames names = CurrencyDisplayNames.getInstance(locale); return names.getPluralName(subType, pluralCount); } @@ -620,7 +620,7 @@ public class Currency extends MeasureUnit { * Note: This method is a convenience equivalent for * {@link java.util.Currency#getDisplayName()} and is equivalent to * getName(Locale.getDefault(), LONG_NAME, null). - * + * * @return The display name of this currency * @see #getDisplayName(Locale) * @see #getName(Locale, int, boolean[]) @@ -639,7 +639,7 @@ public class Currency extends MeasureUnit { * Note: This method is a convenience equivalent for * {@link java.util.Currency#getDisplayName(java.util.Locale)} and is equivalent * to getName(locale, LONG_NAME, null). - * + * * @param locale locale in which to display currency * @return The display name of this currency for the specified locale * @see #getDisplayName(Locale) @@ -676,9 +676,9 @@ public class Currency extends MeasureUnit { public static String parse(ULocale locale, String text, int type, ParsePosition pos) { List> currencyTrieVec = CURRENCY_NAME_CACHE.get(locale); if (currencyTrieVec == null) { - TextTrieMap currencyNameTrie = + TextTrieMap currencyNameTrie = new TextTrieMap(true); - TextTrieMap currencySymbolTrie = + TextTrieMap currencySymbolTrie = new TextTrieMap(false); currencyTrieVec = new ArrayList>(); currencyTrieVec.add(currencySymbolTrie); @@ -686,7 +686,7 @@ public class Currency extends MeasureUnit { setupCurrencyTrieVec(locale, currencyTrieVec); CURRENCY_NAME_CACHE.put(locale, currencyTrieVec); } - + int maxLength = 0; String isoResult = null; @@ -711,7 +711,7 @@ public class Currency extends MeasureUnit { return isoResult; } - private static void setupCurrencyTrieVec(ULocale locale, + private static void setupCurrencyTrieVec(ULocale locale, List> trieVec) { TextTrieMap symTrie = trieVec.get(0); @@ -753,16 +753,17 @@ public class Currency extends MeasureUnit { } } - private static class CurrencyNameResultHandler + private static class CurrencyNameResultHandler implements TextTrieMap.ResultHandler { // The length of longest matching key private int bestMatchLength; // The currency ISO code of longest matching key private String bestCurrencyISOCode; - + // As the trie is traversed, handlePrefixMatch is called at each node. matchLength is the // length length of the key at the current node; values is the list of all the values mapped to // that key. matchLength increases with each call as trie is traversed. + @Override public boolean handlePrefixMatch(int matchLength, Iterator values) { if (values.hasNext()) { // Since the best match criteria is only based on length of key in trie and since all the @@ -776,7 +777,7 @@ public class Currency extends MeasureUnit { public String getBestCurrencyISOCode() { return bestCurrencyISOCode; } - + public int getBestMatchLength() { return bestMatchLength; } @@ -854,6 +855,7 @@ public class Currency extends MeasureUnit { * Returns the ISO 4217 code for this currency. * @stable ICU 2.2 */ + @Override public String toString() { return subType; } @@ -861,7 +863,7 @@ public class Currency extends MeasureUnit { /** * Constructs a currency object for the given ISO 4217 3-letter * code. This constructor assumes that the code is valid. - * + * * @param theISOCode The iso code used to construct the currency. * @stable ICU 3.4 */ @@ -874,8 +876,8 @@ public class Currency extends MeasureUnit { } // POW10[i] = 10^i - private static final int[] POW10 = { - 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 + private static final int[] POW10 = { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; @@ -896,7 +898,7 @@ public class Currency extends MeasureUnit { } return all; } - + private static synchronized Set getAllCurrenciesAsSet() { Set all = (ALL_CODES_AS_SET == null) ? null : ALL_CODES_AS_SET.get(); if (all == null) { @@ -914,7 +916,7 @@ public class Currency extends MeasureUnit { * Note: For checking availability of a currency on a specific date, specify the date on both from and * to. When both from and to are null, this method checks if the specified * currency is available all time. - * + * * @param code * The ISO 4217 3-letter code. * @param from @@ -925,7 +927,7 @@ public class Currency extends MeasureUnit { * the currency any date after from * @return true if the given ISO 4217 3-letter code is supported on the specified date range. * @throws IllegalArgumentException when to is before from. - * + * * @stable ICU 4.6 */ public static boolean isAvailable(String code, Date from, Date to) { @@ -961,11 +963,11 @@ public class Currency extends MeasureUnit { CurrencyMetaInfo info = CurrencyMetaInfo.getInstance(); return info.currencies(filter.withTender()); } - + private static final class EquivalenceRelation { - + private Map> data = new HashMap>(); - + @SuppressWarnings("unchecked") // See ticket #11395, this is safe. public EquivalenceRelation add(T... items) { Set group = new HashSet(); @@ -980,7 +982,7 @@ public class Currency extends MeasureUnit { } return this; } - + public Set get(T item) { Set result = data.get(item); if (result == null) { @@ -989,7 +991,7 @@ public class Currency extends MeasureUnit { return Collections.unmodifiableSet(result); } } - + private Object writeReplace() throws ObjectStreamException { return new MeasureUnitProxy(type, subType); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/DateTimeRule.java b/icu4j/main/classes/core/src/com/ibm/icu/util/DateTimeRule.java index 20f364f2232..e609082beae 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/DateTimeRule.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/DateTimeRule.java @@ -14,7 +14,7 @@ import java.io.Serializable; * DateTimeRule is a class representing a time in a year by * a rule specified by month, day of month, day of week and * time in the day. - * + * * @stable ICU 3.8 */ public class DateTimeRule implements Serializable { @@ -24,7 +24,7 @@ public class DateTimeRule implements Serializable { /** * Date rule type defined by exact day of month. * For example, March 14. - * + * * @stable ICU 3.8 */ public static final int DOM = 0; @@ -32,7 +32,7 @@ public class DateTimeRule implements Serializable { /** * Date rule type defined by day of week in month. * For example, 2nd Sunday in March. - * + * * @stable ICU 3.8 */ public static final int DOW = 1; @@ -41,7 +41,7 @@ public class DateTimeRule implements Serializable { * Date rule type defined by first day of week on or * after exact day of month. * For example, 1st Monday on or after March 15. - * + * * @stable ICU 3.8 */ public static final int DOW_GEQ_DOM = 2; @@ -50,28 +50,28 @@ public class DateTimeRule implements Serializable { * Date rule type defined by last day of week on or * before exact day of month. * For example, last Saturday on or before March 15. - * + * * @stable ICU 3.8 */ public static final int DOW_LEQ_DOM = 3; - + /** * Time rule type for local wall time. - * + * * @stable ICU 3.8 */ public static final int WALL_TIME = 0; /** * Time rule type for local standard time. - * + * * @stable ICU 3.8 */ public static final int STANDARD_TIME = 1; /** * Time rule type for coordinated universal time. - * + * * @stable ICU 3.8 */ public static final int UTC_TIME = 2; @@ -90,13 +90,13 @@ public class DateTimeRule implements Serializable { * Constructs a DateTimeRule by the day of month and * the time rule. The date rule type for an instance created by * this constructor is DOM. - * + * * @param month The rule month, for example, Calendar.JANUARY * @param dayOfMonth The day of month, 1-based. * @param millisInDay The milliseconds in the rule date. * @param timeType The time type, WALL_TIME or STANDARD_TIME * or UTC_TIME. - * + * * @stable ICU 3.8 */ public DateTimeRule(int month, int dayOfMonth, @@ -107,7 +107,7 @@ public class DateTimeRule implements Serializable { this.millisInDay = millisInDay; this.timeRuleType = timeType; - + // not used by this rule type this.dayOfWeek = 0; this.weekInMonth = 0; @@ -117,7 +117,7 @@ public class DateTimeRule implements Serializable { * Constructs a DateTimeRule by the day of week and its oridinal * number and the time rule. The date rule type for an instance created * by this constructor is DOW. - * + * * @param month The rule month, for example, Calendar.JANUARY. * @param weekInMonth The ordinal number of the day of week. Negative number * may be used for specifying a rule date counted from the @@ -126,7 +126,7 @@ public class DateTimeRule implements Serializable { * @param millisInDay The milliseconds in the rule date. * @param timeType The time type, WALL_TIME or STANDARD_TIME * or UTC_TIME. - * + * * @stable ICU 3.8 */ public DateTimeRule(int month, int weekInMonth, int dayOfWeek, @@ -148,7 +148,7 @@ public class DateTimeRule implements Serializable { * on or after/before the day of month and the time rule. The date rule * type for an instance created by this constructor is either * DOM_GEQ_DOM or DOM_LEQ_DOM. - * + * * @param month The rule month, for example, Calendar.JANUARY * @param dayOfMonth The day of month, 1-based. * @param dayOfWeek The day of week, for example, Calendar.SUNDAY. @@ -156,7 +156,7 @@ public class DateTimeRule implements Serializable { * @param millisInDay The milliseconds in the rule date. * @param timeType The time type, WALL_TIME or STANDARD_TIME * or UTC_TIME. - * + * * @stable ICU 3.8 */ public DateTimeRule(int month, int dayOfMonth, int dayOfWeek, boolean after, @@ -175,9 +175,9 @@ public class DateTimeRule implements Serializable { /** * Gets the date rule type, such as DOM - * + * * @return The date rule type. - * + * * @stable ICU 3.8 */ public int getDateRuleType() { @@ -186,9 +186,9 @@ public class DateTimeRule implements Serializable { /** * Gets the rule month. - * + * * @return The rule month. - * + * * @stable ICU 3.8 */ public int getRuleMonth() { @@ -198,9 +198,9 @@ public class DateTimeRule implements Serializable { /** * Gets the rule day of month. When the date rule type * is DOW, the value is always 0. - * + * * @return The rule day of month - * + * * @stable ICU 3.8 */ public int getRuleDayOfMonth() { @@ -210,9 +210,9 @@ public class DateTimeRule implements Serializable { /** * Gets the rule day of week. When the date rule type * is DOM, the value is always 0. - * + * * @return The rule day of week. - * + * * @stable ICU 3.8 */ public int getRuleDayOfWeek() { @@ -223,9 +223,9 @@ public class DateTimeRule implements Serializable { * Gets the rule day of week ordinal number in the month. * When the date rule type is not DOW, the value is * always 0. - * + * * @return The rule day of week ordinal number in the month. - * + * * @stable ICU 3.8 */ public int getRuleWeekInMonth() { @@ -234,10 +234,10 @@ public class DateTimeRule implements Serializable { /** * Gets the time rule type - * + * * @return The time rule type, either WALL_TIME or STANDARD_TIME * or UTC_TIME. - * + * * @stable ICU 3.8 */ public int getTimeRuleType() { @@ -246,15 +246,15 @@ public class DateTimeRule implements Serializable { /** * Gets the rule time in the rule day. - * + * * @return The time in the rule day in milliseconds. - * + * * @stable ICU 3.8 */ public int getRuleMillisInDay() { return millisInDay; } - + private static final String[] DOWSTR = {"", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; private static final String[] MONSTR = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; @@ -262,9 +262,10 @@ public class DateTimeRule implements Serializable { * Returns a String representation of this DateTimeRule object. * This method is used for debugging purpose only. The string representation can be changed * in future version of ICU without any notice. - * + * * @stable ICU 3.8 */ + @Override public String toString() { String sDate = null; String sTimeRuleType = null; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/EasterHoliday.java b/icu4j/main/classes/core/src/com/ibm/icu/util/EasterHoliday.java index 838d61123f3..f950a8b05f1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/EasterHoliday.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/EasterHoliday.java @@ -14,7 +14,7 @@ import java.util.Date; /** * Note: The Holiday framework is a technology preview. * Despite its age, is still draft API, and clients should treat it as such. - * + * * A Holiday subclass which represents holidays that occur * a fixed number of days before or after Easter. Supports both the * Western and Orthodox methods for calculating Easter. @@ -160,17 +160,19 @@ class EasterRule implements DateRule { } /** - * Return the first occurrance of this rule on or after the given date + * Return the first occurrence of this rule on or after the given date */ + @Override public Date firstAfter(Date start) { return doFirstBetween(start, null); } /** - * Return the first occurrance of this rule on or after + * Return the first occurrence of this rule on or after * the given start date and before the given end date. */ + @Override public Date firstBetween(Date start, Date end) { return doFirstBetween(start, end); @@ -179,6 +181,7 @@ class EasterRule implements DateRule { /** * Return true if the given Date is on the same day as Easter */ + @Override public boolean isOn(Date date) { synchronized(calendar) { @@ -194,6 +197,7 @@ class EasterRule implements DateRule { /** * Return true if Easter occurs between the two dates given */ + @Override public boolean isBetween(Date start, Date end) { return firstBetween(start, end) != null; // TODO: optimize? diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/Holiday.java b/icu4j/main/classes/core/src/com/ibm/icu/util/Holiday.java index d8c342597aa..53e61c1ec16 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/Holiday.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/Holiday.java @@ -19,7 +19,7 @@ import com.ibm.icu.util.ULocale.Category; /** * Note: The Holiday framework is a technology preview. * Despite its age, is still draft API, and clients should treat it as such. - * + * * An abstract class representing a holiday. * @draft ICU 2.8 (retainAll) * @provisional This API might change or be removed in a future release. @@ -74,6 +74,7 @@ public abstract class Holiday implements DateRule * @draft ICU 2.8 * @provisional This API might change or be removed in a future release. */ + @Override public Date firstAfter(Date start) { return rule.firstAfter(start); } @@ -92,6 +93,7 @@ public abstract class Holiday implements DateRule * @draft ICU 2.8 * @provisional This API might change or be removed in a future release. */ + @Override public Date firstBetween(Date start, Date end) { return rule.firstBetween(start, end); } @@ -106,6 +108,7 @@ public abstract class Holiday implements DateRule * @draft ICU 2.8 * @provisional This API might change or be removed in a future release. */ + @Override public boolean isOn(Date date) { //System.out.println(name + ".isOn(" + date.toString() + "):"); return rule.isOn(date); @@ -117,6 +120,7 @@ public abstract class Holiday implements DateRule * @draft ICU 2.8 * @provisional This API might change or be removed in a future release. */ + @Override public boolean isBetween(Date start, Date end) { return rule.isBetween(start, end); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/InitialTimeZoneRule.java b/icu4j/main/classes/core/src/com/ibm/icu/util/InitialTimeZoneRule.java index c56f16b7c86..acb482e149f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/InitialTimeZoneRule.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/InitialTimeZoneRule.java @@ -14,7 +14,7 @@ import java.util.Date; * InitialTimeZoneRule represents a time zone rule * representing a time zone effective from the beginning and * has no actual start times. - * + * * @stable ICU 3.8 */ public class InitialTimeZoneRule extends TimeZoneRule { @@ -24,12 +24,12 @@ public class InitialTimeZoneRule extends TimeZoneRule { /** * Constructs a InitialTimeZoneRule with the name, the GMT offset of its * standard time and the amount of daylight saving offset adjustment. - * + * * @param name The time zone name. * @param rawOffset The UTC offset of its standard time in milliseconds. * @param dstSavings The amount of daylight saving offset adjustment in milliseconds. * If this ia a rule for standard time, the value of this argument is 0. - * + * * @stable ICU 3.8 */ public InitialTimeZoneRule(String name, int rawOffset, int dstSavings) { @@ -38,22 +38,24 @@ public class InitialTimeZoneRule extends TimeZoneRule { /** * {@inheritDoc} - * + * * @stable ICU 3.8 */ + @Override public boolean isEquivalentTo(TimeZoneRule other) { if (other instanceof InitialTimeZoneRule) { return super.isEquivalentTo(other); } return false; } - + /** * {@inheritDoc}

* Note: This method in InitialTimeZoneRule always returns null. - * + * * @stable ICU 3.8 */ + @Override public Date getFinalStart(int prevRawOffset, int prevDSTSavings) { // No start time available return null; @@ -62,9 +64,10 @@ public class InitialTimeZoneRule extends TimeZoneRule { /** * {@inheritDoc}

* Note: This method in InitialTimeZoneRule always returns null. - * + * * @stable ICU 3.8 */ + @Override public Date getFirstStart(int prevRawOffset, int prevDSTSavings) { // No start time available return null; @@ -73,9 +76,10 @@ public class InitialTimeZoneRule extends TimeZoneRule { /** * {@inheritDoc}

* Note: This method in InitialTimeZoneRule always returns null. - * + * * @stable ICU 3.8 */ + @Override public Date getNextStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive) { // No start time available @@ -85,9 +89,10 @@ public class InitialTimeZoneRule extends TimeZoneRule { /** * {@inheritDoc}

* Note: This method in InitialTimeZoneRule always returns null. - * + * * @stable ICU 3.8 */ + @Override public Date getPreviousStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive) { // No start time available @@ -99,6 +104,7 @@ public class InitialTimeZoneRule extends TimeZoneRule { * Note: This method in InitialTimeZoneRule always returns false. * @stable ICU 3.8 */ + @Override public boolean isTransitionRule() { return false; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleData.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleData.java index 07becb0667f..be06c10d926 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleData.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleData.java @@ -333,28 +333,21 @@ public final class LocaleData { * otherwise known as Metric system. * @stable ICU 2.8 */ - public static final MeasurementSystem SI = new MeasurementSystem(0); + public static final MeasurementSystem SI = new MeasurementSystem(); /** * Measurement system followed in the United States of America. * @stable ICU 2.8 */ - public static final MeasurementSystem US = new MeasurementSystem(1); + public static final MeasurementSystem US = new MeasurementSystem(); /** * Mix of metric and imperial units used in Great Britain. * @stable ICU 55 */ - public static final MeasurementSystem UK = new MeasurementSystem(2); + public static final MeasurementSystem UK = new MeasurementSystem(); - private int systemID; - private MeasurementSystem(int id){ - systemID = id; - } - - private boolean equals(int id){ - return systemID == id; - } + private MeasurementSystem() {} } /** @@ -367,19 +360,15 @@ public final class LocaleData { public static final MeasurementSystem getMeasurementSystem(ULocale locale){ UResourceBundle sysBundle = measurementTypeBundleForLocale(locale, MEASUREMENT_SYSTEM); - int system = sysBundle.getInt(); - if(MeasurementSystem.US.equals(system)){ - return MeasurementSystem.US; + switch (sysBundle.getInt()) { + case 0: return MeasurementSystem.SI; + case 1: return MeasurementSystem.US; + case 2: return MeasurementSystem.UK; + default: + // return null if the object is null or is not an instance + // of integer indicating an error + return null; } - if(MeasurementSystem.UK.equals(system)){ - return MeasurementSystem.UK; - } - if(MeasurementSystem.SI.equals(system)){ - return MeasurementSystem.SI; - } - // return null if the object is null or is not an instance - // of integer indicating an error - return null; } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/Measure.java b/icu4j/main/classes/core/src/com/ibm/icu/util/Measure.java index 5993c571a2c..f641fec1f27 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/Measure.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/Measure.java @@ -31,7 +31,7 @@ package com.ibm.icu.util; * @stable ICU 3.0 */ public class Measure { - + private final Number number; private final MeasureUnit unit; @@ -48,12 +48,13 @@ public class Measure { this.number = number; this.unit = unit; } - + /** * Returns true if the given object is equal to this object. * @return true if this object is equal to the given object * @stable ICU 3.0 */ + @Override public boolean equals(Object obj) { if (obj == this) { return true; @@ -64,7 +65,7 @@ public class Measure { Measure m = (Measure) obj; return unit.equals(m.unit) && numbersEqual(number, m.number); } - + /* * See if two numbers are identical or have the same double value. * @param a A number @@ -87,6 +88,7 @@ public class Measure { * @return a 32-bit hash * @stable ICU 3.0 */ + @Override public int hashCode() { return 31 * Double.valueOf(number.doubleValue()).hashCode() + unit.hashCode(); } @@ -97,6 +99,7 @@ public class Measure { * code together with the numeric amount * @stable ICU 3.0 */ + @Override public String toString() { return number.toString() + ' ' + unit.toString(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/Output.java b/icu4j/main/classes/core/src/com/ibm/icu/util/Output.java index 18b8264c83f..85a5cc478aa 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/Output.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/Output.java @@ -24,6 +24,7 @@ public class Output { * {@inheritDoc} * @stable ICU 4.8 */ + @Override public String toString() { return value == null ? "null" : value.toString(); } @@ -33,7 +34,7 @@ public class Output { * @stable ICU 4.8 */ public Output() { - + } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/OutputInt.java b/icu4j/main/classes/core/src/com/ibm/icu/util/OutputInt.java index 4c5e5f3a07e..5abec5b5dce 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/OutputInt.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/OutputInt.java @@ -54,6 +54,7 @@ public class OutputInt { * @deprecated This API is ICU internal only. */ @Deprecated + @Override public String toString() { return Integer.toString(value); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java b/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java index 02d19a93e9a..f91d80f76b0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java @@ -791,6 +791,7 @@ public class SimpleTimeZone extends BasicTimeZone { * @internal * @deprecated This API is ICU internal only. */ + @Override @Deprecated public void getOffsetFromLocal(long date, int nonExistingTimeOpt, int duplicatedTimeOpt, int[] offsets) { @@ -964,6 +965,7 @@ public class SimpleTimeZone extends BasicTimeZone { * {@inheritDoc} * @stable ICU 49 */ + @Override public boolean observesDaylightTime() { return useDaylight; } @@ -1418,6 +1420,7 @@ public class SimpleTimeZone extends BasicTimeZone { * {@inheritDoc} * @stable ICU 49 */ + @Override public boolean isFrozen() { return isFrozen; } @@ -1426,6 +1429,7 @@ public class SimpleTimeZone extends BasicTimeZone { * {@inheritDoc} * @stable ICU 49 */ + @Override public TimeZone freeze() { isFrozen = true; return this; @@ -1435,6 +1439,7 @@ public class SimpleTimeZone extends BasicTimeZone { * {@inheritDoc} * @stable ICU 49 */ + @Override public TimeZone cloneAsThawed() { SimpleTimeZone tz = (SimpleTimeZone)super.cloneAsThawed(); tz.isFrozen = false; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeArrayTimeZoneRule.java b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeArrayTimeZoneRule.java index c3c144d2c38..9785c7a5e96 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeArrayTimeZoneRule.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeArrayTimeZoneRule.java @@ -13,7 +13,7 @@ import java.util.Date; /** * TimeArrayTimeZoneRule represents a time zone rule whose start times are * defined by an array of milliseconds since the standard base time. - * + * * @stable ICU 3.8 */ public class TimeArrayTimeZoneRule extends TimeZoneRule { @@ -27,7 +27,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * Constructs a TimeArrayTimeZoneRule with the name, the GMT offset of its * standard time, the amount of daylight saving offset adjustment and * the array of times when this rule takes effect. - * + * * @param name The time zone name. * @param rawOffset The UTC offset of its standard time in milliseconds. * @param dstSavings The amount of daylight saving offset adjustment in @@ -38,7 +38,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * @param timeType The time type of the start times, which is one of * DataTimeRule.WALL_TIME, STANDARD_TIME * and UTC_TIME. - * + * * @stable ICU 3.8 */ public TimeArrayTimeZoneRule(String name, int rawOffset, int dstSavings, long[] startTimes, int timeType) { @@ -54,7 +54,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { /** * Gets the array of start times used by this rule. - * + * * @return An array of the start times in milliseconds since the base time * (January 1, 1970, 00:00:00 GMT). * @stable ICU 3.8 @@ -67,7 +67,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * Gets the time type of the start times used by this rule. The return value * is either DateTimeRule.WALL_TIME or DateTimeRule.STANDARD_TIME * or DateTimeRule.UTC_TIME. - * + * * @return The time type used of the start times used by this rule. * @stable ICU 3.8 */ @@ -79,6 +79,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getFirstStart(int prevRawOffset, int prevDSTSavings) { return new Date(getUTC(startTimes[0], prevRawOffset, prevDSTSavings)); } @@ -87,6 +88,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getFinalStart(int prevRawOffset, int prevDSTSavings) { return new Date(getUTC(startTimes[startTimes.length - 1], prevRawOffset, prevDSTSavings)); } @@ -95,6 +97,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getNextStart(long base, int prevOffset, int prevDSTSavings, boolean inclusive) { int i = startTimes.length - 1; for (; i >= 0; i--) { @@ -113,6 +116,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public Date getPreviousStart(long base, int prevOffset, int prevDSTSavings, boolean inclusive) { int i = startTimes.length - 1; for (; i >= 0; i--) { @@ -128,6 +132,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * {@inheritDoc} * @stable ICU 3.8 */ + @Override public boolean isEquivalentTo(TimeZoneRule other) { if (!(other instanceof TimeArrayTimeZoneRule)) { return false; @@ -144,6 +149,7 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * Note: This method in TimeArrayTimeZoneRule always returns true. * @stable ICU 3.8 */ + @Override public boolean isTransitionRule() { return true; } @@ -163,9 +169,10 @@ public class TimeArrayTimeZoneRule extends TimeZoneRule { * Returns a String representation of this TimeArrayTimeZoneRule object. * This method is used for debugging purpose only. The string representation can be changed * in future version of ICU without any notice. - * + * * @stable ICU 3.8 */ + @Override public String toString() { StringBuilder buf = new StringBuilder(); buf.append(super.toString()); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java index f323d7e5fbc..02c05a097d5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java @@ -71,7 +71,7 @@ import com.ibm.icu.util.ULocale.Category; * offset from GMT(=UTC) and does not observe daylight saving * time. For example, you might specify GMT+14:00 as a custom * time zone ID to create a TimeZone representing 14 hours ahead - * of GMT (with no daylight saving time). In addition, + * of GMT (with no daylight saving time). In addition, * getCanonicalID can also be used to * normalize a custom time zone ID. * @@ -228,7 +228,7 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable GENERIC_LOCATION) { throw new IllegalArgumentException("Illegal style: " + style); } - + return _getDisplayName(style, daylight, locale); } @@ -670,7 +670,7 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezablefalse even when {@link #useDaylightTime()} returns * true. - * + * * @return true if this time zone is in daylight saving time or will observe * daylight saving time at any future time. * @see #useDaylightTime @@ -745,7 +745,7 @@ abstract public class TimeZone implements Serializable, Cloneable, FreezableTimeZone for the given ID and the timezone type. * @param id time zone ID - * @param type time zone implementation type, TIMEZONE_JDK or TIMEZONE_ICU + * @param type time zone implementation type, TIMEZONE_JDK or TIMEZONE_ICU * @param frozen specify if the returned object can be frozen * @return the specified TimeZone or UNKNOWN_ZONE if the given ID * cannot be understood. @@ -756,7 +756,7 @@ abstract public class TimeZone implements Serializable, Cloneable, FreezableNote:A Set returned by this method is * immutable. * @param zoneType The system time zone type. - * @param region The ISO 3166 two-letter country code or UN M.49 three-digit area code. - * When null, no filtering done by region. - * @param rawOffset An offset from GMT in milliseconds, ignoring the effect of daylight savings - * time, if any. When null, no filtering done by zone offset. + * @param region The ISO 3166 two-letter country code or UN M.49 three-digit area code. + * When null, no filtering done by region. + * @param rawOffset An offset from GMT in milliseconds, ignoring the effect of daylight savings + * time, if any. When null, no filtering done by zone offset. * @return an immutable set of system time zone IDs. * @see SystemTimeZoneType - * + * * @stable ICU 4.8 - */ + */ public static Set getAvailableIDs(SystemTimeZoneType zoneType, String region, Integer rawOffset) { return ZoneMeta.getAvailableIDs(zoneType, region, rawOffset); @@ -837,7 +837,7 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezableid is not a known system ID. - * @see #getAvailableIDs(String) - * + /** + * {@icu} Returns the region code associated with the given + * system time zone ID. The region code is either ISO 3166 + * 2-letter country code or UN M.49 3-digit area code. + * When the time zone is not associated with a specific location, + * for example - "Etc/UTC", "EST5EDT", then this method returns + * "001" (UN M.49 area code for World). + * @param id the system time zone ID. + * @return the region code associated with the given + * system time zone ID. + * @throws IllegalArgumentException if id is not a known system ID. + * @see #getAvailableIDs(String) + * * @stable ICU 4.8 - */ + */ public static String getRegion(String id) { String region = null; // "Etc/Unknown" is not a system time zone ID, @@ -1132,21 +1135,21 @@ abstract public class TimeZone implements Serializable, Cloneable, FreezableThere are system time zones that cannot be mapped to Windows zones. When the input * system time zone ID is unknown or unmappable to a Windows time zone, then this * method returns null. - * + * *

This implementation utilizes * Zone-Tzid mapping data. The mapping data is updated time to time. To get the latest changes, * please read the ICU user guide section * Updating the Time Zone Data. - * + * * @param id A system time zone ID * @return A Windows time zone ID mapped from the input system time zone ID, * or null when the input ID is unknown or unmappable. * @see #getIDForWindowsID(String, String) - * + * * @stable ICU 52 */ public static String getWindowsID(String id) { @@ -1155,7 +1158,7 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezablenull), * "America/Vancouver" is returned for the same Windows ID "Pacific Standard Time" and * region "CA". - * + * *

Not all Windows time zones can be mapped to system time zones. When the input * Windows time zone ID is unknown or unmappable to a system time zone, then this * method returns null. @@ -1207,7 +1210,7 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezablenull when the input ID is unknown or unmappable. * @see #getWindowsID(String) - * + * * @stable ICU 52 */ public static String getIDForWindowsID(String winid, String region) { @@ -1249,6 +1252,7 @@ abstract public class TimeZone implements Serializable, Cloneable, FreezableTimeZoneRule is an abstract class representing a rule for time zone. * TimeZoneRule has a set of time zone attributes, such as zone name, * raw offset (UTC offset for standard time) and daylight saving time offset. - * + * * @see com.ibm.icu.util.TimeZoneTransition * @see com.ibm.icu.util.RuleBasedTimeZone - * + * * @stable ICU 3.8 */ public abstract class TimeZoneRule implements Serializable { @@ -32,12 +32,12 @@ public abstract class TimeZoneRule implements Serializable { /** * Constructs a TimeZoneRule with the name, the GMT offset of its * standard time and the amount of daylight saving offset adjustment. - * + * * @param name The time zone name. * @param rawOffset The UTC offset of its standard time in milliseconds. * @param dstSavings The amount of daylight saving offset adjustment in milliseconds. * If this is a rule for standard time, the value of this argument is 0. - * + * * @stable ICU 3.8 */ public TimeZoneRule(String name, int rawOffset, int dstSavings) { @@ -48,9 +48,9 @@ public abstract class TimeZoneRule implements Serializable { /** * Gets the name of this time zone. - * + * * @return The name of this time zone. - * + * * @stable ICU 3.8 */ public String getName() { @@ -59,9 +59,9 @@ public abstract class TimeZoneRule implements Serializable { /** * Gets the standard time offset. - * + * * @return The standard time offset from UTC in milliseconds. - * + * * @stable ICU 3.8 */ public int getRawOffset() { @@ -70,10 +70,10 @@ public abstract class TimeZoneRule implements Serializable { /** * Gets the amount of daylight saving delta time from the standard time. - * + * * @return The amount of daylight saving offset used by this rule * in milliseconds. - * + * * @stable ICU 3.8 */ public int getDSTSavings() { @@ -87,7 +87,7 @@ public abstract class TimeZoneRule implements Serializable { * * @param other The TimeZoneRule object to be compared with. * @return true if the other TimeZoneRule is the same as this one. - * + * * @stable ICU 3.8 */ public boolean isEquivalentTo(TimeZoneRule other) { @@ -96,76 +96,76 @@ public abstract class TimeZoneRule implements Serializable { } return false; } - + /** * Gets the very first time when this rule takes effect. - * + * * @param prevRawOffset The standard time offset from UTC before this rule * takes effect in milliseconds. * @param prevDSTSavings The amount of daylight saving offset from the - * standard time. - * + * standard time. + * * @return The very first time when this rule takes effect. - * + * * @stable ICU 3.8 */ public abstract Date getFirstStart(int prevRawOffset, int prevDSTSavings); /** * Gets the final time when this rule takes effect. - * + * * @param prevRawOffset The standard time offset from UTC before this rule * takes effect in milliseconds. * @param prevDSTSavings The amount of daylight saving offset from the - * standard time. - * + * standard time. + * * @return The very last time when this rule takes effect, * or null if this rule is applied for future dates infinitely. - * + * * @stable ICU 3.8 */ public abstract Date getFinalStart(int prevRawOffset, int prevDSTSavings); /** * Gets the first time when this rule takes effect after the specified time. - * + * * @param base The first time after this time is returned. * @param prevRawOffset The standard time offset from UTC before this rule * takes effect in milliseconds. * @param prevDSTSavings The amount of daylight saving offset from the - * standard time. + * standard time. * @param inclusive Whether the base time is inclusive or not. - * + * * @return The first time when this rule takes effect after the specified time, * or null when this rule never takes effect after the specified time. - * + * * @stable ICU 3.8 */ public abstract Date getNextStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive); /** * Gets the most recent time when this rule takes effect before the specified time. - * + * * @param base The most recent time when this rule takes effect before * this time is returned. * @param prevRawOffset The standard time offset from UTC before this rule * takes effect in milliseconds. * @param prevDSTSavings The amount of daylight saving offset from the - * standard time. + * standard time. * @param inclusive Whether the base time is inclusive or not. - * + * * @return The most recent time when this rule takes effect before the specified time, * or null when this rule never takes effect before the specified time. - * + * * @stable ICU 3.8 */ public abstract Date getPreviousStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive); /** * Returns if this TimeZoneRule has one or more start times. - * + * * @return true if this TimeZoneRule has one or more start times. - * + * * @stable ICU 3.8 */ public abstract boolean isTransitionRule(); @@ -176,6 +176,7 @@ public abstract class TimeZoneRule implements Serializable { * in future version of ICU without any notice. * @stable ICU 3.8 */ + @Override public String toString() { StringBuilder buf = new StringBuilder(); buf.append("name=" + name); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZoneTransition.java b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZoneTransition.java index fc3ab3a1f5d..6d8de3a6f14 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZoneTransition.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZoneTransition.java @@ -11,7 +11,7 @@ package com.ibm.icu.util; * TimeZoneTransition is a class representing a time zone transition. * An instance has a time of transition and rules for both before and * after the transition. - * + * * @stable ICU 3.8 */ public class TimeZoneTransition { @@ -22,11 +22,11 @@ public class TimeZoneTransition { /** * Constructs a TimeZoneTransition with the time and the rules before/after * the transition. - * + * * @param time The time of transition in milliseconds since the base time. * @param from The time zone rule used before the transition. * @param to The time zone rule used after the transition. - * + * * @stable ICU 3.8 */ public TimeZoneTransition(long time, TimeZoneRule from, TimeZoneRule to) { @@ -37,9 +37,9 @@ public class TimeZoneTransition { /** * Returns the time of transition in milliseconds since the base time. - * + * * @return The time of the transition in milliseconds since the base time. - * + * * @stable ICU 3.8 */ public long getTime() { @@ -48,9 +48,9 @@ public class TimeZoneTransition { /** * Returns the rule used after the transition. - * + * * @return The time zone rule used after the transition. - * + * * @stable ICU 3.8 */ public TimeZoneRule getTo() { @@ -59,9 +59,9 @@ public class TimeZoneTransition { /** * Returns the rule used before the transition. - * + * * @return The time zone rule used after the transition. - * + * * @stable ICU 3.8 */ public TimeZoneRule getFrom() { @@ -72,9 +72,10 @@ public class TimeZoneTransition { * Returns a String representation of this TimeZoneTransition object. * This method is used for debugging purpose only. The string representation can be changed * in future version of ICU without any notice. - * + * * @stable ICU 3.8 */ + @Override public String toString() { StringBuilder buf = new StringBuilder(); buf.append("time=" + time); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/UResourceBundle.java b/icu4j/main/classes/core/src/com/ibm/icu/util/UResourceBundle.java index 3246066fda7..16a25d301de 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/UResourceBundle.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/UResourceBundle.java @@ -74,14 +74,14 @@ import com.ibm.icu.impl.ResourceBundleWrapper; * change. To open ICU style organization use: * *

- *      UResourceBundle bundle = 
- *          UResourceBundle.getBundleInstance("com/mycompany/resources", 
+ *      UResourceBundle bundle =
+ *          UResourceBundle.getBundleInstance("com/mycompany/resources",
  *                                            "en_US", myClassLoader);
  * 
* To open Java/JDK style organization use: *
- *      UResourceBundle bundle = 
- *          UResourceBundle.getBundleInstance("com.mycompany.resources.LocaleElements", 
+ *      UResourceBundle bundle =
+ *          UResourceBundle.getBundleInstance("com.mycompany.resources.LocaleElements",
  *                                            "en_US", myClassLoader);
  * 
* @@ -107,7 +107,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @stable ICU 3.0 */ public static UResourceBundle getBundleInstance(String baseName, String localeName){ - return getBundleInstance(baseName, localeName, ICUResourceBundle.ICU_DATA_CLASS_LOADER, + return getBundleInstance(baseName, localeName, ICUResourceBundle.ICU_DATA_CLASS_LOADER, false); } @@ -123,7 +123,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @return a resource bundle for the given base name and locale * @stable ICU 3.0 */ - public static UResourceBundle getBundleInstance(String baseName, String localeName, + public static UResourceBundle getBundleInstance(String baseName, String localeName, ClassLoader root){ return getBundleInstance(baseName, localeName, root, false); } @@ -144,7 +144,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @stable ICU 3.0 * */ - protected static UResourceBundle getBundleInstance(String baseName, String localeName, + protected static UResourceBundle getBundleInstance(String baseName, String localeName, ClassLoader root, boolean disableFallback) { return instantiateBundle(baseName, localeName, root, disableFallback); } @@ -187,7 +187,7 @@ public abstract class UResourceBundle extends ResourceBundle { baseName = ICUData.ICU_BASE_NAME; } ULocale uloc = ULocale.getDefault(); - return getBundleInstance(baseName, uloc.getBaseName(), ICUResourceBundle.ICU_DATA_CLASS_LOADER, + return getBundleInstance(baseName, uloc.getBaseName(), ICUResourceBundle.ICU_DATA_CLASS_LOADER, false); } @@ -244,7 +244,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @return a resource bundle for the given base name and locale * @stable ICU 3.8 */ - public static UResourceBundle getBundleInstance(String baseName, Locale locale, + public static UResourceBundle getBundleInstance(String baseName, Locale locale, ClassLoader loader) { if (baseName == null) { baseName = ICUData.ICU_BASE_NAME; @@ -267,7 +267,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @return a resource bundle for the given base name and locale * @stable ICU 3.8 */ - public static UResourceBundle getBundleInstance(String baseName, ULocale locale, + public static UResourceBundle getBundleInstance(String baseName, ULocale locale, ClassLoader loader) { if (baseName == null) { baseName = ICUData.ICU_BASE_NAME; @@ -316,6 +316,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @return the locale of this resource bundle * @stable ICU 3.0 */ + @Override public Locale getLocale(){ return getULocale().toLocale(); } @@ -374,18 +375,18 @@ public abstract class UResourceBundle extends ResourceBundle { return ICUResourceBundle.getBundleInstance(baseName, localeName, root, disableFallback); case JAVA: - return ResourceBundleWrapper.getBundleInstance(baseName, localeName, root, + return ResourceBundleWrapper.getBundleInstance(baseName, localeName, root, disableFallback); case MISSING: default: UResourceBundle b; try{ - b = ICUResourceBundle.getBundleInstance(baseName, localeName, root, + b = ICUResourceBundle.getBundleInstance(baseName, localeName, root, disableFallback); setRootType(baseName, RootType.ICU); }catch(MissingResourceException ex){ - b = ResourceBundleWrapper.getBundleInstance(baseName, localeName, root, + b = ResourceBundleWrapper.getBundleInstance(baseName, localeName, root, disableFallback); setRootType(baseName, RootType.JAVA); } @@ -568,7 +569,7 @@ public abstract class UResourceBundle extends ResourceBundle { public UResourceBundle get(int index) { UResourceBundle obj = handleGet(index, null, this); if (obj == null) { - obj = (ICUResourceBundle) getParent(); + obj = getParent(); if (obj != null) { obj = obj.get(index); } @@ -614,6 +615,7 @@ public abstract class UResourceBundle extends ResourceBundle { * which is empty if this is not a bundle or a table resource * @stable ICU 3.8 */ + @Override public Enumeration getKeys() { return Collections.enumeration(keySet()); } @@ -625,6 +627,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @internal * @deprecated This API is ICU internal only. */ + @Override @Deprecated public Set keySet() { // TODO: Java 6 ResourceBundle has keySet() which calls handleKeySet() @@ -676,6 +679,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @internal * @deprecated This API is ICU internal only. */ + @Override @Deprecated protected Set handleKeySet() { return Collections.emptySet(); @@ -684,7 +688,7 @@ public abstract class UResourceBundle extends ResourceBundle { /** * {@icu} Returns the size of a resource. Size for scalar types is always 1, and for * vector/table types is the number of child resources. - * + * *
Note: Integer array is treated as a scalar type. There are no APIs to * access individual members of an integer array. It is always returned as a whole. * @return number of resources in a given resource. @@ -795,7 +799,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @return UResourceBundle a resource associated with the key * @stable ICU 3.8 */ - protected UResourceBundle handleGet(String aKey, HashMap aliasesVisited, + protected UResourceBundle handleGet(String aKey, HashMap aliasesVisited, UResourceBundle requested) { return null; } @@ -811,7 +815,7 @@ public abstract class UResourceBundle extends ResourceBundle { * @return UResourceBundle a resource associated with the index * @stable ICU 3.8 */ - protected UResourceBundle handleGet(int index, HashMap aliasesVisited, + protected UResourceBundle handleGet(int index, HashMap aliasesVisited, UResourceBundle requested) { return null; } @@ -844,6 +848,7 @@ public abstract class UResourceBundle extends ResourceBundle { // this method is declared in ResourceBundle class // so cannot change the signature // Override this method + @Override protected Object handleGetObject(String aKey) { return handleGetObjectImpl(aKey, this); } diff --git a/icu4j/main/classes/currdata/src/com/ibm/icu/impl/ICUCurrencyMetaInfo.java b/icu4j/main/classes/currdata/src/com/ibm/icu/impl/ICUCurrencyMetaInfo.java index 3337f1d20e6..0bb4620eaef 100644 --- a/icu4j/main/classes/currdata/src/com/ibm/icu/impl/ICUCurrencyMetaInfo.java +++ b/icu4j/main/classes/currdata/src/com/ibm/icu/impl/ICUCurrencyMetaInfo.java @@ -67,7 +67,7 @@ public class ICUCurrencyMetaInfo extends CurrencyMetaInfo { return new CurrencyDigits(data[0], data[1]); } } - + private List collect(Collector collector, CurrencyFilter filter) { // We rely on the fact that the data lists the regions in order, and the // priorities in order within region. This means we don't need @@ -169,7 +169,7 @@ public class ICUCurrencyMetaInfo extends CurrencyMetaInfo { return defaultValue; } int[] values = b.getIntVector(); - return ((long) values[0] << 32) | (((long) values[1]) & MASK); + return ((long) values[0] << 32) | ((values[1]) & MASK); } // Utility, just because I don't like the n^2 behavior of using list.contains to build a @@ -199,14 +199,17 @@ public class ICUCurrencyMetaInfo extends CurrencyMetaInfo { // about duplicates. private List result = new ArrayList(); + @Override public void collect(String region, String currency, long from, long to, int priority, boolean tender) { result.add(new CurrencyInfo(region, currency, from, to, priority, tender)); } + @Override public List getList() { return Collections.unmodifiableList(result); } + @Override public int collects() { return Everything; } @@ -215,15 +218,18 @@ public class ICUCurrencyMetaInfo extends CurrencyMetaInfo { private static class RegionCollector implements Collector { private final UniqueList result = UniqueList.create(); + @Override public void collect( String region, String currency, long from, long to, int priority, boolean tender) { result.add(region); } + @Override public int collects() { return Region; } + @Override public List getList() { return result.list(); } @@ -232,15 +238,18 @@ public class ICUCurrencyMetaInfo extends CurrencyMetaInfo { private static class CurrencyCollector implements Collector { private final UniqueList result = UniqueList.create(); + @Override public void collect( String region, String currency, long from, long to, int priority, boolean tender) { result.add(currency); } + @Override public int collects() { return Currency; } + @Override public List getList() { return result.list(); } diff --git a/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/CurrencyNameProviderICU.java b/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/CurrencyNameProviderICU.java index 80c76e0607a..8093b779d72 100644 --- a/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/CurrencyNameProviderICU.java +++ b/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/CurrencyNameProviderICU.java @@ -26,7 +26,7 @@ public class CurrencyNameProviderICU extends CurrencyNameProvider { return sym; } - //@Override + @Override public String getDisplayName(String currencyCode, Locale locale) { CurrencyDisplayNames curDispNames = CurrencyDisplayNames.getInstance(ICULocaleServiceProvider.toULocaleNoSpecialVariant(locale)); String name = curDispNames.getName(currencyCode); diff --git a/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/LocaleNameProviderICU.java b/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/LocaleNameProviderICU.java index ef622c50478..ffa44788704 100644 --- a/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/LocaleNameProviderICU.java +++ b/icu4j/main/classes/localespi/src/com/ibm/icu/impl/javaspi/util/LocaleNameProviderICU.java @@ -39,7 +39,7 @@ public class LocaleNameProviderICU extends LocaleNameProvider { return disp; } - //@Override + @Override public String getDisplayScript(String scriptCode, Locale locale) { scriptCode = AsciiUtil.toTitleString(scriptCode); String disp = LocaleDisplayNames.getInstance(ICULocaleServiceProvider.toULocaleNoSpecialVariant(locale)) diff --git a/icu4j/main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/CollatorICU.java b/icu4j/main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/CollatorICU.java index 48a28d21d73..3a92ccfd54c 100644 --- a/icu4j/main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/CollatorICU.java +++ b/icu4j/main/classes/localespi/src/com/ibm/icu/impl/jdkadapter/CollatorICU.java @@ -32,6 +32,7 @@ public class CollatorICU extends java.text.Collator { return fIcuCollator; } + @Override public Object clone() { CollatorICU other = (CollatorICU)super.clone(); try { @@ -45,14 +46,17 @@ public class CollatorICU extends java.text.Collator { return other; } + @Override public int compare(Object o1, Object o2) { return fIcuCollator.compare(o1, o2); } + @Override public int compare(String source, String target) { return fIcuCollator.compare(source, target); } + @Override public boolean equals(Object that) { if (that instanceof CollatorICU) { return ((CollatorICU)that).fIcuCollator.equals(fIcuCollator); @@ -60,15 +64,18 @@ public class CollatorICU extends java.text.Collator { return false; } + @Override public boolean equals(String source, String target) { return fIcuCollator.equals(source, target); } + @Override public CollationKey getCollationKey(String source) { com.ibm.icu.text.CollationKey icuCollKey = fIcuCollator.getCollationKey(source); return CollationKeyICU.wrap(icuCollKey); } + @Override public int getDecomposition() { int mode = java.text.Collator.NO_DECOMPOSITION; @@ -89,6 +96,7 @@ public class CollatorICU extends java.text.Collator { return mode; } + @Override public int getStrength() { int strength; int icuStrength = fIcuCollator.getStrength(); @@ -116,10 +124,12 @@ public class CollatorICU extends java.text.Collator { return strength; } + @Override public int hashCode() { return fIcuCollator.hashCode(); } + @Override public void setDecomposition(int decompositionMode) { switch (decompositionMode) { case java.text.Collator.CANONICAL_DECOMPOSITION: @@ -138,6 +148,7 @@ public class CollatorICU extends java.text.Collator { } } + @Override public void setStrength(int newStrength) { switch (newStrength) { case java.text.Collator.IDENTICAL: diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java index b41429a8e17..bc2bc2d030b 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java @@ -66,7 +66,7 @@ class AnyTransliterator extends Transliterator { * The target script code. Never USCRIPT_INVALID_CODE. */ private int targetScript; - + /** * Special code for handling width characters */ @@ -75,6 +75,7 @@ class AnyTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position pos, boolean isIncremental) { int allStart = pos.start; @@ -149,7 +150,7 @@ class AnyTransliterator extends Transliterator { * @param id the ID of the form S-T or S-T/V, where T is theTarget * and V is theVariant. Must not be empty. * @param filter The Unicode filter. - * @param target2 the target name. + * @param target2 the target name. * @param targetScript2 the script code corresponding to theTarget. * @param widthFix2 The Transliterator width fix. * @param cache2 The Map object for cache. @@ -257,7 +258,7 @@ class AnyTransliterator extends Transliterator { for (Enumeration v = Transliterator.getAvailableVariants(source, target); v.hasMoreElements(); ) { String variant = v.nextElement(); - + // Only process each target/variant pair once if (seenVariants.contains(variant)) { continue; diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/BreakTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/BreakTransliterator.java index 9fbcd54a39d..3c9ed7d7052 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/BreakTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/BreakTransliterator.java @@ -73,6 +73,7 @@ final class BreakTransliterator extends Transliterator { | (1< end) { @@ -267,6 +271,7 @@ final class BreakTransliterator extends Transliterator { * Implements CharacterIterator.current() for String. * @see CharacterIterator#current */ + @Override public char current() { if (pos >= begin && pos < end) { @@ -281,6 +286,7 @@ final class BreakTransliterator extends Transliterator { * Implements CharacterIterator.next() for String. * @see CharacterIterator#next */ + @Override public char next() { if (pos < end - 1) { @@ -297,6 +303,7 @@ final class BreakTransliterator extends Transliterator { * Implements CharacterIterator.previous() for String. * @see CharacterIterator#previous */ + @Override public char previous() { if (pos > begin) { @@ -312,6 +319,7 @@ final class BreakTransliterator extends Transliterator { * Implements CharacterIterator.getBeginIndex() for String. * @see CharacterIterator#getBeginIndex */ + @Override public int getBeginIndex() { return begin; @@ -321,6 +329,7 @@ final class BreakTransliterator extends Transliterator { * Implements CharacterIterator.getEndIndex() for String. * @see CharacterIterator#getEndIndex */ + @Override public int getEndIndex() { return end; @@ -330,6 +339,7 @@ final class BreakTransliterator extends Transliterator { * Implements CharacterIterator.getIndex() for String. * @see CharacterIterator#getIndex */ + @Override public int getIndex() { return pos; @@ -341,6 +351,7 @@ final class BreakTransliterator extends Transliterator { * @return true if the given obj is the same as this * ReplaceableCharacterIterator object; false otherwise. */ + @Override public boolean equals(Object obj) { if (this == obj) { @@ -368,6 +379,7 @@ final class BreakTransliterator extends Transliterator { * Computes a hashcode for this iterator. * @return A hash code */ + @Override public int hashCode() { return text.hashCode() ^ pos ^ begin ^ end; @@ -377,6 +389,7 @@ final class BreakTransliterator extends Transliterator { * Creates a copy of this iterator. * @return A copy of this */ + @Override public Object clone() { try { diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/CaseFoldTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/CaseFoldTransliterator.java index 5cc39b0678e..c7b936eb850 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/CaseFoldTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/CaseFoldTransliterator.java @@ -21,7 +21,7 @@ class CaseFoldTransliterator extends Transliterator{ * Package accessible ID. */ static final String _ID = "Any-CaseFold"; - + // TODO: Add variants for tr, az, lt, default = default locale /** @@ -29,6 +29,7 @@ class CaseFoldTransliterator extends Transliterator{ */ static void register() { Transliterator.registerFactory(_ID, new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new CaseFoldTransliterator(); } @@ -55,6 +56,7 @@ class CaseFoldTransliterator extends Transliterator{ /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected synchronized void handleTransliterate(Replaceable text, Position offsets, boolean isIncremental) { if(csp==null) { @@ -63,7 +65,7 @@ class CaseFoldTransliterator extends Transliterator{ if(offsets.start >= offsets.limit) { return; - } + } iter.setText(text); result.setLength(0); @@ -105,9 +107,9 @@ class CaseFoldTransliterator extends Transliterator{ } offsets.start = offsets.limit; } - + static SourceTargetUtility sourceTargetUtility = null; - + /* (non-Javadoc) * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) */ @@ -116,6 +118,7 @@ class CaseFoldTransliterator extends Transliterator{ synchronized (UppercaseTransliterator.class) { if (sourceTargetUtility == null) { sourceTargetUtility = new SourceTargetUtility(new Transform() { + @Override public String transform(String source) { return UCharacter.foldCase(source, true); } diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/CompoundTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/CompoundTransliterator.java index 0301c678b3a..709e6a1893c 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/CompoundTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/CompoundTransliterator.java @@ -117,7 +117,7 @@ class CompoundTransliterator extends Transliterator { /** * Internal method for safeClone... * @param id - * @param filter2 + * @param filter2 * @param trans2 * @param numAnonymousRBTs2 */ @@ -126,7 +126,7 @@ class CompoundTransliterator extends Transliterator { trans = trans2; numAnonymousRBTs = numAnonymousRBTs2; } - + /** * Finish constructing a transliterator: only to be called by * constructors. Before calling init(), set trans and filter to NULL. @@ -263,6 +263,7 @@ class CompoundTransliterator extends Transliterator { * U+000A, U+0020..U+007E. * @return the rule string */ + @Override public String toRules(boolean escapeUnprintable) { // We do NOT call toRules() on our component transliterators, in // general. If we have several rule-based transliterators, this @@ -339,6 +340,7 @@ class CompoundTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position index, boolean incremental) { /* Call each transliterator with the same start value and diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/EscapeTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/EscapeTransliterator.java index 9c3bb7dfc3e..2ceec92f4dd 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/EscapeTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/EscapeTransliterator.java @@ -82,31 +82,35 @@ class EscapeTransliterator extends Transliterator { static void register() { // Unicode: "U+10FFFF" hex, min=4, max=6 Transliterator.registerFactory("Any-Hex/Unicode", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex/Unicode", "U+", "", 16, 4, true, null); } }); - + // Java: "\\uFFFF" hex, min=4, max=4 Transliterator.registerFactory("Any-Hex/Java", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex/Java", "\\u", "", 16, 4, false, null); } }); - + // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 Transliterator.registerFactory("Any-Hex/C", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex/C", "\\u", "", 16, 4, true, new EscapeTransliterator("", "\\U", "", 16, 8, true, null)); } }); - + // XML: "􏿿" hex, min=1, max=6 Transliterator.registerFactory("Any-Hex/XML", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex/XML", "&#x", ";", 16, 1, true, null); @@ -115,6 +119,7 @@ class EscapeTransliterator extends Transliterator { // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") Transliterator.registerFactory("Any-Hex/XML10", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex/XML10", "&#", ";", 10, 1, true, null); @@ -123,6 +128,7 @@ class EscapeTransliterator extends Transliterator { // Perl: "\\x{263A}" hex, min=1, max=6 Transliterator.registerFactory("Any-Hex/Perl", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex/Perl", "\\x{", "}", 16, 1, true, null); @@ -131,14 +137,16 @@ class EscapeTransliterator extends Transliterator { // Plain: "FFFF" hex, min=4, max=6 Transliterator.registerFactory("Any-Hex/Plain", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex/Plain", "", "", 16, 4, true, null); } }); - + // Generic Transliterator.registerFactory("Any-Hex", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new EscapeTransliterator("Any-Hex", "\\u", "", 16, 4, false, null); @@ -166,6 +174,7 @@ class EscapeTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position pos, boolean incremental) { int start = pos.start; diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/FunctionReplacer.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/FunctionReplacer.java index 3ba3eb311fe..a11f453ad68 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/FunctionReplacer.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/FunctionReplacer.java @@ -46,6 +46,7 @@ class FunctionReplacer implements UnicodeReplacer { /** * UnicodeReplacer API */ + @Override public int replace(Replaceable text, int start, int limit, @@ -64,6 +65,7 @@ class FunctionReplacer implements UnicodeReplacer { /** * UnicodeReplacer API */ + @Override public String toReplacerPattern(boolean escapeUnprintable) { StringBuilder rule = new StringBuilder("&"); rule.append(translit.getID()); @@ -78,6 +80,7 @@ class FunctionReplacer implements UnicodeReplacer { * into the given set. * @param toUnionTo the set into which to union the output characters */ + @Override public void addReplacementSetTo(UnicodeSet toUnionTo) { toUnionTo.addAll(translit.getTargetSet()); } diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/NameUnicodeTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/NameUnicodeTransliterator.java index 274f3e19aeb..8c4383b7ad8 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/NameUnicodeTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/NameUnicodeTransliterator.java @@ -29,6 +29,7 @@ class NameUnicodeTransliterator extends Transliterator { */ static void register() { Transliterator.registerFactory(_ID, new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new NameUnicodeTransliterator(null); } @@ -45,6 +46,7 @@ class NameUnicodeTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position offsets, boolean isIncremental) { @@ -64,7 +66,7 @@ class NameUnicodeTransliterator extends Transliterator { // 1 - after open delimiter int mode = 0; int openPos = -1; // open delim candidate pos - + int c; while (cursor < limit) { c = text.char32At(cursor); @@ -88,7 +90,7 @@ class NameUnicodeTransliterator extends Transliterator { // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. - + // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (PatternProps.isWhiteSpace(c)) { @@ -108,7 +110,7 @@ class NameUnicodeTransliterator extends Transliterator { if (c == CLOSE_DELIM) { int len = name.length(); - + // Delete trailing space, if any if (len > 0 && name.charAt(len-1) == SPACE) { @@ -175,7 +177,7 @@ class NameUnicodeTransliterator extends Transliterator { public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); if (!myFilter.containsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.contains(CLOSE_DELIM)) { - return; // we have to contain both prefix and suffix + return; // we have to contain both prefix and suffix } UnicodeSet items = new UnicodeSet() .addAll('0', '9') diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/NormalizationTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/NormalizationTransliterator.java index e89a7a78d3b..8877882e4e3 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/NormalizationTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/NormalizationTransliterator.java @@ -28,31 +28,37 @@ final class NormalizationTransliterator extends Transliterator { */ static void register() { Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFC", Normalizer2.getNFCInstance()); } }); Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFD", Normalizer2.getNFDInstance()); } }); Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFKC", Normalizer2.getNFKCInstance()); } }); Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFKD", Normalizer2.getNFKDInstance()); } }); Transliterator.registerFactory("Any-FCD", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("FCD", Norm2AllModes.getFCDNormalizer2()); } }); Transliterator.registerFactory("Any-FCC", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("FCC", Norm2AllModes.getNFCInstance().fcc); } @@ -74,6 +80,7 @@ final class NormalizationTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position offsets, boolean isIncremental) { // start and limit of the input range @@ -130,16 +137,17 @@ final class NormalizationTransliterator extends Transliterator { } static final Map SOURCE_CACHE = new HashMap(); - + // TODO Get rid of this if Normalizer2 becomes a Transform static class NormalizingTransform implements Transform { final Normalizer2 norm2; public NormalizingTransform(Normalizer2 norm2) { this.norm2 = norm2; } + @Override public String transform(String source) { return norm2.normalize(source); - } + } } /* (non-Javadoc) diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/NullTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/NullTransliterator.java index a1bb6be7ebb..c95386e2da7 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/NullTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/NullTransliterator.java @@ -28,6 +28,7 @@ class NullTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position offsets, boolean incremental) { offsets.start = offsets.limit; diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/RemoveTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/RemoveTransliterator.java index 5bebbfb10ee..e9b38d6899a 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/RemoveTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/RemoveTransliterator.java @@ -25,6 +25,7 @@ class RemoveTransliterator extends Transliterator { */ static void register() { Transliterator.registerFactory(_ID, new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new RemoveTransliterator(); } @@ -42,6 +43,7 @@ class RemoveTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position index, boolean incremental) { // Our caller (filteredTransliterate) has already narrowed us diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java index 09179421bba..0c42ff82cd5 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java @@ -276,7 +276,7 @@ import java.util.Map; @Deprecated public class RuleBasedTransliterator extends Transliterator { - private Data data; + private final Data data; // /** // * Constructs a new transliterator from the given rules. @@ -325,6 +325,7 @@ public class RuleBasedTransliterator extends Transliterator { * @internal * @deprecated This API is ICU internal only. */ + @Override @Deprecated protected void handleTransliterate(Replaceable text, Position index, boolean incremental) { @@ -442,6 +443,7 @@ public class RuleBasedTransliterator extends Transliterator { * @internal * @deprecated This API is ICU internal only. */ + @Override @Deprecated public String toRules(boolean escapeUnprintable) { return data.ruleSet.toRules(escapeUnprintable); @@ -462,7 +464,7 @@ public class RuleBasedTransliterator extends Transliterator { // public UnicodeSet getTargetSet() { // return data.ruleSet.getSourceTargetSet(true, unicodeFilter); // } - + /** * @internal * @deprecated This API is ICU internal only. diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/StringMatcher.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/StringMatcher.java index d98856bfa69..fb56884518c 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/StringMatcher.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/StringMatcher.java @@ -38,7 +38,7 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer { * match. */ private int matchStart; - + /** * Limit offset, in the match text, of the rightmost * match. @@ -98,6 +98,7 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer { /** * Implement UnicodeMatcher */ + @Override public int matches(Replaceable text, int[] offset, int limit, @@ -174,6 +175,7 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer { /** * Implement UnicodeMatcher */ + @Override public String toPattern(boolean escapeUnprintable) { StringBuffer result = new StringBuffer(); StringBuffer quoteBuf = new StringBuffer(); @@ -202,6 +204,7 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer { /** * Implement UnicodeMatcher */ + @Override public boolean matchesIndexValue(int v) { if (pattern.length() == 0) { return true; @@ -217,6 +220,7 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer { * set. * @param toUnionTo the set into which to union the source characters */ + @Override public void addMatchSetTo(UnicodeSet toUnionTo) { int ch; for (int i=0; i 0); StringBuffer rule = new StringBuffer("$"); @@ -279,6 +285,7 @@ class StringMatcher implements UnicodeMatcher, UnicodeReplacer { * into the given set. * @param toUnionTo the set into which to union the output characters */ + @Override public void addReplacementSetTo(UnicodeSet toUnionTo) { // The output of this replacer varies; it is the source text between // matchStart and matchLimit. Since this varies depending on the diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/StringReplacer.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/StringReplacer.java index ee995ec6712..3bd8d0eff4a 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/StringReplacer.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/StringReplacer.java @@ -110,6 +110,7 @@ class StringReplacer implements UnicodeReplacer { /** * UnicodeReplacer API */ + @Override public int replace(Replaceable text, int start, int limit, @@ -224,7 +225,7 @@ class StringReplacer implements UnicodeReplacer { // Delete the old text (the key) text.replace(start + outLen, limit + outLen, ""); - } + } if (hasCursor) { // Adjust the cursor for positions outside the key. These @@ -264,6 +265,7 @@ class StringReplacer implements UnicodeReplacer { /** * UnicodeReplacer API */ + @Override public String toReplacerPattern(boolean escapeUnprintable) { StringBuffer rule = new StringBuffer(); StringBuffer quoteBuf = new StringBuffer(); @@ -318,6 +320,7 @@ class StringReplacer implements UnicodeReplacer { * into the given set. * @param toUnionTo the set into which to union the output characters */ + @Override public void addReplacementSetTo(UnicodeSet toUnionTo) { int ch; for (int i=0; i x diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java index de0de7130b2..7e324c9c1ce 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java @@ -35,16 +35,16 @@ import com.ibm.icu.util.UResourceBundle; * changes Russian text written in Cyrillic characters to phonetically equivalent Latin characters. It does not * translate Russian to English! Transliteration, unlike translation, operates on characters, without reference * to the meanings of words and sentences. - * + * *

* Although script conversion is its most common use, a transliterator can actually perform a more general class of * tasks. In fact, Transliterator defines a very general API which specifies only that a segment of the * input text is replaced by new text. The particulars of this conversion are determined entirely by subclasses of * Transliterator. - * + * *

* Transliterators are stateless - * + * *

* Transliterator objects are stateless; they retain no information between calls to * transliterate(). As a result, threads may share transliterators without synchronizing them. This might @@ -52,56 +52,56 @@ import com.ibm.icu.util.UResourceBundle; * transliterations by delaying the replacement of text until it is known that no other replacements are possible. In * other words, although the Transliterator objects are stateless, the source text itself embodies all the * needed information, and delayed operation allows arbitrary complexity. - * + * *

* Batch transliteration - * + * *

* The simplest way to perform transliteration is all at once, on a string of existing text. This is referred to as * batch transliteration. For example, given a string input and a transliterator t, * the call - * + * *

String result = t.transliterate(input); *
- * + * * will transliterate it and return the result. Other methods allow the client to specify a substring to be * transliterated and to use {@link Replaceable} objects instead of strings, in order to preserve out-of-band * information (such as text styles). - * + * *

* Keyboard transliteration - * + * *

* Somewhat more involved is keyboard, or incremental transliteration. This is the transliteration of text that * is arriving from some source (typically the user's keyboard) one character at a time, or in some other piecemeal * fashion. - * + * *

* In keyboard transliteration, a Replaceable buffer stores the text. As text is inserted, as much as * possible is transliterated on the fly. This means a GUI that displays the contents of the buffer may show text being * modified as each new character arrives. - * + * *

* Consider the simple RuleBasedTransliterator: - * + * *

* th>{theta}
* t>{tau} *
- * + * * When the user types 't', nothing will happen, since the transliterator is waiting to see if the next character is * 'h'. To remedy this, we introduce the notion of a cursor, marked by a '|' in the output string: - * + * *
* t>|{tau}
* {tau}h>{theta} *
- * + * * Now when the user types 't', tau appears, and if the next character is 'h', the tau changes to a theta. This is * accomplished by maintaining a cursor position (independent of the insertion point, and invisible in the GUI) across * calls to transliterate(). Typically, the cursor will be coincident with the insertion point, but in a * case like the one above, it will precede the insertion point. - * + * *

* Keyboard transliteration methods maintain a set of three indices that are updated with each call to * transliterate(), including the cursor, start, and limit. These indices are changed by the method, and @@ -113,48 +113,48 @@ import com.ibm.icu.util.UResourceBundle; * RuleBasedTransliterator. Any characters before the cursor index are frozen; future keyboard * transliteration calls within this input sequence will not change them. New text is inserted at the limit * index, which marks the end of the substring that the transliterator looks at. - * + * *

* Because keyboard transliteration assumes that more characters are to arrive, it is conservative in its operation. It * only transliterates when it can do so unambiguously. Otherwise it waits for more characters to arrive. When the * client code knows that no more characters are forthcoming, perhaps because the user has performed some input * termination operation, then it should call finishTransliteration() to complete any pending * transliterations. - * + * *

* Inverses - * + * *

* Pairs of transliterators may be inverses of one another. For example, if transliterator A transliterates * characters by incrementing their Unicode value (so "abc" -> "def"), and transliterator B decrements character * values, then A is an inverse of B and vice versa. If we compose A with B in a compound * transliterator, the result is the indentity transliterator, that is, a transliterator that does not change its input * text. - * + * * The Transliterator method getInverse() returns a transliterator's inverse, if one exists, * or null otherwise. However, the result of getInverse() usually will not be a true * mathematical inverse. This is because true inverse transliterators are difficult to formulate. For example, consider * two transliterators: AB, which transliterates the character 'A' to 'B', and BA, which transliterates * 'B' to 'A'. It might seem that these are exact inverses, since - * + * *

"A" x AB -> "B"
* "B" x BA -> "A"
- * + * * where 'x' represents transliteration. However, - * + * *
"ABCD" x AB -> "BBCD"
* "BBCD" x BA -> "AACD"
- * + * * so AB composed with BA is not the identity. Nonetheless, BA may be usefully considered to be * AB's inverse, and it is on this basis that AB.getInverse() could legitimately return * BA. - * + * *

* Filtering *

Each transliterator has a filter, which restricts changes to those characters selected by the filter. The * filter affects just the characters that are changed -- the characters outside of the filter are still part of the * context for the filter. For example, in the following even though 'x' is filtered out, and doesn't convert to y, it does affect the conversion of 'a'. - * + * *

  * String rules = "x > y; x{a} > b; ";
  * Transliterator tempTrans = Transliterator.createFromRules("temp", rules, Transliterator.FORWARD);
@@ -164,7 +164,7 @@ import com.ibm.icu.util.UResourceBundle;
  *
*

* IDs and display names - * + * *

* A transliterator is designated by a short identifier string or ID. IDs follow the format * source-destination, where source describes the entity being replaced, and destination @@ -173,27 +173,27 @@ import com.ibm.icu.util.UResourceBundle; * Russian to Latin might be named "Russian-Latin". A transliterator from keyboard escape sequences to Latin-1 * characters might be named "KeyboardEscape-Latin1". By convention, system entity names are in English, with the * initial letters of words capitalized; user entity names may follow any format so long as they do not contain dashes. - * + * *

* In addition to programmatic IDs, transliterator objects have display names for presentation in user interfaces, * returned by {@link #getDisplayName}. - * + * *

* Factory methods and registration - * + * *

* In general, client code should use the factory method getInstance() to obtain an instance of a * transliterator given its ID. Valid IDs may be enumerated using getAvailableIDs(). Since transliterators * are stateless, multiple calls to getInstance() with the same ID will return the same object. - * + * *

* In addition to the system transliterators registered at startup, user transliterators may be registered by calling * registerInstance() at run time. To register a transliterator subclass without instantiating it (until it * is needed), users may call registerClass(). - * + * *

* Composed transliterators - * + * *

* In addition to built-in system transliterators like "Latin-Greek", there are also built-in composed * transliterators. These are implemented by composing two or more component transliterators. For example, if we have @@ -204,26 +204,26 @@ import com.ibm.icu.util.UResourceBundle; * 2 - n, so as n gets larger the gain becomes significant. With 9 scripts, it's 18 vs. 72 * rule sets, a big difference.) Note the use of "~" rather than "-" for the script separator here; this indicates that * the given transliterator is intended to be composed with others, rather than be used as is. - * + * *

* Composed transliterators can be instantiated as usual. For example, the system transliterator "Devanagari-Gujarati" * is a composed transliterator built internally as "Devanagari~InterIndic;InterIndic~Gujarati". When this * transliterator is instantiated, it appears externally to be a standard transliterator (e.g., getID() returns * "Devanagari-Gujarati"). - * + * *

* Subclassing - * + * *

* Subclasses must implement the abstract method handleTransliterate(). *

* Subclasses should override the transliterate() method taking a Replaceable and the * transliterate() method taking a String and StringBuffer if the performance of * these methods can be improved over the performance obtained by the default implementations in this class. - * + * *

* Copyright © IBM Corporation 1999. All rights reserved. - * + * * @author Alan Liu * @stable ICU 2.0 */ @@ -362,6 +362,7 @@ public abstract class Transliterator implements StringTransform { * Returns true if this Position is equal to the given object. * @stable ICU 2.6 */ + @Override public boolean equals(Object obj) { if (obj instanceof Position) { Position pos = (Position) obj; @@ -372,13 +373,14 @@ public abstract class Transliterator implements StringTransform { } return false; } - + /** * Mock implementation of hashCode(). This implementation always returns a constant * value. When Java assertion is enabled, this method triggers an assertion failure. * @internal * @deprecated This API is ICU internal only. */ + @Override @Deprecated public int hashCode() { assert false : "hashCode not designed"; @@ -389,6 +391,7 @@ public abstract class Transliterator implements StringTransform { * Returns a string representation of this Position. * @stable ICU 2.6 */ + @Override public String toString() { return "[cs=" + contextStart + ", s=" + start @@ -1650,7 +1653,7 @@ public abstract class Transliterator implements StringTransform { } /** - * Returns the intersectionof this instance's filter intersected with an external filter. + * Returns the intersectionof this instance's filter intersected with an external filter. * The externalFilter must be frozen (it is frozen if not). * The result may be frozen, so don't attempt to modify. * @internal @@ -1719,11 +1722,11 @@ public abstract class Transliterator implements StringTransform { /** * Register a factory object with the given ID. The factory * method should return a new instance of the given transliterator. - * + * *

Because ICU may choose to cache Transliterator objects internally, this must * be called at application startup, prior to any calls to * Transliterator.getInstance to avoid undefined behavior. - * + * * @param ID the ID of this transliterator * @param factory the factory object * @stable ICU 2.0 @@ -1734,11 +1737,11 @@ public abstract class Transliterator implements StringTransform { /** * Register a Transliterator object with the given ID. - * + * *

Because ICU may choose to cache Transliterator objects internally, this must * be called at application startup, prior to any calls to * Transliterator.getInstance to avoid undefined behavior. - * + * * @param trans the Transliterator object * @stable ICU 2.2 */ @@ -1748,11 +1751,11 @@ public abstract class Transliterator implements StringTransform { /** * Register a Transliterator object. - * + * *

Because ICU may choose to cache Transliterator objects internally, this must * be called at application startup, prior to any calls to * Transliterator.getInstance to avoid undefined behavior. - * + * * @param trans the Transliterator object */ static void registerInstance(Transliterator trans, boolean visible) { @@ -1763,11 +1766,11 @@ public abstract class Transliterator implements StringTransform { * Register an ID as an alias of another ID. Instantiating * alias ID produces the same result as instantiating the original ID. * This is generally used to create short aliases of compound IDs. - * + * *

Because ICU may choose to cache Transliterator objects internally, this must * be called at application startup, prior to any calls to * Transliterator.getInstance to avoid undefined behavior. - * + * * @param aliasID The new ID being registered. * @param realID The existing ID that the new ID should be an alias of. * @stable ICU 3.6 @@ -1902,13 +1905,13 @@ public abstract class Transliterator implements StringTransform { * is the ID of the system transliterator being defined. These * are public IDs enumerated by Transliterator.getAvailableIDs(), * unless the second field is "internal". - * + * * is a ResourceReader resource name. Currently these refer * to file names under com/ibm/text/resources. This string is passed * directly to ResourceReader, together with . - * + * * is either "FORWARD" or "REVERSE". - * + * * is a string to be passed directly to * Transliterator.getInstance(). The returned Transliterator object * then has its ID changed to and is returned. @@ -1977,7 +1980,7 @@ public abstract class Transliterator implements StringTransform { BreakTransliterator.register(); AnyTransliterator.register(); // do this last! } - + /** * Register the script-based "Any" transliterators: Any-Latin, Any-Greek * @internal @@ -2005,13 +2008,14 @@ public abstract class Transliterator implements StringTransform { */ Transliterator getInstance(String ID); } - + /** * Implements StringTransform via this method. * @param source text to be transformed (eg lowercased) * @return result * @stable ICU 3.8 */ + @Override public String transform(String source) { return transliterate(source); } diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java index 3de35a0dea2..5398021bf56 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorParser.java @@ -79,7 +79,7 @@ class TransliteratorParser { /** * Vector of StringMatcher objects for segments. Used during the - * parsing of a single rule. + * parsing of a single rule. * segmentStandins.charAt(0) is the standin for "$1" and corresponds * to StringMatcher object segmentObjects.elementAt(0), etc. */ @@ -179,7 +179,7 @@ class TransliteratorParser { private static final char ALT_FORWARD_RULE_OP = '\u2192'; // Right Arrow private static final char ALT_FWDREV_RULE_OP = '\u2194'; // Left Right Arrow private static final char ALT_FUNCTION = '\u2206'; // Increment (~Greek Capital Delta) - + // Special characters disallowed at the top level private static UnicodeSet ILLEGAL_TOP = new UnicodeSet("[\\)]"); @@ -204,6 +204,7 @@ class TransliteratorParser { /** * Implement SymbolTable API. */ + @Override public char[] lookup(String name) { return variableNames.get(name); } @@ -211,6 +212,7 @@ class TransliteratorParser { /** * Implement SymbolTable API. */ + @Override public UnicodeMatcher lookupMatcher(int ch) { // Note that we cannot use data.lookup() because the // set array has not been constructed yet. @@ -225,6 +227,7 @@ class TransliteratorParser { * Implement SymbolTable API. Parse out a symbol reference * name. */ + @Override public String parseReference(String text, ParsePosition pos, int limit) { int start = pos.getIndex(); int i = start; @@ -329,9 +332,11 @@ class TransliteratorParser { String[] array; int i; public RuleArray(String[] array) { this.array = array; i = 0; } + @Override public String handleNextLine() { return (i < array.length) ? array[i++] : null; } + @Override public void reset() { i = 0; } @@ -480,7 +485,7 @@ class TransliteratorParser { } pp.setIndex(pos-1); // Backup to opening '[' buf.append(parser.parseSet(rule, pp)); - pos = pp.getIndex(); + pos = pp.getIndex(); continue; } // Handle escapes @@ -527,7 +532,7 @@ class TransliteratorParser { } } quoteLimit = buf.length(); - + for (iq=quoteStart; iq end || start < 0 || end > 0xFFFF) { throw new IllegalIcuArgumentException("Invalid variable range " + start + ", " + end); } - + curData.variablesBase = (char) start; // first private use if (dataVector.size() == 0) { @@ -1385,7 +1389,7 @@ class TransliteratorParser { // know that pos points to /use\s/i; we can skip 4 characters // immediately pos += 4; - + // Here are the pragmas we recognize: // use variable range 0xE000 0xEFFF; // use maximum backup 16; @@ -1497,7 +1501,7 @@ class TransliteratorParser { } return c; } - + /** * Set the object for segment seg (1-based). */ diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java index b743d1df672..a8dea71df8e 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java @@ -397,10 +397,12 @@ class TransliteratorRegistry { en = e; } + @Override public boolean hasMoreElements() { return en != null && en.hasMoreElements(); } + @Override public String nextElement() { return (en.nextElement()).getString(); } @@ -871,10 +873,10 @@ class TransliteratorRegistry { TransliteratorParser parser = new TransliteratorParser(); try { - + ResourceEntry re = (ResourceEntry) entry; parser.parse(re.resource, re.direction); - + } catch (ClassCastException e) { // If we pull a rule from a locale resource bundle it will // be a LocaleEntry. diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/UnescapeTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/UnescapeTransliterator.java index 434c7d4a2ea..15b00f50435 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/UnescapeTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/UnescapeTransliterator.java @@ -51,6 +51,7 @@ class UnescapeTransliterator extends Transliterator { static void register() { // Unicode: "U+10FFFF" hex, min=4, max=6 Transliterator.registerFactory("Hex-Any/Unicode", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnescapeTransliterator("Hex-Any/Unicode", new char[] { 2, 0, 16, 4, 6, 'U', '+', @@ -58,9 +59,10 @@ class UnescapeTransliterator extends Transliterator { }); } }); - + // Java: "\\uFFFF" hex, min=4, max=4 Transliterator.registerFactory("Hex-Any/Java", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnescapeTransliterator("Hex-Any/Java", new char[] { 2, 0, 16, 4, 4, '\\', 'u', @@ -68,9 +70,10 @@ class UnescapeTransliterator extends Transliterator { }); } }); - + // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 Transliterator.registerFactory("Hex-Any/C", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnescapeTransliterator("Hex-Any/C", new char[] { 2, 0, 16, 4, 4, '\\', 'u', @@ -79,9 +82,10 @@ class UnescapeTransliterator extends Transliterator { }); } }); - + // XML: "􏿿" hex, min=1, max=6 Transliterator.registerFactory("Hex-Any/XML", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnescapeTransliterator("Hex-Any/XML", new char[] { 3, 1, 16, 1, 6, '&', '#', 'x', ';', @@ -92,6 +96,7 @@ class UnescapeTransliterator extends Transliterator { // XML10: "&1114111;" dec, min=1, max=7 (not really "Hex-Any") Transliterator.registerFactory("Hex-Any/XML10", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnescapeTransliterator("Hex-Any/XML10", new char[] { 2, 1, 10, 1, 7, '&', '#', ';', @@ -102,6 +107,7 @@ class UnescapeTransliterator extends Transliterator { // Perl: "\\x{263A}" hex, min=1, max=6 Transliterator.registerFactory("Hex-Any/Perl", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnescapeTransliterator("Hex-Any/Perl", new char[] { 3, 1, 16, 1, 6, '\\', 'x', '{', '}', @@ -112,6 +118,7 @@ class UnescapeTransliterator extends Transliterator { // All: Java, C, Perl, XML, XML10, Unicode Transliterator.registerFactory("Hex-Any", new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnescapeTransliterator("Hex-Any", new char[] { 2, 0, 16, 4, 6, 'U', '+', // Unicode @@ -137,6 +144,7 @@ class UnescapeTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position pos, boolean isIncremental) { int start = pos.start; diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/UnicodeNameTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/UnicodeNameTransliterator.java index 4236638a536..6a7d16efcdc 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/UnicodeNameTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/UnicodeNameTransliterator.java @@ -25,6 +25,7 @@ class UnicodeNameTransliterator extends Transliterator { */ static void register() { Transliterator.registerFactory(_ID, new Transliterator.Factory() { + @Override public Transliterator getInstance(String ID) { return new UnicodeNameTransliterator(null); } @@ -41,20 +42,21 @@ class UnicodeNameTransliterator extends Transliterator { /** * Implements {@link Transliterator#handleTransliterate}. */ + @Override protected void handleTransliterate(Replaceable text, Position offsets, boolean isIncremental) { int cursor = offsets.start; int limit = offsets.limit; - + StringBuilder str = new StringBuilder(); str.append(OPEN_DELIM); int len; String name; - + while (cursor < limit) { int c = text.char32At(cursor); if ((name=UCharacter.getExtendedName(c)) != null) { - + str.setLength(OPEN_DELIM_LEN); str.append(name).append(CLOSE_DELIM);