diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index f7073fec31f..81b6e0f68ab 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -798,7 +798,7 @@ _getKeywords(const char *localeID, } keywordsLen += keywordList[i].keywordLen + 1; if(valuesToo) { - if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { + if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) { uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); } keywordsLen += keywordList[i].valueLen; diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp index b0647e97a2a..2d6a9213c3d 100644 --- a/icu4c/source/common/uloc_tag.cpp +++ b/icu4c/source/common/uloc_tag.cpp @@ -12,11 +12,13 @@ #include "unicode/putil.h" #include "unicode/uloc.h" #include "ustr_imp.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "putilimp.h" #include "uinvchar.h" #include "ulocimp.h" +#include "uvector.h" #include "uassert.h" @@ -172,6 +174,46 @@ static const char* ultag_getGrandfathered(const ULanguageTag* langtag); #endif +namespace { + +// Helper class to memory manage CharString objects. +// Only ever stack-allocated, does not need to inherit UMemory. +class CharStringPool { +public: + CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {} + ~CharStringPool() = default; + + CharStringPool(const CharStringPool&) = delete; + CharStringPool& operator=(const CharStringPool&) = delete; + + icu::CharString* create() { + if (U_FAILURE(status)) { + return nullptr; + } + icu::CharString* const obj = new icu::CharString; + if (obj == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + pool.addElement(obj, status); + if (U_FAILURE(status)) { + delete obj; + return nullptr; + } + return obj; + } + +private: + static void U_CALLCONV deleter(void* obj) { + delete static_cast(obj); + } + + UErrorCode status; + icu::UVector pool; +}; + +} // namespace + /* * ------------------------------------------------- * @@ -900,7 +942,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac static int32_t _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; int32_t attrBufLength = 0; UEnumeration *keywordEnum = NULL; @@ -920,22 +961,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac AttributeListEntry *firstAttr = NULL; AttributeListEntry *attr; char *attrValue; - char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - char *pExtBuf = extBuf; - int32_t extBufCapacity = sizeof(extBuf); + CharStringPool extBufPool; const char *bcpKey=nullptr, *bcpValue=nullptr; UErrorCode tmpStatus = U_ZERO_ERROR; int32_t keylen; UBool isBcpUExt; while (TRUE) { + icu::CharString buf; key = uenum_next(keywordEnum, NULL, status); if (key == NULL) { break; } - len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); - /* buf must be null-terminated */ - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + char* buffer; + int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY; + + for (;;) { + buffer = buf.getAppendBuffer( + /*minCapacity=*/resultCapacity, + /*desiredCapacityHint=*/resultCapacity, + resultCapacity, + tmpStatus); + + if (U_FAILURE(tmpStatus)) { + break; + } + + len = uloc_getKeywordValue( + localeID, key, buffer, resultCapacity, &tmpStatus); + + if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + resultCapacity = len; + tmpStatus = U_ZERO_ERROR; + } + + if (U_FAILURE(tmpStatus)) { + if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; break; @@ -945,6 +1012,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac continue; } + buf.append(buffer, len, tmpStatus); + if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. + } + keylen = (int32_t)uprv_strlen(key); isBcpUExt = (keylen > 1); @@ -1007,7 +1079,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac } /* we've checked buf is null-terminated above */ - bcpValue = uloc_toUnicodeLocaleType(key, buf); + bcpValue = uloc_toUnicodeLocaleType(key, buf.data()); if (bcpValue == NULL) { if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; @@ -1015,33 +1087,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac } continue; } - if (bcpValue == buf) { - /* + if (bcpValue == buf.data()) { + /* When uloc_toUnicodeLocaleType(key, buf) returns the input value as is, the value is well-formed, but has no known mapping. This implementation normalizes the - the value to lower case + value to lower case */ - int32_t bcpValueLen = static_cast(uprv_strlen(bcpValue)); - if (bcpValueLen < extBufCapacity) { - uprv_strcpy(pExtBuf, bcpValue); - T_CString_toLowerCase(pExtBuf); - - bcpValue = pExtBuf; - - pExtBuf += (bcpValueLen + 1); - extBufCapacity -= (bcpValueLen + 1); - } else { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; + icu::CharString* extBuf = extBufPool.create(); + if (extBuf == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; } + int32_t bcpValueLen = static_cast(uprv_strlen(bcpValue)); + int32_t resultCapacity; + char* pExtBuf = extBuf->getAppendBuffer( + /*minCapacity=*/bcpValueLen, + /*desiredCapacityHint=*/bcpValueLen, + resultCapacity, + tmpStatus); + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; + } + + uprv_strcpy(pExtBuf, bcpValue); + T_CString_toLowerCase(pExtBuf); + + extBuf->append(pExtBuf, bcpValueLen, tmpStatus); + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; + } + + bcpValue = extBuf->data(); } } else { if (*key == PRIVATEUSE) { - if (!_isPrivateuseValueSubtags(buf, len)) { + if (!_isPrivateuseValueSubtags(buf.data(), len)) { if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; break; @@ -1049,7 +1132,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac continue; } } else { - if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { + if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) { if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; break; @@ -1058,20 +1141,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac } } bcpKey = key; - if ((len + 1) < extBufCapacity) { - uprv_memcpy(pExtBuf, buf, len); - bcpValue = pExtBuf; - - pExtBuf += len; - - *pExtBuf = 0; - pExtBuf++; - - extBufCapacity -= (len + 1); - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; + icu::CharString* extBuf = extBufPool.create(); + if (extBuf == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; break; } + extBuf->append(buf.data(), len, tmpStatus); + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; + } + bcpValue = extBuf->data(); } /* create ExtensionListEntry */ @@ -2337,31 +2417,66 @@ uloc_toLanguageTag(const char* localeID, int32_t langtagCapacity, UBool strict, UErrorCode* status) { - /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ - char canonical[256]; - int32_t reslen = 0; + icu::CharString canonical; + int32_t reslen; UErrorCode tmpStatus = U_ZERO_ERROR; UBool hadPosix = FALSE; const char* pKeywordStart; /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ - canonical[0] = 0; - if (uprv_strlen(localeID) > 0) { - uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); - if (tmpStatus != U_ZERO_ERROR) { + int32_t resultCapacity = uprv_strlen(localeID); + if (resultCapacity > 0) { + char* buffer; + + for (;;) { + buffer = canonical.getAppendBuffer( + /*minCapacity=*/resultCapacity, + /*desiredCapacityHint=*/resultCapacity, + resultCapacity, + tmpStatus); + + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return 0; + } + + reslen = + uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus); + + if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + resultCapacity = reslen; + tmpStatus = U_ZERO_ERROR; + } + + if (U_FAILURE(tmpStatus)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } + + canonical.append(buffer, reslen, tmpStatus); + if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. + } + + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return 0; + } } + reslen = 0; + /* For handling special case - private use only tag */ - pKeywordStart = locale_getKeywordsStart(canonical); - if (pKeywordStart == canonical) { + pKeywordStart = locale_getKeywordsStart(canonical.data()); + if (pKeywordStart == canonical.data()) { UEnumeration *kwdEnum; int kwdCnt = 0; UBool done = FALSE; - kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); + kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus); if (kwdEnum != NULL) { kwdCnt = uenum_count(kwdEnum, &tmpStatus); if (kwdCnt == 1) { @@ -2399,12 +2514,12 @@ uloc_toLanguageTag(const char* localeID, } } - reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); - reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); - reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); + reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status); + reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); + reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); + reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); + reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); + reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); return reslen; } diff --git a/icu4c/source/i18n/rbt.h b/icu4c/source/i18n/rbt.h index b998c694c23..671149f66ef 100644 --- a/icu4c/source/i18n/rbt.h +++ b/icu4c/source/i18n/rbt.h @@ -29,262 +29,10 @@ class TransliterationRuleData; /** * RuleBasedTransliterator is a transliterator - * that reads a set of rules in order to determine how to perform - * translations. Rule sets are stored in resource bundles indexed by - * name. Rules within a rule set are separated by semicolons (';'). - * To include a literal semicolon, prefix it with a backslash ('\'). - * Whitespace, as defined by Character.isWhitespace(), - * is ignored. If the first non-blank character on a line is '#', - * the entire line is ignored as a comment.

- * - *

Each set of rules consists of two groups, one forward, and one - * reverse. This is a convention that is not enforced; rules for one - * direction may be omitted, with the result that translations in - * that direction will not modify the source text. In addition, - * bidirectional forward-reverse rules may be specified for - * symmetrical transformations.

- * - *

Rule syntax

- * - *

Rule statements take one of the following forms:

- * - *
- *
$alefmadda=\u0622;
- *
Variable definition. The name on the - * left is assigned the text on the right. In this example, - * after this statement, instances of the left hand name, - * "$alefmadda", will be replaced by - * the Unicode character U+0622. Variable names must begin - * with a letter and consist only of letters, digits, and - * underscores. Case is significant. Duplicate names cause - * an exception to be thrown, that is, variables cannot be - * redefined. The right hand side may contain well-formed - * text of any length, including no text at all ("$empty=;"). - * The right hand side may contain embedded UnicodeSet - * patterns, for example, "$softvowel=[eiyEIY]".
- *
 
- *
ai>$alefmadda;
- *
Forward translation rule. This rule - * states that the string on the left will be changed to the - * string on the right when performing forward - * transliteration.
- *
 
- *
ai<$alefmadda;
- *
Reverse translation rule. This rule - * states that the string on the right will be changed to - * the string on the left when performing reverse - * transliteration.
- *
- * - *
- *
ai<>$alefmadda;
- *
Bidirectional translation rule. This - * rule states that the string on the right will be changed - * to the string on the left when performing forward - * transliteration, and vice versa when performing reverse - * transliteration.
- *
- * - *

Translation rules consist of a match pattern and an output - * string. The match pattern consists of literal characters, - * optionally preceded by context, and optionally followed by - * context. Context characters, like literal pattern characters, - * must be matched in the text being transliterated. However, unlike - * literal pattern characters, they are not replaced by the output - * text. For example, the pattern "abc{def}" - * indicates the characters "def" must be - * preceded by "abc" for a successful match. - * If there is a successful match, "def" will - * be replaced, but not "abc". The final '}' - * is optional, so "abc{def" is equivalent to - * "abc{def}". Another example is "{123}456" - * (or "123}456") in which the literal - * pattern "123" must be followed by "456". - *

- * - *

The output string of a forward or reverse rule consists of - * characters to replace the literal pattern characters. If the - * output string contains the character '|', this is - * taken to indicate the location of the cursor after - * replacement. The cursor is the point in the text at which the - * next replacement, if any, will be applied. The cursor is usually - * placed within the replacement text; however, it can actually be - * placed into the precending or following context by using the - * special character '@'. Examples:

- * - *
- *

a {foo} z > | @ bar; # foo -> bar, move cursor - * before a
- * {foo} xyz > bar @@|; # foo -> bar, cursor between - * y and z

- *
- * - *

UnicodeSet

- * - *

UnicodeSet patterns may appear anywhere that - * makes sense. They may appear in variable definitions. - * Contrariwise, UnicodeSet patterns may themselves - * contain variable references, such as "$a=[a-z];$not_a=[^$a]", - * or "$range=a-z;$ll=[$range]".

- * - *

UnicodeSet patterns may also be embedded directly - * into rule strings. Thus, the following two rules are equivalent:

- * - *
- *

$vowel=[aeiou]; $vowel>'*'; # One way to do this
- * [aeiou]>'*'; - *                # - * Another way

- *
- * - *

See {@link UnicodeSet} for more documentation and examples.

- * - *

Segments

- * - *

Segments of the input string can be matched and copied to the - * output string. This makes certain sets of rules simpler and more - * general, and makes reordering possible. For example:

- * - *
- *

([a-z]) > $1 $1; - *           # - * double lowercase letters
- * ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs

- *
- * - *

The segment of the input string to be copied is delimited by - * "(" and ")". Up to - * nine segments may be defined. Segments may not overlap. In the - * output string, "$1" through "$9" - * represent the input string segments, in left-to-right order of - * definition.

- * - *

Anchors

- * - *

Patterns can be anchored to the beginning or the end of the text. This is done with the - * special characters '^' and '$'. For example:

- * - *
- *

^ a   > 'BEG_A';   # match 'a' at start of text
- *   a   > 'A';       # match other instances - * of 'a'
- *   z $ > 'END_Z';   # match 'z' at end of text
- *   z   > 'Z';       # match other instances - * of 'z'

- *
- * - *

It is also possible to match the beginning or the end of the text using a UnicodeSet. - * This is done by including a virtual anchor character '$' at the end of the - * set pattern. Although this is usually the match chafacter for the end anchor, the set will - * match either the beginning or the end of the text, depending on its placement. For - * example:

- * - *
- *

$x = [a-z$];   # match 'a' through 'z' OR anchor
- * $x 1    > 2;   # match '1' after a-z or at the start
- *    3 $x > 4;   # match '3' before a-z or at the end

- *
- * - *

Example

- * - *

The following example rules illustrate many of the features of - * the rule language.

- * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
Rule 1.abc{def}>x|y
Rule 2.xyz>r
Rule 3.yz>q
- * - *

Applying these rules to the string "adefabcdefz" - * yields the following results:

- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
|adefabcdefzInitial state, no rules match. Advance - * cursor.
a|defabcdefzStill no match. Rule 1 does not match - * because the preceding context is not present.
ad|efabcdefzStill no match. Keep advancing until - * there is a match...
ade|fabcdefz...
adef|abcdefz...
adefa|bcdefz...
adefab|cdefz...
adefabc|defzRule 1 matches; replace "def" - * with "xy" and back up the cursor - * to before the 'y'.
adefabcx|yzAlthough "xyz" is - * present, rule 2 does not match because the cursor is - * before the 'y', not before the 'x'. - * Rule 3 does match. Replace "yz" - * with "q".
adefabcxq|The cursor is at the end; - * transliteration is complete.
- * - *

The order of rules is significant. If multiple rules may match - * at some point, the first matching rule is applied.

- * - *

Forward and reverse rules may have an empty output string. - * Otherwise, an empty left or right hand side of any statement is a - * syntax error.

- * - *

Single quotes are used to quote any character other than a - * digit or letter. To specify a single quote itself, inside or - * outside of quotes, use two single quotes in a row. For example, - * the rule "'>'>o''clock" changes the - * string ">" to the string "o'clock". - *

- * - *

Notes

- * - *

While a RuleBasedTransliterator is being built, it checks that - * the rules are added in proper order. For example, if the rule - * "a>x" is followed by the rule "ab>y", - * then the second rule will throw an exception. The reason is that - * the second rule can never be triggered, since the first rule - * always matches anything it matches. In other words, the first - * rule masks the second rule.

- * + * built from a set of rules as defined for + * Transliterator::createFromRules(). + * See the C++ class Transliterator documentation for the rule syntax. + * * @author Alan Liu * @internal Use transliterator factory methods instead since this class will be removed in that release. */ diff --git a/icu4c/source/i18n/unicode/translit.h b/icu4c/source/i18n/unicode/translit.h index ebb9575a9f5..6b4888145f1 100644 --- a/icu4c/source/i18n/unicode/translit.h +++ b/icu4c/source/i18n/unicode/translit.h @@ -15,10 +15,10 @@ #include "unicode/utypes.h" /** - * \file + * \file * \brief C++ API: Tranforms text from one format to another. */ - + #if !UCONFIG_NO_TRANSLITERATION #include "unicode/uobject.h" @@ -31,7 +31,6 @@ U_NAMESPACE_BEGIN class UnicodeFilter; class UnicodeSet; -class CompoundTransliterator; class TransliteratorParser; class NormalizationTransliterator; class TransliteratorIDParser; @@ -97,18 +96,20 @@ class TransliteratorIDParser; * contents of the buffer may show text being modified as each new * character arrives. * - *

Consider the simple `RuleBasedTransliterator`: - * + *

Consider the simple rule-based Transliterator: + *

  *     th>{theta}
  *     t>{tau}
+ * 
* * When the user types 't', nothing will happen, since the * transliterator is waiting to see if the next character is 'h'. To * remedy this, we introduce the notion of a cursor, marked by a '|' * in the output string: - * + *
  *     t>|{tau}
  *     {tau}h>{theta}
+ * 
* * Now when the user types 't', tau appears, and if the next character * is 'h', the tau changes to a theta. This is accomplished by @@ -130,7 +131,7 @@ class TransliteratorIDParser; * which the transliterator last stopped, either because it reached * the end, or because it required more characters to disambiguate * between possible inputs. The CURSOR can also be - * explicitly set by rules in a RuleBasedTransliterator. + * explicitly set by rules in a rule-based Transliterator. * Any characters before the CURSOR index are frozen; * future keyboard transliteration calls within this input sequence * will not change them. New text is inserted at the @@ -232,6 +233,255 @@ class TransliteratorIDParser; * if the performance of these methods can be improved over the * performance obtained by the default implementations in this class. * + *

Rule syntax + * + *

A set of rules determines how to perform translations. + * Rules within a rule set are separated by semicolons (';'). + * To include a literal semicolon, prefix it with a backslash ('\'). + * Unicode Pattern_White_Space is ignored. + * If the first non-blank character on a line is '#', + * the entire line is ignored as a comment. + * + *

Each set of rules consists of two groups, one forward, and one + * reverse. This is a convention that is not enforced; rules for one + * direction may be omitted, with the result that translations in + * that direction will not modify the source text. In addition, + * bidirectional forward-reverse rules may be specified for + * symmetrical transformations. + * + *

Note: Another description of the Transliterator rule syntax is available in + * section + * Transform Rules Syntax of UTS #35: Unicode LDML. + * The rules are shown there using arrow symbols ← and → and ↔. + * ICU supports both those and the equivalent ASCII symbols < and > and <>. + * + *

Rule statements take one of the following forms: + * + *

+ *
$alefmadda=\\u0622;
+ *
Variable definition. The name on the + * left is assigned the text on the right. In this example, + * after this statement, instances of the left hand name, + * "$alefmadda", will be replaced by + * the Unicode character U+0622. Variable names must begin + * with a letter and consist only of letters, digits, and + * underscores. Case is significant. Duplicate names cause + * an exception to be thrown, that is, variables cannot be + * redefined. The right hand side may contain well-formed + * text of any length, including no text at all ("$empty=;"). + * The right hand side may contain embedded UnicodeSet + * patterns, for example, "$softvowel=[eiyEIY]".
+ *
ai>$alefmadda;
+ *
Forward translation rule. This rule + * states that the string on the left will be changed to the + * string on the right when performing forward + * transliteration.
+ *
ai<$alefmadda;
+ *
Reverse translation rule. This rule + * states that the string on the right will be changed to + * the string on the left when performing reverse + * transliteration.
+ *
+ * + *
+ *
ai<>$alefmadda;
+ *
Bidirectional translation rule. This + * rule states that the string on the right will be changed + * to the string on the left when performing forward + * transliteration, and vice versa when performing reverse + * transliteration.
+ *
+ * + *

Translation rules consist of a match pattern and an output + * string. The match pattern consists of literal characters, + * optionally preceded by context, and optionally followed by + * context. Context characters, like literal pattern characters, + * must be matched in the text being transliterated. However, unlike + * literal pattern characters, they are not replaced by the output + * text. For example, the pattern "abc{def}" + * indicates the characters "def" must be + * preceded by "abc" for a successful match. + * If there is a successful match, "def" will + * be replaced, but not "abc". The final '}' + * is optional, so "abc{def" is equivalent to + * "abc{def}". Another example is "{123}456" + * (or "123}456") in which the literal + * pattern "123" must be followed by "456". + * + *

The output string of a forward or reverse rule consists of + * characters to replace the literal pattern characters. If the + * output string contains the character '|', this is + * taken to indicate the location of the cursor after + * replacement. The cursor is the point in the text at which the + * next replacement, if any, will be applied. The cursor is usually + * placed within the replacement text; however, it can actually be + * placed into the precending or following context by using the + * special character '@'. Examples: + * + *

+ *     a {foo} z > | @ bar; # foo -> bar, move cursor before a
+ *     {foo} xyz > bar @@|; # foo -> bar, cursor between y and z
+ * 
+ * + *

UnicodeSet + * + *

UnicodeSet patterns may appear anywhere that + * makes sense. They may appear in variable definitions. + * Contrariwise, UnicodeSet patterns may themselves + * contain variable references, such as "$a=[a-z];$not_a=[^$a]", + * or "$range=a-z;$ll=[$range]". + * + *

UnicodeSet patterns may also be embedded directly + * into rule strings. Thus, the following two rules are equivalent: + * + *

+ *     $vowel=[aeiou]; $vowel>'*'; # One way to do this
+ *     [aeiou]>'*'; # Another way
+ * 
+ * + *

See {@link UnicodeSet} for more documentation and examples. + * + *

Segments + * + *

Segments of the input string can be matched and copied to the + * output string. This makes certain sets of rules simpler and more + * general, and makes reordering possible. For example: + * + *

+ *     ([a-z]) > $1 $1; # double lowercase letters
+ *     ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
+ * 
+ * + *

The segment of the input string to be copied is delimited by + * "(" and ")". Up to + * nine segments may be defined. Segments may not overlap. In the + * output string, "$1" through "$9" + * represent the input string segments, in left-to-right order of + * definition. + * + *

Anchors + * + *

Patterns can be anchored to the beginning or the end of the text. This is done with the + * special characters '^' and '$'. For example: + * + *

+ *   ^ a   > 'BEG_A';   # match 'a' at start of text
+ *     a   > 'A'; # match other instances of 'a'
+ *     z $ > 'END_Z';   # match 'z' at end of text
+ *     z   > 'Z';       # match other instances of 'z'
+ * 
+ * + *

It is also possible to match the beginning or the end of the text using a UnicodeSet. + * This is done by including a virtual anchor character '$' at the end of the + * set pattern. Although this is usually the match chafacter for the end anchor, the set will + * match either the beginning or the end of the text, depending on its placement. For + * example: + * + *

+ *   $x = [a-z$];   # match 'a' through 'z' OR anchor
+ *   $x 1    > 2;   # match '1' after a-z or at the start
+ *      3 $x > 4;   # match '3' before a-z or at the end
+ * 
+ * + *

Example + * + *

The following example rules illustrate many of the features of + * the rule language. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Rule 1.abc{def}>x|y
Rule 2.xyz>r
Rule 3.yz>q
+ * + *

Applying these rules to the string "adefabcdefz" + * yields the following results: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
|adefabcdefzInitial state, no rules match. Advance + * cursor.
a|defabcdefzStill no match. Rule 1 does not match + * because the preceding context is not present.
ad|efabcdefzStill no match. Keep advancing until + * there is a match...
ade|fabcdefz...
adef|abcdefz...
adefa|bcdefz...
adefab|cdefz...
adefabc|defzRule 1 matches; replace "def" + * with "xy" and back up the cursor + * to before the 'y'.
adefabcx|yzAlthough "xyz" is + * present, rule 2 does not match because the cursor is + * before the 'y', not before the 'x'. + * Rule 3 does match. Replace "yz" + * with "q".
adefabcxq|The cursor is at the end; + * transliteration is complete.
+ * + *

The order of rules is significant. If multiple rules may match + * at some point, the first matching rule is applied. + * + *

Forward and reverse rules may have an empty output string. + * Otherwise, an empty left or right hand side of any statement is a + * syntax error. + * + *

Single quotes are used to quote any character other than a + * digit or letter. To specify a single quote itself, inside or + * outside of quotes, use two single quotes in a row. For example, + * the rule "'>'>o''clock" changes the + * string ">" to the string "o'clock". + * + *

Notes + * + *

While a Transliterator is being built from rules, it checks that + * the rules are added in proper order. For example, if the rule + * "a>x" is followed by the rule "ab>y", + * then the second rule will throw an exception. The reason is that + * the second rule can never be triggered, since the first rule + * always matches anything it matches. In other words, the first + * rule masks the second rule. + * * @author Alan Liu * @stable ICU 2.0 */ @@ -627,7 +877,7 @@ public: /** * Transliterate a substring of text, as specified by index, taking filters * into account. This method is for subclasses that need to delegate to - * another transliterator, such as CompoundTransliterator. + * another transliterator. * @param text the text to be transliterated * @param index the position indices * @param incremental if TRUE, then assume more characters may be inserted @@ -841,17 +1091,19 @@ public: /** * Returns a Transliterator object constructed from - * the given rule string. This will be a RuleBasedTransliterator, + * the given rule string. This will be a rule-based Transliterator, * if the rule string contains only rules, or a - * CompoundTransliterator, if it contains ID blocks, or a - * NullTransliterator, if it contains ID blocks which parse as + * compound Transliterator, if it contains ID blocks, or a + * null Transliterator, if it contains ID blocks which parse as * empty for the given direction. + * * @param ID the id for the transliterator. * @param rules rules, separated by ';' * @param dir either FORWARD or REVERSE. - * @param parseError Struct to recieve information on position + * @param parseError Struct to receive information on position * of error if an error is encountered * @param status Output param set to success/failure code. + * @return a newly created Transliterator * @stable ICU 2.0 */ static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID, diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c index 4454c67274d..1d1805196f2 100644 --- a/icu4c/source/test/cintltst/cloctst.c +++ b/icu4c/source/test/cintltst/cloctst.c @@ -226,6 +226,7 @@ void addLocaleTest(TestNode** root) TESTCASE(TestKeywordVariants); TESTCASE(TestKeywordVariantParsing); TESTCASE(TestCanonicalization); + TESTCASE(TestCanonicalizationBuffer); TESTCASE(TestKeywordSet); TESTCASE(TestKeywordSetError); TESTCASE(TestDisplayKeywords); @@ -2251,6 +2252,42 @@ static void TestCanonicalization(void) } } +static void TestCanonicalizationBuffer(void) +{ + UErrorCode status = U_ZERO_ERROR; + char buffer[256]; + + // ULOC_FULLNAME_CAPACITY == 157 (uloc.h) + static const char name[] = + "zh@x" + "=foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz" + "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz" + "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz" + "-foo-barz" + ; + static const size_t len = sizeof name - 1; // Without NUL terminator. + + int32_t reslen = uloc_canonicalize(name, buffer, len, &status); + + if (U_FAILURE(status)) { + log_err("FAIL: uloc_canonicalize(%s) => %s, expected !U_FAILURE()\n", + name, u_errorName(status)); + return; + } + + if (reslen != len) { + log_err("FAIL: uloc_canonicalize(%s) => \"%i\", expected \"%u\"\n", + name, reslen, len); + return; + } + + if (uprv_strncmp(name, buffer, len) != 0) { + log_err("FAIL: uloc_canonicalize(%s) => \"%.*s\", expected \"%s\"\n", + name, reslen, buffer, name); + return; + } +} + static void TestDisplayKeywords(void) { int32_t i; diff --git a/icu4c/source/test/cintltst/cloctst.h b/icu4c/source/test/cintltst/cloctst.h index be1896a0c3f..a2ce892ec23 100644 --- a/icu4c/source/test/cintltst/cloctst.h +++ b/icu4c/source/test/cintltst/cloctst.h @@ -84,6 +84,7 @@ static void TestDisplayNames(void); static void doTestDisplayNames(const char* inLocale, int32_t compareIndex); static void TestCanonicalization(void); + static void TestCanonicalizationBuffer(void); static void TestDisplayKeywords(void); diff --git a/icu4c/source/test/intltest/cpdtrtst.h b/icu4c/source/test/intltest/cpdtrtst.h index e723619ad36..1733f1a6e42 100644 --- a/icu4c/source/test/intltest/cpdtrtst.h +++ b/icu4c/source/test/intltest/cpdtrtst.h @@ -20,6 +20,7 @@ #if !UCONFIG_NO_TRANSLITERATION #include "unicode/translit.h" +#include "cpdtrans.h" #include "intltest.h" /** diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp index d3fc4e286c0..e375c0c5a55 100644 --- a/icu4c/source/test/intltest/loctest.cpp +++ b/icu4c/source/test/intltest/loctest.cpp @@ -252,6 +252,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c TESTCASE_AUTO(TestToLanguageTag); TESTCASE_AUTO(TestMoveAssign); TESTCASE_AUTO(TestMoveCtor); + TESTCASE_AUTO(TestBug13417VeryLongLanguageTag); TESTCASE_AUTO_END; } @@ -3125,3 +3126,23 @@ void LocaleTest::TestMoveCtor() { assertEquals("variant", l7.getVariant(), l8.getVariant()); assertEquals("bogus", l7.isBogus(), l8.isBogus()); } + +void LocaleTest::TestBug13417VeryLongLanguageTag() { + IcuTestErrorCode status(*this, "TestBug13417VeryLongLanguageTag()"); + + static const char tag[] = + "zh-x" + "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz" + "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz" + "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz" + "-foo-bar-baz-fxx" + ; + + Locale l = Locale::forLanguageTag(tag, status); + status.errIfFailureAndReset("\"%s\"", tag); + assertTrue("!l.isBogus()", !l.isBogus()); + + std::string result = l.toLanguageTag(status); + status.errIfFailureAndReset("\"%s\"", l.getName()); + assertEquals("equals", tag, result.c_str()); +} diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h index d165cae8932..2a83be51a05 100644 --- a/icu4c/source/test/intltest/loctest.h +++ b/icu4c/source/test/intltest/loctest.h @@ -124,6 +124,8 @@ public: void TestMoveAssign(); void TestMoveCtor(); + void TestBug13417VeryLongLanguageTag(); + private: void _checklocs(const char* label, const char* req, diff --git a/icu4j/build.xml b/icu4j/build.xml index 2ceb623ad4a..a939d12724a 100644 --- a/icu4j/build.xml +++ b/icu4j/build.xml @@ -1729,6 +1729,32 @@ + + + + + + + + + + + + + + + + + + + + + computeMaxExpansions(CollationData data) { - Map maxExpansions = new HashMap(); + Map maxExpansions = new HashMap<>(); MaxExpSink sink = new MaxExpSink(maxExpansions); new ContractionsAndExpansions(null, null, sink, true).forData(data); return maxExpansions; @@ -692,11 +692,9 @@ public final class CollationElementIterator /** * Mock implementation of hashCode(). This implementation always returns a constant * value. When Java assertion is enabled, this method triggers an assertion failure. - * @internal - * @deprecated This API is ICU internal only. + * @stable ICU 2.8 */ @Override - @Deprecated public int hashCode() { assert false : "hashCode not designed"; return 42; diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java index 79889c4296f..fdbbf36f370 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java @@ -329,7 +329,7 @@ public abstract class Collator implements Comparator, Freezable, Freezable, Freezable values = new LinkedList(); + LinkedList values = new LinkedList<>(); boolean hasDefault = false; @Override diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java index ea597a7edf8..29a75a9e029 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java @@ -112,6 +112,8 @@ public final class CharacterProperties { * @return the property as a set * @see UProperty * @see UCharacter#hasBinaryProperty + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. */ public static final UnicodeSet getBinaryPropertySet(int property) { if (property < 0 || UProperty.BINARY_LIMIT <= property) { @@ -141,6 +143,8 @@ public final class CharacterProperties { * @return the property as a map * @see UProperty * @see UCharacter#getIntPropertyValue + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. */ public static final CodePointMap getIntPropertyMap(int property) { if (property < UProperty.INT_START || UProperty.INT_LIMIT <= property) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java index fa0322bffed..d5cccc49622 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java @@ -175,6 +175,7 @@ public abstract class NumberRangeFormatter { * * @return An {@link UnlocalizedNumberRangeFormatter}, to be used for chaining. * @draft ICU 63 + * @provisional This API might change or be removed in a future release. */ public static UnlocalizedNumberRangeFormatter with() { return BASE; @@ -188,6 +189,7 @@ public abstract class NumberRangeFormatter { * The locale from which to load formats and symbols for number range formatting. * @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining. * @draft ICU 63 + * @provisional This API might change or be removed in a future release. */ public static LocalizedNumberRangeFormatter withLocale(Locale locale) { return BASE.locale(locale); @@ -201,9 +203,15 @@ public abstract class NumberRangeFormatter { * The locale from which to load formats and symbols for number range formatting. * @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining. * @draft ICU 63 + * @provisional This API might change or be removed in a future release. */ public static LocalizedNumberRangeFormatter withLocale(ULocale locale) { return BASE.locale(locale); } + /** + * Private constructor - this class is not designed for instantiation + */ + private NumberRangeFormatter() { + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java b/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java index 974bd7cdb02..375b535b90e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java @@ -408,10 +408,10 @@ public abstract class Precision implements Cloneable { } /** - * @internal - * @deprecated This API is ICU internal only. + * {@inheritDoc} + * @draft ICU 62 + * @provisional This API might change or be removed in a future release. */ - @Deprecated @Override public Object clone() { try { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java b/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java index bd0c723b859..0f2f0e7d21a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java @@ -93,10 +93,9 @@ public class ScientificNotation extends Notation implements Cloneable { } /** - * @internal - * @deprecated This API is ICU internal only. + * @draft ICU 60 + * @provisional This API might change or be removed in a future release. */ - @Deprecated @Override public Object clone() { try { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java index afc0c2ec4ea..25c0e1c2e4a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java @@ -202,7 +202,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable { //other.pluralRules = pluralRules; // clone content //other.pluralCountToCurrencyUnitPattern = pluralCountToCurrencyUnitPattern; - other.pluralCountToCurrencyUnitPattern = new HashMap(); + other.pluralCountToCurrencyUnitPattern = new HashMap<>(); for (String pluralCount : pluralCountToCurrencyUnitPattern.keySet()) { String currencyPattern = pluralCountToCurrencyUnitPattern.get(pluralCount); other.pluralCountToCurrencyUnitPattern.put(pluralCount, currencyPattern); @@ -231,11 +231,9 @@ public class CurrencyPluralInfo implements Cloneable, Serializable { /** * Override hashCode * - * @internal - * @deprecated This API is ICU internal only. + * @stable ICU 4.2 */ @Override - @Deprecated public int hashCode() { return pluralCountToCurrencyUnitPattern.hashCode() ^ pluralRules.hashCode() @@ -283,7 +281,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable { } private void setupCurrencyPluralPattern(ULocale uloc) { - pluralCountToCurrencyUnitPattern = new HashMap(); + pluralCountToCurrencyUnitPattern = new HashMap<>(); String numberStylePattern = NumberFormat.getPattern(uloc, NumberFormat.NUMBERSTYLE); // Split the number style pattern into pos and neg if applicable diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java index baa79b09695..a72da77a473 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java @@ -273,10 +273,8 @@ public class DateIntervalInfo implements Cloneable, Freezable, /** * {@inheritDoc} - * @internal - * @deprecated This API is ICU internal only. + * @stable ICU 4.0 */ - @Deprecated @Override public String toString() { return "{first=«" + fIntervalPatternFirstPart + "», second=«" + fIntervalPatternSecondPart + "», reversed:" + fFirstDateInPtnIsLaterDate + "}"; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java index b97d54f0079..89b46478744 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java @@ -821,7 +821,8 @@ public final class Edits { /** * A string representation of the current edit represented by the iterator for debugging. You * should not depend on the contents of the return string; it may change over time. - * @internal + * @return a string representation of the object. + * @stable ICU 59 */ @Override public String toString() { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java index 175d92e8d40..df13519c306 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java @@ -126,9 +126,9 @@ public class MeasureFormat extends UFormat { private final transient LocalizedNumberFormatter numberFormatter; - private static final SimpleCache localeToNumericDurationFormatters = new SimpleCache(); + private static final SimpleCache localeToNumericDurationFormatters = new SimpleCache<>(); - private static final Map hmsTo012 = new HashMap(); + private static final Map hmsTo012 = new HashMap<>(); static { hmsTo012.put(MeasureUnit.HOUR, 0); @@ -486,7 +486,7 @@ public class MeasureFormat extends UFormat { * Two MeasureFormats, a and b, are equal if and only if they have the same formatWidth, locale, and * equal number formats. * - * @stable ICU 53 + * @stable ICU 3.0 */ @Override public final boolean equals(Object other) { @@ -506,7 +506,7 @@ public class MeasureFormat extends UFormat { /** * {@inheritDoc} * - * @stable ICU 53 + * @stable ICU 3.0 */ @Override public final int hashCode() { @@ -997,7 +997,7 @@ public class MeasureFormat extends UFormat { this.formatWidth = width; this.numberFormat = numberFormat; this.subClass = subClass; - this.keyValues = new HashMap(); + this.keyValues = new HashMap<>(); } // Must have public constructor, to enable Externalizable @@ -1070,7 +1070,7 @@ public class MeasureFormat extends UFormat { return values[ordinal]; } - private static final Map localeIdToRangeFormat = new ConcurrentHashMap(); + private static final Map localeIdToRangeFormat = new ConcurrentHashMap<>(); /** * Return a formatter (compiled SimpleFormatter pattern) for a range, such as "{0}–{1}". diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java index d384c89426d..2a6ab6f9f45 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java @@ -1084,7 +1084,7 @@ public class PluralRules implements Serializable { SampleType sampleType2; boolean bounded2 = true; boolean haveBound = false; - Set samples2 = new LinkedHashSet(); + Set samples2 = new LinkedHashSet<>(); if (source.startsWith("integer")) { sampleType2 = SampleType.INTEGER; @@ -1215,7 +1215,7 @@ public class PluralRules implements Serializable { static final UnicodeSet BREAK_AND_KEEP = new UnicodeSet('!', '!', '%', '%', ',', ',', '.', '.', '=', '=').freeze(); static String[] split(String source) { int last = -1; - List result = new ArrayList(); + List result = new ArrayList<>(); for (int i = 0; i < source.length(); ++i) { char ch = source.charAt(i); if (BREAK_AND_IGNORE.contains(ch)) { @@ -1334,7 +1334,7 @@ public class PluralRules implements Serializable { t = nextToken(tokens, x++, condition); } - List valueList = new ArrayList(); + List valueList = new ArrayList<>(); // the token t is always one item ahead while (true) { @@ -1756,10 +1756,9 @@ public class PluralRules implements Serializable { } /** - * @internal - * @deprecated This API is ICU internal only. + * {@inheritDoc} + * @stable ICU 3.8 */ - @Deprecated @Override public int hashCode() { return keyword.hashCode() ^ constraint.hashCode(); @@ -1773,7 +1772,7 @@ public class PluralRules implements Serializable { private static class RuleList implements Serializable { private boolean hasExplicitBoundingInfo = false; private static final long serialVersionUID = 1; - private final List rules = new ArrayList(); + private final List rules = new ArrayList<>(); public RuleList addRule(Rule nextRule) { String keyword = nextRule.getKeyword(); @@ -1821,7 +1820,7 @@ public class PluralRules implements Serializable { } public Set getKeywords() { - Set result = new LinkedHashSet(); + Set result = new LinkedHashSet<>(); for (Rule rule : rules) { result.add(rule.getKeyword()); } @@ -2020,10 +2019,9 @@ public class PluralRules implements Serializable { } /** - * @internal - * @deprecated This API is ICU internal only. + * {@inheritDoc} + * @stable ICU 3.8 */ - @Deprecated @Override public int hashCode() { return rules.hashCode(); @@ -2175,7 +2173,7 @@ public class PluralRules implements Serializable { if (!keywords.contains(keyword)) { return null; } - Set result = new TreeSet(); + Set result = new TreeSet<>(); if (rules.hasExplicitBoundingInfo) { FixedDecimalSamples samples = rules.getDecimalSamples(keyword, sampleType); @@ -2420,7 +2418,7 @@ public class PluralRules implements Serializable { // Compute if the quick test is insufficient. - HashSet subtractedSet = new HashSet(values); + HashSet subtractedSet = new HashSet<>(values); for (Double explicit : explicits) { subtractedSet.remove(explicit - offset); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java index 719fefb0771..dbab3142162 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java @@ -941,13 +941,10 @@ public class RuleBasedNumberFormat extends NumberFormat { } /** - * Mock implementation of hashCode(). This implementation always returns a constant - * value. When Java assertion is enabled, this method triggers an assertion failure. - * @internal - * @deprecated This API is ICU internal only. + * {@inheritDoc} + * @stable ICU 2.0 */ @Override - @Deprecated public int hashCode() { return super.hashCode(); } @@ -1731,7 +1728,7 @@ public class RuleBasedNumberFormat extends NumberFormat { // our rule list is an array of the appropriate size ruleSets = new NFRuleSet[numRuleSets]; - ruleSetsMap = new HashMap(numRuleSets * 2 + 1); + ruleSetsMap = new HashMap<>(numRuleSets * 2 + 1); defaultRuleSet = null; // Used to count the number of public rule sets @@ -1844,7 +1841,7 @@ public class RuleBasedNumberFormat extends NumberFormat { if (localizations != null) { publicRuleSetNames = localizations[0].clone(); - Map m = new HashMap(); + Map m = new HashMap<>(); for (int i = 1; i < localizations.length; ++i) { String[] data = localizations[i]; String loc = data[0]; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java index 53ab8b0d80c..ca1015d7d13 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java @@ -506,7 +506,7 @@ public class SpoofChecker { SpoofData fSpoofData; final UnicodeSet fAllowedCharsSet = new UnicodeSet(0, 0x10ffff); // The UnicodeSet of allowed characters. // for this Spoof Checker. Defaults to all chars. - final Set fAllowedLocales = new LinkedHashSet(); // The list of allowed locales. + final Set fAllowedLocales = new LinkedHashSet<>(); // The list of allowed locales. private RestrictionLevel fRestrictionLevel; /** @@ -567,7 +567,7 @@ public class SpoofChecker { result.fSpoofData = this.fSpoofData; result.fAllowedCharsSet = (UnicodeSet) (this.fAllowedCharsSet.clone()); result.fAllowedCharsSet.freeze(); - result.fAllowedLocales = new HashSet(this.fAllowedLocales); + result.fAllowedLocales = new HashSet<>(this.fAllowedLocales); result.fRestrictionLevel = this.fRestrictionLevel; return result; } @@ -734,7 +734,7 @@ public class SpoofChecker { * @stable ICU 54 */ public Builder setAllowedJavaLocales(Set locales) { - HashSet ulocales = new HashSet(locales.size()); + HashSet ulocales = new HashSet<>(locales.size()); for (Locale locale : locales) { ulocales.add(ULocale.forLocale(locale)); } @@ -848,10 +848,10 @@ public class SpoofChecker { private int fLineNum; ConfusabledataBuilder() { - fTable = new Hashtable(); + fTable = new Hashtable<>(); fKeySet = new UnicodeSet(); - fKeyVec = new ArrayList(); - fValueVec = new ArrayList(); + fKeyVec = new ArrayList<>(); + fValueVec = new ArrayList<>(); stringPool = new SPUStringPool(); } @@ -1093,8 +1093,8 @@ public class SpoofChecker { // combination of a uhash and a Vector. private static class SPUStringPool { public SPUStringPool() { - fVec = new Vector(); - fHash = new Hashtable(); + fVec = new Vector<>(); + fHash = new Hashtable<>(); } public int size() { @@ -1179,7 +1179,7 @@ public class SpoofChecker { * @stable ICU 54 */ public Set getAllowedJavaLocales() { - HashSet locales = new HashSet(fAllowedLocales.size()); + HashSet locales = new HashSet<>(fAllowedLocales.size()); for (ULocale uloc : fAllowedLocales) { locales.add(uloc.toLocale()); } @@ -1535,7 +1535,7 @@ public class SpoofChecker { * @param other * the SpoofChecker being compared with. * @return true if the two SpoofCheckers are equal. - * @stable ICU 58 + * @stable ICU 4.6 */ @Override public boolean equals(Object other) { @@ -1565,7 +1565,7 @@ public class SpoofChecker { /** * Overrides {@link Object#hashCode()}. - * @stable ICU 58 + * @stable ICU 4.6 */ @Override public int hashCode() { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java index 1a73c862098..2b2a91124ad 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java @@ -143,11 +143,10 @@ public class StringPrepParseException extends ParseException { /** * Mock implementation of hashCode(). This implementation always returns a constant * value. When Java assertion is enabled, this method triggers an assertion failure. - * @internal - * @deprecated This API is ICU internal only. + * @return a hash code value for this object. + * @stable ICU 2.8 */ @Override - @Deprecated public int hashCode() { assert false : "hashCode not designed"; return 42; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java index 70a974ea85f..08c7dc4e18e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java @@ -343,7 +343,7 @@ public class TimeUnitFormat extends MeasureFormat { format = NumberFormat.getNumberInstance(locale); } pluralRules = PluralRules.forLocale(locale); - timeUnitToCountToPatterns = new HashMap>(); + timeUnitToCountToPatterns = new HashMap<>(); Set pluralKeywords = pluralRules.getKeywords(); setup("units/duration", timeUnitToCountToPatterns, FULL_NAME, pluralKeywords); setup("unitsShort/duration", timeUnitToCountToPatterns, ABBREVIATED_NAME, pluralKeywords); @@ -400,7 +400,7 @@ public class TimeUnitFormat extends MeasureFormat { Map countToPatterns = timeUnitToCountToPatterns.get(timeUnit); if (countToPatterns == null) { - countToPatterns = new TreeMap(); + countToPatterns = new TreeMap<>(); timeUnitToCountToPatterns.put(timeUnit, countToPatterns); } @@ -467,7 +467,7 @@ public class TimeUnitFormat extends MeasureFormat { final TimeUnit timeUnit = timeUnits[i]; Map countToPatterns = timeUnitToCountToPatterns.get(timeUnit); if (countToPatterns == null) { - countToPatterns = new TreeMap(); + countToPatterns = new TreeMap<>(); timeUnitToCountToPatterns.put(timeUnit, countToPatterns); } for (String pluralCount : keywords) { @@ -556,8 +556,7 @@ public class TimeUnitFormat extends MeasureFormat { // MeasureFormat /** - * @internal - * @deprecated This API is ICU internal only. + * @deprecated ICU 53 see {@link MeasureFormat} */ @Deprecated @Override diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java index ba1fd42a24c..e39fdd07cfe 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java @@ -26,7 +26,7 @@ import com.ibm.icu.impl.Utility; public class ByteArrayWrapper implements Comparable { // public data member ------------------------------------------------ - + /** * Internal byte array. * @stable ICU 2.8 @@ -34,16 +34,16 @@ public class ByteArrayWrapper implements Comparable public byte[] bytes; /** - * Size of the internal byte array used. - * Different from bytes.length, size will be <= bytes.length. + * Size of the internal byte array used. + * Different from bytes.length, size will be <= bytes.length. * Semantics of size is similar to java.util.Vector.size(). * @stable ICU 2.8 */ public int size; - + // public constructor ------------------------------------------------ - /** + /** * Construct a new ByteArrayWrapper with no data. * @stable ICU 2.8 */ @@ -103,15 +103,15 @@ public class ByteArrayWrapper implements Comparable // public methods ---------------------------------------------------- /** - * Ensure that the internal byte array is at least of length capacity. - * If the byte array is null or its length is less than capacity, a new - * byte array of length capacity will be allocated. - * The contents of the array (between 0 and size) remain unchanged. + * Ensure that the internal byte array is at least of length capacity. + * If the byte array is null or its length is less than capacity, a new + * byte array of length capacity will be allocated. + * The contents of the array (between 0 and size) remain unchanged. * @param capacity minimum length of internal byte array. * @return this ByteArrayWrapper * @stable ICU 3.2 */ - public ByteArrayWrapper ensureCapacity(int capacity) + public ByteArrayWrapper ensureCapacity(int capacity) { if (bytes == null || bytes.length < capacity) { byte[] newbytes = new byte[capacity]; @@ -122,11 +122,11 @@ public class ByteArrayWrapper implements Comparable } return this; } - + /** - * Set the internal byte array from offset 0 to (limit - start) with the - * contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new - * byte array of length (limit - start) will be allocated. + * Set the internal byte array from offset 0 to (limit - start) with the + * contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new + * byte array of length (limit - start) will be allocated. * This resets the size of the internal byte array to (limit - start). * @param src source byte array to copy from * @param start start offset of src to copy from @@ -134,15 +134,15 @@ public class ByteArrayWrapper implements Comparable * @return this ByteArrayWrapper * @stable ICU 3.2 */ - public final ByteArrayWrapper set(byte[] src, int start, int limit) + public final ByteArrayWrapper set(byte[] src, int start, int limit) { size = 0; append(src, start, limit); return this; } - + /* - public final ByteArrayWrapper get(byte[] target, int start, int limit) + public final ByteArrayWrapper get(byte[] target, int start, int limit) { int len = limit - start; if (len > size) throw new IllegalArgumentException("limit too long"); @@ -152,7 +152,7 @@ public class ByteArrayWrapper implements Comparable */ /** - * Appends the internal byte array from offset size with the + * Appends the internal byte array from offset size with the * contents of src from offset start to limit. This increases the size of * the internal byte array to (size + limit - start). * @param src source byte array to copy from @@ -161,7 +161,7 @@ public class ByteArrayWrapper implements Comparable * @return this ByteArrayWrapper * @stable ICU 3.2 */ - public final ByteArrayWrapper append(byte[] src, int start, int limit) + public final ByteArrayWrapper append(byte[] src, int start, int limit) { int len = limit - start; ensureCapacity(size + len); @@ -171,7 +171,7 @@ public class ByteArrayWrapper implements Comparable } /* - public final ByteArrayWrapper append(ByteArrayWrapper other) + public final ByteArrayWrapper append(ByteArrayWrapper other) { return append(other.bytes, 0, other.size); } @@ -190,13 +190,14 @@ public class ByteArrayWrapper implements Comparable size = 0; return result; } - + // Boilerplate ---------------------------------------------------- - + /** * Returns string value for debugging - * @stable ICU 3.2 + * @stable ICU 2.8 */ + @Override public String toString() { StringBuilder result = new StringBuilder(); for (int i = 0; i < size; ++i) { @@ -210,8 +211,9 @@ public class ByteArrayWrapper implements Comparable * Return true if the bytes in each wrapper are equal. * @param other the object to compare to. * @return true if the two objects are equal. - * @stable ICU 3.2 + * @stable ICU 2.8 */ + @Override public boolean equals(Object other) { if (this == other) return true; if (other == null) return false; @@ -231,8 +233,9 @@ public class ByteArrayWrapper implements Comparable /** * Return the hashcode. * @return the hashcode. - * @stable ICU 3.2 + * @stable ICU 2.8 */ + @Override public int hashCode() { int result = bytes.length; for (int i = 0; i < size; ++i) { @@ -249,6 +252,7 @@ public class ByteArrayWrapper implements Comparable * @throws ClassCastException if the other object is not a ByteArrayWrapper * @stable ICU 4.4 */ + @Override public int compareTo(ByteArrayWrapper other) { if (this == other) return 0; int minSize = size < other.size ? size : other.size; @@ -259,11 +263,11 @@ public class ByteArrayWrapper implements Comparable } return size - other.size; } - + // private methods ----------------------------------------------------- - + /** - * Copies the contents of src byte array from offset srcoff to the + * Copies the contents of src byte array from offset srcoff to the * target of tgt byte array at the offset tgtoff. * @param src source byte array to copy from * @param srcoff start offset of src to copy from @@ -271,15 +275,15 @@ public class ByteArrayWrapper implements Comparable * @param tgtoff start offset of tgt to copy to * @param length size of contents to copy */ - private static final void copyBytes(byte[] src, int srcoff, byte[] tgt, + private static final void copyBytes(byte[] src, int srcoff, byte[] tgt, int tgtoff, int length) { if (length < 64) { for (int i = srcoff, n = tgtoff; -- length >= 0; ++ i, ++ n) { tgt[n] = src[i]; } - } + } else { System.arraycopy(src, srcoff, tgt, tgtoff, length); } - } + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java b/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java index ff0ba2957b8..8ecf7c32dbe 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java @@ -17,35 +17,35 @@ import com.ibm.icu.lang.UCharacter; * @stable ICU 2.0 */ public class CaseInsensitiveString { - + private String string; private int hash = 0; - + private String folded = null; - + private static String foldCase(String foldee) { return UCharacter.foldCase(foldee, true); } - + private void getFolded() { if (folded == null) { folded = foldCase(string); } } - + /** * Constructs an CaseInsentiveString object from the given string - * @param s The string to construct this object from + * @param s The string to construct this object from * @stable ICU 2.0 */ public CaseInsensitiveString(String s) { string = s; } /** - * returns the underlying string + * returns the underlying string * @return String * @stable ICU 2.0 */ @@ -53,10 +53,11 @@ public class CaseInsensitiveString { return string; } /** - * Compare the object with this - * @param o Object to compare this object with + * Compare the object with this + * @param o Object to compare this object with * @stable ICU 2.0 */ + @Override public boolean equals(Object o) { if (o == null) { return false; @@ -72,26 +73,29 @@ public class CaseInsensitiveString { } return false; } - + /** * Returns the hashCode of this object * @return int hashcode * @stable ICU 2.0 */ + @Override public int hashCode() { getFolded(); - + if (hash == 0) { hash = folded.hashCode(); } - + return hash; } - + /** * Overrides superclass method - * @stable ICU 3.6 + * @return a string representation of the object. + * @stable ICU 2.0 */ + @Override public String toString() { return string; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java b/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java index ffc60a3434b..7277053bd8a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java @@ -316,6 +316,15 @@ public abstract class CodePointMap implements Iterable { public final int getValue() { return value; } } + /** + * Protected no-args constructor. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + protected CodePointMap() { + } + /** * Returns the value for a code point as stored in the map, with range checking. * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java b/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java index c6351b4edf1..caf027103bf 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java @@ -351,31 +351,43 @@ public class JapaneseCalendar extends GregorianCalendar { /** * @stable ICU 2.8 */ - static public final int CURRENT_ERA = ERA_RULES.getCurrentEraIndex(); + static public final int CURRENT_ERA; /** * Constant for the era starting on Sept. 8, 1868 AD. * @stable ICU 2.8 */ - static public final int MEIJI = 232; + static public final int MEIJI; /** * Constant for the era starting on July 30, 1912 AD. * @stable ICU 2.8 */ - static public final int TAISHO = 233; + static public final int TAISHO; /** * Constant for the era starting on Dec. 25, 1926 AD. * @stable ICU 2.8 */ - static public final int SHOWA = 234; + static public final int SHOWA; /** * Constant for the era starting on Jan. 7, 1989 AD. * @stable ICU 2.8 */ - static public final int HEISEI = 235; + static public final int HEISEI; + + // We want to make these era constants initialized in a static initializer + // block to prevent javac to inline these values in a consumer code. + // By doing so, we can keep better binary compatibility across versions even + // these values are changed. + static { + MEIJI = 232; + TAISHO = 233; + SHOWA = 234; + HEISEI = 235; + CURRENT_ERA = ERA_RULES.getCurrentEraIndex(); + } /** * Override GregorianCalendar. We should really handle YEAR_WOY and diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java b/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java index a4dedc72d09..5fb8e0bac1b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java @@ -44,7 +44,7 @@ public class MeasureUnit implements Serializable { // All access to the cache or cacheIsPopulated flag must be synchronized on class MeasureUnit, // i.e. from synchronized static methods. Beware of non-static methods. private static final Map> cache - = new HashMap>(); + = new HashMap<>(); private static boolean cacheIsPopulated = false; /** @@ -95,7 +95,7 @@ public class MeasureUnit implements Serializable { /** * {@inheritDoc} * - * @stable ICU 53 + * @stable ICU 3.0 */ @Override public int hashCode() { @@ -105,7 +105,7 @@ public class MeasureUnit implements Serializable { /** * {@inheritDoc} * - * @stable ICU 53 + * @stable ICU 3.0 */ @Override public boolean equals(Object rhs) { @@ -122,7 +122,7 @@ public class MeasureUnit implements Serializable { /** * {@inheritDoc} * - * @stable ICU 53 + * @stable ICU 3.0 */ @Override public String toString() { @@ -152,7 +152,7 @@ public class MeasureUnit implements Serializable { // flexibility for implementation. // Use CollectionSet instead of HashSet for better performance. return units == null ? Collections.emptySet() - : Collections.unmodifiableSet(new CollectionSet(units.values())); + : Collections.unmodifiableSet(new CollectionSet<>(units.values())); } /** @@ -161,8 +161,8 @@ public class MeasureUnit implements Serializable { * @stable ICU 53 */ public synchronized static Set getAvailable() { - Set result = new HashSet(); - for (String type : new HashSet(MeasureUnit.getAvailableTypes())) { + Set result = new HashSet<>(); + for (String type : new HashSet<>(MeasureUnit.getAvailableTypes())) { for (MeasureUnit unit : MeasureUnit.getAvailable(type)) { result.add(unit); } @@ -348,7 +348,7 @@ public class MeasureUnit implements Serializable { protected synchronized static MeasureUnit addUnit(String type, String unitName, Factory factory) { Map tmp = cache.get(type); if (tmp == null) { - cache.put(type, tmp = new HashMap()); + cache.put(type, tmp = new HashMap<>()); } else { // "intern" the type by setting to first item's type. type = tmp.entrySet().iterator().next().getValue().type; @@ -1184,7 +1184,7 @@ public class MeasureUnit implements Serializable { public static final MeasureUnit TEASPOON = MeasureUnit.internalGetInstance("volume", "teaspoon"); private static HashMap, MeasureUnit>unitPerUnitToSingleUnit = - new HashMap, MeasureUnit>(); + new HashMap<>(); static { unitPerUnitToSingleUnit.put(Pair.of(MeasureUnit.LITER, MeasureUnit.KILOMETER), MeasureUnit.LITER_PER_KILOMETER); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java b/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java index 94dc82292af..09f47ff99f3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java @@ -612,7 +612,7 @@ public class SimpleTimeZone extends BasicTimeZone { /** * Returns a string representation of this object. * @return a string representation of this object - * @stable ICU 3.6 + * @stable ICU 2.0 */ @Override public String toString() { @@ -1140,7 +1140,7 @@ public class SimpleTimeZone extends BasicTimeZone { /** * Overrides equals. * @return true if obj is a SimpleTimeZone equivalent to this - * @stable ICU 3.6 + * @stable ICU 2.0 */ @Override public boolean equals(Object obj){ @@ -1180,7 +1180,8 @@ public class SimpleTimeZone extends BasicTimeZone { /** * Overrides hashCode. - * @stable ICU 3.6 + * @return a hash code value for this object. + * @stable ICU 2.0 */ @Override public int hashCode(){ @@ -1208,7 +1209,7 @@ public class SimpleTimeZone extends BasicTimeZone { /** * Overrides clone. - * @stable ICU 3.6 + * @stable ICU 2.0 */ @Override public Object clone() { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java index c7a2e8d545b..cea2babe5c7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java @@ -1052,7 +1052,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezabletrue if this object is the same as the obj argument; false otherwise. + * @stable ICU 2.0 */ @Override public boolean equals(Object obj){ @@ -1063,7 +1064,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable { /** * This is for compatibility with Locale-- in actuality, since ULocale is * immutable, there is no reason to clone it, so this API returns 'this'. - * @stable ICU 3.0 + * @stable ICU 2.8 */ @Override public Object clone() { @@ -677,7 +677,8 @@ public final class ULocale implements Serializable, Comparable { /** * Returns the hashCode. - * @stable ICU 3.0 + * @return a hash code value for this object. + * @stable ICU 2.8 */ @Override public int hashCode() { @@ -691,7 +692,7 @@ public final class ULocale implements Serializable, Comparable { * function identically might not compare equal. * * @return true if this Locale is equal to the specified object. - * @stable ICU 3.0 + * @stable ICU 2.8 */ @Override public boolean equals(Object obj) { @@ -1071,7 +1072,8 @@ public final class ULocale implements Serializable, Comparable { /** * Returns a string representation of this object. - * @stable ICU 3.0 + * @return a string representation of the object. + * @stable ICU 2.8 */ @Override public String toString() { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java index e57a1737da8..18c10eacf42 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java @@ -482,7 +482,7 @@ public final class VersionInfo implements Comparable * * @return the hash code value for this set. * @see java.lang.Object#hashCode() - * @stable ICU 58 + * @stable ICU 2.6 */ @Override public int hashCode() { @@ -527,7 +527,7 @@ public final class VersionInfo implements Comparable /** * Map of singletons */ - private static final ConcurrentHashMap MAP_ = new ConcurrentHashMap(); + private static final ConcurrentHashMap MAP_ = new ConcurrentHashMap<>(); /** * Last byte mask */ diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java index 97a51fdd2f2..be3beb6fdbd 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java @@ -13,259 +13,9 @@ import java.util.Map; /** * RuleBasedTransliterator is a transliterator - * that reads a set of rules in order to determine how to perform - * translations. Rule sets are stored in resource bundles indexed by - * name. Rules within a rule set are separated by semicolons (';'). - * To include a literal semicolon, prefix it with a backslash ('\'). - * Unicode Pattern_White_Space is ignored. - * If the first non-blank character on a line is '#', - * the entire line is ignored as a comment. - * - *

Each set of rules consists of two groups, one forward, and one - * reverse. This is a convention that is not enforced; rules for one - * direction may be omitted, with the result that translations in - * that direction will not modify the source text. In addition, - * bidirectional forward-reverse rules may be specified for - * symmetrical transformations. - * - *

Rule syntax - * - *

Rule statements take one of the following forms: - * - *

- *
$alefmadda=\u0622;
- *
Variable definition. The name on the - * left is assigned the text on the right. In this example, - * after this statement, instances of the left hand name, - * "$alefmadda", will be replaced by - * the Unicode character U+0622. Variable names must begin - * with a letter and consist only of letters, digits, and - * underscores. Case is significant. Duplicate names cause - * an exception to be thrown, that is, variables cannot be - * redefined. The right hand side may contain well-formed - * text of any length, including no text at all ("$empty=;"). - * The right hand side may contain embedded UnicodeSet - * patterns, for example, "$softvowel=[eiyEIY]".
- *
 
- *
ai>$alefmadda;
- *
Forward translation rule. This rule - * states that the string on the left will be changed to the - * string on the right when performing forward - * transliteration.
- *
 
- *
ai<$alefmadda;
- *
Reverse translation rule. This rule - * states that the string on the right will be changed to - * the string on the left when performing reverse - * transliteration.
- *
- * - *
- *
ai<>$alefmadda;
- *
Bidirectional translation rule. This - * rule states that the string on the right will be changed - * to the string on the left when performing forward - * transliteration, and vice versa when performing reverse - * transliteration.
- *
- * - *

Translation rules consist of a match pattern and an output - * string. The match pattern consists of literal characters, - * optionally preceded by context, and optionally followed by - * context. Context characters, like literal pattern characters, - * must be matched in the text being transliterated. However, unlike - * literal pattern characters, they are not replaced by the output - * text. For example, the pattern "abc{def}" - * indicates the characters "def" must be - * preceded by "abc" for a successful match. - * If there is a successful match, "def" will - * be replaced, but not "abc". The final '}' - * is optional, so "abc{def" is equivalent to - * "abc{def}". Another example is "{123}456" - * (or "123}456") in which the literal - * pattern "123" must be followed by "456". - * - *

The output string of a forward or reverse rule consists of - * characters to replace the literal pattern characters. If the - * output string contains the character '|', this is - * taken to indicate the location of the cursor after - * replacement. The cursor is the point in the text at which the - * next replacement, if any, will be applied. The cursor is usually - * placed within the replacement text; however, it can actually be - * placed into the precending or following context by using the - * special character '@'. Examples: - * - *

- *

a {foo} z > | @ bar; # foo -> bar, move cursor - * before a
- * {foo} xyz > bar @@|; # foo -> bar, cursor between - * y and z
- *

- * - *

UnicodeSet - * - *

UnicodeSet patterns may appear anywhere that - * makes sense. They may appear in variable definitions. - * Contrariwise, UnicodeSet patterns may themselves - * contain variable references, such as "$a=[a-z];$not_a=[^$a]", - * or "$range=a-z;$ll=[$range]". - * - *

UnicodeSet patterns may also be embedded directly - * into rule strings. Thus, the following two rules are equivalent: - * - *

- *

$vowel=[aeiou]; $vowel>'*'; # One way to do this
- * [aeiou]>'*'; - *                # - * Another way
- *

- * - *

See {@link UnicodeSet} for more documentation and examples. - * - *

Segments - * - *

Segments of the input string can be matched and copied to the - * output string. This makes certain sets of rules simpler and more - * general, and makes reordering possible. For example: - * - *

- *

([a-z]) > $1 $1; - *           # - * double lowercase letters
- * ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
- *

- * - *

The segment of the input string to be copied is delimited by - * "(" and ")". Up to - * nine segments may be defined. Segments may not overlap. In the - * output string, "$1" through "$9" - * represent the input string segments, in left-to-right order of - * definition. - * - *

Anchors - * - *

Patterns can be anchored to the beginning or the end of the text. This is done with the - * special characters '^' and '$'. For example: - * - *

- *

^ a   > 'BEG_A';   # match 'a' at start of text
- *   a   > 'A';       # match other instances - * of 'a'
- *   z $ > 'END_Z';   # match 'z' at end of text
- *   z   > 'Z';       # match other instances - * of 'z'
- *

- * - *

It is also possible to match the beginning or the end of the text using a UnicodeSet. - * This is done by including a virtual anchor character '$' at the end of the - * set pattern. Although this is usually the match chafacter for the end anchor, the set will - * match either the beginning or the end of the text, depending on its placement. For - * example: - * - *

- *

$x = [a-z$];   # match 'a' through 'z' OR anchor
- * $x 1    > 2;   # match '1' after a-z or at the start
- *    3 $x > 4;   # match '3' before a-z or at the end
- *

- * - *

Example - * - *

The following example rules illustrate many of the features of - * the rule language. - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
Rule 1.abc{def}>x|y
Rule 2.xyz>r
Rule 3.yz>q
- * - *

Applying these rules to the string "adefabcdefz" - * yields the following results: - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
|adefabcdefzInitial state, no rules match. Advance - * cursor.
a|defabcdefzStill no match. Rule 1 does not match - * because the preceding context is not present.
ad|efabcdefzStill no match. Keep advancing until - * there is a match...
ade|fabcdefz...
adef|abcdefz...
adefa|bcdefz...
adefab|cdefz...
adefabc|defzRule 1 matches; replace "def" - * with "xy" and back up the cursor - * to before the 'y'.
adefabcx|yzAlthough "xyz" is - * present, rule 2 does not match because the cursor is - * before the 'y', not before the 'x'. - * Rule 3 does match. Replace "yz" - * with "q".
adefabcxq|The cursor is at the end; - * transliteration is complete.
- * - *

The order of rules is significant. If multiple rules may match - * at some point, the first matching rule is applied. - * - *

Forward and reverse rules may have an empty output string. - * Otherwise, an empty left or right hand side of any statement is a - * syntax error. - * - *

Single quotes are used to quote any character other than a - * digit or letter. To specify a single quote itself, inside or - * outside of quotes, use two single quotes in a row. For example, - * the rule "'>'>o''clock" changes the - * string ">" to the string "o'clock". - * - *

Notes - * - *

While a RuleBasedTransliterator is being built, it checks that - * the rules are added in proper order. For example, if the rule - * "a>x" is followed by the rule "ab>y", - * then the second rule will throw an exception. The reason is that - * the second rule can never be triggered, since the first rule - * always matches anything it matches. In other words, the first - * rule masks the second rule. + * built from a set of rules as defined for + * {@link Transliterator#createFromRules(String, String, int)}. + * See the class {@link Transliterator} documentation for the rule syntax. * * @author Alan Liu * @internal @@ -369,7 +119,7 @@ public class RuleBasedTransliterator extends Transliterator { static class Data { public Data() { - variableNames = new HashMap(); + variableNames = new HashMap<>(); ruleSet = new TransliterationRuleSet(); } @@ -487,5 +237,3 @@ public class RuleBasedTransliterator extends Transliterator { return new RuleBasedTransliterator(getID(), data, filter); } } - - diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java index 3d7a7e75316..01be8a96dff 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java @@ -83,7 +83,7 @@ import com.ibm.icu.util.UResourceBundle; * modified as each new character arrives. * *

- * Consider the simple RuleBasedTransliterator: + * Consider the simple rule-based Transliterator: * *

* th>{theta}
@@ -110,8 +110,8 @@ import com.ibm.icu.util.UResourceBundle; * that the transliterator will look at. It is advanced as text becomes committed (but it is not the committed index; * that's the cursor). The cursor index, described above, marks the point at which the * transliterator last stopped, either because it reached the end, or because it required more characters to - * disambiguate between possible inputs. The cursor can also be explicitly set by rules in a - * RuleBasedTransliterator. Any characters before the cursor index are frozen; future keyboard + * disambiguate between possible inputs. The cursor can also be explicitly set by rules. + * Any characters before the cursor index are frozen; future keyboard * transliteration calls within this input sequence will not change them. New text is inserted at the limit * index, which marks the end of the substring that the transliterator looks at. * @@ -222,13 +222,262 @@ import com.ibm.icu.util.UResourceBundle; * transliterate() method taking a String and StringBuffer if the performance of * these methods can be improved over the performance obtained by the default implementations in this class. * + *

Rule syntax + * + *

A set of rules determines how to perform translations. + * Rules within a rule set are separated by semicolons (';'). + * To include a literal semicolon, prefix it with a backslash ('\'). + * Unicode Pattern_White_Space is ignored. + * If the first non-blank character on a line is '#', + * the entire line is ignored as a comment. + * + *

Each set of rules consists of two groups, one forward, and one + * reverse. This is a convention that is not enforced; rules for one + * direction may be omitted, with the result that translations in + * that direction will not modify the source text. In addition, + * bidirectional forward-reverse rules may be specified for + * symmetrical transformations. + * + *

Note: Another description of the Transliterator rule syntax is available in + * section + * Transform Rules Syntax of UTS #35: Unicode LDML. + * The rules are shown there using arrow symbols ← and → and ↔. + * ICU supports both those and the equivalent ASCII symbols < and > and <>. + * + *

Rule statements take one of the following forms: + * + *

+ *
$alefmadda=\\u0622;
+ *
Variable definition. The name on the + * left is assigned the text on the right. In this example, + * after this statement, instances of the left hand name, + * "$alefmadda", will be replaced by + * the Unicode character U+0622. Variable names must begin + * with a letter and consist only of letters, digits, and + * underscores. Case is significant. Duplicate names cause + * an exception to be thrown, that is, variables cannot be + * redefined. The right hand side may contain well-formed + * text of any length, including no text at all ("$empty=;"). + * The right hand side may contain embedded UnicodeSet + * patterns, for example, "$softvowel=[eiyEIY]".
+ *
ai>$alefmadda;
+ *
Forward translation rule. This rule + * states that the string on the left will be changed to the + * string on the right when performing forward + * transliteration.
+ *
ai<$alefmadda;
+ *
Reverse translation rule. This rule + * states that the string on the right will be changed to + * the string on the left when performing reverse + * transliteration.
+ *
+ * + *
+ *
ai<>$alefmadda;
+ *
Bidirectional translation rule. This + * rule states that the string on the right will be changed + * to the string on the left when performing forward + * transliteration, and vice versa when performing reverse + * transliteration.
+ *
+ * + *

Translation rules consist of a match pattern and an output + * string. The match pattern consists of literal characters, + * optionally preceded by context, and optionally followed by + * context. Context characters, like literal pattern characters, + * must be matched in the text being transliterated. However, unlike + * literal pattern characters, they are not replaced by the output + * text. For example, the pattern "abc{def}" + * indicates the characters "def" must be + * preceded by "abc" for a successful match. + * If there is a successful match, "def" will + * be replaced, but not "abc". The final '}' + * is optional, so "abc{def" is equivalent to + * "abc{def}". Another example is "{123}456" + * (or "123}456") in which the literal + * pattern "123" must be followed by "456". + * + *

The output string of a forward or reverse rule consists of + * characters to replace the literal pattern characters. If the + * output string contains the character '|', this is + * taken to indicate the location of the cursor after + * replacement. The cursor is the point in the text at which the + * next replacement, if any, will be applied. The cursor is usually + * placed within the replacement text; however, it can actually be + * placed into the precending or following context by using the + * special character '@'. Examples: + * + *

+ *     a {foo} z > | @ bar; # foo -> bar, move cursor before a
+ *     {foo} xyz > bar @@|; # foo -> bar, cursor between y and z
+ * 
+ * + *

UnicodeSet + * + *

UnicodeSet patterns may appear anywhere that + * makes sense. They may appear in variable definitions. + * Contrariwise, UnicodeSet patterns may themselves + * contain variable references, such as "$a=[a-z];$not_a=[^$a]", + * or "$range=a-z;$ll=[$range]". + * + *

UnicodeSet patterns may also be embedded directly + * into rule strings. Thus, the following two rules are equivalent: + * + *

+ *     $vowel=[aeiou]; $vowel>'*'; # One way to do this
+ *     [aeiou]>'*'; # Another way
+ * 
+ * + *

See {@link UnicodeSet} for more documentation and examples. + * + *

Segments + * + *

Segments of the input string can be matched and copied to the + * output string. This makes certain sets of rules simpler and more + * general, and makes reordering possible. For example: + * + *

+ *     ([a-z]) > $1 $1; # double lowercase letters
+ *     ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
+ * 
+ * + *

The segment of the input string to be copied is delimited by + * "(" and ")". Up to + * nine segments may be defined. Segments may not overlap. In the + * output string, "$1" through "$9" + * represent the input string segments, in left-to-right order of + * definition. + * + *

Anchors + * + *

Patterns can be anchored to the beginning or the end of the text. This is done with the + * special characters '^' and '$'. For example: + * + *

+ *   ^ a   > 'BEG_A';   # match 'a' at start of text
+ *     a   > 'A'; # match other instances of 'a'
+ *     z $ > 'END_Z';   # match 'z' at end of text
+ *     z   > 'Z';       # match other instances of 'z'
+ * 
+ * + *

It is also possible to match the beginning or the end of the text using a UnicodeSet. + * This is done by including a virtual anchor character '$' at the end of the + * set pattern. Although this is usually the match chafacter for the end anchor, the set will + * match either the beginning or the end of the text, depending on its placement. For + * example: + * + *

+ *   $x = [a-z$];   # match 'a' through 'z' OR anchor
+ *   $x 1    > 2;   # match '1' after a-z or at the start
+ *      3 $x > 4;   # match '3' before a-z or at the end
+ * 
+ * + *

Example + * + *

The following example rules illustrate many of the features of + * the rule language. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Rule 1.abc{def}>x|y
Rule 2.xyz>r
Rule 3.yz>q
+ * + *

Applying these rules to the string "adefabcdefz" + * yields the following results: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
|adefabcdefzInitial state, no rules match. Advance + * cursor.
a|defabcdefzStill no match. Rule 1 does not match + * because the preceding context is not present.
ad|efabcdefzStill no match. Keep advancing until + * there is a match...
ade|fabcdefz...
adef|abcdefz...
adefa|bcdefz...
adefab|cdefz...
adefabc|defzRule 1 matches; replace "def" + * with "xy" and back up the cursor + * to before the 'y'.
adefabcx|yzAlthough "xyz" is + * present, rule 2 does not match because the cursor is + * before the 'y', not before the 'x'. + * Rule 3 does match. Replace "yz" + * with "q".
adefabcxq|The cursor is at the end; + * transliteration is complete.
+ * + *

The order of rules is significant. If multiple rules may match + * at some point, the first matching rule is applied. + * + *

Forward and reverse rules may have an empty output string. + * Otherwise, an empty left or right hand side of any statement is a + * syntax error. + * + *

Single quotes are used to quote any character other than a + * digit or letter. To specify a single quote itself, inside or + * outside of quotes, use two single quotes in a row. For example, + * the rule "'>'>o''clock" changes the + * string ">" to the string "o'clock". + * + *

Notes + * + *

While a Transliterator is being built from rules, it checks that + * the rules are added in proper order. For example, if the rule + * "a>x" is followed by the rule "ab>y", + * then the second rule will throw an exception. The reason is that + * the second rule can never be triggered, since the first rule + * always matches anything it matches. In other words, the first + * rule masks the second rule. + * * @author Alan Liu * @stable ICU 2.0 */ public abstract class Transliterator implements StringTransform { /** * Direction constant indicating the forward direction in a transliterator, - * e.g., the forward rules of a RuleBasedTransliterator. An "A-B" + * e.g., the forward rules of a rule-based Transliterator. An "A-B" * transliterator transliterates A to B when operating in the forward * direction, and B to A when operating in the reverse direction. * @stable ICU 2.0 @@ -237,7 +486,7 @@ public abstract class Transliterator implements StringTransform { /** * Direction constant indicating the reverse direction in a transliterator, - * e.g., the reverse rules of a RuleBasedTransliterator. An "A-B" + * e.g., the reverse rules of a rule-based Transliterator. An "A-B" * transliterator transliterates A to B when operating in the forward * direction, and B to A when operating in the reverse direction. * @stable ICU 2.0 @@ -358,7 +607,7 @@ public abstract class Transliterator implements StringTransform { /** * Returns true if this Position is equal to the given object. - * @stable ICU 2.6 + * @stable ICU 2.0 */ @Override public boolean equals(Object obj) { @@ -373,7 +622,8 @@ public abstract class Transliterator implements StringTransform { } /** - * @draft ICU 63 + * {@inheritDoc} + * @stable ICU 2.0 */ @Override public int hashCode() { @@ -382,7 +632,8 @@ public abstract class Transliterator implements StringTransform { /** * Returns a string representation of this Position. - * @stable ICU 2.6 + * @return a string representation of the object. + * @stable ICU 2.0 */ @Override public String toString() { @@ -1100,7 +1351,7 @@ public abstract class Transliterator implements StringTransform { /** * Transliterate a substring of text, as specified by index, taking filters * into account. This method is for subclasses that need to delegate to - * another transliterator, such as CompoundTransliterator. + * another transliterator. * @param text the text to be transliterated * @param index the position indices * @param incremental if TRUE, then assume more characters may be inserted @@ -1343,7 +1594,7 @@ public abstract class Transliterator implements StringTransform { public static Transliterator getInstance(String ID, int dir) { StringBuffer canonID = new StringBuffer(); - List list = new ArrayList(); + List list = new ArrayList<>(); UnicodeSet[] globalFilter = new UnicodeSet[1]; if (!TransliteratorIDParser.parseCompoundID(ID, dir, canonID, list, globalFilter)) { throw new IllegalArgumentException("Invalid ID " + ID); @@ -1398,11 +1649,17 @@ public abstract class Transliterator implements StringTransform { /** * Returns a Transliterator object constructed from - * the given rule string. This will be a RuleBasedTransliterator, + * the given rule string. This will be a rule-based Transliterator, * if the rule string contains only rules, or a - * CompoundTransliterator, if it contains ID blocks, or a - * NullTransliterator, if it contains ID blocks which parse as + * compound Transliterator, if it contains ID blocks, or a + * null Transliterator, if it contains ID blocks which parse as * empty for the given direction. + * + * @param ID the id for the transliterator. + * @param rules rules, separated by ';' + * @param dir either FORWARD or REVERSE. + * @return a newly created Transliterator + * @throws IllegalArgumentException if there is a problem with the ID or the rules * @stable ICU 2.0 */ public static final Transliterator createFromRules(String ID, String rules, int dir) { @@ -1435,7 +1692,7 @@ public abstract class Transliterator implements StringTransform { } } else { - List transliterators = new ArrayList(); + List transliterators = new ArrayList<>(); int passNumber = 1; int limit = Math.max(parser.idBlockVector.size(), parser.dataVector.size()); diff --git a/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/APIStatusConsistencyChecker.java b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/APIStatusConsistencyChecker.java new file mode 100644 index 00000000000..d6d850a5981 --- /dev/null +++ b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/APIStatusConsistencyChecker.java @@ -0,0 +1,124 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +package com.ibm.icu.dev.tool.docs; + +import java.io.File; +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; + +/** + * Checks if API status of equals/hashCode is same with its containing class. + * + * @author Yoshito + */ +public class APIStatusConsistencyChecker { + public static void main(String[] args) { + // args[0] API signature file path + // args[1] (Optional) List of classes to be skipped, separated by semicolon + if (args.length < 1) { + System.err.println("Missing API signature file path."); + } else if (args.length > 2) { + System.err.println("Too many command arguments"); + } + + List skipClasses = Collections.emptyList(); + if (args.length == 2) { + String[] classes = args[1].split(";"); + skipClasses = Arrays.asList(classes); + } + + // Load the ICU4J API signature file + Set apiInfoSet = APIData.read(new File(args[0]), true).getAPIInfoSet(); + APIStatusConsistencyChecker checker = new APIStatusConsistencyChecker(apiInfoSet, skipClasses, new PrintWriter(System.err, true)); + checker.checkConsistency(); + System.exit(checker.errCount); + } + + private int errCount = 0; + private Set apiInfoSet; + private PrintWriter pw; + private List skipClasses; + + public APIStatusConsistencyChecker(Set apiInfoSet, List skipClasses, PrintWriter pw) { + this.apiInfoSet = apiInfoSet; + this.skipClasses = skipClasses; + this.pw = pw; + } + + public int errorCount() { + return errCount; + } + + // Methods that should have same API status with a containing class + static final String[][] METHODS = { + //{"", ""}, + {"equals", "boolean(java.lang.Object)"}, + {"hashCode", "int()"}, + {"toString", "java.lang.String()"}, + {"clone", "java.lang.Object()"}, + }; + + public void checkConsistency() { + Map classMap = new TreeMap<>(); + // Build a map of APIInfo for classes, indexed by class name + for (APIInfo api : apiInfoSet) { + if (!api.isPublic() && !api.isProtected()) { + continue; + } + if (!api.isClass() && !api.isEnum()) { + continue; + } + String fullClassName = api.getPackageName() + "." + api.getName(); + classMap.put(fullClassName, api); + } + + // Walk through methods + for (APIInfo api : apiInfoSet) { + if (!api.isMethod()) { + continue; + } + + String fullClassName = api.getPackageName() + "." + api.getClassName(); + if (skipClasses.contains(fullClassName)) { + continue; + } + + boolean checkWithClass = false; + String methodName = api.getName(); + String methodSig = api.getSignature(); + + for (String[] method : METHODS) { + if (method[0].equals(methodName) && method[1].equals(methodSig)) { + checkWithClass = true; + } + } + + if (!checkWithClass) { + continue; + } + + // Check if this method has same API status with the containing class + APIInfo clsApi = classMap.get(fullClassName); + if (clsApi == null) { + pw.println("## Error ## Class " + fullClassName + " is not found."); + errCount++; + } + + int methodStatus = api.getVal(APIInfo.STA); + String methodVer = api.getStatusVersion(); + int classStatus = clsApi.getVal(APIInfo.STA); + String classVer = clsApi.getStatusVersion(); + + if (methodStatus != classStatus || !Objects.equals(methodVer, classVer)) { + pw.println("## Error ## " + methodName + " in " + fullClassName); + errCount++; + } + } + } +} diff --git a/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java index 61abf722360..46aca4589de 100644 --- a/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java +++ b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java @@ -56,7 +56,7 @@ public class DeprecatedAPIChecker { public void checkDeprecated() { // Gather API class/enum names and its names that can be // used for Class.forName() - Map apiClassNameMap = new TreeMap(); + Map apiClassNameMap = new TreeMap<>(); for (APIInfo api : apiInfoSet) { if (!api.isPublic() && !api.isProtected()) { continue; @@ -133,6 +133,18 @@ public class DeprecatedAPIChecker { } List paramNames = getParamNames(ctor); + + Class declClass = cls.getDeclaringClass(); + if (declClass != null && !Modifier.isStatic(cls.getModifiers())) { + // This is non-static inner class's constructor. + // javac automatically injects instance of declaring class + // as the first param of the constructor, but ICU's API + // signature is based on javadoc and it generates signature + // without the implicit parameter. + assert paramNames.get(0).equals(declClass.getName()); + paramNames.remove(0); + } + api = findConstructorInfo(apiInfoSet, clsName, paramNames); if (api == null) { @@ -351,7 +363,7 @@ public class DeprecatedAPIChecker { throw new IllegalArgumentException(api.toString() + " is not a constructor or a method."); } - List nameList = new ArrayList(); + List nameList = new ArrayList<>(); String signature = api.getSignature(); int start = signature.indexOf('('); int end = signature.indexOf(')'); @@ -410,7 +422,7 @@ public class DeprecatedAPIChecker { private static char[] PRIMITIVE_SIGNATURES = { 'B', 'S', 'I', 'J', 'F', 'D', 'Z', 'C' }; private static List toTypeNameList(Type[] types) { - List nameList = new ArrayList(); + List nameList = new ArrayList<>(); for (Type t : types) { StringBuilder s = new StringBuilder();