diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp
index f7073fec31f..81b6e0f68ab 100644
--- a/icu4c/source/common/uloc.cpp
+++ b/icu4c/source/common/uloc.cpp
@@ -798,7 +798,7 @@ _getKeywords(const char *localeID,
}
keywordsLen += keywordList[i].keywordLen + 1;
if(valuesToo) {
- if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
+ if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
}
keywordsLen += keywordList[i].valueLen;
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index b0647e97a2a..2d6a9213c3d 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -12,11 +12,13 @@
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "ustr_imp.h"
+#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
+#include "uvector.h"
#include "uassert.h"
@@ -172,6 +174,46 @@ static const char*
ultag_getGrandfathered(const ULanguageTag* langtag);
#endif
+namespace {
+
+// Helper class to memory manage CharString objects.
+// Only ever stack-allocated, does not need to inherit UMemory.
+class CharStringPool {
+public:
+ CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
+ ~CharStringPool() = default;
+
+ CharStringPool(const CharStringPool&) = delete;
+ CharStringPool& operator=(const CharStringPool&) = delete;
+
+ icu::CharString* create() {
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ icu::CharString* const obj = new icu::CharString;
+ if (obj == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ pool.addElement(obj, status);
+ if (U_FAILURE(status)) {
+ delete obj;
+ return nullptr;
+ }
+ return obj;
+ }
+
+private:
+ static void U_CALLCONV deleter(void* obj) {
+ delete static_cast(obj);
+ }
+
+ UErrorCode status;
+ icu::UVector pool;
+};
+
+} // namespace
+
/*
* -------------------------------------------------
*
@@ -900,7 +942,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
static int32_t
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
- char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
int32_t attrBufLength = 0;
UEnumeration *keywordEnum = NULL;
@@ -920,22 +961,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
AttributeListEntry *firstAttr = NULL;
AttributeListEntry *attr;
char *attrValue;
- char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
- char *pExtBuf = extBuf;
- int32_t extBufCapacity = sizeof(extBuf);
+ CharStringPool extBufPool;
const char *bcpKey=nullptr, *bcpValue=nullptr;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
UBool isBcpUExt;
while (TRUE) {
+ icu::CharString buf;
key = uenum_next(keywordEnum, NULL, status);
if (key == NULL) {
break;
}
- len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
- /* buf must be null-terminated */
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ char* buffer;
+ int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
+
+ for (;;) {
+ buffer = buf.getAppendBuffer(
+ /*minCapacity=*/resultCapacity,
+ /*desiredCapacityHint=*/resultCapacity,
+ resultCapacity,
+ tmpStatus);
+
+ if (U_FAILURE(tmpStatus)) {
+ break;
+ }
+
+ len = uloc_getKeywordValue(
+ localeID, key, buffer, resultCapacity, &tmpStatus);
+
+ if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+
+ resultCapacity = len;
+ tmpStatus = U_ZERO_ERROR;
+ }
+
+ if (U_FAILURE(tmpStatus)) {
+ if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -945,6 +1012,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
+ buf.append(buffer, len, tmpStatus);
+ if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
+ }
+
keylen = (int32_t)uprv_strlen(key);
isBcpUExt = (keylen > 1);
@@ -1007,7 +1079,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
/* we've checked buf is null-terminated above */
- bcpValue = uloc_toUnicodeLocaleType(key, buf);
+ bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
if (bcpValue == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -1015,33 +1087,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
continue;
}
- if (bcpValue == buf) {
- /*
+ if (bcpValue == buf.data()) {
+ /*
When uloc_toUnicodeLocaleType(key, buf) returns the
input value as is, the value is well-formed, but has
no known mapping. This implementation normalizes the
- the value to lower case
+ value to lower case
*/
- int32_t bcpValueLen = static_cast(uprv_strlen(bcpValue));
- if (bcpValueLen < extBufCapacity) {
- uprv_strcpy(pExtBuf, bcpValue);
- T_CString_toLowerCase(pExtBuf);
-
- bcpValue = pExtBuf;
-
- pExtBuf += (bcpValueLen + 1);
- extBufCapacity -= (bcpValueLen + 1);
- } else {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
+ icu::CharString* extBuf = extBufPool.create();
+ if (extBuf == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
}
+ int32_t bcpValueLen = static_cast(uprv_strlen(bcpValue));
+ int32_t resultCapacity;
+ char* pExtBuf = extBuf->getAppendBuffer(
+ /*minCapacity=*/bcpValueLen,
+ /*desiredCapacityHint=*/bcpValueLen,
+ resultCapacity,
+ tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
+ }
+
+ uprv_strcpy(pExtBuf, bcpValue);
+ T_CString_toLowerCase(pExtBuf);
+
+ extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
+ }
+
+ bcpValue = extBuf->data();
}
} else {
if (*key == PRIVATEUSE) {
- if (!_isPrivateuseValueSubtags(buf, len)) {
+ if (!_isPrivateuseValueSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -1049,7 +1132,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
} else {
- if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
+ if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -1058,20 +1141,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
bcpKey = key;
- if ((len + 1) < extBufCapacity) {
- uprv_memcpy(pExtBuf, buf, len);
- bcpValue = pExtBuf;
-
- pExtBuf += len;
-
- *pExtBuf = 0;
- pExtBuf++;
-
- extBufCapacity -= (len + 1);
- } else {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
+ icu::CharString* extBuf = extBufPool.create();
+ if (extBuf == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
break;
}
+ extBuf->append(buf.data(), len, tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
+ }
+ bcpValue = extBuf->data();
}
/* create ExtensionListEntry */
@@ -2337,31 +2417,66 @@ uloc_toLanguageTag(const char* localeID,
int32_t langtagCapacity,
UBool strict,
UErrorCode* status) {
- /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
- char canonical[256];
- int32_t reslen = 0;
+ icu::CharString canonical;
+ int32_t reslen;
UErrorCode tmpStatus = U_ZERO_ERROR;
UBool hadPosix = FALSE;
const char* pKeywordStart;
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
- canonical[0] = 0;
- if (uprv_strlen(localeID) > 0) {
- uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
- if (tmpStatus != U_ZERO_ERROR) {
+ int32_t resultCapacity = uprv_strlen(localeID);
+ if (resultCapacity > 0) {
+ char* buffer;
+
+ for (;;) {
+ buffer = canonical.getAppendBuffer(
+ /*minCapacity=*/resultCapacity,
+ /*desiredCapacityHint=*/resultCapacity,
+ resultCapacity,
+ tmpStatus);
+
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ return 0;
+ }
+
+ reslen =
+ uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
+
+ if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+
+ resultCapacity = reslen;
+ tmpStatus = U_ZERO_ERROR;
+ }
+
+ if (U_FAILURE(tmpStatus)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
+
+ canonical.append(buffer, reslen, tmpStatus);
+ if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
+ }
+
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ return 0;
+ }
}
+ reslen = 0;
+
/* For handling special case - private use only tag */
- pKeywordStart = locale_getKeywordsStart(canonical);
- if (pKeywordStart == canonical) {
+ pKeywordStart = locale_getKeywordsStart(canonical.data());
+ if (pKeywordStart == canonical.data()) {
UEnumeration *kwdEnum;
int kwdCnt = 0;
UBool done = FALSE;
- kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
+ kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
if (kwdEnum != NULL) {
kwdCnt = uenum_count(kwdEnum, &tmpStatus);
if (kwdCnt == 1) {
@@ -2399,12 +2514,12 @@ uloc_toLanguageTag(const char* localeID,
}
}
- reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
- reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
- reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
- reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
- reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
- reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
+ reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
+ reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
+ reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
+ reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
+ reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
+ reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
return reslen;
}
diff --git a/icu4c/source/i18n/rbt.h b/icu4c/source/i18n/rbt.h
index b998c694c23..671149f66ef 100644
--- a/icu4c/source/i18n/rbt.h
+++ b/icu4c/source/i18n/rbt.h
@@ -29,262 +29,10 @@ class TransliterationRuleData;
/**
* RuleBasedTransliterator
is a transliterator
- * that reads a set of rules in order to determine how to perform
- * translations. Rule sets are stored in resource bundles indexed by
- * name. Rules within a rule set are separated by semicolons (';').
- * To include a literal semicolon, prefix it with a backslash ('\').
- * Whitespace, as defined by Character.isWhitespace()
,
- * is ignored. If the first non-blank character on a line is '#',
- * the entire line is ignored as a comment.
- *
- * Each set of rules consists of two groups, one forward, and one
- * reverse. This is a convention that is not enforced; rules for one
- * direction may be omitted, with the result that translations in
- * that direction will not modify the source text. In addition,
- * bidirectional forward-reverse rules may be specified for
- * symmetrical transformations.
- *
- * Rule syntax
- *
- * Rule statements take one of the following forms:
- *
- *
- * $alefmadda=\u0622;
- * Variable definition. The name on the
- * left is assigned the text on the right. In this example,
- * after this statement, instances of the left hand name,
- * "$alefmadda
", will be replaced by
- * the Unicode character U+0622. Variable names must begin
- * with a letter and consist only of letters, digits, and
- * underscores. Case is significant. Duplicate names cause
- * an exception to be thrown, that is, variables cannot be
- * redefined. The right hand side may contain well-formed
- * text of any length, including no text at all ("$empty=;
").
- * The right hand side may contain embedded UnicodeSet
- * patterns, for example, "$softvowel=[eiyEIY]
".
- *
- * ai>$alefmadda;
- * Forward translation rule. This rule
- * states that the string on the left will be changed to the
- * string on the right when performing forward
- * transliteration.
- *
- * ai<$alefmadda;
- * Reverse translation rule. This rule
- * states that the string on the right will be changed to
- * the string on the left when performing reverse
- * transliteration.
- *
- *
- *
- * ai<>$alefmadda;
- * Bidirectional translation rule. This
- * rule states that the string on the right will be changed
- * to the string on the left when performing forward
- * transliteration, and vice versa when performing reverse
- * transliteration.
- *
- *
- * Translation rules consist of a match pattern and an output
- * string . The match pattern consists of literal characters,
- * optionally preceded by context, and optionally followed by
- * context. Context characters, like literal pattern characters,
- * must be matched in the text being transliterated. However, unlike
- * literal pattern characters, they are not replaced by the output
- * text. For example, the pattern "abc{def}
"
- * indicates the characters "def
" must be
- * preceded by "abc
" for a successful match.
- * If there is a successful match, "def
" will
- * be replaced, but not "abc
". The final '}
'
- * is optional, so "abc{def
" is equivalent to
- * "abc{def}
". Another example is "{123}456
"
- * (or "123}456
") in which the literal
- * pattern "123
" must be followed by "456
".
- *
- *
- * The output string of a forward or reverse rule consists of
- * characters to replace the literal pattern characters. If the
- * output string contains the character '|
', this is
- * taken to indicate the location of the cursor after
- * replacement. The cursor is the point in the text at which the
- * next replacement, if any, will be applied. The cursor is usually
- * placed within the replacement text; however, it can actually be
- * placed into the precending or following context by using the
- * special character '@
'. Examples:
- *
- *
- * a {foo} z > | @ bar; # foo -> bar, move cursor
- * before a
- * {foo} xyz > bar @@|; # foo -> bar, cursor between
- * y and z
- *
- *
- * UnicodeSet
- *
- * UnicodeSet
patterns may appear anywhere that
- * makes sense. They may appear in variable definitions.
- * Contrariwise, UnicodeSet
patterns may themselves
- * contain variable references, such as "$a=[a-z];$not_a=[^$a]
",
- * or "$range=a-z;$ll=[$range]
".
- *
- * UnicodeSet
patterns may also be embedded directly
- * into rule strings. Thus, the following two rules are equivalent:
- *
- *
- * $vowel=[aeiou]; $vowel>'*'; # One way to do this
- * [aeiou]>'*';
- * #
- * Another way
- *
- *
- * See {@link UnicodeSet} for more documentation and examples.
- *
- * Segments
- *
- * Segments of the input string can be matched and copied to the
- * output string. This makes certain sets of rules simpler and more
- * general, and makes reordering possible. For example:
- *
- *
- * ([a-z]) > $1 $1;
- * #
- * double lowercase letters
- * ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
- *
- *
- * The segment of the input string to be copied is delimited by
- * "(
" and ")
". Up to
- * nine segments may be defined. Segments may not overlap. In the
- * output string, "$1
" through "$9
"
- * represent the input string segments, in left-to-right order of
- * definition.
- *
- * Anchors
- *
- * Patterns can be anchored to the beginning or the end of the text. This is done with the
- * special characters '^
' and '$
'. For example:
- *
- *
- * ^ a > 'BEG_A'; # match 'a' at start of text
- * a > 'A'; # match other instances
- * of 'a'
- * z $ > 'END_Z'; # match 'z' at end of text
- * z > 'Z'; # match other instances
- * of 'z'
- *
- *
- * It is also possible to match the beginning or the end of the text using a UnicodeSet
.
- * This is done by including a virtual anchor character '$
' at the end of the
- * set pattern. Although this is usually the match chafacter for the end anchor, the set will
- * match either the beginning or the end of the text, depending on its placement. For
- * example:
- *
- *
- * $x = [a-z$]; # match 'a' through 'z' OR anchor
- * $x 1 > 2; # match '1' after a-z or at the start
- * 3 $x > 4; # match '3' before a-z or at the end
- *
- *
- * Example
- *
- * The following example rules illustrate many of the features of
- * the rule language.
- *
- *
- *
- * Rule 1.
- * abc{def}>x|y
- *
- *
- * Rule 2.
- * xyz>r
- *
- *
- * Rule 3.
- * yz>q
- *
- *
- *
- * Applying these rules to the string "adefabcdefz
"
- * yields the following results:
- *
- *
- *
- * |adefabcdefz
- * Initial state, no rules match. Advance
- * cursor.
- *
- *
- * a|defabcdefz
- * Still no match. Rule 1 does not match
- * because the preceding context is not present.
- *
- *
- * ad|efabcdefz
- * Still no match. Keep advancing until
- * there is a match...
- *
- *
- * ade|fabcdefz
- * ...
- *
- *
- * adef|abcdefz
- * ...
- *
- *
- * adefa|bcdefz
- * ...
- *
- *
- * adefab|cdefz
- * ...
- *
- *
- * adefabc|defz
- * Rule 1 matches; replace "def
"
- * with "xy
" and back up the cursor
- * to before the 'y
'.
- *
- *
- * adefabcx|yz
- * Although "xyz
" is
- * present, rule 2 does not match because the cursor is
- * before the 'y
', not before the 'x
'.
- * Rule 3 does match. Replace "yz
"
- * with "q
".
- *
- *
- * adefabcxq|
- * The cursor is at the end;
- * transliteration is complete.
- *
- *
- *
- * The order of rules is significant. If multiple rules may match
- * at some point, the first matching rule is applied.
- *
- * Forward and reverse rules may have an empty output string.
- * Otherwise, an empty left or right hand side of any statement is a
- * syntax error.
- *
- * Single quotes are used to quote any character other than a
- * digit or letter. To specify a single quote itself, inside or
- * outside of quotes, use two single quotes in a row. For example,
- * the rule "'>'>o''clock
" changes the
- * string ">
" to the string "o'clock
".
- *
- *
- * Notes
- *
- * While a RuleBasedTransliterator is being built, it checks that
- * the rules are added in proper order. For example, if the rule
- * "a>x" is followed by the rule "ab>y",
- * then the second rule will throw an exception. The reason is that
- * the second rule can never be triggered, since the first rule
- * always matches anything it matches. In other words, the first
- * rule masks the second rule.
- *
+ * built from a set of rules as defined for
+ * Transliterator::createFromRules().
+ * See the C++ class Transliterator documentation for the rule syntax.
+ *
* @author Alan Liu
* @internal Use transliterator factory methods instead since this class will be removed in that release.
*/
diff --git a/icu4c/source/i18n/unicode/translit.h b/icu4c/source/i18n/unicode/translit.h
index ebb9575a9f5..6b4888145f1 100644
--- a/icu4c/source/i18n/unicode/translit.h
+++ b/icu4c/source/i18n/unicode/translit.h
@@ -15,10 +15,10 @@
#include "unicode/utypes.h"
/**
- * \file
+ * \file
* \brief C++ API: Tranforms text from one format to another.
*/
-
+
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/uobject.h"
@@ -31,7 +31,6 @@ U_NAMESPACE_BEGIN
class UnicodeFilter;
class UnicodeSet;
-class CompoundTransliterator;
class TransliteratorParser;
class NormalizationTransliterator;
class TransliteratorIDParser;
@@ -97,18 +96,20 @@ class TransliteratorIDParser;
* contents of the buffer may show text being modified as each new
* character arrives.
*
- * Consider the simple `RuleBasedTransliterator`:
- *
+ *
Consider the simple rule-based Transliterator:
+ *
* th>{theta}
* t>{tau}
+ *
*
* When the user types 't', nothing will happen, since the
* transliterator is waiting to see if the next character is 'h'. To
* remedy this, we introduce the notion of a cursor, marked by a '|'
* in the output string:
- *
+ *
* t>|{tau}
* {tau}h>{theta}
+ *
*
* Now when the user types 't', tau appears, and if the next character
* is 'h', the tau changes to a theta. This is accomplished by
@@ -130,7 +131,7 @@ class TransliteratorIDParser;
* which the transliterator last stopped, either because it reached
* the end, or because it required more characters to disambiguate
* between possible inputs. The CURSOR
can also be
- * explicitly set by rules in a RuleBasedTransliterator
.
+ * explicitly set by rules in a rule-based Transliterator.
* Any characters before the CURSOR
index are frozen;
* future keyboard transliteration calls within this input sequence
* will not change them. New text is inserted at the
@@ -232,6 +233,255 @@ class TransliteratorIDParser;
* if the performance of these methods can be improved over the
* performance obtained by the default implementations in this class.
*
+ * Rule syntax
+ *
+ *
A set of rules determines how to perform translations.
+ * Rules within a rule set are separated by semicolons (';').
+ * To include a literal semicolon, prefix it with a backslash ('\').
+ * Unicode Pattern_White_Space is ignored.
+ * If the first non-blank character on a line is '#',
+ * the entire line is ignored as a comment.
+ *
+ *
Each set of rules consists of two groups, one forward, and one
+ * reverse. This is a convention that is not enforced; rules for one
+ * direction may be omitted, with the result that translations in
+ * that direction will not modify the source text. In addition,
+ * bidirectional forward-reverse rules may be specified for
+ * symmetrical transformations.
+ *
+ *
Note: Another description of the Transliterator rule syntax is available in
+ * section
+ * Transform Rules Syntax of UTS #35: Unicode LDML .
+ * The rules are shown there using arrow symbols ← and → and ↔.
+ * ICU supports both those and the equivalent ASCII symbols < and > and <>.
+ *
+ *
Rule statements take one of the following forms:
+ *
+ *
+ * $alefmadda=\\u0622;
+ * Variable definition. The name on the
+ * left is assigned the text on the right. In this example,
+ * after this statement, instances of the left hand name,
+ * "$alefmadda
", will be replaced by
+ * the Unicode character U+0622. Variable names must begin
+ * with a letter and consist only of letters, digits, and
+ * underscores. Case is significant. Duplicate names cause
+ * an exception to be thrown, that is, variables cannot be
+ * redefined. The right hand side may contain well-formed
+ * text of any length, including no text at all ("$empty=;
").
+ * The right hand side may contain embedded UnicodeSet
+ * patterns, for example, "$softvowel=[eiyEIY]
".
+ * ai>$alefmadda;
+ * Forward translation rule. This rule
+ * states that the string on the left will be changed to the
+ * string on the right when performing forward
+ * transliteration.
+ * ai<$alefmadda;
+ * Reverse translation rule. This rule
+ * states that the string on the right will be changed to
+ * the string on the left when performing reverse
+ * transliteration.
+ *
+ *
+ *
+ * ai<>$alefmadda;
+ * Bidirectional translation rule. This
+ * rule states that the string on the right will be changed
+ * to the string on the left when performing forward
+ * transliteration, and vice versa when performing reverse
+ * transliteration.
+ *
+ *
+ * Translation rules consist of a match pattern and an output
+ * string . The match pattern consists of literal characters,
+ * optionally preceded by context, and optionally followed by
+ * context. Context characters, like literal pattern characters,
+ * must be matched in the text being transliterated. However, unlike
+ * literal pattern characters, they are not replaced by the output
+ * text. For example, the pattern "abc{def}
"
+ * indicates the characters "def
" must be
+ * preceded by "abc
" for a successful match.
+ * If there is a successful match, "def
" will
+ * be replaced, but not "abc
". The final '}
'
+ * is optional, so "abc{def
" is equivalent to
+ * "abc{def}
". Another example is "{123}456
"
+ * (or "123}456
") in which the literal
+ * pattern "123
" must be followed by "456
".
+ *
+ *
The output string of a forward or reverse rule consists of
+ * characters to replace the literal pattern characters. If the
+ * output string contains the character '|
', this is
+ * taken to indicate the location of the cursor after
+ * replacement. The cursor is the point in the text at which the
+ * next replacement, if any, will be applied. The cursor is usually
+ * placed within the replacement text; however, it can actually be
+ * placed into the precending or following context by using the
+ * special character '@'. Examples:
+ *
+ *
+ * a {foo} z > | @ bar; # foo -> bar, move cursor before a
+ * {foo} xyz > bar @@|; # foo -> bar, cursor between y and z
+ *
+ *
+ * UnicodeSet
+ *
+ *
UnicodeSet
patterns may appear anywhere that
+ * makes sense. They may appear in variable definitions.
+ * Contrariwise, UnicodeSet
patterns may themselves
+ * contain variable references, such as "$a=[a-z];$not_a=[^$a]
",
+ * or "$range=a-z;$ll=[$range]
".
+ *
+ *
UnicodeSet
patterns may also be embedded directly
+ * into rule strings. Thus, the following two rules are equivalent:
+ *
+ *
+ * $vowel=[aeiou]; $vowel>'*'; # One way to do this
+ * [aeiou]>'*'; # Another way
+ *
+ *
+ * See {@link UnicodeSet} for more documentation and examples.
+ *
+ *
Segments
+ *
+ *
Segments of the input string can be matched and copied to the
+ * output string. This makes certain sets of rules simpler and more
+ * general, and makes reordering possible. For example:
+ *
+ *
+ * ([a-z]) > $1 $1; # double lowercase letters
+ * ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
+ *
+ *
+ * The segment of the input string to be copied is delimited by
+ * "(
" and ")
". Up to
+ * nine segments may be defined. Segments may not overlap. In the
+ * output string, "$1
" through "$9
"
+ * represent the input string segments, in left-to-right order of
+ * definition.
+ *
+ *
Anchors
+ *
+ *
Patterns can be anchored to the beginning or the end of the text. This is done with the
+ * special characters '^
' and '$
'. For example:
+ *
+ *
+ * ^ a > 'BEG_A'; # match 'a' at start of text
+ * a > 'A'; # match other instances of 'a'
+ * z $ > 'END_Z'; # match 'z' at end of text
+ * z > 'Z'; # match other instances of 'z'
+ *
+ *
+ * It is also possible to match the beginning or the end of the text using a UnicodeSet
.
+ * This is done by including a virtual anchor character '$
' at the end of the
+ * set pattern. Although this is usually the match chafacter for the end anchor, the set will
+ * match either the beginning or the end of the text, depending on its placement. For
+ * example:
+ *
+ *
+ * $x = [a-z$]; # match 'a' through 'z' OR anchor
+ * $x 1 > 2; # match '1' after a-z or at the start
+ * 3 $x > 4; # match '3' before a-z or at the end
+ *
+ *
+ * Example
+ *
+ *
The following example rules illustrate many of the features of
+ * the rule language.
+ *
+ *
+ *
+ * Rule 1.
+ * abc{def}>x|y
+ *
+ *
+ * Rule 2.
+ * xyz>r
+ *
+ *
+ * Rule 3.
+ * yz>q
+ *
+ *
+ *
+ * Applying these rules to the string "adefabcdefz
"
+ * yields the following results:
+ *
+ *
+ *
+ * |adefabcdefz
+ * Initial state, no rules match. Advance
+ * cursor.
+ *
+ *
+ * a|defabcdefz
+ * Still no match. Rule 1 does not match
+ * because the preceding context is not present.
+ *
+ *
+ * ad|efabcdefz
+ * Still no match. Keep advancing until
+ * there is a match...
+ *
+ *
+ * ade|fabcdefz
+ * ...
+ *
+ *
+ * adef|abcdefz
+ * ...
+ *
+ *
+ * adefa|bcdefz
+ * ...
+ *
+ *
+ * adefab|cdefz
+ * ...
+ *
+ *
+ * adefabc|defz
+ * Rule 1 matches; replace "def
"
+ * with "xy
" and back up the cursor
+ * to before the 'y
'.
+ *
+ *
+ * adefabcx|yz
+ * Although "xyz
" is
+ * present, rule 2 does not match because the cursor is
+ * before the 'y
', not before the 'x
'.
+ * Rule 3 does match. Replace "yz
"
+ * with "q
".
+ *
+ *
+ * adefabcxq|
+ * The cursor is at the end;
+ * transliteration is complete.
+ *
+ *
+ *
+ * The order of rules is significant. If multiple rules may match
+ * at some point, the first matching rule is applied.
+ *
+ *
Forward and reverse rules may have an empty output string.
+ * Otherwise, an empty left or right hand side of any statement is a
+ * syntax error.
+ *
+ *
Single quotes are used to quote any character other than a
+ * digit or letter. To specify a single quote itself, inside or
+ * outside of quotes, use two single quotes in a row. For example,
+ * the rule "'>'>o''clock
" changes the
+ * string ">
" to the string "o'clock
".
+ *
+ *
Notes
+ *
+ *
While a Transliterator is being built from rules, it checks that
+ * the rules are added in proper order. For example, if the rule
+ * "a>x" is followed by the rule "ab>y",
+ * then the second rule will throw an exception. The reason is that
+ * the second rule can never be triggered, since the first rule
+ * always matches anything it matches. In other words, the first
+ * rule masks the second rule.
+ *
* @author Alan Liu
* @stable ICU 2.0
*/
@@ -627,7 +877,7 @@ public:
/**
* Transliterate a substring of text, as specified by index, taking filters
* into account. This method is for subclasses that need to delegate to
- * another transliterator, such as CompoundTransliterator.
+ * another transliterator.
* @param text the text to be transliterated
* @param index the position indices
* @param incremental if TRUE, then assume more characters may be inserted
@@ -841,17 +1091,19 @@ public:
/**
* Returns a Transliterator
object constructed from
- * the given rule string. This will be a RuleBasedTransliterator,
+ * the given rule string. This will be a rule-based Transliterator,
* if the rule string contains only rules, or a
- * CompoundTransliterator, if it contains ID blocks, or a
- * NullTransliterator, if it contains ID blocks which parse as
+ * compound Transliterator, if it contains ID blocks, or a
+ * null Transliterator, if it contains ID blocks which parse as
* empty for the given direction.
+ *
* @param ID the id for the transliterator.
* @param rules rules, separated by ';'
* @param dir either FORWARD or REVERSE.
- * @param parseError Struct to recieve information on position
+ * @param parseError Struct to receive information on position
* of error if an error is encountered
* @param status Output param set to success/failure code.
+ * @return a newly created Transliterator
* @stable ICU 2.0
*/
static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c
index 4454c67274d..1d1805196f2 100644
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@@ -226,6 +226,7 @@ void addLocaleTest(TestNode** root)
TESTCASE(TestKeywordVariants);
TESTCASE(TestKeywordVariantParsing);
TESTCASE(TestCanonicalization);
+ TESTCASE(TestCanonicalizationBuffer);
TESTCASE(TestKeywordSet);
TESTCASE(TestKeywordSetError);
TESTCASE(TestDisplayKeywords);
@@ -2251,6 +2252,42 @@ static void TestCanonicalization(void)
}
}
+static void TestCanonicalizationBuffer(void)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ char buffer[256];
+
+ // ULOC_FULLNAME_CAPACITY == 157 (uloc.h)
+ static const char name[] =
+ "zh@x"
+ "=foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
+ "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
+ "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
+ "-foo-barz"
+ ;
+ static const size_t len = sizeof name - 1; // Without NUL terminator.
+
+ int32_t reslen = uloc_canonicalize(name, buffer, len, &status);
+
+ if (U_FAILURE(status)) {
+ log_err("FAIL: uloc_canonicalize(%s) => %s, expected !U_FAILURE()\n",
+ name, u_errorName(status));
+ return;
+ }
+
+ if (reslen != len) {
+ log_err("FAIL: uloc_canonicalize(%s) => \"%i\", expected \"%u\"\n",
+ name, reslen, len);
+ return;
+ }
+
+ if (uprv_strncmp(name, buffer, len) != 0) {
+ log_err("FAIL: uloc_canonicalize(%s) => \"%.*s\", expected \"%s\"\n",
+ name, reslen, buffer, name);
+ return;
+ }
+}
+
static void TestDisplayKeywords(void)
{
int32_t i;
diff --git a/icu4c/source/test/cintltst/cloctst.h b/icu4c/source/test/cintltst/cloctst.h
index be1896a0c3f..a2ce892ec23 100644
--- a/icu4c/source/test/cintltst/cloctst.h
+++ b/icu4c/source/test/cintltst/cloctst.h
@@ -84,6 +84,7 @@ static void TestDisplayNames(void);
static void doTestDisplayNames(const char* inLocale, int32_t compareIndex);
static void TestCanonicalization(void);
+ static void TestCanonicalizationBuffer(void);
static void TestDisplayKeywords(void);
diff --git a/icu4c/source/test/intltest/cpdtrtst.h b/icu4c/source/test/intltest/cpdtrtst.h
index e723619ad36..1733f1a6e42 100644
--- a/icu4c/source/test/intltest/cpdtrtst.h
+++ b/icu4c/source/test/intltest/cpdtrtst.h
@@ -20,6 +20,7 @@
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/translit.h"
+#include "cpdtrans.h"
#include "intltest.h"
/**
diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp
index d3fc4e286c0..e375c0c5a55 100644
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@@ -252,6 +252,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
TESTCASE_AUTO(TestToLanguageTag);
TESTCASE_AUTO(TestMoveAssign);
TESTCASE_AUTO(TestMoveCtor);
+ TESTCASE_AUTO(TestBug13417VeryLongLanguageTag);
TESTCASE_AUTO_END;
}
@@ -3125,3 +3126,23 @@ void LocaleTest::TestMoveCtor() {
assertEquals("variant", l7.getVariant(), l8.getVariant());
assertEquals("bogus", l7.isBogus(), l8.isBogus());
}
+
+void LocaleTest::TestBug13417VeryLongLanguageTag() {
+ IcuTestErrorCode status(*this, "TestBug13417VeryLongLanguageTag()");
+
+ static const char tag[] =
+ "zh-x"
+ "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
+ "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
+ "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
+ "-foo-bar-baz-fxx"
+ ;
+
+ Locale l = Locale::forLanguageTag(tag, status);
+ status.errIfFailureAndReset("\"%s\"", tag);
+ assertTrue("!l.isBogus()", !l.isBogus());
+
+ std::string result = l.toLanguageTag(status);
+ status.errIfFailureAndReset("\"%s\"", l.getName());
+ assertEquals("equals", tag, result.c_str());
+}
diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h
index d165cae8932..2a83be51a05 100644
--- a/icu4c/source/test/intltest/loctest.h
+++ b/icu4c/source/test/intltest/loctest.h
@@ -124,6 +124,8 @@ public:
void TestMoveAssign();
void TestMoveCtor();
+ void TestBug13417VeryLongLanguageTag();
+
private:
void _checklocs(const char* label,
const char* req,
diff --git a/icu4j/build.xml b/icu4j/build.xml
index 2ceb623ad4a..a939d12724a 100644
--- a/icu4j/build.xml
+++ b/icu4j/build.xml
@@ -1729,6 +1729,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
computeMaxExpansions(CollationData data) {
- Map maxExpansions = new HashMap();
+ Map maxExpansions = new HashMap<>();
MaxExpSink sink = new MaxExpSink(maxExpansions);
new ContractionsAndExpansions(null, null, sink, true).forData(data);
return maxExpansions;
@@ -692,11 +692,9 @@ public final class CollationElementIterator
/**
* Mock implementation of hashCode(). This implementation always returns a constant
* value. When Java assertion is enabled, this method triggers an assertion failure.
- * @internal
- * @deprecated This API is ICU internal only.
+ * @stable ICU 2.8
*/
@Override
- @Deprecated
public int hashCode() {
assert false : "hashCode not designed";
return 42;
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java
index 79889c4296f..fdbbf36f370 100644
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java
@@ -329,7 +329,7 @@ public abstract class Collator implements Comparator, Freezable, Freezable, Freezable values = new LinkedList();
+ LinkedList values = new LinkedList<>();
boolean hasDefault = false;
@Override
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java
index ea597a7edf8..29a75a9e029 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/CharacterProperties.java
@@ -112,6 +112,8 @@ public final class CharacterProperties {
* @return the property as a set
* @see UProperty
* @see UCharacter#hasBinaryProperty
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
*/
public static final UnicodeSet getBinaryPropertySet(int property) {
if (property < 0 || UProperty.BINARY_LIMIT <= property) {
@@ -141,6 +143,8 @@ public final class CharacterProperties {
* @return the property as a map
* @see UProperty
* @see UCharacter#getIntPropertyValue
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
*/
public static final CodePointMap getIntPropertyMap(int property) {
if (property < UProperty.INT_START || UProperty.INT_LIMIT <= property) {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java
index fa0322bffed..d5cccc49622 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java
@@ -175,6 +175,7 @@ public abstract class NumberRangeFormatter {
*
* @return An {@link UnlocalizedNumberRangeFormatter}, to be used for chaining.
* @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
*/
public static UnlocalizedNumberRangeFormatter with() {
return BASE;
@@ -188,6 +189,7 @@ public abstract class NumberRangeFormatter {
* The locale from which to load formats and symbols for number range formatting.
* @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining.
* @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
*/
public static LocalizedNumberRangeFormatter withLocale(Locale locale) {
return BASE.locale(locale);
@@ -201,9 +203,15 @@ public abstract class NumberRangeFormatter {
* The locale from which to load formats and symbols for number range formatting.
* @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining.
* @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
*/
public static LocalizedNumberRangeFormatter withLocale(ULocale locale) {
return BASE.locale(locale);
}
+ /**
+ * Private constructor - this class is not designed for instantiation
+ */
+ private NumberRangeFormatter() {
+ }
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java b/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java
index 974bd7cdb02..375b535b90e 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/number/Precision.java
@@ -408,10 +408,10 @@ public abstract class Precision implements Cloneable {
}
/**
- * @internal
- * @deprecated This API is ICU internal only.
+ * {@inheritDoc}
+ * @draft ICU 62
+ * @provisional This API might change or be removed in a future release.
*/
- @Deprecated
@Override
public Object clone() {
try {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java b/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java
index bd0c723b859..0f2f0e7d21a 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/number/ScientificNotation.java
@@ -93,10 +93,9 @@ public class ScientificNotation extends Notation implements Cloneable {
}
/**
- * @internal
- * @deprecated This API is ICU internal only.
+ * @draft ICU 60
+ * @provisional This API might change or be removed in a future release.
*/
- @Deprecated
@Override
public Object clone() {
try {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java
index afc0c2ec4ea..25c0e1c2e4a 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CurrencyPluralInfo.java
@@ -202,7 +202,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
//other.pluralRules = pluralRules;
// clone content
//other.pluralCountToCurrencyUnitPattern = pluralCountToCurrencyUnitPattern;
- other.pluralCountToCurrencyUnitPattern = new HashMap();
+ other.pluralCountToCurrencyUnitPattern = new HashMap<>();
for (String pluralCount : pluralCountToCurrencyUnitPattern.keySet()) {
String currencyPattern = pluralCountToCurrencyUnitPattern.get(pluralCount);
other.pluralCountToCurrencyUnitPattern.put(pluralCount, currencyPattern);
@@ -231,11 +231,9 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
/**
* Override hashCode
*
- * @internal
- * @deprecated This API is ICU internal only.
+ * @stable ICU 4.2
*/
@Override
- @Deprecated
public int hashCode() {
return pluralCountToCurrencyUnitPattern.hashCode()
^ pluralRules.hashCode()
@@ -283,7 +281,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
}
private void setupCurrencyPluralPattern(ULocale uloc) {
- pluralCountToCurrencyUnitPattern = new HashMap();
+ pluralCountToCurrencyUnitPattern = new HashMap<>();
String numberStylePattern = NumberFormat.getPattern(uloc, NumberFormat.NUMBERSTYLE);
// Split the number style pattern into pos and neg if applicable
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java
index baa79b09695..a72da77a473 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java
@@ -273,10 +273,8 @@ public class DateIntervalInfo implements Cloneable, Freezable,
/**
* {@inheritDoc}
- * @internal
- * @deprecated This API is ICU internal only.
+ * @stable ICU 4.0
*/
- @Deprecated
@Override
public String toString() {
return "{first=«" + fIntervalPatternFirstPart + "», second=«" + fIntervalPatternSecondPart + "», reversed:" + fFirstDateInPtnIsLaterDate + "}";
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
index b97d54f0079..89b46478744 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
@@ -821,7 +821,8 @@ public final class Edits {
/**
* A string representation of the current edit represented by the iterator for debugging. You
* should not depend on the contents of the return string; it may change over time.
- * @internal
+ * @return a string representation of the object.
+ * @stable ICU 59
*/
@Override
public String toString() {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java
index 175d92e8d40..df13519c306 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java
@@ -126,9 +126,9 @@ public class MeasureFormat extends UFormat {
private final transient LocalizedNumberFormatter numberFormatter;
- private static final SimpleCache localeToNumericDurationFormatters = new SimpleCache();
+ private static final SimpleCache localeToNumericDurationFormatters = new SimpleCache<>();
- private static final Map hmsTo012 = new HashMap();
+ private static final Map hmsTo012 = new HashMap<>();
static {
hmsTo012.put(MeasureUnit.HOUR, 0);
@@ -486,7 +486,7 @@ public class MeasureFormat extends UFormat {
* Two MeasureFormats, a and b, are equal if and only if they have the same formatWidth, locale, and
* equal number formats.
*
- * @stable ICU 53
+ * @stable ICU 3.0
*/
@Override
public final boolean equals(Object other) {
@@ -506,7 +506,7 @@ public class MeasureFormat extends UFormat {
/**
* {@inheritDoc}
*
- * @stable ICU 53
+ * @stable ICU 3.0
*/
@Override
public final int hashCode() {
@@ -997,7 +997,7 @@ public class MeasureFormat extends UFormat {
this.formatWidth = width;
this.numberFormat = numberFormat;
this.subClass = subClass;
- this.keyValues = new HashMap();
+ this.keyValues = new HashMap<>();
}
// Must have public constructor, to enable Externalizable
@@ -1070,7 +1070,7 @@ public class MeasureFormat extends UFormat {
return values[ordinal];
}
- private static final Map localeIdToRangeFormat = new ConcurrentHashMap();
+ private static final Map localeIdToRangeFormat = new ConcurrentHashMap<>();
/**
* Return a formatter (compiled SimpleFormatter pattern) for a range, such as "{0}–{1}".
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java
index d384c89426d..2a6ab6f9f45 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralRules.java
@@ -1084,7 +1084,7 @@ public class PluralRules implements Serializable {
SampleType sampleType2;
boolean bounded2 = true;
boolean haveBound = false;
- Set samples2 = new LinkedHashSet();
+ Set samples2 = new LinkedHashSet<>();
if (source.startsWith("integer")) {
sampleType2 = SampleType.INTEGER;
@@ -1215,7 +1215,7 @@ public class PluralRules implements Serializable {
static final UnicodeSet BREAK_AND_KEEP = new UnicodeSet('!', '!', '%', '%', ',', ',', '.', '.', '=', '=').freeze();
static String[] split(String source) {
int last = -1;
- List result = new ArrayList();
+ List result = new ArrayList<>();
for (int i = 0; i < source.length(); ++i) {
char ch = source.charAt(i);
if (BREAK_AND_IGNORE.contains(ch)) {
@@ -1334,7 +1334,7 @@ public class PluralRules implements Serializable {
t = nextToken(tokens, x++, condition);
}
- List valueList = new ArrayList();
+ List valueList = new ArrayList<>();
// the token t is always one item ahead
while (true) {
@@ -1756,10 +1756,9 @@ public class PluralRules implements Serializable {
}
/**
- * @internal
- * @deprecated This API is ICU internal only.
+ * {@inheritDoc}
+ * @stable ICU 3.8
*/
- @Deprecated
@Override
public int hashCode() {
return keyword.hashCode() ^ constraint.hashCode();
@@ -1773,7 +1772,7 @@ public class PluralRules implements Serializable {
private static class RuleList implements Serializable {
private boolean hasExplicitBoundingInfo = false;
private static final long serialVersionUID = 1;
- private final List rules = new ArrayList();
+ private final List rules = new ArrayList<>();
public RuleList addRule(Rule nextRule) {
String keyword = nextRule.getKeyword();
@@ -1821,7 +1820,7 @@ public class PluralRules implements Serializable {
}
public Set getKeywords() {
- Set result = new LinkedHashSet();
+ Set result = new LinkedHashSet<>();
for (Rule rule : rules) {
result.add(rule.getKeyword());
}
@@ -2020,10 +2019,9 @@ public class PluralRules implements Serializable {
}
/**
- * @internal
- * @deprecated This API is ICU internal only.
+ * {@inheritDoc}
+ * @stable ICU 3.8
*/
- @Deprecated
@Override
public int hashCode() {
return rules.hashCode();
@@ -2175,7 +2173,7 @@ public class PluralRules implements Serializable {
if (!keywords.contains(keyword)) {
return null;
}
- Set result = new TreeSet();
+ Set result = new TreeSet<>();
if (rules.hasExplicitBoundingInfo) {
FixedDecimalSamples samples = rules.getDecimalSamples(keyword, sampleType);
@@ -2420,7 +2418,7 @@ public class PluralRules implements Serializable {
// Compute if the quick test is insufficient.
- HashSet subtractedSet = new HashSet(values);
+ HashSet subtractedSet = new HashSet<>(values);
for (Double explicit : explicits) {
subtractedSet.remove(explicit - offset);
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java
index 719fefb0771..dbab3142162 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java
@@ -941,13 +941,10 @@ public class RuleBasedNumberFormat extends NumberFormat {
}
/**
- * Mock implementation of hashCode(). This implementation always returns a constant
- * value. When Java assertion is enabled, this method triggers an assertion failure.
- * @internal
- * @deprecated This API is ICU internal only.
+ * {@inheritDoc}
+ * @stable ICU 2.0
*/
@Override
- @Deprecated
public int hashCode() {
return super.hashCode();
}
@@ -1731,7 +1728,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
// our rule list is an array of the appropriate size
ruleSets = new NFRuleSet[numRuleSets];
- ruleSetsMap = new HashMap(numRuleSets * 2 + 1);
+ ruleSetsMap = new HashMap<>(numRuleSets * 2 + 1);
defaultRuleSet = null;
// Used to count the number of public rule sets
@@ -1844,7 +1841,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
if (localizations != null) {
publicRuleSetNames = localizations[0].clone();
- Map m = new HashMap();
+ Map m = new HashMap<>();
for (int i = 1; i < localizations.length; ++i) {
String[] data = localizations[i];
String loc = data[0];
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
index 53ab8b0d80c..ca1015d7d13 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
@@ -506,7 +506,7 @@ public class SpoofChecker {
SpoofData fSpoofData;
final UnicodeSet fAllowedCharsSet = new UnicodeSet(0, 0x10ffff); // The UnicodeSet of allowed characters.
// for this Spoof Checker. Defaults to all chars.
- final Set fAllowedLocales = new LinkedHashSet(); // The list of allowed locales.
+ final Set fAllowedLocales = new LinkedHashSet<>(); // The list of allowed locales.
private RestrictionLevel fRestrictionLevel;
/**
@@ -567,7 +567,7 @@ public class SpoofChecker {
result.fSpoofData = this.fSpoofData;
result.fAllowedCharsSet = (UnicodeSet) (this.fAllowedCharsSet.clone());
result.fAllowedCharsSet.freeze();
- result.fAllowedLocales = new HashSet(this.fAllowedLocales);
+ result.fAllowedLocales = new HashSet<>(this.fAllowedLocales);
result.fRestrictionLevel = this.fRestrictionLevel;
return result;
}
@@ -734,7 +734,7 @@ public class SpoofChecker {
* @stable ICU 54
*/
public Builder setAllowedJavaLocales(Set locales) {
- HashSet ulocales = new HashSet(locales.size());
+ HashSet ulocales = new HashSet<>(locales.size());
for (Locale locale : locales) {
ulocales.add(ULocale.forLocale(locale));
}
@@ -848,10 +848,10 @@ public class SpoofChecker {
private int fLineNum;
ConfusabledataBuilder() {
- fTable = new Hashtable();
+ fTable = new Hashtable<>();
fKeySet = new UnicodeSet();
- fKeyVec = new ArrayList();
- fValueVec = new ArrayList();
+ fKeyVec = new ArrayList<>();
+ fValueVec = new ArrayList<>();
stringPool = new SPUStringPool();
}
@@ -1093,8 +1093,8 @@ public class SpoofChecker {
// combination of a uhash and a Vector.
private static class SPUStringPool {
public SPUStringPool() {
- fVec = new Vector();
- fHash = new Hashtable();
+ fVec = new Vector<>();
+ fHash = new Hashtable<>();
}
public int size() {
@@ -1179,7 +1179,7 @@ public class SpoofChecker {
* @stable ICU 54
*/
public Set getAllowedJavaLocales() {
- HashSet locales = new HashSet(fAllowedLocales.size());
+ HashSet locales = new HashSet<>(fAllowedLocales.size());
for (ULocale uloc : fAllowedLocales) {
locales.add(uloc.toLocale());
}
@@ -1535,7 +1535,7 @@ public class SpoofChecker {
* @param other
* the SpoofChecker being compared with.
* @return true if the two SpoofCheckers are equal.
- * @stable ICU 58
+ * @stable ICU 4.6
*/
@Override
public boolean equals(Object other) {
@@ -1565,7 +1565,7 @@ public class SpoofChecker {
/**
* Overrides {@link Object#hashCode()}.
- * @stable ICU 58
+ * @stable ICU 4.6
*/
@Override
public int hashCode() {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java
index 1a73c862098..2b2a91124ad 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrepParseException.java
@@ -143,11 +143,10 @@ public class StringPrepParseException extends ParseException {
/**
* Mock implementation of hashCode(). This implementation always returns a constant
* value. When Java assertion is enabled, this method triggers an assertion failure.
- * @internal
- * @deprecated This API is ICU internal only.
+ * @return a hash code value for this object.
+ * @stable ICU 2.8
*/
@Override
- @Deprecated
public int hashCode() {
assert false : "hashCode not designed";
return 42;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java
index 70a974ea85f..08c7dc4e18e 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java
@@ -343,7 +343,7 @@ public class TimeUnitFormat extends MeasureFormat {
format = NumberFormat.getNumberInstance(locale);
}
pluralRules = PluralRules.forLocale(locale);
- timeUnitToCountToPatterns = new HashMap>();
+ timeUnitToCountToPatterns = new HashMap<>();
Set pluralKeywords = pluralRules.getKeywords();
setup("units/duration", timeUnitToCountToPatterns, FULL_NAME, pluralKeywords);
setup("unitsShort/duration", timeUnitToCountToPatterns, ABBREVIATED_NAME, pluralKeywords);
@@ -400,7 +400,7 @@ public class TimeUnitFormat extends MeasureFormat {
Map countToPatterns = timeUnitToCountToPatterns.get(timeUnit);
if (countToPatterns == null) {
- countToPatterns = new TreeMap();
+ countToPatterns = new TreeMap<>();
timeUnitToCountToPatterns.put(timeUnit, countToPatterns);
}
@@ -467,7 +467,7 @@ public class TimeUnitFormat extends MeasureFormat {
final TimeUnit timeUnit = timeUnits[i];
Map countToPatterns = timeUnitToCountToPatterns.get(timeUnit);
if (countToPatterns == null) {
- countToPatterns = new TreeMap();
+ countToPatterns = new TreeMap<>();
timeUnitToCountToPatterns.put(timeUnit, countToPatterns);
}
for (String pluralCount : keywords) {
@@ -556,8 +556,7 @@ public class TimeUnitFormat extends MeasureFormat {
// MeasureFormat
/**
- * @internal
- * @deprecated This API is ICU internal only.
+ * @deprecated ICU 53 see {@link MeasureFormat}
*/
@Deprecated
@Override
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java
index ba1fd42a24c..e39fdd07cfe 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ByteArrayWrapper.java
@@ -26,7 +26,7 @@ import com.ibm.icu.impl.Utility;
public class ByteArrayWrapper implements Comparable
{
// public data member ------------------------------------------------
-
+
/**
* Internal byte array.
* @stable ICU 2.8
@@ -34,16 +34,16 @@ public class ByteArrayWrapper implements Comparable
public byte[] bytes;
/**
- * Size of the internal byte array used.
- * Different from bytes.length, size will be <= bytes.length.
+ * Size of the internal byte array used.
+ * Different from bytes.length, size will be <= bytes.length.
* Semantics of size is similar to java.util.Vector.size().
* @stable ICU 2.8
*/
public int size;
-
+
// public constructor ------------------------------------------------
- /**
+ /**
* Construct a new ByteArrayWrapper with no data.
* @stable ICU 2.8
*/
@@ -103,15 +103,15 @@ public class ByteArrayWrapper implements Comparable
// public methods ----------------------------------------------------
/**
- * Ensure that the internal byte array is at least of length capacity.
- * If the byte array is null or its length is less than capacity, a new
- * byte array of length capacity will be allocated.
- * The contents of the array (between 0 and size) remain unchanged.
+ * Ensure that the internal byte array is at least of length capacity.
+ * If the byte array is null or its length is less than capacity, a new
+ * byte array of length capacity will be allocated.
+ * The contents of the array (between 0 and size) remain unchanged.
* @param capacity minimum length of internal byte array.
* @return this ByteArrayWrapper
* @stable ICU 3.2
*/
- public ByteArrayWrapper ensureCapacity(int capacity)
+ public ByteArrayWrapper ensureCapacity(int capacity)
{
if (bytes == null || bytes.length < capacity) {
byte[] newbytes = new byte[capacity];
@@ -122,11 +122,11 @@ public class ByteArrayWrapper implements Comparable
}
return this;
}
-
+
/**
- * Set the internal byte array from offset 0 to (limit - start) with the
- * contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
- * byte array of length (limit - start) will be allocated.
+ * Set the internal byte array from offset 0 to (limit - start) with the
+ * contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
+ * byte array of length (limit - start) will be allocated.
* This resets the size of the internal byte array to (limit - start).
* @param src source byte array to copy from
* @param start start offset of src to copy from
@@ -134,15 +134,15 @@ public class ByteArrayWrapper implements Comparable
* @return this ByteArrayWrapper
* @stable ICU 3.2
*/
- public final ByteArrayWrapper set(byte[] src, int start, int limit)
+ public final ByteArrayWrapper set(byte[] src, int start, int limit)
{
size = 0;
append(src, start, limit);
return this;
}
-
+
/*
- public final ByteArrayWrapper get(byte[] target, int start, int limit)
+ public final ByteArrayWrapper get(byte[] target, int start, int limit)
{
int len = limit - start;
if (len > size) throw new IllegalArgumentException("limit too long");
@@ -152,7 +152,7 @@ public class ByteArrayWrapper implements Comparable
*/
/**
- * Appends the internal byte array from offset size with the
+ * Appends the internal byte array from offset size with the
* contents of src from offset start to limit. This increases the size of
* the internal byte array to (size + limit - start).
* @param src source byte array to copy from
@@ -161,7 +161,7 @@ public class ByteArrayWrapper implements Comparable
* @return this ByteArrayWrapper
* @stable ICU 3.2
*/
- public final ByteArrayWrapper append(byte[] src, int start, int limit)
+ public final ByteArrayWrapper append(byte[] src, int start, int limit)
{
int len = limit - start;
ensureCapacity(size + len);
@@ -171,7 +171,7 @@ public class ByteArrayWrapper implements Comparable
}
/*
- public final ByteArrayWrapper append(ByteArrayWrapper other)
+ public final ByteArrayWrapper append(ByteArrayWrapper other)
{
return append(other.bytes, 0, other.size);
}
@@ -190,13 +190,14 @@ public class ByteArrayWrapper implements Comparable
size = 0;
return result;
}
-
+
// Boilerplate ----------------------------------------------------
-
+
/**
* Returns string value for debugging
- * @stable ICU 3.2
+ * @stable ICU 2.8
*/
+ @Override
public String toString() {
StringBuilder result = new StringBuilder();
for (int i = 0; i < size; ++i) {
@@ -210,8 +211,9 @@ public class ByteArrayWrapper implements Comparable
* Return true if the bytes in each wrapper are equal.
* @param other the object to compare to.
* @return true if the two objects are equal.
- * @stable ICU 3.2
+ * @stable ICU 2.8
*/
+ @Override
public boolean equals(Object other) {
if (this == other) return true;
if (other == null) return false;
@@ -231,8 +233,9 @@ public class ByteArrayWrapper implements Comparable
/**
* Return the hashcode.
* @return the hashcode.
- * @stable ICU 3.2
+ * @stable ICU 2.8
*/
+ @Override
public int hashCode() {
int result = bytes.length;
for (int i = 0; i < size; ++i) {
@@ -249,6 +252,7 @@ public class ByteArrayWrapper implements Comparable
* @throws ClassCastException if the other object is not a ByteArrayWrapper
* @stable ICU 4.4
*/
+ @Override
public int compareTo(ByteArrayWrapper other) {
if (this == other) return 0;
int minSize = size < other.size ? size : other.size;
@@ -259,11 +263,11 @@ public class ByteArrayWrapper implements Comparable
}
return size - other.size;
}
-
+
// private methods -----------------------------------------------------
-
+
/**
- * Copies the contents of src byte array from offset srcoff to the
+ * Copies the contents of src byte array from offset srcoff to the
* target of tgt byte array at the offset tgtoff.
* @param src source byte array to copy from
* @param srcoff start offset of src to copy from
@@ -271,15 +275,15 @@ public class ByteArrayWrapper implements Comparable
* @param tgtoff start offset of tgt to copy to
* @param length size of contents to copy
*/
- private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
+ private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
int tgtoff, int length) {
if (length < 64) {
for (int i = srcoff, n = tgtoff; -- length >= 0; ++ i, ++ n) {
tgt[n] = src[i];
}
- }
+ }
else {
System.arraycopy(src, srcoff, tgt, tgtoff, length);
}
- }
+ }
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java b/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java
index ff0ba2957b8..8ecf7c32dbe 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/CaseInsensitiveString.java
@@ -17,35 +17,35 @@ import com.ibm.icu.lang.UCharacter;
* @stable ICU 2.0
*/
public class CaseInsensitiveString {
-
+
private String string;
private int hash = 0;
-
+
private String folded = null;
-
+
private static String foldCase(String foldee)
{
return UCharacter.foldCase(foldee, true);
}
-
+
private void getFolded()
{
if (folded == null) {
folded = foldCase(string);
}
}
-
+
/**
* Constructs an CaseInsentiveString object from the given string
- * @param s The string to construct this object from
+ * @param s The string to construct this object from
* @stable ICU 2.0
*/
public CaseInsensitiveString(String s) {
string = s;
}
/**
- * returns the underlying string
+ * returns the underlying string
* @return String
* @stable ICU 2.0
*/
@@ -53,10 +53,11 @@ public class CaseInsensitiveString {
return string;
}
/**
- * Compare the object with this
- * @param o Object to compare this object with
+ * Compare the object with this
+ * @param o Object to compare this object with
* @stable ICU 2.0
*/
+ @Override
public boolean equals(Object o) {
if (o == null) {
return false;
@@ -72,26 +73,29 @@ public class CaseInsensitiveString {
}
return false;
}
-
+
/**
* Returns the hashCode of this object
* @return int hashcode
* @stable ICU 2.0
*/
+ @Override
public int hashCode() {
getFolded();
-
+
if (hash == 0) {
hash = folded.hashCode();
}
-
+
return hash;
}
-
+
/**
* Overrides superclass method
- * @stable ICU 3.6
+ * @return a string representation of the object.
+ * @stable ICU 2.0
*/
+ @Override
public String toString() {
return string;
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java b/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java
index ffc60a3434b..7277053bd8a 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/CodePointMap.java
@@ -316,6 +316,15 @@ public abstract class CodePointMap implements Iterable {
public final int getValue() { return value; }
}
+ /**
+ * Protected no-args constructor.
+ *
+ * @draft ICU 63
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected CodePointMap() {
+ }
+
/**
* Returns the value for a code point as stored in the map, with range checking.
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java b/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java
index c6351b4edf1..caf027103bf 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/JapaneseCalendar.java
@@ -351,31 +351,43 @@ public class JapaneseCalendar extends GregorianCalendar {
/**
* @stable ICU 2.8
*/
- static public final int CURRENT_ERA = ERA_RULES.getCurrentEraIndex();
+ static public final int CURRENT_ERA;
/**
* Constant for the era starting on Sept. 8, 1868 AD.
* @stable ICU 2.8
*/
- static public final int MEIJI = 232;
+ static public final int MEIJI;
/**
* Constant for the era starting on July 30, 1912 AD.
* @stable ICU 2.8
*/
- static public final int TAISHO = 233;
+ static public final int TAISHO;
/**
* Constant for the era starting on Dec. 25, 1926 AD.
* @stable ICU 2.8
*/
- static public final int SHOWA = 234;
+ static public final int SHOWA;
/**
* Constant for the era starting on Jan. 7, 1989 AD.
* @stable ICU 2.8
*/
- static public final int HEISEI = 235;
+ static public final int HEISEI;
+
+ // We want to make these era constants initialized in a static initializer
+ // block to prevent javac to inline these values in a consumer code.
+ // By doing so, we can keep better binary compatibility across versions even
+ // these values are changed.
+ static {
+ MEIJI = 232;
+ TAISHO = 233;
+ SHOWA = 234;
+ HEISEI = 235;
+ CURRENT_ERA = ERA_RULES.getCurrentEraIndex();
+ }
/**
* Override GregorianCalendar. We should really handle YEAR_WOY and
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java b/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java
index a4dedc72d09..5fb8e0bac1b 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/MeasureUnit.java
@@ -44,7 +44,7 @@ public class MeasureUnit implements Serializable {
// All access to the cache or cacheIsPopulated flag must be synchronized on class MeasureUnit,
// i.e. from synchronized static methods. Beware of non-static methods.
private static final Map> cache
- = new HashMap>();
+ = new HashMap<>();
private static boolean cacheIsPopulated = false;
/**
@@ -95,7 +95,7 @@ public class MeasureUnit implements Serializable {
/**
* {@inheritDoc}
*
- * @stable ICU 53
+ * @stable ICU 3.0
*/
@Override
public int hashCode() {
@@ -105,7 +105,7 @@ public class MeasureUnit implements Serializable {
/**
* {@inheritDoc}
*
- * @stable ICU 53
+ * @stable ICU 3.0
*/
@Override
public boolean equals(Object rhs) {
@@ -122,7 +122,7 @@ public class MeasureUnit implements Serializable {
/**
* {@inheritDoc}
*
- * @stable ICU 53
+ * @stable ICU 3.0
*/
@Override
public String toString() {
@@ -152,7 +152,7 @@ public class MeasureUnit implements Serializable {
// flexibility for implementation.
// Use CollectionSet instead of HashSet for better performance.
return units == null ? Collections.emptySet()
- : Collections.unmodifiableSet(new CollectionSet(units.values()));
+ : Collections.unmodifiableSet(new CollectionSet<>(units.values()));
}
/**
@@ -161,8 +161,8 @@ public class MeasureUnit implements Serializable {
* @stable ICU 53
*/
public synchronized static Set getAvailable() {
- Set result = new HashSet();
- for (String type : new HashSet(MeasureUnit.getAvailableTypes())) {
+ Set result = new HashSet<>();
+ for (String type : new HashSet<>(MeasureUnit.getAvailableTypes())) {
for (MeasureUnit unit : MeasureUnit.getAvailable(type)) {
result.add(unit);
}
@@ -348,7 +348,7 @@ public class MeasureUnit implements Serializable {
protected synchronized static MeasureUnit addUnit(String type, String unitName, Factory factory) {
Map tmp = cache.get(type);
if (tmp == null) {
- cache.put(type, tmp = new HashMap());
+ cache.put(type, tmp = new HashMap<>());
} else {
// "intern" the type by setting to first item's type.
type = tmp.entrySet().iterator().next().getValue().type;
@@ -1184,7 +1184,7 @@ public class MeasureUnit implements Serializable {
public static final MeasureUnit TEASPOON = MeasureUnit.internalGetInstance("volume", "teaspoon");
private static HashMap, MeasureUnit>unitPerUnitToSingleUnit =
- new HashMap, MeasureUnit>();
+ new HashMap<>();
static {
unitPerUnitToSingleUnit.put(Pair.of(MeasureUnit.LITER, MeasureUnit.KILOMETER), MeasureUnit.LITER_PER_KILOMETER);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java b/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java
index 94dc82292af..09f47ff99f3 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/SimpleTimeZone.java
@@ -612,7 +612,7 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Returns a string representation of this object.
* @return a string representation of this object
- * @stable ICU 3.6
+ * @stable ICU 2.0
*/
@Override
public String toString() {
@@ -1140,7 +1140,7 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Overrides equals.
* @return true if obj is a SimpleTimeZone equivalent to this
- * @stable ICU 3.6
+ * @stable ICU 2.0
*/
@Override
public boolean equals(Object obj){
@@ -1180,7 +1180,8 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Overrides hashCode.
- * @stable ICU 3.6
+ * @return a hash code value for this object.
+ * @stable ICU 2.0
*/
@Override
public int hashCode(){
@@ -1208,7 +1209,7 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Overrides clone.
- * @stable ICU 3.6
+ * @stable ICU 2.0
*/
@Override
public Object clone() {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java
index c7a2e8d545b..cea2babe5c7 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/TimeZone.java
@@ -1052,7 +1052,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezabletrue if this object is the same as the obj argument; false
otherwise.
+ * @stable ICU 2.0
*/
@Override
public boolean equals(Object obj){
@@ -1063,7 +1064,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable {
/**
* This is for compatibility with Locale-- in actuality, since ULocale is
* immutable, there is no reason to clone it, so this API returns 'this'.
- * @stable ICU 3.0
+ * @stable ICU 2.8
*/
@Override
public Object clone() {
@@ -677,7 +677,8 @@ public final class ULocale implements Serializable, Comparable {
/**
* Returns the hashCode.
- * @stable ICU 3.0
+ * @return a hash code value for this object.
+ * @stable ICU 2.8
*/
@Override
public int hashCode() {
@@ -691,7 +692,7 @@ public final class ULocale implements Serializable, Comparable {
* function identically might not compare equal.
*
* @return true if this Locale is equal to the specified object.
- * @stable ICU 3.0
+ * @stable ICU 2.8
*/
@Override
public boolean equals(Object obj) {
@@ -1071,7 +1072,8 @@ public final class ULocale implements Serializable, Comparable {
/**
* Returns a string representation of this object.
- * @stable ICU 3.0
+ * @return a string representation of the object.
+ * @stable ICU 2.8
*/
@Override
public String toString() {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java b/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java
index e57a1737da8..18c10eacf42 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/VersionInfo.java
@@ -482,7 +482,7 @@ public final class VersionInfo implements Comparable
*
* @return the hash code value for this set.
* @see java.lang.Object#hashCode()
- * @stable ICU 58
+ * @stable ICU 2.6
*/
@Override
public int hashCode() {
@@ -527,7 +527,7 @@ public final class VersionInfo implements Comparable
/**
* Map of singletons
*/
- private static final ConcurrentHashMap MAP_ = new ConcurrentHashMap();
+ private static final ConcurrentHashMap MAP_ = new ConcurrentHashMap<>();
/**
* Last byte mask
*/
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java
index 97a51fdd2f2..be3beb6fdbd 100644
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/RuleBasedTransliterator.java
@@ -13,259 +13,9 @@ import java.util.Map;
/**
* RuleBasedTransliterator
is a transliterator
- * that reads a set of rules in order to determine how to perform
- * translations. Rule sets are stored in resource bundles indexed by
- * name. Rules within a rule set are separated by semicolons (';').
- * To include a literal semicolon, prefix it with a backslash ('\').
- * Unicode Pattern_White_Space is ignored.
- * If the first non-blank character on a line is '#',
- * the entire line is ignored as a comment.
- *
- * Each set of rules consists of two groups, one forward, and one
- * reverse. This is a convention that is not enforced; rules for one
- * direction may be omitted, with the result that translations in
- * that direction will not modify the source text. In addition,
- * bidirectional forward-reverse rules may be specified for
- * symmetrical transformations.
- *
- *
Rule syntax
- *
- *
Rule statements take one of the following forms:
- *
- *
- * $alefmadda=\u0622;
- * Variable definition. The name on the
- * left is assigned the text on the right. In this example,
- * after this statement, instances of the left hand name,
- * "$alefmadda
", will be replaced by
- * the Unicode character U+0622. Variable names must begin
- * with a letter and consist only of letters, digits, and
- * underscores. Case is significant. Duplicate names cause
- * an exception to be thrown, that is, variables cannot be
- * redefined. The right hand side may contain well-formed
- * text of any length, including no text at all ("$empty=;
").
- * The right hand side may contain embedded UnicodeSet
- * patterns, for example, "$softvowel=[eiyEIY]
".
- *
- * ai>$alefmadda;
- * Forward translation rule. This rule
- * states that the string on the left will be changed to the
- * string on the right when performing forward
- * transliteration.
- *
- * ai<$alefmadda;
- * Reverse translation rule. This rule
- * states that the string on the right will be changed to
- * the string on the left when performing reverse
- * transliteration.
- *
- *
- *
- * ai<>$alefmadda;
- * Bidirectional translation rule. This
- * rule states that the string on the right will be changed
- * to the string on the left when performing forward
- * transliteration, and vice versa when performing reverse
- * transliteration.
- *
- *
- * Translation rules consist of a match pattern and an output
- * string . The match pattern consists of literal characters,
- * optionally preceded by context, and optionally followed by
- * context. Context characters, like literal pattern characters,
- * must be matched in the text being transliterated. However, unlike
- * literal pattern characters, they are not replaced by the output
- * text. For example, the pattern "abc{def}
"
- * indicates the characters "def
" must be
- * preceded by "abc
" for a successful match.
- * If there is a successful match, "def
" will
- * be replaced, but not "abc
". The final '}
'
- * is optional, so "abc{def
" is equivalent to
- * "abc{def}
". Another example is "{123}456
"
- * (or "123}456
") in which the literal
- * pattern "123
" must be followed by "456
".
- *
- *
The output string of a forward or reverse rule consists of
- * characters to replace the literal pattern characters. If the
- * output string contains the character '|
', this is
- * taken to indicate the location of the cursor after
- * replacement. The cursor is the point in the text at which the
- * next replacement, if any, will be applied. The cursor is usually
- * placed within the replacement text; however, it can actually be
- * placed into the precending or following context by using the
- * special character '@
'. Examples:
- *
- *
- * a {foo} z > | @ bar; # foo -> bar, move cursor
- * before a
- * {foo} xyz > bar @@|; # foo -> bar, cursor between
- * y and z
- *
- *
- * UnicodeSet
- *
- *
UnicodeSet
patterns may appear anywhere that
- * makes sense. They may appear in variable definitions.
- * Contrariwise, UnicodeSet
patterns may themselves
- * contain variable references, such as "$a=[a-z];$not_a=[^$a]
",
- * or "$range=a-z;$ll=[$range]
".
- *
- *
UnicodeSet
patterns may also be embedded directly
- * into rule strings. Thus, the following two rules are equivalent:
- *
- *
- * $vowel=[aeiou]; $vowel>'*'; # One way to do this
- * [aeiou]>'*';
- * #
- * Another way
- *
- *
- * See {@link UnicodeSet} for more documentation and examples.
- *
- *
Segments
- *
- *
Segments of the input string can be matched and copied to the
- * output string. This makes certain sets of rules simpler and more
- * general, and makes reordering possible. For example:
- *
- *
- * ([a-z]) > $1 $1;
- * #
- * double lowercase letters
- * ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
- *
- *
- * The segment of the input string to be copied is delimited by
- * "(
" and ")
". Up to
- * nine segments may be defined. Segments may not overlap. In the
- * output string, "$1
" through "$9
"
- * represent the input string segments, in left-to-right order of
- * definition.
- *
- *
Anchors
- *
- *
Patterns can be anchored to the beginning or the end of the text. This is done with the
- * special characters '^
' and '$
'. For example:
- *
- *
- * ^ a > 'BEG_A'; # match 'a' at start of text
- * a > 'A'; # match other instances
- * of 'a'
- * z $ > 'END_Z'; # match 'z' at end of text
- * z > 'Z'; # match other instances
- * of 'z'
- *
- *
- * It is also possible to match the beginning or the end of the text using a UnicodeSet
.
- * This is done by including a virtual anchor character '$
' at the end of the
- * set pattern. Although this is usually the match chafacter for the end anchor, the set will
- * match either the beginning or the end of the text, depending on its placement. For
- * example:
- *
- *
- * $x = [a-z$]; # match 'a' through 'z' OR anchor
- * $x 1 > 2; # match '1' after a-z or at the start
- * 3 $x > 4; # match '3' before a-z or at the end
- *
- *
- * Example
- *
- *
The following example rules illustrate many of the features of
- * the rule language.
- *
- *
- *
- * Rule 1.
- * abc{def}>x|y
- *
- *
- * Rule 2.
- * xyz>r
- *
- *
- * Rule 3.
- * yz>q
- *
- *
- *
- * Applying these rules to the string "adefabcdefz
"
- * yields the following results:
- *
- *
- *
- * |adefabcdefz
- * Initial state, no rules match. Advance
- * cursor.
- *
- *
- * a|defabcdefz
- * Still no match. Rule 1 does not match
- * because the preceding context is not present.
- *
- *
- * ad|efabcdefz
- * Still no match. Keep advancing until
- * there is a match...
- *
- *
- * ade|fabcdefz
- * ...
- *
- *
- * adef|abcdefz
- * ...
- *
- *
- * adefa|bcdefz
- * ...
- *
- *
- * adefab|cdefz
- * ...
- *
- *
- * adefabc|defz
- * Rule 1 matches; replace "def
"
- * with "xy
" and back up the cursor
- * to before the 'y
'.
- *
- *
- * adefabcx|yz
- * Although "xyz
" is
- * present, rule 2 does not match because the cursor is
- * before the 'y
', not before the 'x
'.
- * Rule 3 does match. Replace "yz
"
- * with "q
".
- *
- *
- * adefabcxq|
- * The cursor is at the end;
- * transliteration is complete.
- *
- *
- *
- * The order of rules is significant. If multiple rules may match
- * at some point, the first matching rule is applied.
- *
- *
Forward and reverse rules may have an empty output string.
- * Otherwise, an empty left or right hand side of any statement is a
- * syntax error.
- *
- *
Single quotes are used to quote any character other than a
- * digit or letter. To specify a single quote itself, inside or
- * outside of quotes, use two single quotes in a row. For example,
- * the rule "'>'>o''clock
" changes the
- * string ">
" to the string "o'clock
".
- *
- *
Notes
- *
- *
While a RuleBasedTransliterator is being built, it checks that
- * the rules are added in proper order. For example, if the rule
- * "a>x" is followed by the rule "ab>y",
- * then the second rule will throw an exception. The reason is that
- * the second rule can never be triggered, since the first rule
- * always matches anything it matches. In other words, the first
- * rule masks the second rule.
+ * built from a set of rules as defined for
+ * {@link Transliterator#createFromRules(String, String, int)}.
+ * See the class {@link Transliterator} documentation for the rule syntax.
*
* @author Alan Liu
* @internal
@@ -369,7 +119,7 @@ public class RuleBasedTransliterator extends Transliterator {
static class Data {
public Data() {
- variableNames = new HashMap();
+ variableNames = new HashMap<>();
ruleSet = new TransliterationRuleSet();
}
@@ -487,5 +237,3 @@ public class RuleBasedTransliterator extends Transliterator {
return new RuleBasedTransliterator(getID(), data, filter);
}
}
-
-
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java
index 3d7a7e75316..01be8a96dff 100644
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/Transliterator.java
@@ -83,7 +83,7 @@ import com.ibm.icu.util.UResourceBundle;
* modified as each new character arrives.
*
*
- * Consider the simple RuleBasedTransliterator
:
+ * Consider the simple rule-based Transliterator:
*
*
* th>{theta}
@@ -110,8 +110,8 @@ import com.ibm.icu.util.UResourceBundle;
* that the transliterator will look at. It is advanced as text becomes committed (but it is not the committed index;
* that's the cursor
). The cursor
index, described above, marks the point at which the
* transliterator last stopped, either because it reached the end, or because it required more characters to
- * disambiguate between possible inputs. The cursor
can also be explicitly set by rules in a
- * RuleBasedTransliterator
. Any characters before the cursor
index are frozen; future keyboard
+ * disambiguate between possible inputs. The cursor
can also be explicitly set by rules.
+ * Any characters before the cursor
index are frozen; future keyboard
* transliteration calls within this input sequence will not change them. New text is inserted at the limit
* index, which marks the end of the substring that the transliterator looks at.
*
@@ -222,13 +222,262 @@ import com.ibm.icu.util.UResourceBundle;
* transliterate()
method taking a String
and StringBuffer
if the performance of
* these methods can be improved over the performance obtained by the default implementations in this class.
*
+ * Rule syntax
+ *
+ *
A set of rules determines how to perform translations.
+ * Rules within a rule set are separated by semicolons (';').
+ * To include a literal semicolon, prefix it with a backslash ('\').
+ * Unicode Pattern_White_Space is ignored.
+ * If the first non-blank character on a line is '#',
+ * the entire line is ignored as a comment.
+ *
+ *
Each set of rules consists of two groups, one forward, and one
+ * reverse. This is a convention that is not enforced; rules for one
+ * direction may be omitted, with the result that translations in
+ * that direction will not modify the source text. In addition,
+ * bidirectional forward-reverse rules may be specified for
+ * symmetrical transformations.
+ *
+ *
Note: Another description of the Transliterator rule syntax is available in
+ * section
+ * Transform Rules Syntax of UTS #35: Unicode LDML .
+ * The rules are shown there using arrow symbols ← and → and ↔.
+ * ICU supports both those and the equivalent ASCII symbols < and > and <>.
+ *
+ *
Rule statements take one of the following forms:
+ *
+ *
+ * $alefmadda=\\u0622;
+ * Variable definition. The name on the
+ * left is assigned the text on the right. In this example,
+ * after this statement, instances of the left hand name,
+ * "$alefmadda
", will be replaced by
+ * the Unicode character U+0622. Variable names must begin
+ * with a letter and consist only of letters, digits, and
+ * underscores. Case is significant. Duplicate names cause
+ * an exception to be thrown, that is, variables cannot be
+ * redefined. The right hand side may contain well-formed
+ * text of any length, including no text at all ("$empty=;
").
+ * The right hand side may contain embedded UnicodeSet
+ * patterns, for example, "$softvowel=[eiyEIY]
".
+ * ai>$alefmadda;
+ * Forward translation rule. This rule
+ * states that the string on the left will be changed to the
+ * string on the right when performing forward
+ * transliteration.
+ * ai<$alefmadda;
+ * Reverse translation rule. This rule
+ * states that the string on the right will be changed to
+ * the string on the left when performing reverse
+ * transliteration.
+ *
+ *
+ *
+ * ai<>$alefmadda;
+ * Bidirectional translation rule. This
+ * rule states that the string on the right will be changed
+ * to the string on the left when performing forward
+ * transliteration, and vice versa when performing reverse
+ * transliteration.
+ *
+ *
+ * Translation rules consist of a match pattern and an output
+ * string . The match pattern consists of literal characters,
+ * optionally preceded by context, and optionally followed by
+ * context. Context characters, like literal pattern characters,
+ * must be matched in the text being transliterated. However, unlike
+ * literal pattern characters, they are not replaced by the output
+ * text. For example, the pattern "abc{def}
"
+ * indicates the characters "def
" must be
+ * preceded by "abc
" for a successful match.
+ * If there is a successful match, "def
" will
+ * be replaced, but not "abc
". The final '}
'
+ * is optional, so "abc{def
" is equivalent to
+ * "abc{def}
". Another example is "{123}456
"
+ * (or "123}456
") in which the literal
+ * pattern "123
" must be followed by "456
".
+ *
+ *
The output string of a forward or reverse rule consists of
+ * characters to replace the literal pattern characters. If the
+ * output string contains the character '|
', this is
+ * taken to indicate the location of the cursor after
+ * replacement. The cursor is the point in the text at which the
+ * next replacement, if any, will be applied. The cursor is usually
+ * placed within the replacement text; however, it can actually be
+ * placed into the precending or following context by using the
+ * special character '@'. Examples:
+ *
+ *
+ * a {foo} z > | @ bar; # foo -> bar, move cursor before a
+ * {foo} xyz > bar @@|; # foo -> bar, cursor between y and z
+ *
+ *
+ * UnicodeSet
+ *
+ *
UnicodeSet
patterns may appear anywhere that
+ * makes sense. They may appear in variable definitions.
+ * Contrariwise, UnicodeSet
patterns may themselves
+ * contain variable references, such as "$a=[a-z];$not_a=[^$a]
",
+ * or "$range=a-z;$ll=[$range]
".
+ *
+ *
UnicodeSet
patterns may also be embedded directly
+ * into rule strings. Thus, the following two rules are equivalent:
+ *
+ *
+ * $vowel=[aeiou]; $vowel>'*'; # One way to do this
+ * [aeiou]>'*'; # Another way
+ *
+ *
+ * See {@link UnicodeSet} for more documentation and examples.
+ *
+ *
Segments
+ *
+ *
Segments of the input string can be matched and copied to the
+ * output string. This makes certain sets of rules simpler and more
+ * general, and makes reordering possible. For example:
+ *
+ *
+ * ([a-z]) > $1 $1; # double lowercase letters
+ * ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
+ *
+ *
+ * The segment of the input string to be copied is delimited by
+ * "(
" and ")
". Up to
+ * nine segments may be defined. Segments may not overlap. In the
+ * output string, "$1
" through "$9
"
+ * represent the input string segments, in left-to-right order of
+ * definition.
+ *
+ *
Anchors
+ *
+ *
Patterns can be anchored to the beginning or the end of the text. This is done with the
+ * special characters '^
' and '$
'. For example:
+ *
+ *
+ * ^ a > 'BEG_A'; # match 'a' at start of text
+ * a > 'A'; # match other instances of 'a'
+ * z $ > 'END_Z'; # match 'z' at end of text
+ * z > 'Z'; # match other instances of 'z'
+ *
+ *
+ * It is also possible to match the beginning or the end of the text using a UnicodeSet
.
+ * This is done by including a virtual anchor character '$
' at the end of the
+ * set pattern. Although this is usually the match chafacter for the end anchor, the set will
+ * match either the beginning or the end of the text, depending on its placement. For
+ * example:
+ *
+ *
+ * $x = [a-z$]; # match 'a' through 'z' OR anchor
+ * $x 1 > 2; # match '1' after a-z or at the start
+ * 3 $x > 4; # match '3' before a-z or at the end
+ *
+ *
+ * Example
+ *
+ *
The following example rules illustrate many of the features of
+ * the rule language.
+ *
+ *
+ *
+ * Rule 1.
+ * abc{def}>x|y
+ *
+ *
+ * Rule 2.
+ * xyz>r
+ *
+ *
+ * Rule 3.
+ * yz>q
+ *
+ *
+ *
+ * Applying these rules to the string "adefabcdefz
"
+ * yields the following results:
+ *
+ *
+ *
+ * |adefabcdefz
+ * Initial state, no rules match. Advance
+ * cursor.
+ *
+ *
+ * a|defabcdefz
+ * Still no match. Rule 1 does not match
+ * because the preceding context is not present.
+ *
+ *
+ * ad|efabcdefz
+ * Still no match. Keep advancing until
+ * there is a match...
+ *
+ *
+ * ade|fabcdefz
+ * ...
+ *
+ *
+ * adef|abcdefz
+ * ...
+ *
+ *
+ * adefa|bcdefz
+ * ...
+ *
+ *
+ * adefab|cdefz
+ * ...
+ *
+ *
+ * adefabc|defz
+ * Rule 1 matches; replace "def
"
+ * with "xy
" and back up the cursor
+ * to before the 'y
'.
+ *
+ *
+ * adefabcx|yz
+ * Although "xyz
" is
+ * present, rule 2 does not match because the cursor is
+ * before the 'y
', not before the 'x
'.
+ * Rule 3 does match. Replace "yz
"
+ * with "q
".
+ *
+ *
+ * adefabcxq|
+ * The cursor is at the end;
+ * transliteration is complete.
+ *
+ *
+ *
+ * The order of rules is significant. If multiple rules may match
+ * at some point, the first matching rule is applied.
+ *
+ *
Forward and reverse rules may have an empty output string.
+ * Otherwise, an empty left or right hand side of any statement is a
+ * syntax error.
+ *
+ *
Single quotes are used to quote any character other than a
+ * digit or letter. To specify a single quote itself, inside or
+ * outside of quotes, use two single quotes in a row. For example,
+ * the rule "'>'>o''clock
" changes the
+ * string ">
" to the string "o'clock
".
+ *
+ *
Notes
+ *
+ *
While a Transliterator is being built from rules, it checks that
+ * the rules are added in proper order. For example, if the rule
+ * "a>x" is followed by the rule "ab>y",
+ * then the second rule will throw an exception. The reason is that
+ * the second rule can never be triggered, since the first rule
+ * always matches anything it matches. In other words, the first
+ * rule masks the second rule.
+ *
* @author Alan Liu
* @stable ICU 2.0
*/
public abstract class Transliterator implements StringTransform {
/**
* Direction constant indicating the forward direction in a transliterator,
- * e.g., the forward rules of a RuleBasedTransliterator. An "A-B"
+ * e.g., the forward rules of a rule-based Transliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @stable ICU 2.0
@@ -237,7 +486,7 @@ public abstract class Transliterator implements StringTransform {
/**
* Direction constant indicating the reverse direction in a transliterator,
- * e.g., the reverse rules of a RuleBasedTransliterator. An "A-B"
+ * e.g., the reverse rules of a rule-based Transliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @stable ICU 2.0
@@ -358,7 +607,7 @@ public abstract class Transliterator implements StringTransform {
/**
* Returns true if this Position is equal to the given object.
- * @stable ICU 2.6
+ * @stable ICU 2.0
*/
@Override
public boolean equals(Object obj) {
@@ -373,7 +622,8 @@ public abstract class Transliterator implements StringTransform {
}
/**
- * @draft ICU 63
+ * {@inheritDoc}
+ * @stable ICU 2.0
*/
@Override
public int hashCode() {
@@ -382,7 +632,8 @@ public abstract class Transliterator implements StringTransform {
/**
* Returns a string representation of this Position.
- * @stable ICU 2.6
+ * @return a string representation of the object.
+ * @stable ICU 2.0
*/
@Override
public String toString() {
@@ -1100,7 +1351,7 @@ public abstract class Transliterator implements StringTransform {
/**
* Transliterate a substring of text, as specified by index, taking filters
* into account. This method is for subclasses that need to delegate to
- * another transliterator, such as CompoundTransliterator.
+ * another transliterator.
* @param text the text to be transliterated
* @param index the position indices
* @param incremental if TRUE, then assume more characters may be inserted
@@ -1343,7 +1594,7 @@ public abstract class Transliterator implements StringTransform {
public static Transliterator getInstance(String ID,
int dir) {
StringBuffer canonID = new StringBuffer();
- List list = new ArrayList();
+ List list = new ArrayList<>();
UnicodeSet[] globalFilter = new UnicodeSet[1];
if (!TransliteratorIDParser.parseCompoundID(ID, dir, canonID, list, globalFilter)) {
throw new IllegalArgumentException("Invalid ID " + ID);
@@ -1398,11 +1649,17 @@ public abstract class Transliterator implements StringTransform {
/**
* Returns a Transliterator
object constructed from
- * the given rule string. This will be a RuleBasedTransliterator,
+ * the given rule string. This will be a rule-based Transliterator,
* if the rule string contains only rules, or a
- * CompoundTransliterator, if it contains ID blocks, or a
- * NullTransliterator, if it contains ID blocks which parse as
+ * compound Transliterator, if it contains ID blocks, or a
+ * null Transliterator, if it contains ID blocks which parse as
* empty for the given direction.
+ *
+ * @param ID the id for the transliterator.
+ * @param rules rules, separated by ';'
+ * @param dir either FORWARD or REVERSE.
+ * @return a newly created Transliterator
+ * @throws IllegalArgumentException if there is a problem with the ID or the rules
* @stable ICU 2.0
*/
public static final Transliterator createFromRules(String ID, String rules, int dir) {
@@ -1435,7 +1692,7 @@ public abstract class Transliterator implements StringTransform {
}
}
else {
- List transliterators = new ArrayList();
+ List transliterators = new ArrayList<>();
int passNumber = 1;
int limit = Math.max(parser.idBlockVector.size(), parser.dataVector.size());
diff --git a/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/APIStatusConsistencyChecker.java b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/APIStatusConsistencyChecker.java
new file mode 100644
index 00000000000..d6d850a5981
--- /dev/null
+++ b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/APIStatusConsistencyChecker.java
@@ -0,0 +1,124 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.tool.docs;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+
+/**
+ * Checks if API status of equals/hashCode is same with its containing class.
+ *
+ * @author Yoshito
+ */
+public class APIStatusConsistencyChecker {
+ public static void main(String[] args) {
+ // args[0] API signature file path
+ // args[1] (Optional) List of classes to be skipped, separated by semicolon
+ if (args.length < 1) {
+ System.err.println("Missing API signature file path.");
+ } else if (args.length > 2) {
+ System.err.println("Too many command arguments");
+ }
+
+ List skipClasses = Collections.emptyList();
+ if (args.length == 2) {
+ String[] classes = args[1].split(";");
+ skipClasses = Arrays.asList(classes);
+ }
+
+ // Load the ICU4J API signature file
+ Set apiInfoSet = APIData.read(new File(args[0]), true).getAPIInfoSet();
+ APIStatusConsistencyChecker checker = new APIStatusConsistencyChecker(apiInfoSet, skipClasses, new PrintWriter(System.err, true));
+ checker.checkConsistency();
+ System.exit(checker.errCount);
+ }
+
+ private int errCount = 0;
+ private Set apiInfoSet;
+ private PrintWriter pw;
+ private List skipClasses;
+
+ public APIStatusConsistencyChecker(Set apiInfoSet, List skipClasses, PrintWriter pw) {
+ this.apiInfoSet = apiInfoSet;
+ this.skipClasses = skipClasses;
+ this.pw = pw;
+ }
+
+ public int errorCount() {
+ return errCount;
+ }
+
+ // Methods that should have same API status with a containing class
+ static final String[][] METHODS = {
+ //{"", ""},
+ {"equals", "boolean(java.lang.Object)"},
+ {"hashCode", "int()"},
+ {"toString", "java.lang.String()"},
+ {"clone", "java.lang.Object()"},
+ };
+
+ public void checkConsistency() {
+ Map classMap = new TreeMap<>();
+ // Build a map of APIInfo for classes, indexed by class name
+ for (APIInfo api : apiInfoSet) {
+ if (!api.isPublic() && !api.isProtected()) {
+ continue;
+ }
+ if (!api.isClass() && !api.isEnum()) {
+ continue;
+ }
+ String fullClassName = api.getPackageName() + "." + api.getName();
+ classMap.put(fullClassName, api);
+ }
+
+ // Walk through methods
+ for (APIInfo api : apiInfoSet) {
+ if (!api.isMethod()) {
+ continue;
+ }
+
+ String fullClassName = api.getPackageName() + "." + api.getClassName();
+ if (skipClasses.contains(fullClassName)) {
+ continue;
+ }
+
+ boolean checkWithClass = false;
+ String methodName = api.getName();
+ String methodSig = api.getSignature();
+
+ for (String[] method : METHODS) {
+ if (method[0].equals(methodName) && method[1].equals(methodSig)) {
+ checkWithClass = true;
+ }
+ }
+
+ if (!checkWithClass) {
+ continue;
+ }
+
+ // Check if this method has same API status with the containing class
+ APIInfo clsApi = classMap.get(fullClassName);
+ if (clsApi == null) {
+ pw.println("## Error ## Class " + fullClassName + " is not found.");
+ errCount++;
+ }
+
+ int methodStatus = api.getVal(APIInfo.STA);
+ String methodVer = api.getStatusVersion();
+ int classStatus = clsApi.getVal(APIInfo.STA);
+ String classVer = clsApi.getStatusVersion();
+
+ if (methodStatus != classStatus || !Objects.equals(methodVer, classVer)) {
+ pw.println("## Error ## " + methodName + " in " + fullClassName);
+ errCount++;
+ }
+ }
+ }
+}
diff --git a/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java
index 61abf722360..46aca4589de 100644
--- a/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java
+++ b/icu4j/tools/build/src/com/ibm/icu/dev/tool/docs/DeprecatedAPIChecker.java
@@ -56,7 +56,7 @@ public class DeprecatedAPIChecker {
public void checkDeprecated() {
// Gather API class/enum names and its names that can be
// used for Class.forName()
- Map apiClassNameMap = new TreeMap();
+ Map apiClassNameMap = new TreeMap<>();
for (APIInfo api : apiInfoSet) {
if (!api.isPublic() && !api.isProtected()) {
continue;
@@ -133,6 +133,18 @@ public class DeprecatedAPIChecker {
}
List paramNames = getParamNames(ctor);
+
+ Class> declClass = cls.getDeclaringClass();
+ if (declClass != null && !Modifier.isStatic(cls.getModifiers())) {
+ // This is non-static inner class's constructor.
+ // javac automatically injects instance of declaring class
+ // as the first param of the constructor, but ICU's API
+ // signature is based on javadoc and it generates signature
+ // without the implicit parameter.
+ assert paramNames.get(0).equals(declClass.getName());
+ paramNames.remove(0);
+ }
+
api = findConstructorInfo(apiInfoSet, clsName, paramNames);
if (api == null) {
@@ -351,7 +363,7 @@ public class DeprecatedAPIChecker {
throw new IllegalArgumentException(api.toString() + " is not a constructor or a method.");
}
- List nameList = new ArrayList();
+ List nameList = new ArrayList<>();
String signature = api.getSignature();
int start = signature.indexOf('(');
int end = signature.indexOf(')');
@@ -410,7 +422,7 @@ public class DeprecatedAPIChecker {
private static char[] PRIMITIVE_SIGNATURES = { 'B', 'S', 'I', 'J', 'F', 'D', 'Z', 'C' };
private static List toTypeNameList(Type[] types) {
- List nameList = new ArrayList();
+ List nameList = new ArrayList<>();
for (Type t : types) {
StringBuilder s = new StringBuilder();