ICU-20119 Merge github.com:unicode-org/icu into icu63_1

This commit is contained in:
Norbert Runge 2018-09-20 09:03:58 -07:00 committed by Shane Carr
commit c28e2510c4
No known key found for this signature in database
GPG key ID: FCED3B24AAB18B5C
38 changed files with 1113 additions and 736 deletions

View file

@ -798,7 +798,7 @@ _getKeywords(const char *localeID,
}
keywordsLen += keywordList[i].keywordLen + 1;
if(valuesToo) {
if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
}
keywordsLen += keywordList[i].valueLen;

View file

@ -12,11 +12,13 @@
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
#include "uvector.h"
#include "uassert.h"
@ -172,6 +174,46 @@ static const char*
ultag_getGrandfathered(const ULanguageTag* langtag);
#endif
namespace {
// Helper class to memory manage CharString objects.
// Only ever stack-allocated, does not need to inherit UMemory.
class CharStringPool {
public:
CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
~CharStringPool() = default;
CharStringPool(const CharStringPool&) = delete;
CharStringPool& operator=(const CharStringPool&) = delete;
icu::CharString* create() {
if (U_FAILURE(status)) {
return nullptr;
}
icu::CharString* const obj = new icu::CharString;
if (obj == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
pool.addElement(obj, status);
if (U_FAILURE(status)) {
delete obj;
return nullptr;
}
return obj;
}
private:
static void U_CALLCONV deleter(void* obj) {
delete static_cast<icu::CharString*>(obj);
}
UErrorCode status;
icu::UVector pool;
};
} // namespace
/*
* -------------------------------------------------
*
@ -900,7 +942,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
static int32_t
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
int32_t attrBufLength = 0;
UEnumeration *keywordEnum = NULL;
@ -920,22 +961,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
AttributeListEntry *firstAttr = NULL;
AttributeListEntry *attr;
char *attrValue;
char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char *pExtBuf = extBuf;
int32_t extBufCapacity = sizeof(extBuf);
CharStringPool extBufPool;
const char *bcpKey=nullptr, *bcpValue=nullptr;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
UBool isBcpUExt;
while (TRUE) {
icu::CharString buf;
key = uenum_next(keywordEnum, NULL, status);
if (key == NULL) {
break;
}
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
/* buf must be null-terminated */
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
char* buffer;
int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
for (;;) {
buffer = buf.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
tmpStatus);
if (U_FAILURE(tmpStatus)) {
break;
}
len = uloc_getKeywordValue(
localeID, key, buffer, resultCapacity, &tmpStatus);
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
break;
}
resultCapacity = len;
tmpStatus = U_ZERO_ERROR;
}
if (U_FAILURE(tmpStatus)) {
if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@ -945,6 +1012,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
buf.append(buffer, len, tmpStatus);
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
}
keylen = (int32_t)uprv_strlen(key);
isBcpUExt = (keylen > 1);
@ -1007,7 +1079,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
/* we've checked buf is null-terminated above */
bcpValue = uloc_toUnicodeLocaleType(key, buf);
bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
if (bcpValue == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
@ -1015,33 +1087,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
continue;
}
if (bcpValue == buf) {
/*
if (bcpValue == buf.data()) {
/*
When uloc_toUnicodeLocaleType(key, buf) returns the
input value as is, the value is well-formed, but has
no known mapping. This implementation normalizes the
the value to lower case
value to lower case
*/
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
if (bcpValueLen < extBufCapacity) {
uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf);
bcpValue = pExtBuf;
pExtBuf += (bcpValueLen + 1);
extBufCapacity -= (bcpValueLen + 1);
} else {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
continue;
icu::CharString* extBuf = extBufPool.create();
if (extBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
int32_t resultCapacity;
char* pExtBuf = extBuf->getAppendBuffer(
/*minCapacity=*/bcpValueLen,
/*desiredCapacityHint=*/bcpValueLen,
resultCapacity,
tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf);
extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
bcpValue = extBuf->data();
}
} else {
if (*key == PRIVATEUSE) {
if (!_isPrivateuseValueSubtags(buf, len)) {
if (!_isPrivateuseValueSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@ -1049,7 +1132,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
} else {
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@ -1058,20 +1141,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
bcpKey = key;
if ((len + 1) < extBufCapacity) {
uprv_memcpy(pExtBuf, buf, len);
bcpValue = pExtBuf;
pExtBuf += len;
*pExtBuf = 0;
pExtBuf++;
extBufCapacity -= (len + 1);
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
icu::CharString* extBuf = extBufPool.create();
if (extBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
extBuf->append(buf.data(), len, tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
bcpValue = extBuf->data();
}
/* create ExtensionListEntry */
@ -2337,31 +2417,66 @@ uloc_toLanguageTag(const char* localeID,
int32_t langtagCapacity,
UBool strict,
UErrorCode* status) {
/* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
char canonical[256];
int32_t reslen = 0;
icu::CharString canonical;
int32_t reslen;
UErrorCode tmpStatus = U_ZERO_ERROR;
UBool hadPosix = FALSE;
const char* pKeywordStart;
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
canonical[0] = 0;
if (uprv_strlen(localeID) > 0) {
uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
if (tmpStatus != U_ZERO_ERROR) {
int32_t resultCapacity = uprv_strlen(localeID);
if (resultCapacity > 0) {
char* buffer;
for (;;) {
buffer = canonical.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return 0;
}
reslen =
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
break;
}
resultCapacity = reslen;
tmpStatus = U_ZERO_ERROR;
}
if (U_FAILURE(tmpStatus)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
canonical.append(buffer, reslen, tmpStatus);
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return 0;
}
}
reslen = 0;
/* For handling special case - private use only tag */
pKeywordStart = locale_getKeywordsStart(canonical);
if (pKeywordStart == canonical) {
pKeywordStart = locale_getKeywordsStart(canonical.data());
if (pKeywordStart == canonical.data()) {
UEnumeration *kwdEnum;
int kwdCnt = 0;
UBool done = FALSE;
kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
if (kwdEnum != NULL) {
kwdCnt = uenum_count(kwdEnum, &tmpStatus);
if (kwdCnt == 1) {
@ -2399,12 +2514,12 @@ uloc_toLanguageTag(const char* localeID,
}
}
reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
return reslen;
}

View file

@ -29,262 +29,10 @@ class TransliterationRuleData;
/**
* <code>RuleBasedTransliterator</code> is a transliterator
* that reads a set of rules in order to determine how to perform
* translations. Rule sets are stored in resource bundles indexed by
* name. Rules within a rule set are separated by semicolons (';').
* To include a literal semicolon, prefix it with a backslash ('\').
* Whitespace, as defined by <code>Character.isWhitespace()</code>,
* is ignored. If the first non-blank character on a line is '#',
* the entire line is ignored as a comment. </p>
*
* <p>Each set of rules consists of two groups, one forward, and one
* reverse. This is a convention that is not enforced; rules for one
* direction may be omitted, with the result that translations in
* that direction will not modify the source text. In addition,
* bidirectional forward-reverse rules may be specified for
* symmetrical transformations.</p>
*
* <p><b>Rule syntax</b> </p>
*
* <p>Rule statements take one of the following forms: </p>
*
* <dl>
* <dt><code>$alefmadda=\u0622;</code></dt>
* <dd><strong>Variable definition.</strong> The name on the
* left is assigned the text on the right. In this example,
* after this statement, instances of the left hand name,
* &quot;<code>$alefmadda</code>&quot;, will be replaced by
* the Unicode character U+0622. Variable names must begin
* with a letter and consist only of letters, digits, and
* underscores. Case is significant. Duplicate names cause
* an exception to be thrown, that is, variables cannot be
* redefined. The right hand side may contain well-formed
* text of any length, including no text at all (&quot;<code>$empty=;</code>&quot;).
* The right hand side may contain embedded <code>UnicodeSet</code>
* patterns, for example, &quot;<code>$softvowel=[eiyEIY]</code>&quot;.</dd>
* <dd>&nbsp;</dd>
* <dt><code>ai&gt;$alefmadda;</code></dt>
* <dd><strong>Forward translation rule.</strong> This rule
* states that the string on the left will be changed to the
* string on the right when performing forward
* transliteration.</dd>
* <dt>&nbsp;</dt>
* <dt><code>ai<$alefmadda;</code></dt>
* <dd><strong>Reverse translation rule.</strong> This rule
* states that the string on the right will be changed to
* the string on the left when performing reverse
* transliteration.</dd>
* </dl>
*
* <dl>
* <dt><code>ai<>$alefmadda;</code></dt>
* <dd><strong>Bidirectional translation rule.</strong> This
* rule states that the string on the right will be changed
* to the string on the left when performing forward
* transliteration, and vice versa when performing reverse
* transliteration.</dd>
* </dl>
*
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
* string</em>. The match pattern consists of literal characters,
* optionally preceded by context, and optionally followed by
* context. Context characters, like literal pattern characters,
* must be matched in the text being transliterated. However, unlike
* literal pattern characters, they are not replaced by the output
* text. For example, the pattern &quot;<code>abc{def}</code>&quot;
* indicates the characters &quot;<code>def</code>&quot; must be
* preceded by &quot;<code>abc</code>&quot; for a successful match.
* If there is a successful match, &quot;<code>def</code>&quot; will
* be replaced, but not &quot;<code>abc</code>&quot;. The final '<code>}</code>'
* is optional, so &quot;<code>abc{def</code>&quot; is equivalent to
* &quot;<code>abc{def}</code>&quot;. Another example is &quot;<code>{123}456</code>&quot;
* (or &quot;<code>123}456</code>&quot;) in which the literal
* pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
* </p>
*
* <p>The output string of a forward or reverse rule consists of
* characters to replace the literal pattern characters. If the
* output string contains the character '<code>|</code>', this is
* taken to indicate the location of the <em>cursor</em> after
* replacement. The cursor is the point in the text at which the
* next replacement, if any, will be applied. The cursor is usually
* placed within the replacement text; however, it can actually be
* placed into the precending or following context by using the
* special character '<code>@</code>'. Examples:</p>
*
* <blockquote>
* <p><code>a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor
* before a<br>
* {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between
* y and z</code></p>
* </blockquote>
*
* <p><b>UnicodeSet</b></p>
*
* <p><code>UnicodeSet</code> patterns may appear anywhere that
* makes sense. They may appear in variable definitions.
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
* contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
* or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.</p>
*
* <p><code>UnicodeSet</code> patterns may also be embedded directly
* into rule strings. Thus, the following two rules are equivalent:</p>
*
* <blockquote>
* <p><code>$vowel=[aeiou]; $vowel&gt;'*'; # One way to do this<br>
* [aeiou]&gt;'*';
* &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
* Another way</code></p>
* </blockquote>
*
* <p>See {@link UnicodeSet} for more documentation and examples.</p>
*
* <p><b>Segments</b></p>
*
* <p>Segments of the input string can be matched and copied to the
* output string. This makes certain sets of rules simpler and more
* general, and makes reordering possible. For example:</p>
*
* <blockquote>
* <p><code>([a-z]) &gt; $1 $1;
* &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
* double lowercase letters<br>
* ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs</code></p>
* </blockquote>
*
* <p>The segment of the input string to be copied is delimited by
* &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
* nine segments may be defined. Segments may not overlap. In the
* output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
* represent the input string segments, in left-to-right order of
* definition.</p>
*
* <p><b>Anchors</b></p>
*
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
* special characters '<code>^</code>' and '<code>$</code>'. For example:</p>
*
* <blockquote>
* <p><code>^ a&nbsp;&nbsp; &gt; 'BEG_A'; &nbsp;&nbsp;# match 'a' at start of text<br>
* &nbsp; a&nbsp;&nbsp; &gt; 'A';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
* of 'a'<br>
* &nbsp; z $ &gt; 'END_Z'; &nbsp;&nbsp;# match 'z' at end of text<br>
* &nbsp; z&nbsp;&nbsp; &gt; 'Z';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
* of 'z'</code></p>
* </blockquote>
*
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
* match either the beginning or the end of the text, depending on its placement. For
* example:</p>
*
* <blockquote>
* <p><code>$x = [a-z$]; &nbsp;&nbsp;# match 'a' through 'z' OR anchor<br>
* $x 1&nbsp;&nbsp;&nbsp; &gt; 2;&nbsp;&nbsp; # match '1' after a-z or at the start<br>
* &nbsp;&nbsp; 3 $x &gt; 4; &nbsp;&nbsp;# match '3' before a-z or at the end</code></p>
* </blockquote>
*
* <p><b>Example</b> </p>
*
* <p>The following example rules illustrate many of the features of
* the rule language. </p>
*
* <table border="0" cellpadding="4">
* <tr>
* <td valign="top">Rule 1.</td>
* <td valign="top" nowrap><code>abc{def}&gt;x|y</code></td>
* </tr>
* <tr>
* <td valign="top">Rule 2.</td>
* <td valign="top" nowrap><code>xyz&gt;r</code></td>
* </tr>
* <tr>
* <td valign="top">Rule 3.</td>
* <td valign="top" nowrap><code>yz&gt;q</code></td>
* </tr>
* </table>
*
* <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
* yields the following results: </p>
*
* <table border="0" cellpadding="4">
* <tr>
* <td valign="top" nowrap><code>|adefabcdefz</code></td>
* <td valign="top">Initial state, no rules match. Advance
* cursor.</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>a|defabcdefz</code></td>
* <td valign="top">Still no match. Rule 1 does not match
* because the preceding context is not present.</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>ad|efabcdefz</code></td>
* <td valign="top">Still no match. Keep advancing until
* there is a match...</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>ade|fabcdefz</code></td>
* <td valign="top">...</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>adef|abcdefz</code></td>
* <td valign="top">...</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>adefa|bcdefz</code></td>
* <td valign="top">...</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>adefab|cdefz</code></td>
* <td valign="top">...</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>adefabc|defz</code></td>
* <td valign="top">Rule 1 matches; replace &quot;<code>def</code>&quot;
* with &quot;<code>xy</code>&quot; and back up the cursor
* to before the '<code>y</code>'.</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>adefabcx|yz</code></td>
* <td valign="top">Although &quot;<code>xyz</code>&quot; is
* present, rule 2 does not match because the cursor is
* before the '<code>y</code>', not before the '<code>x</code>'.
* Rule 3 does match. Replace &quot;<code>yz</code>&quot;
* with &quot;<code>q</code>&quot;.</td>
* </tr>
* <tr>
* <td valign="top" nowrap><code>adefabcxq|</code></td>
* <td valign="top">The cursor is at the end;
* transliteration is complete.</td>
* </tr>
* </table>
*
* <p>The order of rules is significant. If multiple rules may match
* at some point, the first matching rule is applied. </p>
*
* <p>Forward and reverse rules may have an empty output string.
* Otherwise, an empty left or right hand side of any statement is a
* syntax error. </p>
*
* <p>Single quotes are used to quote any character other than a
* digit or letter. To specify a single quote itself, inside or
* outside of quotes, use two single quotes in a row. For example,
* the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
* string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
* </p>
*
* <p><b>Notes</b> </p>
*
* <p>While a RuleBasedTransliterator is being built, it checks that
* the rules are added in proper order. For example, if the rule
* &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
* then the second rule will throw an exception. The reason is that
* the second rule can never be triggered, since the first rule
* always matches anything it matches. In other words, the first
* rule <em>masks</em> the second rule. </p>
*
* built from a set of rules as defined for
* Transliterator::createFromRules().
* See the C++ class Transliterator documentation for the rule syntax.
*
* @author Alan Liu
* @internal Use transliterator factory methods instead since this class will be removed in that release.
*/

View file

@ -15,10 +15,10 @@
#include "unicode/utypes.h"
/**
* \file
* \file
* \brief C++ API: Tranforms text from one format to another.
*/
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/uobject.h"
@ -31,7 +31,6 @@ U_NAMESPACE_BEGIN
class UnicodeFilter;
class UnicodeSet;
class CompoundTransliterator;
class TransliteratorParser;
class NormalizationTransliterator;
class TransliteratorIDParser;
@ -97,18 +96,20 @@ class TransliteratorIDParser;
* contents of the buffer may show text being modified as each new
* character arrives.
*
* <p>Consider the simple `RuleBasedTransliterator`:
*
* <p>Consider the simple rule-based Transliterator:
* <pre>
* th>{theta}
* t>{tau}
* </pre>
*
* When the user types 't', nothing will happen, since the
* transliterator is waiting to see if the next character is 'h'. To
* remedy this, we introduce the notion of a cursor, marked by a '|'
* in the output string:
*
* <pre>
* t>|{tau}
* {tau}h>{theta}
* </pre>
*
* Now when the user types 't', tau appears, and if the next character
* is 'h', the tau changes to a theta. This is accomplished by
@ -130,7 +131,7 @@ class TransliteratorIDParser;
* which the transliterator last stopped, either because it reached
* the end, or because it required more characters to disambiguate
* between possible inputs. The <code>CURSOR</code> can also be
* explicitly set by rules in a <code>RuleBasedTransliterator</code>.
* explicitly set by rules in a rule-based Transliterator.
* Any characters before the <code>CURSOR</code> index are frozen;
* future keyboard transliteration calls within this input sequence
* will not change them. New text is inserted at the
@ -232,6 +233,255 @@ class TransliteratorIDParser;
* if the performance of these methods can be improved over the
* performance obtained by the default implementations in this class.
*
* <p><b>Rule syntax</b>
*
* <p>A set of rules determines how to perform translations.
* Rules within a rule set are separated by semicolons (';').
* To include a literal semicolon, prefix it with a backslash ('\').
* Unicode Pattern_White_Space is ignored.
* If the first non-blank character on a line is '#',
* the entire line is ignored as a comment.
*
* <p>Each set of rules consists of two groups, one forward, and one
* reverse. This is a convention that is not enforced; rules for one
* direction may be omitted, with the result that translations in
* that direction will not modify the source text. In addition,
* bidirectional forward-reverse rules may be specified for
* symmetrical transformations.
*
* <p>Note: Another description of the Transliterator rule syntax is available in
* <a href="https://www.unicode.org/reports/tr35/tr35-general.html#Transform_Rules_Syntax">section
* Transform Rules Syntax of UTS #35: Unicode LDML</a>.
* The rules are shown there using arrow symbols and and .
* ICU supports both those and the equivalent ASCII symbols &lt; and &gt; and &lt;&gt;.
*
* <p>Rule statements take one of the following forms:
*
* <dl>
* <dt><code>$alefmadda=\\u0622;</code></dt>
* <dd><strong>Variable definition.</strong> The name on the
* left is assigned the text on the right. In this example,
* after this statement, instances of the left hand name,
* &quot;<code>$alefmadda</code>&quot;, will be replaced by
* the Unicode character U+0622. Variable names must begin
* with a letter and consist only of letters, digits, and
* underscores. Case is significant. Duplicate names cause
* an exception to be thrown, that is, variables cannot be
* redefined. The right hand side may contain well-formed
* text of any length, including no text at all (&quot;<code>$empty=;</code>&quot;).
* The right hand side may contain embedded <code>UnicodeSet</code>
* patterns, for example, &quot;<code>$softvowel=[eiyEIY]</code>&quot;.</dd>
* <dt><code>ai&gt;$alefmadda;</code></dt>
* <dd><strong>Forward translation rule.</strong> This rule
* states that the string on the left will be changed to the
* string on the right when performing forward
* transliteration.</dd>
* <dt><code>ai&lt;$alefmadda;</code></dt>
* <dd><strong>Reverse translation rule.</strong> This rule
* states that the string on the right will be changed to
* the string on the left when performing reverse
* transliteration.</dd>
* </dl>
*
* <dl>
* <dt><code>ai&lt;&gt;$alefmadda;</code></dt>
* <dd><strong>Bidirectional translation rule.</strong> This
* rule states that the string on the right will be changed
* to the string on the left when performing forward
* transliteration, and vice versa when performing reverse
* transliteration.</dd>
* </dl>
*
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
* string</em>. The match pattern consists of literal characters,
* optionally preceded by context, and optionally followed by
* context. Context characters, like literal pattern characters,
* must be matched in the text being transliterated. However, unlike
* literal pattern characters, they are not replaced by the output
* text. For example, the pattern &quot;<code>abc{def}</code>&quot;
* indicates the characters &quot;<code>def</code>&quot; must be
* preceded by &quot;<code>abc</code>&quot; for a successful match.
* If there is a successful match, &quot;<code>def</code>&quot; will
* be replaced, but not &quot;<code>abc</code>&quot;. The final '<code>}</code>'
* is optional, so &quot;<code>abc{def</code>&quot; is equivalent to
* &quot;<code>abc{def}</code>&quot;. Another example is &quot;<code>{123}456</code>&quot;
* (or &quot;<code>123}456</code>&quot;) in which the literal
* pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
*
* <p>The output string of a forward or reverse rule consists of
* characters to replace the literal pattern characters. If the
* output string contains the character '<code>|</code>', this is
* taken to indicate the location of the <em>cursor</em> after
* replacement. The cursor is the point in the text at which the
* next replacement, if any, will be applied. The cursor is usually
* placed within the replacement text; however, it can actually be
* placed into the precending or following context by using the
* special character '@'. Examples:
*
* <pre>
* a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor before a
* {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between y and z
* </pre>
*
* <p><b>UnicodeSet</b>
*
* <p><code>UnicodeSet</code> patterns may appear anywhere that
* makes sense. They may appear in variable definitions.
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
* contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
* or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.
*
* <p><code>UnicodeSet</code> patterns may also be embedded directly
* into rule strings. Thus, the following two rules are equivalent:
*
* <pre>
* $vowel=[aeiou]; $vowel&gt;'*'; # One way to do this
* [aeiou]&gt;'*'; # Another way
* </pre>
*
* <p>See {@link UnicodeSet} for more documentation and examples.
*
* <p><b>Segments</b>
*
* <p>Segments of the input string can be matched and copied to the
* output string. This makes certain sets of rules simpler and more
* general, and makes reordering possible. For example:
*
* <pre>
* ([a-z]) &gt; $1 $1; # double lowercase letters
* ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs
* </pre>
*
* <p>The segment of the input string to be copied is delimited by
* &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
* nine segments may be defined. Segments may not overlap. In the
* output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
* represent the input string segments, in left-to-right order of
* definition.
*
* <p><b>Anchors</b>
*
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
* special characters '<code>^</code>' and '<code>$</code>'. For example:
*
* <pre>
* ^ a&nbsp;&nbsp; &gt; 'BEG_A'; &nbsp;&nbsp;# match 'a' at start of text
* &nbsp; a&nbsp;&nbsp; &gt; 'A'; # match other instances of 'a'
* &nbsp; z $ &gt; 'END_Z'; &nbsp;&nbsp;# match 'z' at end of text
* &nbsp; z&nbsp;&nbsp; &gt; 'Z';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances of 'z'
* </pre>
*
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
* match either the beginning or the end of the text, depending on its placement. For
* example:
*
* <pre>
* $x = [a-z$]; &nbsp;&nbsp;# match 'a' through 'z' OR anchor
* $x 1&nbsp;&nbsp;&nbsp; &gt; 2;&nbsp;&nbsp; # match '1' after a-z or at the start
* &nbsp;&nbsp; 3 $x &gt; 4; &nbsp;&nbsp;# match '3' before a-z or at the end
* </pre>
*
* <p><b>Example</b>
*
* <p>The following example rules illustrate many of the features of
* the rule language.
*
* <table border="0" cellpadding="4">
* <tr>
* <td style="vertical-align: top;">Rule 1.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>abc{def}&gt;x|y</code></td>
* </tr>
* <tr>
* <td style="vertical-align: top;">Rule 2.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>xyz&gt;r</code></td>
* </tr>
* <tr>
* <td style="vertical-align: top;">Rule 3.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>yz&gt;q</code></td>
* </tr>
* </table>
*
* <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
* yields the following results:
*
* <table border="0" cellpadding="4">
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>|adefabcdefz</code></td>
* <td style="vertical-align: top;">Initial state, no rules match. Advance
* cursor.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>a|defabcdefz</code></td>
* <td style="vertical-align: top;">Still no match. Rule 1 does not match
* because the preceding context is not present.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>ad|efabcdefz</code></td>
* <td style="vertical-align: top;">Still no match. Keep advancing until
* there is a match...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>ade|fabcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adef|abcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefa|bcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefab|cdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabc|defz</code></td>
* <td style="vertical-align: top;">Rule 1 matches; replace &quot;<code>def</code>&quot;
* with &quot;<code>xy</code>&quot; and back up the cursor
* to before the '<code>y</code>'.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcx|yz</code></td>
* <td style="vertical-align: top;">Although &quot;<code>xyz</code>&quot; is
* present, rule 2 does not match because the cursor is
* before the '<code>y</code>', not before the '<code>x</code>'.
* Rule 3 does match. Replace &quot;<code>yz</code>&quot;
* with &quot;<code>q</code>&quot;.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcxq|</code></td>
* <td style="vertical-align: top;">The cursor is at the end;
* transliteration is complete.</td>
* </tr>
* </table>
*
* <p>The order of rules is significant. If multiple rules may match
* at some point, the first matching rule is applied.
*
* <p>Forward and reverse rules may have an empty output string.
* Otherwise, an empty left or right hand side of any statement is a
* syntax error.
*
* <p>Single quotes are used to quote any character other than a
* digit or letter. To specify a single quote itself, inside or
* outside of quotes, use two single quotes in a row. For example,
* the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
* string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
*
* <p><b>Notes</b>
*
* <p>While a Transliterator is being built from rules, it checks that
* the rules are added in proper order. For example, if the rule
* &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
* then the second rule will throw an exception. The reason is that
* the second rule can never be triggered, since the first rule
* always matches anything it matches. In other words, the first
* rule <em>masks</em> the second rule.
*
* @author Alan Liu
* @stable ICU 2.0
*/
@ -627,7 +877,7 @@ public:
/**
* Transliterate a substring of text, as specified by index, taking filters
* into account. This method is for subclasses that need to delegate to
* another transliterator, such as CompoundTransliterator.
* another transliterator.
* @param text the text to be transliterated
* @param index the position indices
* @param incremental if TRUE, then assume more characters may be inserted
@ -841,17 +1091,19 @@ public:
/**
* Returns a <code>Transliterator</code> object constructed from
* the given rule string. This will be a RuleBasedTransliterator,
* the given rule string. This will be a rule-based Transliterator,
* if the rule string contains only rules, or a
* CompoundTransliterator, if it contains ID blocks, or a
* NullTransliterator, if it contains ID blocks which parse as
* compound Transliterator, if it contains ID blocks, or a
* null Transliterator, if it contains ID blocks which parse as
* empty for the given direction.
*
* @param ID the id for the transliterator.
* @param rules rules, separated by ';'
* @param dir either FORWARD or REVERSE.
* @param parseError Struct to recieve information on position
* @param parseError Struct to receive information on position
* of error if an error is encountered
* @param status Output param set to success/failure code.
* @return a newly created Transliterator
* @stable ICU 2.0
*/
static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,

View file

@ -226,6 +226,7 @@ void addLocaleTest(TestNode** root)
TESTCASE(TestKeywordVariants);
TESTCASE(TestKeywordVariantParsing);
TESTCASE(TestCanonicalization);
TESTCASE(TestCanonicalizationBuffer);
TESTCASE(TestKeywordSet);
TESTCASE(TestKeywordSetError);
TESTCASE(TestDisplayKeywords);
@ -2251,6 +2252,42 @@ static void TestCanonicalization(void)
}
}
static void TestCanonicalizationBuffer(void)
{
UErrorCode status = U_ZERO_ERROR;
char buffer[256];
// ULOC_FULLNAME_CAPACITY == 157 (uloc.h)
static const char name[] =
"zh@x"
"=foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
"-foo-barz"
;
static const size_t len = sizeof name - 1; // Without NUL terminator.
int32_t reslen = uloc_canonicalize(name, buffer, len, &status);
if (U_FAILURE(status)) {
log_err("FAIL: uloc_canonicalize(%s) => %s, expected !U_FAILURE()\n",
name, u_errorName(status));
return;
}
if (reslen != len) {
log_err("FAIL: uloc_canonicalize(%s) => \"%i\", expected \"%u\"\n",
name, reslen, len);
return;
}
if (uprv_strncmp(name, buffer, len) != 0) {
log_err("FAIL: uloc_canonicalize(%s) => \"%.*s\", expected \"%s\"\n",
name, reslen, buffer, name);
return;
}
}
static void TestDisplayKeywords(void)
{
int32_t i;

View file

@ -84,6 +84,7 @@ static void TestDisplayNames(void);
static void doTestDisplayNames(const char* inLocale, int32_t compareIndex);
static void TestCanonicalization(void);
static void TestCanonicalizationBuffer(void);
static void TestDisplayKeywords(void);

View file

@ -20,6 +20,7 @@
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/translit.h"
#include "cpdtrans.h"
#include "intltest.h"
/**

View file

@ -252,6 +252,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
TESTCASE_AUTO(TestToLanguageTag);
TESTCASE_AUTO(TestMoveAssign);
TESTCASE_AUTO(TestMoveCtor);
TESTCASE_AUTO(TestBug13417VeryLongLanguageTag);
TESTCASE_AUTO_END;
}
@ -3125,3 +3126,23 @@ void LocaleTest::TestMoveCtor() {
assertEquals("variant", l7.getVariant(), l8.getVariant());
assertEquals("bogus", l7.isBogus(), l8.isBogus());
}
void LocaleTest::TestBug13417VeryLongLanguageTag() {
IcuTestErrorCode status(*this, "TestBug13417VeryLongLanguageTag()");
static const char tag[] =
"zh-x"
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
"-foo-bar-baz-fxx"
;
Locale l = Locale::forLanguageTag(tag, status);
status.errIfFailureAndReset("\"%s\"", tag);
assertTrue("!l.isBogus()", !l.isBogus());
std::string result = l.toLanguageTag<std::string>(status);
status.errIfFailureAndReset("\"%s\"", l.getName());
assertEquals("equals", tag, result.c_str());
}

View file

@ -124,6 +124,8 @@ public:
void TestMoveAssign();
void TestMoveCtor();
void TestBug13417VeryLongLanguageTag();
private:
void _checklocs(const char* label,
const char* req,

View file

@ -1729,6 +1729,32 @@
</java>
</target>
<target name="checkAPIStatusConsistency" depends="info, build-tools, gatherapi"
description="Check consistency between API class status and methods overriding java.lang.Object">
<!--
If you need classes excluded from this check, define followig property in build-local.properties.
e.g. checkAPIStatusConsistency.skip.classes=com.ibm.icu.text.Normalizer;com.ibm.icu.util.ULocale
-->
<property name="checkAPIStatusConsistency.skip.classes" value=""/>
<java classname="com.ibm.icu.dev.tool.docs.APIStatusConsistencyChecker"
failonerror="true">
<arg value="${out.dir}/icu4j${api.report.version}.api3.gz" />
<arg value="${checkAPIStatusConsistency.skip.classes}" />
<classpath>
<pathelement location="${icu4j.build-tools.jar}"/>
<pathelement location="${icu4j.core.jar}"/>
<pathelement location="${icu4j.collate.jar}"/>
<pathelement location="${icu4j.charset.jar}"/>
<pathelement location="${icu4j.currdata.jar}"/>
<pathelement location="${icu4j.langdata.jar}"/>
<pathelement location="${icu4j.regiondata.jar}"/>
<pathelement location="${icu4j.translit.jar}"/>
</classpath>
</java>
</target>
<target name="checkAPIStatus" depends="checkAPIStatusConsistency, checkDeprecated"/>
<target name="draftAPIs" depends="info, gatherapi" description="Run API collector tool and generate draft API report in html">
<java classname="com.ibm.icu.dev.tool.docs.CollectAPI"
classpath="${icu4j.build-tools.jar}"

View file

@ -624,7 +624,7 @@ public final class CollationElementIterator
}
static final Map<Integer, Integer> computeMaxExpansions(CollationData data) {
Map<Integer, Integer> maxExpansions = new HashMap<Integer, Integer>();
Map<Integer, Integer> maxExpansions = new HashMap<>();
MaxExpSink sink = new MaxExpSink(maxExpansions);
new ContractionsAndExpansions(null, null, sink, true).forData(data);
return maxExpansions;
@ -692,11 +692,9 @@ public final class CollationElementIterator
/**
* Mock implementation of hashCode(). This implementation always returns a constant
* value. When Java assertion is enabled, this method triggers an assertion failure.
* @internal
* @deprecated This API is ICU internal only.
* @stable ICU 2.8
*/
@Override
@Deprecated
public int hashCode() {
assert false : "hashCode not designed";
return 42;

View file

@ -329,7 +329,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
* Subclasses should override this implementation.
*
* @return a hash code value.
* @stable ICU 58
* @stable ICU 2.8
*/
@Override
public int hashCode() {
@ -477,7 +477,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
/**
* Clones the collator.
* @stable ICU 2.6
* @stable ICU 2.8
* @return a clone of this collator.
*/
@Override
@ -1016,7 +1016,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
}
private static final class KeywordsSink extends UResource.Sink {
LinkedList<String> values = new LinkedList<String>();
LinkedList<String> values = new LinkedList<>();
boolean hasDefault = false;
@Override

View file

@ -112,6 +112,8 @@ public final class CharacterProperties {
* @return the property as a set
* @see UProperty
* @see UCharacter#hasBinaryProperty
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeSet getBinaryPropertySet(int property) {
if (property < 0 || UProperty.BINARY_LIMIT <= property) {
@ -141,6 +143,8 @@ public final class CharacterProperties {
* @return the property as a map
* @see UProperty
* @see UCharacter#getIntPropertyValue
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public static final CodePointMap getIntPropertyMap(int property) {
if (property < UProperty.INT_START || UProperty.INT_LIMIT <= property) {

View file

@ -175,6 +175,7 @@ public abstract class NumberRangeFormatter {
*
* @return An {@link UnlocalizedNumberRangeFormatter}, to be used for chaining.
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public static UnlocalizedNumberRangeFormatter with() {
return BASE;
@ -188,6 +189,7 @@ public abstract class NumberRangeFormatter {
* The locale from which to load formats and symbols for number range formatting.
* @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining.
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public static LocalizedNumberRangeFormatter withLocale(Locale locale) {
return BASE.locale(locale);
@ -201,9 +203,15 @@ public abstract class NumberRangeFormatter {
* The locale from which to load formats and symbols for number range formatting.
* @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining.
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public static LocalizedNumberRangeFormatter withLocale(ULocale locale) {
return BASE.locale(locale);
}
/**
* Private constructor - this class is not designed for instantiation
*/
private NumberRangeFormatter() {
}
}

View file

@ -408,10 +408,10 @@ public abstract class Precision implements Cloneable {
}
/**
* @internal
* @deprecated This API is ICU internal only.
* {@inheritDoc}
* @draft ICU 62
* @provisional This API might change or be removed in a future release.
*/
@Deprecated
@Override
public Object clone() {
try {

View file

@ -93,10 +93,9 @@ public class ScientificNotation extends Notation implements Cloneable {
}
/**
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
*/
@Deprecated
@Override
public Object clone() {
try {

View file

@ -202,7 +202,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
//other.pluralRules = pluralRules;
// clone content
//other.pluralCountToCurrencyUnitPattern = pluralCountToCurrencyUnitPattern;
other.pluralCountToCurrencyUnitPattern = new HashMap<String, String>();
other.pluralCountToCurrencyUnitPattern = new HashMap<>();
for (String pluralCount : pluralCountToCurrencyUnitPattern.keySet()) {
String currencyPattern = pluralCountToCurrencyUnitPattern.get(pluralCount);
other.pluralCountToCurrencyUnitPattern.put(pluralCount, currencyPattern);
@ -231,11 +231,9 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
/**
* Override hashCode
*
* @internal
* @deprecated This API is ICU internal only.
* @stable ICU 4.2
*/
@Override
@Deprecated
public int hashCode() {
return pluralCountToCurrencyUnitPattern.hashCode()
^ pluralRules.hashCode()
@ -283,7 +281,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
}
private void setupCurrencyPluralPattern(ULocale uloc) {
pluralCountToCurrencyUnitPattern = new HashMap<String, String>();
pluralCountToCurrencyUnitPattern = new HashMap<>();
String numberStylePattern = NumberFormat.getPattern(uloc, NumberFormat.NUMBERSTYLE);
// Split the number style pattern into pos and neg if applicable

View file

@ -273,10 +273,8 @@ public class DateIntervalInfo implements Cloneable, Freezable<DateIntervalInfo>,
/**
* {@inheritDoc}
* @internal
* @deprecated This API is ICU internal only.
* @stable ICU 4.0
*/
@Deprecated
@Override
public String toString() {
return "{first=«" + fIntervalPatternFirstPart + "», second=«" + fIntervalPatternSecondPart + "», reversed:" + fFirstDateInPtnIsLaterDate + "}";

View file

@ -821,7 +821,8 @@ public final class Edits {
/**
* A string representation of the current edit represented by the iterator for debugging. You
* should not depend on the contents of the return string; it may change over time.
* @internal
* @return a string representation of the object.
* @stable ICU 59
*/
@Override
public String toString() {

View file

@ -126,9 +126,9 @@ public class MeasureFormat extends UFormat {
private final transient LocalizedNumberFormatter numberFormatter;
private static final SimpleCache<ULocale, NumericFormatters> localeToNumericDurationFormatters = new SimpleCache<ULocale, NumericFormatters>();
private static final SimpleCache<ULocale, NumericFormatters> localeToNumericDurationFormatters = new SimpleCache<>();
private static final Map<MeasureUnit, Integer> hmsTo012 = new HashMap<MeasureUnit, Integer>();
private static final Map<MeasureUnit, Integer> hmsTo012 = new HashMap<>();
static {
hmsTo012.put(MeasureUnit.HOUR, 0);
@ -486,7 +486,7 @@ public class MeasureFormat extends UFormat {
* Two MeasureFormats, a and b, are equal if and only if they have the same formatWidth, locale, and
* equal number formats.
*
* @stable ICU 53
* @stable ICU 3.0
*/
@Override
public final boolean equals(Object other) {
@ -506,7 +506,7 @@ public class MeasureFormat extends UFormat {
/**
* {@inheritDoc}
*
* @stable ICU 53
* @stable ICU 3.0
*/
@Override
public final int hashCode() {
@ -997,7 +997,7 @@ public class MeasureFormat extends UFormat {
this.formatWidth = width;
this.numberFormat = numberFormat;
this.subClass = subClass;
this.keyValues = new HashMap<Object, Object>();
this.keyValues = new HashMap<>();
}
// Must have public constructor, to enable Externalizable
@ -1070,7 +1070,7 @@ public class MeasureFormat extends UFormat {
return values[ordinal];
}
private static final Map<ULocale, String> localeIdToRangeFormat = new ConcurrentHashMap<ULocale, String>();
private static final Map<ULocale, String> localeIdToRangeFormat = new ConcurrentHashMap<>();
/**
* Return a formatter (compiled SimpleFormatter pattern) for a range, such as "{0}{1}".

View file

@ -1084,7 +1084,7 @@ public class PluralRules implements Serializable {
SampleType sampleType2;
boolean bounded2 = true;
boolean haveBound = false;
Set<FixedDecimalRange> samples2 = new LinkedHashSet<FixedDecimalRange>();
Set<FixedDecimalRange> samples2 = new LinkedHashSet<>();
if (source.startsWith("integer")) {
sampleType2 = SampleType.INTEGER;
@ -1215,7 +1215,7 @@ public class PluralRules implements Serializable {
static final UnicodeSet BREAK_AND_KEEP = new UnicodeSet('!', '!', '%', '%', ',', ',', '.', '.', '=', '=').freeze();
static String[] split(String source) {
int last = -1;
List<String> result = new ArrayList<String>();
List<String> result = new ArrayList<>();
for (int i = 0; i < source.length(); ++i) {
char ch = source.charAt(i);
if (BREAK_AND_IGNORE.contains(ch)) {
@ -1334,7 +1334,7 @@ public class PluralRules implements Serializable {
t = nextToken(tokens, x++, condition);
}
List<Long> valueList = new ArrayList<Long>();
List<Long> valueList = new ArrayList<>();
// the token t is always one item ahead
while (true) {
@ -1756,10 +1756,9 @@ public class PluralRules implements Serializable {
}
/**
* @internal
* @deprecated This API is ICU internal only.
* {@inheritDoc}
* @stable ICU 3.8
*/
@Deprecated
@Override
public int hashCode() {
return keyword.hashCode() ^ constraint.hashCode();
@ -1773,7 +1772,7 @@ public class PluralRules implements Serializable {
private static class RuleList implements Serializable {
private boolean hasExplicitBoundingInfo = false;
private static final long serialVersionUID = 1;
private final List<Rule> rules = new ArrayList<Rule>();
private final List<Rule> rules = new ArrayList<>();
public RuleList addRule(Rule nextRule) {
String keyword = nextRule.getKeyword();
@ -1821,7 +1820,7 @@ public class PluralRules implements Serializable {
}
public Set<String> getKeywords() {
Set<String> result = new LinkedHashSet<String>();
Set<String> result = new LinkedHashSet<>();
for (Rule rule : rules) {
result.add(rule.getKeyword());
}
@ -2020,10 +2019,9 @@ public class PluralRules implements Serializable {
}
/**
* @internal
* @deprecated This API is ICU internal only.
* {@inheritDoc}
* @stable ICU 3.8
*/
@Deprecated
@Override
public int hashCode() {
return rules.hashCode();
@ -2175,7 +2173,7 @@ public class PluralRules implements Serializable {
if (!keywords.contains(keyword)) {
return null;
}
Set<Double> result = new TreeSet<Double>();
Set<Double> result = new TreeSet<>();
if (rules.hasExplicitBoundingInfo) {
FixedDecimalSamples samples = rules.getDecimalSamples(keyword, sampleType);
@ -2420,7 +2418,7 @@ public class PluralRules implements Serializable {
// Compute if the quick test is insufficient.
HashSet<Double> subtractedSet = new HashSet<Double>(values);
HashSet<Double> subtractedSet = new HashSet<>(values);
for (Double explicit : explicits) {
subtractedSet.remove(explicit - offset);
}

View file

@ -941,13 +941,10 @@ public class RuleBasedNumberFormat extends NumberFormat {
}
/**
* Mock implementation of hashCode(). This implementation always returns a constant
* value. When Java assertion is enabled, this method triggers an assertion failure.
* @internal
* @deprecated This API is ICU internal only.
* {@inheritDoc}
* @stable ICU 2.0
*/
@Override
@Deprecated
public int hashCode() {
return super.hashCode();
}
@ -1731,7 +1728,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
// our rule list is an array of the appropriate size
ruleSets = new NFRuleSet[numRuleSets];
ruleSetsMap = new HashMap<String, NFRuleSet>(numRuleSets * 2 + 1);
ruleSetsMap = new HashMap<>(numRuleSets * 2 + 1);
defaultRuleSet = null;
// Used to count the number of public rule sets
@ -1844,7 +1841,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
if (localizations != null) {
publicRuleSetNames = localizations[0].clone();
Map<String, String[]> m = new HashMap<String, String[]>();
Map<String, String[]> m = new HashMap<>();
for (int i = 1; i < localizations.length; ++i) {
String[] data = localizations[i];
String loc = data[0];

View file

@ -506,7 +506,7 @@ public class SpoofChecker {
SpoofData fSpoofData;
final UnicodeSet fAllowedCharsSet = new UnicodeSet(0, 0x10ffff); // The UnicodeSet of allowed characters.
// for this Spoof Checker. Defaults to all chars.
final Set<ULocale> fAllowedLocales = new LinkedHashSet<ULocale>(); // The list of allowed locales.
final Set<ULocale> fAllowedLocales = new LinkedHashSet<>(); // The list of allowed locales.
private RestrictionLevel fRestrictionLevel;
/**
@ -567,7 +567,7 @@ public class SpoofChecker {
result.fSpoofData = this.fSpoofData;
result.fAllowedCharsSet = (UnicodeSet) (this.fAllowedCharsSet.clone());
result.fAllowedCharsSet.freeze();
result.fAllowedLocales = new HashSet<ULocale>(this.fAllowedLocales);
result.fAllowedLocales = new HashSet<>(this.fAllowedLocales);
result.fRestrictionLevel = this.fRestrictionLevel;
return result;
}
@ -734,7 +734,7 @@ public class SpoofChecker {
* @stable ICU 54
*/
public Builder setAllowedJavaLocales(Set<Locale> locales) {
HashSet<ULocale> ulocales = new HashSet<ULocale>(locales.size());
HashSet<ULocale> ulocales = new HashSet<>(locales.size());
for (Locale locale : locales) {
ulocales.add(ULocale.forLocale(locale));
}
@ -848,10 +848,10 @@ public class SpoofChecker {
private int fLineNum;
ConfusabledataBuilder() {
fTable = new Hashtable<Integer, SPUString>();
fTable = new Hashtable<>();
fKeySet = new UnicodeSet();
fKeyVec = new ArrayList<Integer>();
fValueVec = new ArrayList<Integer>();
fKeyVec = new ArrayList<>();
fValueVec = new ArrayList<>();
stringPool = new SPUStringPool();
}
@ -1093,8 +1093,8 @@ public class SpoofChecker {
// combination of a uhash and a Vector.
private static class SPUStringPool {
public SPUStringPool() {
fVec = new Vector<SPUString>();
fHash = new Hashtable<String, SPUString>();
fVec = new Vector<>();
fHash = new Hashtable<>();
}
public int size() {
@ -1179,7 +1179,7 @@ public class SpoofChecker {
* @stable ICU 54
*/
public Set<Locale> getAllowedJavaLocales() {
HashSet<Locale> locales = new HashSet<Locale>(fAllowedLocales.size());
HashSet<Locale> locales = new HashSet<>(fAllowedLocales.size());
for (ULocale uloc : fAllowedLocales) {
locales.add(uloc.toLocale());
}
@ -1535,7 +1535,7 @@ public class SpoofChecker {
* @param other
* the SpoofChecker being compared with.
* @return true if the two SpoofCheckers are equal.
* @stable ICU 58
* @stable ICU 4.6
*/
@Override
public boolean equals(Object other) {
@ -1565,7 +1565,7 @@ public class SpoofChecker {
/**
* Overrides {@link Object#hashCode()}.
* @stable ICU 58
* @stable ICU 4.6
*/
@Override
public int hashCode() {

View file

@ -143,11 +143,10 @@ public class StringPrepParseException extends ParseException {
/**
* Mock implementation of hashCode(). This implementation always returns a constant
* value. When Java assertion is enabled, this method triggers an assertion failure.
* @internal
* @deprecated This API is ICU internal only.
* @return a hash code value for this object.
* @stable ICU 2.8
*/
@Override
@Deprecated
public int hashCode() {
assert false : "hashCode not designed";
return 42;

View file

@ -343,7 +343,7 @@ public class TimeUnitFormat extends MeasureFormat {
format = NumberFormat.getNumberInstance(locale);
}
pluralRules = PluralRules.forLocale(locale);
timeUnitToCountToPatterns = new HashMap<TimeUnit, Map<String, Object[]>>();
timeUnitToCountToPatterns = new HashMap<>();
Set<String> pluralKeywords = pluralRules.getKeywords();
setup("units/duration", timeUnitToCountToPatterns, FULL_NAME, pluralKeywords);
setup("unitsShort/duration", timeUnitToCountToPatterns, ABBREVIATED_NAME, pluralKeywords);
@ -400,7 +400,7 @@ public class TimeUnitFormat extends MeasureFormat {
Map<String, Object[]> countToPatterns = timeUnitToCountToPatterns.get(timeUnit);
if (countToPatterns == null) {
countToPatterns = new TreeMap<String, Object[]>();
countToPatterns = new TreeMap<>();
timeUnitToCountToPatterns.put(timeUnit, countToPatterns);
}
@ -467,7 +467,7 @@ public class TimeUnitFormat extends MeasureFormat {
final TimeUnit timeUnit = timeUnits[i];
Map<String, Object[]> countToPatterns = timeUnitToCountToPatterns.get(timeUnit);
if (countToPatterns == null) {
countToPatterns = new TreeMap<String, Object[]>();
countToPatterns = new TreeMap<>();
timeUnitToCountToPatterns.put(timeUnit, countToPatterns);
}
for (String pluralCount : keywords) {
@ -556,8 +556,7 @@ public class TimeUnitFormat extends MeasureFormat {
// MeasureFormat
/**
* @internal
* @deprecated This API is ICU internal only.
* @deprecated ICU 53 see {@link MeasureFormat}
*/
@Deprecated
@Override

View file

@ -26,7 +26,7 @@ import com.ibm.icu.impl.Utility;
public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
{
// public data member ------------------------------------------------
/**
* Internal byte array.
* @stable ICU 2.8
@ -34,16 +34,16 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
public byte[] bytes;
/**
* Size of the internal byte array used.
* Different from bytes.length, size will be &lt;= bytes.length.
* Size of the internal byte array used.
* Different from bytes.length, size will be &lt;= bytes.length.
* Semantics of size is similar to java.util.Vector.size().
* @stable ICU 2.8
*/
public int size;
// public constructor ------------------------------------------------
/**
/**
* Construct a new ByteArrayWrapper with no data.
* @stable ICU 2.8
*/
@ -103,15 +103,15 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
// public methods ----------------------------------------------------
/**
* Ensure that the internal byte array is at least of length capacity.
* If the byte array is null or its length is less than capacity, a new
* byte array of length capacity will be allocated.
* The contents of the array (between 0 and size) remain unchanged.
* Ensure that the internal byte array is at least of length capacity.
* If the byte array is null or its length is less than capacity, a new
* byte array of length capacity will be allocated.
* The contents of the array (between 0 and size) remain unchanged.
* @param capacity minimum length of internal byte array.
* @return this ByteArrayWrapper
* @stable ICU 3.2
*/
public ByteArrayWrapper ensureCapacity(int capacity)
public ByteArrayWrapper ensureCapacity(int capacity)
{
if (bytes == null || bytes.length < capacity) {
byte[] newbytes = new byte[capacity];
@ -122,11 +122,11 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
}
return this;
}
/**
* Set the internal byte array from offset 0 to (limit - start) with the
* contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
* byte array of length (limit - start) will be allocated.
* Set the internal byte array from offset 0 to (limit - start) with the
* contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
* byte array of length (limit - start) will be allocated.
* This resets the size of the internal byte array to (limit - start).
* @param src source byte array to copy from
* @param start start offset of src to copy from
@ -134,15 +134,15 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
* @return this ByteArrayWrapper
* @stable ICU 3.2
*/
public final ByteArrayWrapper set(byte[] src, int start, int limit)
public final ByteArrayWrapper set(byte[] src, int start, int limit)
{
size = 0;
append(src, start, limit);
return this;
}
/*
public final ByteArrayWrapper get(byte[] target, int start, int limit)
public final ByteArrayWrapper get(byte[] target, int start, int limit)
{
int len = limit - start;
if (len > size) throw new IllegalArgumentException("limit too long");
@ -152,7 +152,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
*/
/**
* Appends the internal byte array from offset size with the
* Appends the internal byte array from offset size with the
* contents of src from offset start to limit. This increases the size of
* the internal byte array to (size + limit - start).
* @param src source byte array to copy from
@ -161,7 +161,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
* @return this ByteArrayWrapper
* @stable ICU 3.2
*/
public final ByteArrayWrapper append(byte[] src, int start, int limit)
public final ByteArrayWrapper append(byte[] src, int start, int limit)
{
int len = limit - start;
ensureCapacity(size + len);
@ -171,7 +171,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
}
/*
public final ByteArrayWrapper append(ByteArrayWrapper other)
public final ByteArrayWrapper append(ByteArrayWrapper other)
{
return append(other.bytes, 0, other.size);
}
@ -190,13 +190,14 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
size = 0;
return result;
}
// Boilerplate ----------------------------------------------------
/**
* Returns string value for debugging
* @stable ICU 3.2
* @stable ICU 2.8
*/
@Override
public String toString() {
StringBuilder result = new StringBuilder();
for (int i = 0; i < size; ++i) {
@ -210,8 +211,9 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
* Return true if the bytes in each wrapper are equal.
* @param other the object to compare to.
* @return true if the two objects are equal.
* @stable ICU 3.2
* @stable ICU 2.8
*/
@Override
public boolean equals(Object other) {
if (this == other) return true;
if (other == null) return false;
@ -231,8 +233,9 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
/**
* Return the hashcode.
* @return the hashcode.
* @stable ICU 3.2
* @stable ICU 2.8
*/
@Override
public int hashCode() {
int result = bytes.length;
for (int i = 0; i < size; ++i) {
@ -249,6 +252,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
* @throws ClassCastException if the other object is not a ByteArrayWrapper
* @stable ICU 4.4
*/
@Override
public int compareTo(ByteArrayWrapper other) {
if (this == other) return 0;
int minSize = size < other.size ? size : other.size;
@ -259,11 +263,11 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
}
return size - other.size;
}
// private methods -----------------------------------------------------
/**
* Copies the contents of src byte array from offset srcoff to the
* Copies the contents of src byte array from offset srcoff to the
* target of tgt byte array at the offset tgtoff.
* @param src source byte array to copy from
* @param srcoff start offset of src to copy from
@ -271,15 +275,15 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
* @param tgtoff start offset of tgt to copy to
* @param length size of contents to copy
*/
private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
int tgtoff, int length) {
if (length < 64) {
for (int i = srcoff, n = tgtoff; -- length >= 0; ++ i, ++ n) {
tgt[n] = src[i];
}
}
}
else {
System.arraycopy(src, srcoff, tgt, tgtoff, length);
}
}
}
}

View file

@ -17,35 +17,35 @@ import com.ibm.icu.lang.UCharacter;
* @stable ICU 2.0
*/
public class CaseInsensitiveString {
private String string;
private int hash = 0;
private String folded = null;
private static String foldCase(String foldee)
{
return UCharacter.foldCase(foldee, true);
}
private void getFolded()
{
if (folded == null) {
folded = foldCase(string);
}
}
/**
* Constructs an CaseInsentiveString object from the given string
* @param s The string to construct this object from
* @param s The string to construct this object from
* @stable ICU 2.0
*/
public CaseInsensitiveString(String s) {
string = s;
}
/**
* returns the underlying string
* returns the underlying string
* @return String
* @stable ICU 2.0
*/
@ -53,10 +53,11 @@ public class CaseInsensitiveString {
return string;
}
/**
* Compare the object with this
* @param o Object to compare this object with
* Compare the object with this
* @param o Object to compare this object with
* @stable ICU 2.0
*/
@Override
public boolean equals(Object o) {
if (o == null) {
return false;
@ -72,26 +73,29 @@ public class CaseInsensitiveString {
}
return false;
}
/**
* Returns the hashCode of this object
* @return int hashcode
* @stable ICU 2.0
*/
@Override
public int hashCode() {
getFolded();
if (hash == 0) {
hash = folded.hashCode();
}
return hash;
}
/**
* Overrides superclass method
* @stable ICU 3.6
* @return a string representation of the object.
* @stable ICU 2.0
*/
@Override
public String toString() {
return string;
}

View file

@ -316,6 +316,15 @@ public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
public final int getValue() { return value; }
}
/**
* Protected no-args constructor.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
protected CodePointMap() {
}
/**
* Returns the value for a code point as stored in the map, with range checking.
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.

View file

@ -351,31 +351,43 @@ public class JapaneseCalendar extends GregorianCalendar {
/**
* @stable ICU 2.8
*/
static public final int CURRENT_ERA = ERA_RULES.getCurrentEraIndex();
static public final int CURRENT_ERA;
/**
* Constant for the era starting on Sept. 8, 1868 AD.
* @stable ICU 2.8
*/
static public final int MEIJI = 232;
static public final int MEIJI;
/**
* Constant for the era starting on July 30, 1912 AD.
* @stable ICU 2.8
*/
static public final int TAISHO = 233;
static public final int TAISHO;
/**
* Constant for the era starting on Dec. 25, 1926 AD.
* @stable ICU 2.8
*/
static public final int SHOWA = 234;
static public final int SHOWA;
/**
* Constant for the era starting on Jan. 7, 1989 AD.
* @stable ICU 2.8
*/
static public final int HEISEI = 235;
static public final int HEISEI;
// We want to make these era constants initialized in a static initializer
// block to prevent javac to inline these values in a consumer code.
// By doing so, we can keep better binary compatibility across versions even
// these values are changed.
static {
MEIJI = 232;
TAISHO = 233;
SHOWA = 234;
HEISEI = 235;
CURRENT_ERA = ERA_RULES.getCurrentEraIndex();
}
/**
* Override GregorianCalendar. We should really handle YEAR_WOY and

View file

@ -44,7 +44,7 @@ public class MeasureUnit implements Serializable {
// All access to the cache or cacheIsPopulated flag must be synchronized on class MeasureUnit,
// i.e. from synchronized static methods. Beware of non-static methods.
private static final Map<String, Map<String,MeasureUnit>> cache
= new HashMap<String, Map<String,MeasureUnit>>();
= new HashMap<>();
private static boolean cacheIsPopulated = false;
/**
@ -95,7 +95,7 @@ public class MeasureUnit implements Serializable {
/**
* {@inheritDoc}
*
* @stable ICU 53
* @stable ICU 3.0
*/
@Override
public int hashCode() {
@ -105,7 +105,7 @@ public class MeasureUnit implements Serializable {
/**
* {@inheritDoc}
*
* @stable ICU 53
* @stable ICU 3.0
*/
@Override
public boolean equals(Object rhs) {
@ -122,7 +122,7 @@ public class MeasureUnit implements Serializable {
/**
* {@inheritDoc}
*
* @stable ICU 53
* @stable ICU 3.0
*/
@Override
public String toString() {
@ -152,7 +152,7 @@ public class MeasureUnit implements Serializable {
// flexibility for implementation.
// Use CollectionSet instead of HashSet for better performance.
return units == null ? Collections.<MeasureUnit>emptySet()
: Collections.unmodifiableSet(new CollectionSet<MeasureUnit>(units.values()));
: Collections.unmodifiableSet(new CollectionSet<>(units.values()));
}
/**
@ -161,8 +161,8 @@ public class MeasureUnit implements Serializable {
* @stable ICU 53
*/
public synchronized static Set<MeasureUnit> getAvailable() {
Set<MeasureUnit> result = new HashSet<MeasureUnit>();
for (String type : new HashSet<String>(MeasureUnit.getAvailableTypes())) {
Set<MeasureUnit> result = new HashSet<>();
for (String type : new HashSet<>(MeasureUnit.getAvailableTypes())) {
for (MeasureUnit unit : MeasureUnit.getAvailable(type)) {
result.add(unit);
}
@ -348,7 +348,7 @@ public class MeasureUnit implements Serializable {
protected synchronized static MeasureUnit addUnit(String type, String unitName, Factory factory) {
Map<String, MeasureUnit> tmp = cache.get(type);
if (tmp == null) {
cache.put(type, tmp = new HashMap<String, MeasureUnit>());
cache.put(type, tmp = new HashMap<>());
} else {
// "intern" the type by setting to first item's type.
type = tmp.entrySet().iterator().next().getValue().type;
@ -1184,7 +1184,7 @@ public class MeasureUnit implements Serializable {
public static final MeasureUnit TEASPOON = MeasureUnit.internalGetInstance("volume", "teaspoon");
private static HashMap<Pair<MeasureUnit, MeasureUnit>, MeasureUnit>unitPerUnitToSingleUnit =
new HashMap<Pair<MeasureUnit, MeasureUnit>, MeasureUnit>();
new HashMap<>();
static {
unitPerUnitToSingleUnit.put(Pair.<MeasureUnit, MeasureUnit>of(MeasureUnit.LITER, MeasureUnit.KILOMETER), MeasureUnit.LITER_PER_KILOMETER);

View file

@ -612,7 +612,7 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Returns a string representation of this object.
* @return a string representation of this object
* @stable ICU 3.6
* @stable ICU 2.0
*/
@Override
public String toString() {
@ -1140,7 +1140,7 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Overrides equals.
* @return true if obj is a SimpleTimeZone equivalent to this
* @stable ICU 3.6
* @stable ICU 2.0
*/
@Override
public boolean equals(Object obj){
@ -1180,7 +1180,8 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Overrides hashCode.
* @stable ICU 3.6
* @return a hash code value for this object.
* @stable ICU 2.0
*/
@Override
public int hashCode(){
@ -1208,7 +1209,7 @@ public class SimpleTimeZone extends BasicTimeZone {
/**
* Overrides clone.
* @stable ICU 3.6
* @stable ICU 2.0
*/
@Override
public Object clone() {

View file

@ -1052,7 +1052,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable<Tim
/**
* Overrides equals.
* @stable ICU 3.6
* @return <code>true</code> if this object is the same as the obj argument; <code>false</code> otherwise.
* @stable ICU 2.0
*/
@Override
public boolean equals(Object obj){
@ -1063,7 +1064,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable<Tim
/**
* Overrides hashCode.
* @stable ICU 3.6
* @return a hash code value for this object.
* @stable ICU 2.0
*/
@Override
public int hashCode(){

View file

@ -668,7 +668,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
/**
* This is for compatibility with Locale-- in actuality, since ULocale is
* immutable, there is no reason to clone it, so this API returns 'this'.
* @stable ICU 3.0
* @stable ICU 2.8
*/
@Override
public Object clone() {
@ -677,7 +677,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
/**
* Returns the hashCode.
* @stable ICU 3.0
* @return a hash code value for this object.
* @stable ICU 2.8
*/
@Override
public int hashCode() {
@ -691,7 +692,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
* function identically might not compare equal.
*
* @return true if this Locale is equal to the specified object.
* @stable ICU 3.0
* @stable ICU 2.8
*/
@Override
public boolean equals(Object obj) {
@ -1071,7 +1072,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
/**
* Returns a string representation of this object.
* @stable ICU 3.0
* @return a string representation of the object.
* @stable ICU 2.8
*/
@Override
public String toString() {

View file

@ -482,7 +482,7 @@ public final class VersionInfo implements Comparable<VersionInfo>
*
* @return the hash code value for this set.
* @see java.lang.Object#hashCode()
* @stable ICU 58
* @stable ICU 2.6
*/
@Override
public int hashCode() {
@ -527,7 +527,7 @@ public final class VersionInfo implements Comparable<VersionInfo>
/**
* Map of singletons
*/
private static final ConcurrentHashMap<Integer, VersionInfo> MAP_ = new ConcurrentHashMap<Integer, VersionInfo>();
private static final ConcurrentHashMap<Integer, VersionInfo> MAP_ = new ConcurrentHashMap<>();
/**
* Last byte mask
*/

View file

@ -13,259 +13,9 @@ import java.util.Map;
/**
* <code>RuleBasedTransliterator</code> is a transliterator
* that reads a set of rules in order to determine how to perform
* translations. Rule sets are stored in resource bundles indexed by
* name. Rules within a rule set are separated by semicolons (';').
* To include a literal semicolon, prefix it with a backslash ('\').
* Unicode Pattern_White_Space is ignored.
* If the first non-blank character on a line is '#',
* the entire line is ignored as a comment.
*
* <p>Each set of rules consists of two groups, one forward, and one
* reverse. This is a convention that is not enforced; rules for one
* direction may be omitted, with the result that translations in
* that direction will not modify the source text. In addition,
* bidirectional forward-reverse rules may be specified for
* symmetrical transformations.
*
* <p><b>Rule syntax</b>
*
* <p>Rule statements take one of the following forms:
*
* <dl>
* <dt><code>$alefmadda=\u0622;</code></dt>
* <dd><strong>Variable definition.</strong> The name on the
* left is assigned the text on the right. In this example,
* after this statement, instances of the left hand name,
* &quot;<code>$alefmadda</code>&quot;, will be replaced by
* the Unicode character U+0622. Variable names must begin
* with a letter and consist only of letters, digits, and
* underscores. Case is significant. Duplicate names cause
* an exception to be thrown, that is, variables cannot be
* redefined. The right hand side may contain well-formed
* text of any length, including no text at all (&quot;<code>$empty=;</code>&quot;).
* The right hand side may contain embedded <code>UnicodeSet</code>
* patterns, for example, &quot;<code>$softvowel=[eiyEIY]</code>&quot;.</dd>
* <dd>&nbsp;</dd>
* <dt><code>ai&gt;$alefmadda;</code></dt>
* <dd><strong>Forward translation rule.</strong> This rule
* states that the string on the left will be changed to the
* string on the right when performing forward
* transliteration.</dd>
* <dt>&nbsp;</dt>
* <dt><code>ai&lt;$alefmadda;</code></dt>
* <dd><strong>Reverse translation rule.</strong> This rule
* states that the string on the right will be changed to
* the string on the left when performing reverse
* transliteration.</dd>
* </dl>
*
* <dl>
* <dt><code>ai&lt;&gt;$alefmadda;</code></dt>
* <dd><strong>Bidirectional translation rule.</strong> This
* rule states that the string on the right will be changed
* to the string on the left when performing forward
* transliteration, and vice versa when performing reverse
* transliteration.</dd>
* </dl>
*
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
* string</em>. The match pattern consists of literal characters,
* optionally preceded by context, and optionally followed by
* context. Context characters, like literal pattern characters,
* must be matched in the text being transliterated. However, unlike
* literal pattern characters, they are not replaced by the output
* text. For example, the pattern &quot;<code>abc{def}</code>&quot;
* indicates the characters &quot;<code>def</code>&quot; must be
* preceded by &quot;<code>abc</code>&quot; for a successful match.
* If there is a successful match, &quot;<code>def</code>&quot; will
* be replaced, but not &quot;<code>abc</code>&quot;. The final '<code>}</code>'
* is optional, so &quot;<code>abc{def</code>&quot; is equivalent to
* &quot;<code>abc{def}</code>&quot;. Another example is &quot;<code>{123}456</code>&quot;
* (or &quot;<code>123}456</code>&quot;) in which the literal
* pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
*
* <p>The output string of a forward or reverse rule consists of
* characters to replace the literal pattern characters. If the
* output string contains the character '<code>|</code>', this is
* taken to indicate the location of the <em>cursor</em> after
* replacement. The cursor is the point in the text at which the
* next replacement, if any, will be applied. The cursor is usually
* placed within the replacement text; however, it can actually be
* placed into the precending or following context by using the
* special character '<code>@</code>'. Examples:
*
* <blockquote>
* <p><code>a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor
* before a<br>
* {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between
* y and z</code>
* </blockquote>
*
* <p><b>UnicodeSet</b>
*
* <p><code>UnicodeSet</code> patterns may appear anywhere that
* makes sense. They may appear in variable definitions.
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
* contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
* or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.
*
* <p><code>UnicodeSet</code> patterns may also be embedded directly
* into rule strings. Thus, the following two rules are equivalent:
*
* <blockquote>
* <p><code>$vowel=[aeiou]; $vowel&gt;'*'; # One way to do this<br>
* [aeiou]&gt;'*';
* &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
* Another way</code>
* </blockquote>
*
* <p>See {@link UnicodeSet} for more documentation and examples.
*
* <p><b>Segments</b>
*
* <p>Segments of the input string can be matched and copied to the
* output string. This makes certain sets of rules simpler and more
* general, and makes reordering possible. For example:
*
* <blockquote>
* <p><code>([a-z]) &gt; $1 $1;
* &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
* double lowercase letters<br>
* ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs</code>
* </blockquote>
*
* <p>The segment of the input string to be copied is delimited by
* &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
* nine segments may be defined. Segments may not overlap. In the
* output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
* represent the input string segments, in left-to-right order of
* definition.
*
* <p><b>Anchors</b>
*
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
* special characters '<code>^</code>' and '<code>$</code>'. For example:
*
* <blockquote>
* <p><code>^ a&nbsp;&nbsp; &gt; 'BEG_A'; &nbsp;&nbsp;# match 'a' at start of text<br>
* &nbsp; a&nbsp;&nbsp; &gt; 'A';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
* of 'a'<br>
* &nbsp; z $ &gt; 'END_Z'; &nbsp;&nbsp;# match 'z' at end of text<br>
* &nbsp; z&nbsp;&nbsp; &gt; 'Z';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances
* of 'z'</code>
* </blockquote>
*
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
* match either the beginning or the end of the text, depending on its placement. For
* example:
*
* <blockquote>
* <p><code>$x = [a-z$]; &nbsp;&nbsp;# match 'a' through 'z' OR anchor<br>
* $x 1&nbsp;&nbsp;&nbsp; &gt; 2;&nbsp;&nbsp; # match '1' after a-z or at the start<br>
* &nbsp;&nbsp; 3 $x &gt; 4; &nbsp;&nbsp;# match '3' before a-z or at the end</code>
* </blockquote>
*
* <p><b>Example</b>
*
* <p>The following example rules illustrate many of the features of
* the rule language.
*
* <table border="0" cellpadding="4">
* <tr>
* <td style="vertical-align: top;">Rule 1.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>abc{def}&gt;x|y</code></td>
* </tr>
* <tr>
* <td style="vertical-align: top;">Rule 2.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>xyz&gt;r</code></td>
* </tr>
* <tr>
* <td style="vertical-align: top;">Rule 3.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>yz&gt;q</code></td>
* </tr>
* </table>
*
* <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
* yields the following results:
*
* <table border="0" cellpadding="4">
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>|adefabcdefz</code></td>
* <td style="vertical-align: top;">Initial state, no rules match. Advance
* cursor.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>a|defabcdefz</code></td>
* <td style="vertical-align: top;">Still no match. Rule 1 does not match
* because the preceding context is not present.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>ad|efabcdefz</code></td>
* <td style="vertical-align: top;">Still no match. Keep advancing until
* there is a match...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>ade|fabcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adef|abcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefa|bcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefab|cdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabc|defz</code></td>
* <td style="vertical-align: top;">Rule 1 matches; replace &quot;<code>def</code>&quot;
* with &quot;<code>xy</code>&quot; and back up the cursor
* to before the '<code>y</code>'.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcx|yz</code></td>
* <td style="vertical-align: top;">Although &quot;<code>xyz</code>&quot; is
* present, rule 2 does not match because the cursor is
* before the '<code>y</code>', not before the '<code>x</code>'.
* Rule 3 does match. Replace &quot;<code>yz</code>&quot;
* with &quot;<code>q</code>&quot;.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcxq|</code></td>
* <td style="vertical-align: top;">The cursor is at the end;
* transliteration is complete.</td>
* </tr>
* </table>
*
* <p>The order of rules is significant. If multiple rules may match
* at some point, the first matching rule is applied.
*
* <p>Forward and reverse rules may have an empty output string.
* Otherwise, an empty left or right hand side of any statement is a
* syntax error.
*
* <p>Single quotes are used to quote any character other than a
* digit or letter. To specify a single quote itself, inside or
* outside of quotes, use two single quotes in a row. For example,
* the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
* string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
*
* <p><b>Notes</b>
*
* <p>While a RuleBasedTransliterator is being built, it checks that
* the rules are added in proper order. For example, if the rule
* &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
* then the second rule will throw an exception. The reason is that
* the second rule can never be triggered, since the first rule
* always matches anything it matches. In other words, the first
* rule <em>masks</em> the second rule.
* built from a set of rules as defined for
* {@link Transliterator#createFromRules(String, String, int)}.
* See the class {@link Transliterator} documentation for the rule syntax.
*
* @author Alan Liu
* @internal
@ -369,7 +119,7 @@ public class RuleBasedTransliterator extends Transliterator {
static class Data {
public Data() {
variableNames = new HashMap<String, char[]>();
variableNames = new HashMap<>();
ruleSet = new TransliterationRuleSet();
}
@ -487,5 +237,3 @@ public class RuleBasedTransliterator extends Transliterator {
return new RuleBasedTransliterator(getID(), data, filter);
}
}

View file

@ -83,7 +83,7 @@ import com.ibm.icu.util.UResourceBundle;
* modified as each new character arrives.
*
* <p>
* Consider the simple <code>RuleBasedTransliterator</code>:
* Consider the simple rule-based Transliterator:
*
* <blockquote><code>
* th&gt;{theta}<br>
@ -110,8 +110,8 @@ import com.ibm.icu.util.UResourceBundle;
* that the transliterator will look at. It is advanced as text becomes committed (but it is not the committed index;
* that's the <code>cursor</code>). The <code>cursor</code> index, described above, marks the point at which the
* transliterator last stopped, either because it reached the end, or because it required more characters to
* disambiguate between possible inputs. The <code>cursor</code> can also be explicitly set by rules in a
* <code>RuleBasedTransliterator</code>. Any characters before the <code>cursor</code> index are frozen; future keyboard
* disambiguate between possible inputs. The <code>cursor</code> can also be explicitly set by rules.
* Any characters before the <code>cursor</code> index are frozen; future keyboard
* transliteration calls within this input sequence will not change them. New text is inserted at the <code>limit</code>
* index, which marks the end of the substring that the transliterator looks at.
*
@ -222,13 +222,262 @@ import com.ibm.icu.util.UResourceBundle;
* <code>transliterate()</code> method taking a <code>String</code> and <code>StringBuffer</code> if the performance of
* these methods can be improved over the performance obtained by the default implementations in this class.
*
* <p><b>Rule syntax</b>
*
* <p>A set of rules determines how to perform translations.
* Rules within a rule set are separated by semicolons (';').
* To include a literal semicolon, prefix it with a backslash ('\').
* Unicode Pattern_White_Space is ignored.
* If the first non-blank character on a line is '#',
* the entire line is ignored as a comment.
*
* <p>Each set of rules consists of two groups, one forward, and one
* reverse. This is a convention that is not enforced; rules for one
* direction may be omitted, with the result that translations in
* that direction will not modify the source text. In addition,
* bidirectional forward-reverse rules may be specified for
* symmetrical transformations.
*
* <p>Note: Another description of the Transliterator rule syntax is available in
* <a href="https://www.unicode.org/reports/tr35/tr35-general.html#Transform_Rules_Syntax">section
* Transform Rules Syntax of UTS #35: Unicode LDML</a>.
* The rules are shown there using arrow symbols and and .
* ICU supports both those and the equivalent ASCII symbols &lt; and &gt; and &lt;&gt;.
*
* <p>Rule statements take one of the following forms:
*
* <dl>
* <dt><code>$alefmadda=\\u0622;</code></dt>
* <dd><strong>Variable definition.</strong> The name on the
* left is assigned the text on the right. In this example,
* after this statement, instances of the left hand name,
* &quot;<code>$alefmadda</code>&quot;, will be replaced by
* the Unicode character U+0622. Variable names must begin
* with a letter and consist only of letters, digits, and
* underscores. Case is significant. Duplicate names cause
* an exception to be thrown, that is, variables cannot be
* redefined. The right hand side may contain well-formed
* text of any length, including no text at all (&quot;<code>$empty=;</code>&quot;).
* The right hand side may contain embedded <code>UnicodeSet</code>
* patterns, for example, &quot;<code>$softvowel=[eiyEIY]</code>&quot;.</dd>
* <dt><code>ai&gt;$alefmadda;</code></dt>
* <dd><strong>Forward translation rule.</strong> This rule
* states that the string on the left will be changed to the
* string on the right when performing forward
* transliteration.</dd>
* <dt><code>ai&lt;$alefmadda;</code></dt>
* <dd><strong>Reverse translation rule.</strong> This rule
* states that the string on the right will be changed to
* the string on the left when performing reverse
* transliteration.</dd>
* </dl>
*
* <dl>
* <dt><code>ai&lt;&gt;$alefmadda;</code></dt>
* <dd><strong>Bidirectional translation rule.</strong> This
* rule states that the string on the right will be changed
* to the string on the left when performing forward
* transliteration, and vice versa when performing reverse
* transliteration.</dd>
* </dl>
*
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
* string</em>. The match pattern consists of literal characters,
* optionally preceded by context, and optionally followed by
* context. Context characters, like literal pattern characters,
* must be matched in the text being transliterated. However, unlike
* literal pattern characters, they are not replaced by the output
* text. For example, the pattern &quot;<code>abc{def}</code>&quot;
* indicates the characters &quot;<code>def</code>&quot; must be
* preceded by &quot;<code>abc</code>&quot; for a successful match.
* If there is a successful match, &quot;<code>def</code>&quot; will
* be replaced, but not &quot;<code>abc</code>&quot;. The final '<code>}</code>'
* is optional, so &quot;<code>abc{def</code>&quot; is equivalent to
* &quot;<code>abc{def}</code>&quot;. Another example is &quot;<code>{123}456</code>&quot;
* (or &quot;<code>123}456</code>&quot;) in which the literal
* pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
*
* <p>The output string of a forward or reverse rule consists of
* characters to replace the literal pattern characters. If the
* output string contains the character '<code>|</code>', this is
* taken to indicate the location of the <em>cursor</em> after
* replacement. The cursor is the point in the text at which the
* next replacement, if any, will be applied. The cursor is usually
* placed within the replacement text; however, it can actually be
* placed into the precending or following context by using the
* special character '@'. Examples:
*
* <pre>
* a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor before a
* {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between y and z
* </pre>
*
* <p><b>UnicodeSet</b>
*
* <p><code>UnicodeSet</code> patterns may appear anywhere that
* makes sense. They may appear in variable definitions.
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
* contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
* or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.
*
* <p><code>UnicodeSet</code> patterns may also be embedded directly
* into rule strings. Thus, the following two rules are equivalent:
*
* <pre>
* $vowel=[aeiou]; $vowel&gt;'*'; # One way to do this
* [aeiou]&gt;'*'; # Another way
* </pre>
*
* <p>See {@link UnicodeSet} for more documentation and examples.
*
* <p><b>Segments</b>
*
* <p>Segments of the input string can be matched and copied to the
* output string. This makes certain sets of rules simpler and more
* general, and makes reordering possible. For example:
*
* <pre>
* ([a-z]) &gt; $1 $1; # double lowercase letters
* ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs
* </pre>
*
* <p>The segment of the input string to be copied is delimited by
* &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
* nine segments may be defined. Segments may not overlap. In the
* output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
* represent the input string segments, in left-to-right order of
* definition.
*
* <p><b>Anchors</b>
*
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
* special characters '<code>^</code>' and '<code>$</code>'. For example:
*
* <pre>
* ^ a&nbsp;&nbsp; &gt; 'BEG_A'; &nbsp;&nbsp;# match 'a' at start of text
* &nbsp; a&nbsp;&nbsp; &gt; 'A'; # match other instances of 'a'
* &nbsp; z $ &gt; 'END_Z'; &nbsp;&nbsp;# match 'z' at end of text
* &nbsp; z&nbsp;&nbsp; &gt; 'Z';&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # match other instances of 'z'
* </pre>
*
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
* match either the beginning or the end of the text, depending on its placement. For
* example:
*
* <pre>
* $x = [a-z$]; &nbsp;&nbsp;# match 'a' through 'z' OR anchor
* $x 1&nbsp;&nbsp;&nbsp; &gt; 2;&nbsp;&nbsp; # match '1' after a-z or at the start
* &nbsp;&nbsp; 3 $x &gt; 4; &nbsp;&nbsp;# match '3' before a-z or at the end
* </pre>
*
* <p><b>Example</b>
*
* <p>The following example rules illustrate many of the features of
* the rule language.
*
* <table border="0" cellpadding="4">
* <tr>
* <td style="vertical-align: top;">Rule 1.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>abc{def}&gt;x|y</code></td>
* </tr>
* <tr>
* <td style="vertical-align: top;">Rule 2.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>xyz&gt;r</code></td>
* </tr>
* <tr>
* <td style="vertical-align: top;">Rule 3.</td>
* <td style="vertical-align: top; write-space: nowrap;"><code>yz&gt;q</code></td>
* </tr>
* </table>
*
* <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
* yields the following results:
*
* <table border="0" cellpadding="4">
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>|adefabcdefz</code></td>
* <td style="vertical-align: top;">Initial state, no rules match. Advance
* cursor.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>a|defabcdefz</code></td>
* <td style="vertical-align: top;">Still no match. Rule 1 does not match
* because the preceding context is not present.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>ad|efabcdefz</code></td>
* <td style="vertical-align: top;">Still no match. Keep advancing until
* there is a match...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>ade|fabcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adef|abcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefa|bcdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefab|cdefz</code></td>
* <td style="vertical-align: top;">...</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabc|defz</code></td>
* <td style="vertical-align: top;">Rule 1 matches; replace &quot;<code>def</code>&quot;
* with &quot;<code>xy</code>&quot; and back up the cursor
* to before the '<code>y</code>'.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcx|yz</code></td>
* <td style="vertical-align: top;">Although &quot;<code>xyz</code>&quot; is
* present, rule 2 does not match because the cursor is
* before the '<code>y</code>', not before the '<code>x</code>'.
* Rule 3 does match. Replace &quot;<code>yz</code>&quot;
* with &quot;<code>q</code>&quot;.</td>
* </tr>
* <tr>
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcxq|</code></td>
* <td style="vertical-align: top;">The cursor is at the end;
* transliteration is complete.</td>
* </tr>
* </table>
*
* <p>The order of rules is significant. If multiple rules may match
* at some point, the first matching rule is applied.
*
* <p>Forward and reverse rules may have an empty output string.
* Otherwise, an empty left or right hand side of any statement is a
* syntax error.
*
* <p>Single quotes are used to quote any character other than a
* digit or letter. To specify a single quote itself, inside or
* outside of quotes, use two single quotes in a row. For example,
* the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
* string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
*
* <p><b>Notes</b>
*
* <p>While a Transliterator is being built from rules, it checks that
* the rules are added in proper order. For example, if the rule
* &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
* then the second rule will throw an exception. The reason is that
* the second rule can never be triggered, since the first rule
* always matches anything it matches. In other words, the first
* rule <em>masks</em> the second rule.
*
* @author Alan Liu
* @stable ICU 2.0
*/
public abstract class Transliterator implements StringTransform {
/**
* Direction constant indicating the forward direction in a transliterator,
* e.g., the forward rules of a RuleBasedTransliterator. An "A-B"
* e.g., the forward rules of a rule-based Transliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @stable ICU 2.0
@ -237,7 +486,7 @@ public abstract class Transliterator implements StringTransform {
/**
* Direction constant indicating the reverse direction in a transliterator,
* e.g., the reverse rules of a RuleBasedTransliterator. An "A-B"
* e.g., the reverse rules of a rule-based Transliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
* @stable ICU 2.0
@ -358,7 +607,7 @@ public abstract class Transliterator implements StringTransform {
/**
* Returns true if this Position is equal to the given object.
* @stable ICU 2.6
* @stable ICU 2.0
*/
@Override
public boolean equals(Object obj) {
@ -373,7 +622,8 @@ public abstract class Transliterator implements StringTransform {
}
/**
* @draft ICU 63
* {@inheritDoc}
* @stable ICU 2.0
*/
@Override
public int hashCode() {
@ -382,7 +632,8 @@ public abstract class Transliterator implements StringTransform {
/**
* Returns a string representation of this Position.
* @stable ICU 2.6
* @return a string representation of the object.
* @stable ICU 2.0
*/
@Override
public String toString() {
@ -1100,7 +1351,7 @@ public abstract class Transliterator implements StringTransform {
/**
* Transliterate a substring of text, as specified by index, taking filters
* into account. This method is for subclasses that need to delegate to
* another transliterator, such as CompoundTransliterator.
* another transliterator.
* @param text the text to be transliterated
* @param index the position indices
* @param incremental if TRUE, then assume more characters may be inserted
@ -1343,7 +1594,7 @@ public abstract class Transliterator implements StringTransform {
public static Transliterator getInstance(String ID,
int dir) {
StringBuffer canonID = new StringBuffer();
List<SingleID> list = new ArrayList<SingleID>();
List<SingleID> list = new ArrayList<>();
UnicodeSet[] globalFilter = new UnicodeSet[1];
if (!TransliteratorIDParser.parseCompoundID(ID, dir, canonID, list, globalFilter)) {
throw new IllegalArgumentException("Invalid ID " + ID);
@ -1398,11 +1649,17 @@ public abstract class Transliterator implements StringTransform {
/**
* Returns a <code>Transliterator</code> object constructed from
* the given rule string. This will be a RuleBasedTransliterator,
* the given rule string. This will be a rule-based Transliterator,
* if the rule string contains only rules, or a
* CompoundTransliterator, if it contains ID blocks, or a
* NullTransliterator, if it contains ID blocks which parse as
* compound Transliterator, if it contains ID blocks, or a
* null Transliterator, if it contains ID blocks which parse as
* empty for the given direction.
*
* @param ID the id for the transliterator.
* @param rules rules, separated by ';'
* @param dir either FORWARD or REVERSE.
* @return a newly created Transliterator
* @throws IllegalArgumentException if there is a problem with the ID or the rules
* @stable ICU 2.0
*/
public static final Transliterator createFromRules(String ID, String rules, int dir) {
@ -1435,7 +1692,7 @@ public abstract class Transliterator implements StringTransform {
}
}
else {
List<Transliterator> transliterators = new ArrayList<Transliterator>();
List<Transliterator> transliterators = new ArrayList<>();
int passNumber = 1;
int limit = Math.max(parser.idBlockVector.size(), parser.dataVector.size());

View file

@ -0,0 +1,124 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.tool.docs;
import java.io.File;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
/**
* Checks if API status of equals/hashCode is same with its containing class.
*
* @author Yoshito
*/
public class APIStatusConsistencyChecker {
public static void main(String[] args) {
// args[0] API signature file path
// args[1] (Optional) List of classes to be skipped, separated by semicolon
if (args.length < 1) {
System.err.println("Missing API signature file path.");
} else if (args.length > 2) {
System.err.println("Too many command arguments");
}
List<String> skipClasses = Collections.emptyList();
if (args.length == 2) {
String[] classes = args[1].split(";");
skipClasses = Arrays.asList(classes);
}
// Load the ICU4J API signature file
Set<APIInfo> apiInfoSet = APIData.read(new File(args[0]), true).getAPIInfoSet();
APIStatusConsistencyChecker checker = new APIStatusConsistencyChecker(apiInfoSet, skipClasses, new PrintWriter(System.err, true));
checker.checkConsistency();
System.exit(checker.errCount);
}
private int errCount = 0;
private Set<APIInfo> apiInfoSet;
private PrintWriter pw;
private List<String> skipClasses;
public APIStatusConsistencyChecker(Set<APIInfo> apiInfoSet, List<String> skipClasses, PrintWriter pw) {
this.apiInfoSet = apiInfoSet;
this.skipClasses = skipClasses;
this.pw = pw;
}
public int errorCount() {
return errCount;
}
// Methods that should have same API status with a containing class
static final String[][] METHODS = {
//{"<method name>", "<method signature in APIInfo data>"},
{"equals", "boolean(java.lang.Object)"},
{"hashCode", "int()"},
{"toString", "java.lang.String()"},
{"clone", "java.lang.Object()"},
};
public void checkConsistency() {
Map<String, APIInfo> classMap = new TreeMap<>();
// Build a map of APIInfo for classes, indexed by class name
for (APIInfo api : apiInfoSet) {
if (!api.isPublic() && !api.isProtected()) {
continue;
}
if (!api.isClass() && !api.isEnum()) {
continue;
}
String fullClassName = api.getPackageName() + "." + api.getName();
classMap.put(fullClassName, api);
}
// Walk through methods
for (APIInfo api : apiInfoSet) {
if (!api.isMethod()) {
continue;
}
String fullClassName = api.getPackageName() + "." + api.getClassName();
if (skipClasses.contains(fullClassName)) {
continue;
}
boolean checkWithClass = false;
String methodName = api.getName();
String methodSig = api.getSignature();
for (String[] method : METHODS) {
if (method[0].equals(methodName) && method[1].equals(methodSig)) {
checkWithClass = true;
}
}
if (!checkWithClass) {
continue;
}
// Check if this method has same API status with the containing class
APIInfo clsApi = classMap.get(fullClassName);
if (clsApi == null) {
pw.println("## Error ## Class " + fullClassName + " is not found.");
errCount++;
}
int methodStatus = api.getVal(APIInfo.STA);
String methodVer = api.getStatusVersion();
int classStatus = clsApi.getVal(APIInfo.STA);
String classVer = clsApi.getStatusVersion();
if (methodStatus != classStatus || !Objects.equals(methodVer, classVer)) {
pw.println("## Error ## " + methodName + " in " + fullClassName);
errCount++;
}
}
}
}

View file

@ -56,7 +56,7 @@ public class DeprecatedAPIChecker {
public void checkDeprecated() {
// Gather API class/enum names and its names that can be
// used for Class.forName()
Map<String, String> apiClassNameMap = new TreeMap<String, String>();
Map<String, String> apiClassNameMap = new TreeMap<>();
for (APIInfo api : apiInfoSet) {
if (!api.isPublic() && !api.isProtected()) {
continue;
@ -133,6 +133,18 @@ public class DeprecatedAPIChecker {
}
List<String> paramNames = getParamNames(ctor);
Class<?> declClass = cls.getDeclaringClass();
if (declClass != null && !Modifier.isStatic(cls.getModifiers())) {
// This is non-static inner class's constructor.
// javac automatically injects instance of declaring class
// as the first param of the constructor, but ICU's API
// signature is based on javadoc and it generates signature
// without the implicit parameter.
assert paramNames.get(0).equals(declClass.getName());
paramNames.remove(0);
}
api = findConstructorInfo(apiInfoSet, clsName, paramNames);
if (api == null) {
@ -351,7 +363,7 @@ public class DeprecatedAPIChecker {
throw new IllegalArgumentException(api.toString() + " is not a constructor or a method.");
}
List<String> nameList = new ArrayList<String>();
List<String> nameList = new ArrayList<>();
String signature = api.getSignature();
int start = signature.indexOf('(');
int end = signature.indexOf(')');
@ -410,7 +422,7 @@ public class DeprecatedAPIChecker {
private static char[] PRIMITIVE_SIGNATURES = { 'B', 'S', 'I', 'J', 'F', 'D', 'Z', 'C' };
private static List<String> toTypeNameList(Type[] types) {
List<String> nameList = new ArrayList<String>();
List<String> nameList = new ArrayList<>();
for (Type t : types) {
StringBuilder s = new StringBuilder();