mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-20119 Merge github.com:unicode-org/icu into icu63_1
This commit is contained in:
commit
c28e2510c4
38 changed files with 1113 additions and 736 deletions
|
@ -798,7 +798,7 @@ _getKeywords(const char *localeID,
|
|||
}
|
||||
keywordsLen += keywordList[i].keywordLen + 1;
|
||||
if(valuesToo) {
|
||||
if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
|
||||
if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
|
||||
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
|
||||
}
|
||||
keywordsLen += keywordList[i].valueLen;
|
||||
|
|
|
@ -12,11 +12,13 @@
|
|||
#include "unicode/putil.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "putilimp.h"
|
||||
#include "uinvchar.h"
|
||||
#include "ulocimp.h"
|
||||
#include "uvector.h"
|
||||
#include "uassert.h"
|
||||
|
||||
|
||||
|
@ -172,6 +174,46 @@ static const char*
|
|||
ultag_getGrandfathered(const ULanguageTag* langtag);
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
// Helper class to memory manage CharString objects.
|
||||
// Only ever stack-allocated, does not need to inherit UMemory.
|
||||
class CharStringPool {
|
||||
public:
|
||||
CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
|
||||
~CharStringPool() = default;
|
||||
|
||||
CharStringPool(const CharStringPool&) = delete;
|
||||
CharStringPool& operator=(const CharStringPool&) = delete;
|
||||
|
||||
icu::CharString* create() {
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
icu::CharString* const obj = new icu::CharString;
|
||||
if (obj == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
pool.addElement(obj, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete obj;
|
||||
return nullptr;
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
private:
|
||||
static void U_CALLCONV deleter(void* obj) {
|
||||
delete static_cast<icu::CharString*>(obj);
|
||||
}
|
||||
|
||||
UErrorCode status;
|
||||
icu::UVector pool;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/*
|
||||
* -------------------------------------------------
|
||||
*
|
||||
|
@ -900,7 +942,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
|
||||
static int32_t
|
||||
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
|
||||
char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
|
||||
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
|
||||
int32_t attrBufLength = 0;
|
||||
UEnumeration *keywordEnum = NULL;
|
||||
|
@ -920,22 +961,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
AttributeListEntry *firstAttr = NULL;
|
||||
AttributeListEntry *attr;
|
||||
char *attrValue;
|
||||
char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
|
||||
char *pExtBuf = extBuf;
|
||||
int32_t extBufCapacity = sizeof(extBuf);
|
||||
CharStringPool extBufPool;
|
||||
const char *bcpKey=nullptr, *bcpValue=nullptr;
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
int32_t keylen;
|
||||
UBool isBcpUExt;
|
||||
|
||||
while (TRUE) {
|
||||
icu::CharString buf;
|
||||
key = uenum_next(keywordEnum, NULL, status);
|
||||
if (key == NULL) {
|
||||
break;
|
||||
}
|
||||
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
|
||||
/* buf must be null-terminated */
|
||||
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
char* buffer;
|
||||
int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
|
||||
|
||||
for (;;) {
|
||||
buffer = buf.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
break;
|
||||
}
|
||||
|
||||
len = uloc_getKeywordValue(
|
||||
localeID, key, buffer, resultCapacity, &tmpStatus);
|
||||
|
||||
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
resultCapacity = len;
|
||||
tmpStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
|
@ -945,6 +1012,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
continue;
|
||||
}
|
||||
|
||||
buf.append(buffer, len, tmpStatus);
|
||||
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
keylen = (int32_t)uprv_strlen(key);
|
||||
isBcpUExt = (keylen > 1);
|
||||
|
||||
|
@ -1007,7 +1079,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
}
|
||||
|
||||
/* we've checked buf is null-terminated above */
|
||||
bcpValue = uloc_toUnicodeLocaleType(key, buf);
|
||||
bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
|
||||
if (bcpValue == NULL) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
@ -1015,33 +1087,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
}
|
||||
continue;
|
||||
}
|
||||
if (bcpValue == buf) {
|
||||
/*
|
||||
if (bcpValue == buf.data()) {
|
||||
/*
|
||||
When uloc_toUnicodeLocaleType(key, buf) returns the
|
||||
input value as is, the value is well-formed, but has
|
||||
no known mapping. This implementation normalizes the
|
||||
the value to lower case
|
||||
value to lower case
|
||||
*/
|
||||
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
|
||||
if (bcpValueLen < extBufCapacity) {
|
||||
uprv_strcpy(pExtBuf, bcpValue);
|
||||
T_CString_toLowerCase(pExtBuf);
|
||||
|
||||
bcpValue = pExtBuf;
|
||||
|
||||
pExtBuf += (bcpValueLen + 1);
|
||||
extBufCapacity -= (bcpValueLen + 1);
|
||||
} else {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
icu::CharString* extBuf = extBufPool.create();
|
||||
if (extBuf == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
|
||||
int32_t resultCapacity;
|
||||
char* pExtBuf = extBuf->getAppendBuffer(
|
||||
/*minCapacity=*/bcpValueLen,
|
||||
/*desiredCapacityHint=*/bcpValueLen,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
|
||||
uprv_strcpy(pExtBuf, bcpValue);
|
||||
T_CString_toLowerCase(pExtBuf);
|
||||
|
||||
extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
|
||||
bcpValue = extBuf->data();
|
||||
}
|
||||
} else {
|
||||
if (*key == PRIVATEUSE) {
|
||||
if (!_isPrivateuseValueSubtags(buf, len)) {
|
||||
if (!_isPrivateuseValueSubtags(buf.data(), len)) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
|
@ -1049,7 +1132,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
continue;
|
||||
}
|
||||
} else {
|
||||
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
|
||||
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
|
@ -1058,20 +1141,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
}
|
||||
}
|
||||
bcpKey = key;
|
||||
if ((len + 1) < extBufCapacity) {
|
||||
uprv_memcpy(pExtBuf, buf, len);
|
||||
bcpValue = pExtBuf;
|
||||
|
||||
pExtBuf += len;
|
||||
|
||||
*pExtBuf = 0;
|
||||
pExtBuf++;
|
||||
|
||||
extBufCapacity -= (len + 1);
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
icu::CharString* extBuf = extBufPool.create();
|
||||
if (extBuf == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
extBuf->append(buf.data(), len, tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
bcpValue = extBuf->data();
|
||||
}
|
||||
|
||||
/* create ExtensionListEntry */
|
||||
|
@ -2337,31 +2417,66 @@ uloc_toLanguageTag(const char* localeID,
|
|||
int32_t langtagCapacity,
|
||||
UBool strict,
|
||||
UErrorCode* status) {
|
||||
/* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
|
||||
char canonical[256];
|
||||
int32_t reslen = 0;
|
||||
icu::CharString canonical;
|
||||
int32_t reslen;
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
UBool hadPosix = FALSE;
|
||||
const char* pKeywordStart;
|
||||
|
||||
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
|
||||
canonical[0] = 0;
|
||||
if (uprv_strlen(localeID) > 0) {
|
||||
uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
|
||||
if (tmpStatus != U_ZERO_ERROR) {
|
||||
int32_t resultCapacity = uprv_strlen(localeID);
|
||||
if (resultCapacity > 0) {
|
||||
char* buffer;
|
||||
|
||||
for (;;) {
|
||||
buffer = canonical.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return 0;
|
||||
}
|
||||
|
||||
reslen =
|
||||
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
|
||||
|
||||
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
resultCapacity = reslen;
|
||||
tmpStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
canonical.append(buffer, reslen, tmpStatus);
|
||||
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
reslen = 0;
|
||||
|
||||
/* For handling special case - private use only tag */
|
||||
pKeywordStart = locale_getKeywordsStart(canonical);
|
||||
if (pKeywordStart == canonical) {
|
||||
pKeywordStart = locale_getKeywordsStart(canonical.data());
|
||||
if (pKeywordStart == canonical.data()) {
|
||||
UEnumeration *kwdEnum;
|
||||
int kwdCnt = 0;
|
||||
UBool done = FALSE;
|
||||
|
||||
kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
|
||||
kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
|
||||
if (kwdEnum != NULL) {
|
||||
kwdCnt = uenum_count(kwdEnum, &tmpStatus);
|
||||
if (kwdCnt == 1) {
|
||||
|
@ -2399,12 +2514,12 @@ uloc_toLanguageTag(const char* localeID,
|
|||
}
|
||||
}
|
||||
|
||||
reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
|
||||
reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
|
||||
reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
|
||||
reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
|
||||
reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
|
||||
return reslen;
|
||||
}
|
||||
|
|
|
@ -29,262 +29,10 @@ class TransliterationRuleData;
|
|||
|
||||
/**
|
||||
* <code>RuleBasedTransliterator</code> is a transliterator
|
||||
* that reads a set of rules in order to determine how to perform
|
||||
* translations. Rule sets are stored in resource bundles indexed by
|
||||
* name. Rules within a rule set are separated by semicolons (';').
|
||||
* To include a literal semicolon, prefix it with a backslash ('\').
|
||||
* Whitespace, as defined by <code>Character.isWhitespace()</code>,
|
||||
* is ignored. If the first non-blank character on a line is '#',
|
||||
* the entire line is ignored as a comment. </p>
|
||||
*
|
||||
* <p>Each set of rules consists of two groups, one forward, and one
|
||||
* reverse. This is a convention that is not enforced; rules for one
|
||||
* direction may be omitted, with the result that translations in
|
||||
* that direction will not modify the source text. In addition,
|
||||
* bidirectional forward-reverse rules may be specified for
|
||||
* symmetrical transformations.</p>
|
||||
*
|
||||
* <p><b>Rule syntax</b> </p>
|
||||
*
|
||||
* <p>Rule statements take one of the following forms: </p>
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>$alefmadda=\u0622;</code></dt>
|
||||
* <dd><strong>Variable definition.</strong> The name on the
|
||||
* left is assigned the text on the right. In this example,
|
||||
* after this statement, instances of the left hand name,
|
||||
* "<code>$alefmadda</code>", will be replaced by
|
||||
* the Unicode character U+0622. Variable names must begin
|
||||
* with a letter and consist only of letters, digits, and
|
||||
* underscores. Case is significant. Duplicate names cause
|
||||
* an exception to be thrown, that is, variables cannot be
|
||||
* redefined. The right hand side may contain well-formed
|
||||
* text of any length, including no text at all ("<code>$empty=;</code>").
|
||||
* The right hand side may contain embedded <code>UnicodeSet</code>
|
||||
* patterns, for example, "<code>$softvowel=[eiyEIY]</code>".</dd>
|
||||
* <dd> </dd>
|
||||
* <dt><code>ai>$alefmadda;</code></dt>
|
||||
* <dd><strong>Forward translation rule.</strong> This rule
|
||||
* states that the string on the left will be changed to the
|
||||
* string on the right when performing forward
|
||||
* transliteration.</dd>
|
||||
* <dt> </dt>
|
||||
* <dt><code>ai<$alefmadda;</code></dt>
|
||||
* <dd><strong>Reverse translation rule.</strong> This rule
|
||||
* states that the string on the right will be changed to
|
||||
* the string on the left when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>ai<>$alefmadda;</code></dt>
|
||||
* <dd><strong>Bidirectional translation rule.</strong> This
|
||||
* rule states that the string on the right will be changed
|
||||
* to the string on the left when performing forward
|
||||
* transliteration, and vice versa when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
|
||||
* string</em>. The match pattern consists of literal characters,
|
||||
* optionally preceded by context, and optionally followed by
|
||||
* context. Context characters, like literal pattern characters,
|
||||
* must be matched in the text being transliterated. However, unlike
|
||||
* literal pattern characters, they are not replaced by the output
|
||||
* text. For example, the pattern "<code>abc{def}</code>"
|
||||
* indicates the characters "<code>def</code>" must be
|
||||
* preceded by "<code>abc</code>" for a successful match.
|
||||
* If there is a successful match, "<code>def</code>" will
|
||||
* be replaced, but not "<code>abc</code>". The final '<code>}</code>'
|
||||
* is optional, so "<code>abc{def</code>" is equivalent to
|
||||
* "<code>abc{def}</code>". Another example is "<code>{123}456</code>"
|
||||
* (or "<code>123}456</code>") in which the literal
|
||||
* pattern "<code>123</code>" must be followed by "<code>456</code>".
|
||||
* </p>
|
||||
*
|
||||
* <p>The output string of a forward or reverse rule consists of
|
||||
* characters to replace the literal pattern characters. If the
|
||||
* output string contains the character '<code>|</code>', this is
|
||||
* taken to indicate the location of the <em>cursor</em> after
|
||||
* replacement. The cursor is the point in the text at which the
|
||||
* next replacement, if any, will be applied. The cursor is usually
|
||||
* placed within the replacement text; however, it can actually be
|
||||
* placed into the precending or following context by using the
|
||||
* special character '<code>@</code>'. Examples:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>a {foo} z > | @ bar; # foo -> bar, move cursor
|
||||
* before a<br>
|
||||
* {foo} xyz > bar @@|; # foo -> bar, cursor between
|
||||
* y and z</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p><b>UnicodeSet</b></p>
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may appear anywhere that
|
||||
* makes sense. They may appear in variable definitions.
|
||||
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
|
||||
* contain variable references, such as "<code>$a=[a-z];$not_a=[^$a]</code>",
|
||||
* or "<code>$range=a-z;$ll=[$range]</code>".</p>
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may also be embedded directly
|
||||
* into rule strings. Thus, the following two rules are equivalent:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>$vowel=[aeiou]; $vowel>'*'; # One way to do this<br>
|
||||
* [aeiou]>'*';
|
||||
* #
|
||||
* Another way</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>See {@link UnicodeSet} for more documentation and examples.</p>
|
||||
*
|
||||
* <p><b>Segments</b></p>
|
||||
*
|
||||
* <p>Segments of the input string can be matched and copied to the
|
||||
* output string. This makes certain sets of rules simpler and more
|
||||
* general, and makes reordering possible. For example:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>([a-z]) > $1 $1;
|
||||
* #
|
||||
* double lowercase letters<br>
|
||||
* ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>The segment of the input string to be copied is delimited by
|
||||
* "<code>(</code>" and "<code>)</code>". Up to
|
||||
* nine segments may be defined. Segments may not overlap. In the
|
||||
* output string, "<code>$1</code>" through "<code>$9</code>"
|
||||
* represent the input string segments, in left-to-right order of
|
||||
* definition.</p>
|
||||
*
|
||||
* <p><b>Anchors</b></p>
|
||||
*
|
||||
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
|
||||
* special characters '<code>^</code>' and '<code>$</code>'. For example:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>^ a > 'BEG_A'; # match 'a' at start of text<br>
|
||||
* a > 'A'; # match other instances
|
||||
* of 'a'<br>
|
||||
* z $ > 'END_Z'; # match 'z' at end of text<br>
|
||||
* z > 'Z'; # match other instances
|
||||
* of 'z'</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
|
||||
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
|
||||
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
|
||||
* match either the beginning or the end of the text, depending on its placement. For
|
||||
* example:</p>
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>$x = [a-z$]; # match 'a' through 'z' OR anchor<br>
|
||||
* $x 1 > 2; # match '1' after a-z or at the start<br>
|
||||
* 3 $x > 4; # match '3' before a-z or at the end</code></p>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p><b>Example</b> </p>
|
||||
*
|
||||
* <p>The following example rules illustrate many of the features of
|
||||
* the rule language. </p>
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td valign="top">Rule 1.</td>
|
||||
* <td valign="top" nowrap><code>abc{def}>x|y</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top">Rule 2.</td>
|
||||
* <td valign="top" nowrap><code>xyz>r</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top">Rule 3.</td>
|
||||
* <td valign="top" nowrap><code>yz>q</code></td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>Applying these rules to the string "<code>adefabcdefz</code>"
|
||||
* yields the following results: </p>
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>|adefabcdefz</code></td>
|
||||
* <td valign="top">Initial state, no rules match. Advance
|
||||
* cursor.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>a|defabcdefz</code></td>
|
||||
* <td valign="top">Still no match. Rule 1 does not match
|
||||
* because the preceding context is not present.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>ad|efabcdefz</code></td>
|
||||
* <td valign="top">Still no match. Keep advancing until
|
||||
* there is a match...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>ade|fabcdefz</code></td>
|
||||
* <td valign="top">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>adef|abcdefz</code></td>
|
||||
* <td valign="top">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>adefa|bcdefz</code></td>
|
||||
* <td valign="top">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>adefab|cdefz</code></td>
|
||||
* <td valign="top">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>adefabc|defz</code></td>
|
||||
* <td valign="top">Rule 1 matches; replace "<code>def</code>"
|
||||
* with "<code>xy</code>" and back up the cursor
|
||||
* to before the '<code>y</code>'.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>adefabcx|yz</code></td>
|
||||
* <td valign="top">Although "<code>xyz</code>" is
|
||||
* present, rule 2 does not match because the cursor is
|
||||
* before the '<code>y</code>', not before the '<code>x</code>'.
|
||||
* Rule 3 does match. Replace "<code>yz</code>"
|
||||
* with "<code>q</code>".</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td valign="top" nowrap><code>adefabcxq|</code></td>
|
||||
* <td valign="top">The cursor is at the end;
|
||||
* transliteration is complete.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>The order of rules is significant. If multiple rules may match
|
||||
* at some point, the first matching rule is applied. </p>
|
||||
*
|
||||
* <p>Forward and reverse rules may have an empty output string.
|
||||
* Otherwise, an empty left or right hand side of any statement is a
|
||||
* syntax error. </p>
|
||||
*
|
||||
* <p>Single quotes are used to quote any character other than a
|
||||
* digit or letter. To specify a single quote itself, inside or
|
||||
* outside of quotes, use two single quotes in a row. For example,
|
||||
* the rule "<code>'>'>o''clock</code>" changes the
|
||||
* string "<code>></code>" to the string "<code>o'clock</code>".
|
||||
* </p>
|
||||
*
|
||||
* <p><b>Notes</b> </p>
|
||||
*
|
||||
* <p>While a RuleBasedTransliterator is being built, it checks that
|
||||
* the rules are added in proper order. For example, if the rule
|
||||
* "a>x" is followed by the rule "ab>y",
|
||||
* then the second rule will throw an exception. The reason is that
|
||||
* the second rule can never be triggered, since the first rule
|
||||
* always matches anything it matches. In other words, the first
|
||||
* rule <em>masks</em> the second rule. </p>
|
||||
*
|
||||
* built from a set of rules as defined for
|
||||
* Transliterator::createFromRules().
|
||||
* See the C++ class Transliterator documentation for the rule syntax.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
||||
*/
|
||||
|
|
|
@ -15,10 +15,10 @@
|
|||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \file
|
||||
* \brief C++ API: Tranforms text from one format to another.
|
||||
*/
|
||||
|
||||
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
@ -31,7 +31,6 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
class UnicodeFilter;
|
||||
class UnicodeSet;
|
||||
class CompoundTransliterator;
|
||||
class TransliteratorParser;
|
||||
class NormalizationTransliterator;
|
||||
class TransliteratorIDParser;
|
||||
|
@ -97,18 +96,20 @@ class TransliteratorIDParser;
|
|||
* contents of the buffer may show text being modified as each new
|
||||
* character arrives.
|
||||
*
|
||||
* <p>Consider the simple `RuleBasedTransliterator`:
|
||||
*
|
||||
* <p>Consider the simple rule-based Transliterator:
|
||||
* <pre>
|
||||
* th>{theta}
|
||||
* t>{tau}
|
||||
* </pre>
|
||||
*
|
||||
* When the user types 't', nothing will happen, since the
|
||||
* transliterator is waiting to see if the next character is 'h'. To
|
||||
* remedy this, we introduce the notion of a cursor, marked by a '|'
|
||||
* in the output string:
|
||||
*
|
||||
* <pre>
|
||||
* t>|{tau}
|
||||
* {tau}h>{theta}
|
||||
* </pre>
|
||||
*
|
||||
* Now when the user types 't', tau appears, and if the next character
|
||||
* is 'h', the tau changes to a theta. This is accomplished by
|
||||
|
@ -130,7 +131,7 @@ class TransliteratorIDParser;
|
|||
* which the transliterator last stopped, either because it reached
|
||||
* the end, or because it required more characters to disambiguate
|
||||
* between possible inputs. The <code>CURSOR</code> can also be
|
||||
* explicitly set by rules in a <code>RuleBasedTransliterator</code>.
|
||||
* explicitly set by rules in a rule-based Transliterator.
|
||||
* Any characters before the <code>CURSOR</code> index are frozen;
|
||||
* future keyboard transliteration calls within this input sequence
|
||||
* will not change them. New text is inserted at the
|
||||
|
@ -232,6 +233,255 @@ class TransliteratorIDParser;
|
|||
* if the performance of these methods can be improved over the
|
||||
* performance obtained by the default implementations in this class.
|
||||
*
|
||||
* <p><b>Rule syntax</b>
|
||||
*
|
||||
* <p>A set of rules determines how to perform translations.
|
||||
* Rules within a rule set are separated by semicolons (';').
|
||||
* To include a literal semicolon, prefix it with a backslash ('\').
|
||||
* Unicode Pattern_White_Space is ignored.
|
||||
* If the first non-blank character on a line is '#',
|
||||
* the entire line is ignored as a comment.
|
||||
*
|
||||
* <p>Each set of rules consists of two groups, one forward, and one
|
||||
* reverse. This is a convention that is not enforced; rules for one
|
||||
* direction may be omitted, with the result that translations in
|
||||
* that direction will not modify the source text. In addition,
|
||||
* bidirectional forward-reverse rules may be specified for
|
||||
* symmetrical transformations.
|
||||
*
|
||||
* <p>Note: Another description of the Transliterator rule syntax is available in
|
||||
* <a href="https://www.unicode.org/reports/tr35/tr35-general.html#Transform_Rules_Syntax">section
|
||||
* Transform Rules Syntax of UTS #35: Unicode LDML</a>.
|
||||
* The rules are shown there using arrow symbols ← and → and ↔.
|
||||
* ICU supports both those and the equivalent ASCII symbols < and > and <>.
|
||||
*
|
||||
* <p>Rule statements take one of the following forms:
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>$alefmadda=\\u0622;</code></dt>
|
||||
* <dd><strong>Variable definition.</strong> The name on the
|
||||
* left is assigned the text on the right. In this example,
|
||||
* after this statement, instances of the left hand name,
|
||||
* "<code>$alefmadda</code>", will be replaced by
|
||||
* the Unicode character U+0622. Variable names must begin
|
||||
* with a letter and consist only of letters, digits, and
|
||||
* underscores. Case is significant. Duplicate names cause
|
||||
* an exception to be thrown, that is, variables cannot be
|
||||
* redefined. The right hand side may contain well-formed
|
||||
* text of any length, including no text at all ("<code>$empty=;</code>").
|
||||
* The right hand side may contain embedded <code>UnicodeSet</code>
|
||||
* patterns, for example, "<code>$softvowel=[eiyEIY]</code>".</dd>
|
||||
* <dt><code>ai>$alefmadda;</code></dt>
|
||||
* <dd><strong>Forward translation rule.</strong> This rule
|
||||
* states that the string on the left will be changed to the
|
||||
* string on the right when performing forward
|
||||
* transliteration.</dd>
|
||||
* <dt><code>ai<$alefmadda;</code></dt>
|
||||
* <dd><strong>Reverse translation rule.</strong> This rule
|
||||
* states that the string on the right will be changed to
|
||||
* the string on the left when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>ai<>$alefmadda;</code></dt>
|
||||
* <dd><strong>Bidirectional translation rule.</strong> This
|
||||
* rule states that the string on the right will be changed
|
||||
* to the string on the left when performing forward
|
||||
* transliteration, and vice versa when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
|
||||
* string</em>. The match pattern consists of literal characters,
|
||||
* optionally preceded by context, and optionally followed by
|
||||
* context. Context characters, like literal pattern characters,
|
||||
* must be matched in the text being transliterated. However, unlike
|
||||
* literal pattern characters, they are not replaced by the output
|
||||
* text. For example, the pattern "<code>abc{def}</code>"
|
||||
* indicates the characters "<code>def</code>" must be
|
||||
* preceded by "<code>abc</code>" for a successful match.
|
||||
* If there is a successful match, "<code>def</code>" will
|
||||
* be replaced, but not "<code>abc</code>". The final '<code>}</code>'
|
||||
* is optional, so "<code>abc{def</code>" is equivalent to
|
||||
* "<code>abc{def}</code>". Another example is "<code>{123}456</code>"
|
||||
* (or "<code>123}456</code>") in which the literal
|
||||
* pattern "<code>123</code>" must be followed by "<code>456</code>".
|
||||
*
|
||||
* <p>The output string of a forward or reverse rule consists of
|
||||
* characters to replace the literal pattern characters. If the
|
||||
* output string contains the character '<code>|</code>', this is
|
||||
* taken to indicate the location of the <em>cursor</em> after
|
||||
* replacement. The cursor is the point in the text at which the
|
||||
* next replacement, if any, will be applied. The cursor is usually
|
||||
* placed within the replacement text; however, it can actually be
|
||||
* placed into the precending or following context by using the
|
||||
* special character '@'. Examples:
|
||||
*
|
||||
* <pre>
|
||||
* a {foo} z > | @ bar; # foo -> bar, move cursor before a
|
||||
* {foo} xyz > bar @@|; # foo -> bar, cursor between y and z
|
||||
* </pre>
|
||||
*
|
||||
* <p><b>UnicodeSet</b>
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may appear anywhere that
|
||||
* makes sense. They may appear in variable definitions.
|
||||
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
|
||||
* contain variable references, such as "<code>$a=[a-z];$not_a=[^$a]</code>",
|
||||
* or "<code>$range=a-z;$ll=[$range]</code>".
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may also be embedded directly
|
||||
* into rule strings. Thus, the following two rules are equivalent:
|
||||
*
|
||||
* <pre>
|
||||
* $vowel=[aeiou]; $vowel>'*'; # One way to do this
|
||||
* [aeiou]>'*'; # Another way
|
||||
* </pre>
|
||||
*
|
||||
* <p>See {@link UnicodeSet} for more documentation and examples.
|
||||
*
|
||||
* <p><b>Segments</b>
|
||||
*
|
||||
* <p>Segments of the input string can be matched and copied to the
|
||||
* output string. This makes certain sets of rules simpler and more
|
||||
* general, and makes reordering possible. For example:
|
||||
*
|
||||
* <pre>
|
||||
* ([a-z]) > $1 $1; # double lowercase letters
|
||||
* ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
|
||||
* </pre>
|
||||
*
|
||||
* <p>The segment of the input string to be copied is delimited by
|
||||
* "<code>(</code>" and "<code>)</code>". Up to
|
||||
* nine segments may be defined. Segments may not overlap. In the
|
||||
* output string, "<code>$1</code>" through "<code>$9</code>"
|
||||
* represent the input string segments, in left-to-right order of
|
||||
* definition.
|
||||
*
|
||||
* <p><b>Anchors</b>
|
||||
*
|
||||
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
|
||||
* special characters '<code>^</code>' and '<code>$</code>'. For example:
|
||||
*
|
||||
* <pre>
|
||||
* ^ a > 'BEG_A'; # match 'a' at start of text
|
||||
* a > 'A'; # match other instances of 'a'
|
||||
* z $ > 'END_Z'; # match 'z' at end of text
|
||||
* z > 'Z'; # match other instances of 'z'
|
||||
* </pre>
|
||||
*
|
||||
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
|
||||
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
|
||||
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
|
||||
* match either the beginning or the end of the text, depending on its placement. For
|
||||
* example:
|
||||
*
|
||||
* <pre>
|
||||
* $x = [a-z$]; # match 'a' through 'z' OR anchor
|
||||
* $x 1 > 2; # match '1' after a-z or at the start
|
||||
* 3 $x > 4; # match '3' before a-z or at the end
|
||||
* </pre>
|
||||
*
|
||||
* <p><b>Example</b>
|
||||
*
|
||||
* <p>The following example rules illustrate many of the features of
|
||||
* the rule language.
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 1.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>abc{def}>x|y</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 2.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>xyz>r</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 3.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>yz>q</code></td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>Applying these rules to the string "<code>adefabcdefz</code>"
|
||||
* yields the following results:
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>|adefabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Initial state, no rules match. Advance
|
||||
* cursor.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>a|defabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Still no match. Rule 1 does not match
|
||||
* because the preceding context is not present.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>ad|efabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Still no match. Keep advancing until
|
||||
* there is a match...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>ade|fabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adef|abcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefa|bcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefab|cdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabc|defz</code></td>
|
||||
* <td style="vertical-align: top;">Rule 1 matches; replace "<code>def</code>"
|
||||
* with "<code>xy</code>" and back up the cursor
|
||||
* to before the '<code>y</code>'.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcx|yz</code></td>
|
||||
* <td style="vertical-align: top;">Although "<code>xyz</code>" is
|
||||
* present, rule 2 does not match because the cursor is
|
||||
* before the '<code>y</code>', not before the '<code>x</code>'.
|
||||
* Rule 3 does match. Replace "<code>yz</code>"
|
||||
* with "<code>q</code>".</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcxq|</code></td>
|
||||
* <td style="vertical-align: top;">The cursor is at the end;
|
||||
* transliteration is complete.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>The order of rules is significant. If multiple rules may match
|
||||
* at some point, the first matching rule is applied.
|
||||
*
|
||||
* <p>Forward and reverse rules may have an empty output string.
|
||||
* Otherwise, an empty left or right hand side of any statement is a
|
||||
* syntax error.
|
||||
*
|
||||
* <p>Single quotes are used to quote any character other than a
|
||||
* digit or letter. To specify a single quote itself, inside or
|
||||
* outside of quotes, use two single quotes in a row. For example,
|
||||
* the rule "<code>'>'>o''clock</code>" changes the
|
||||
* string "<code>></code>" to the string "<code>o'clock</code>".
|
||||
*
|
||||
* <p><b>Notes</b>
|
||||
*
|
||||
* <p>While a Transliterator is being built from rules, it checks that
|
||||
* the rules are added in proper order. For example, if the rule
|
||||
* "a>x" is followed by the rule "ab>y",
|
||||
* then the second rule will throw an exception. The reason is that
|
||||
* the second rule can never be triggered, since the first rule
|
||||
* always matches anything it matches. In other words, the first
|
||||
* rule <em>masks</em> the second rule.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -627,7 +877,7 @@ public:
|
|||
/**
|
||||
* Transliterate a substring of text, as specified by index, taking filters
|
||||
* into account. This method is for subclasses that need to delegate to
|
||||
* another transliterator, such as CompoundTransliterator.
|
||||
* another transliterator.
|
||||
* @param text the text to be transliterated
|
||||
* @param index the position indices
|
||||
* @param incremental if TRUE, then assume more characters may be inserted
|
||||
|
@ -841,17 +1091,19 @@ public:
|
|||
|
||||
/**
|
||||
* Returns a <code>Transliterator</code> object constructed from
|
||||
* the given rule string. This will be a RuleBasedTransliterator,
|
||||
* the given rule string. This will be a rule-based Transliterator,
|
||||
* if the rule string contains only rules, or a
|
||||
* CompoundTransliterator, if it contains ID blocks, or a
|
||||
* NullTransliterator, if it contains ID blocks which parse as
|
||||
* compound Transliterator, if it contains ID blocks, or a
|
||||
* null Transliterator, if it contains ID blocks which parse as
|
||||
* empty for the given direction.
|
||||
*
|
||||
* @param ID the id for the transliterator.
|
||||
* @param rules rules, separated by ';'
|
||||
* @param dir either FORWARD or REVERSE.
|
||||
* @param parseError Struct to recieve information on position
|
||||
* @param parseError Struct to receive information on position
|
||||
* of error if an error is encountered
|
||||
* @param status Output param set to success/failure code.
|
||||
* @return a newly created Transliterator
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
|
||||
|
|
|
@ -226,6 +226,7 @@ void addLocaleTest(TestNode** root)
|
|||
TESTCASE(TestKeywordVariants);
|
||||
TESTCASE(TestKeywordVariantParsing);
|
||||
TESTCASE(TestCanonicalization);
|
||||
TESTCASE(TestCanonicalizationBuffer);
|
||||
TESTCASE(TestKeywordSet);
|
||||
TESTCASE(TestKeywordSetError);
|
||||
TESTCASE(TestDisplayKeywords);
|
||||
|
@ -2251,6 +2252,42 @@ static void TestCanonicalization(void)
|
|||
}
|
||||
}
|
||||
|
||||
static void TestCanonicalizationBuffer(void)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
char buffer[256];
|
||||
|
||||
// ULOC_FULLNAME_CAPACITY == 157 (uloc.h)
|
||||
static const char name[] =
|
||||
"zh@x"
|
||||
"=foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-barz"
|
||||
;
|
||||
static const size_t len = sizeof name - 1; // Without NUL terminator.
|
||||
|
||||
int32_t reslen = uloc_canonicalize(name, buffer, len, &status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("FAIL: uloc_canonicalize(%s) => %s, expected !U_FAILURE()\n",
|
||||
name, u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
if (reslen != len) {
|
||||
log_err("FAIL: uloc_canonicalize(%s) => \"%i\", expected \"%u\"\n",
|
||||
name, reslen, len);
|
||||
return;
|
||||
}
|
||||
|
||||
if (uprv_strncmp(name, buffer, len) != 0) {
|
||||
log_err("FAIL: uloc_canonicalize(%s) => \"%.*s\", expected \"%s\"\n",
|
||||
name, reslen, buffer, name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void TestDisplayKeywords(void)
|
||||
{
|
||||
int32_t i;
|
||||
|
|
|
@ -84,6 +84,7 @@ static void TestDisplayNames(void);
|
|||
static void doTestDisplayNames(const char* inLocale, int32_t compareIndex);
|
||||
|
||||
static void TestCanonicalization(void);
|
||||
static void TestCanonicalizationBuffer(void);
|
||||
|
||||
static void TestDisplayKeywords(void);
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/translit.h"
|
||||
#include "cpdtrans.h"
|
||||
#include "intltest.h"
|
||||
|
||||
/**
|
||||
|
|
|
@ -252,6 +252,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
|||
TESTCASE_AUTO(TestToLanguageTag);
|
||||
TESTCASE_AUTO(TestMoveAssign);
|
||||
TESTCASE_AUTO(TestMoveCtor);
|
||||
TESTCASE_AUTO(TestBug13417VeryLongLanguageTag);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -3125,3 +3126,23 @@ void LocaleTest::TestMoveCtor() {
|
|||
assertEquals("variant", l7.getVariant(), l8.getVariant());
|
||||
assertEquals("bogus", l7.isBogus(), l8.isBogus());
|
||||
}
|
||||
|
||||
void LocaleTest::TestBug13417VeryLongLanguageTag() {
|
||||
IcuTestErrorCode status(*this, "TestBug13417VeryLongLanguageTag()");
|
||||
|
||||
static const char tag[] =
|
||||
"zh-x"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-fxx"
|
||||
;
|
||||
|
||||
Locale l = Locale::forLanguageTag(tag, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag);
|
||||
assertTrue("!l.isBogus()", !l.isBogus());
|
||||
|
||||
std::string result = l.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", l.getName());
|
||||
assertEquals("equals", tag, result.c_str());
|
||||
}
|
||||
|
|
|
@ -124,6 +124,8 @@ public:
|
|||
void TestMoveAssign();
|
||||
void TestMoveCtor();
|
||||
|
||||
void TestBug13417VeryLongLanguageTag();
|
||||
|
||||
private:
|
||||
void _checklocs(const char* label,
|
||||
const char* req,
|
||||
|
|
|
@ -1729,6 +1729,32 @@
|
|||
</java>
|
||||
</target>
|
||||
|
||||
<target name="checkAPIStatusConsistency" depends="info, build-tools, gatherapi"
|
||||
description="Check consistency between API class status and methods overriding java.lang.Object">
|
||||
<!--
|
||||
If you need classes excluded from this check, define followig property in build-local.properties.
|
||||
e.g. checkAPIStatusConsistency.skip.classes=com.ibm.icu.text.Normalizer;com.ibm.icu.util.ULocale
|
||||
-->
|
||||
<property name="checkAPIStatusConsistency.skip.classes" value=""/>
|
||||
<java classname="com.ibm.icu.dev.tool.docs.APIStatusConsistencyChecker"
|
||||
failonerror="true">
|
||||
<arg value="${out.dir}/icu4j${api.report.version}.api3.gz" />
|
||||
<arg value="${checkAPIStatusConsistency.skip.classes}" />
|
||||
<classpath>
|
||||
<pathelement location="${icu4j.build-tools.jar}"/>
|
||||
<pathelement location="${icu4j.core.jar}"/>
|
||||
<pathelement location="${icu4j.collate.jar}"/>
|
||||
<pathelement location="${icu4j.charset.jar}"/>
|
||||
<pathelement location="${icu4j.currdata.jar}"/>
|
||||
<pathelement location="${icu4j.langdata.jar}"/>
|
||||
<pathelement location="${icu4j.regiondata.jar}"/>
|
||||
<pathelement location="${icu4j.translit.jar}"/>
|
||||
</classpath>
|
||||
</java>
|
||||
</target>
|
||||
|
||||
<target name="checkAPIStatus" depends="checkAPIStatusConsistency, checkDeprecated"/>
|
||||
|
||||
<target name="draftAPIs" depends="info, gatherapi" description="Run API collector tool and generate draft API report in html">
|
||||
<java classname="com.ibm.icu.dev.tool.docs.CollectAPI"
|
||||
classpath="${icu4j.build-tools.jar}"
|
||||
|
|
|
@ -624,7 +624,7 @@ public final class CollationElementIterator
|
|||
}
|
||||
|
||||
static final Map<Integer, Integer> computeMaxExpansions(CollationData data) {
|
||||
Map<Integer, Integer> maxExpansions = new HashMap<Integer, Integer>();
|
||||
Map<Integer, Integer> maxExpansions = new HashMap<>();
|
||||
MaxExpSink sink = new MaxExpSink(maxExpansions);
|
||||
new ContractionsAndExpansions(null, null, sink, true).forData(data);
|
||||
return maxExpansions;
|
||||
|
@ -692,11 +692,9 @@ public final class CollationElementIterator
|
|||
/**
|
||||
* Mock implementation of hashCode(). This implementation always returns a constant
|
||||
* value. When Java assertion is enabled, this method triggers an assertion failure.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public int hashCode() {
|
||||
assert false : "hashCode not designed";
|
||||
return 42;
|
||||
|
|
|
@ -329,7 +329,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
|
|||
* Subclasses should override this implementation.
|
||||
*
|
||||
* @return a hash code value.
|
||||
* @stable ICU 58
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
@ -477,7 +477,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
|
|||
|
||||
/**
|
||||
* Clones the collator.
|
||||
* @stable ICU 2.6
|
||||
* @stable ICU 2.8
|
||||
* @return a clone of this collator.
|
||||
*/
|
||||
@Override
|
||||
|
@ -1016,7 +1016,7 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
|
|||
}
|
||||
|
||||
private static final class KeywordsSink extends UResource.Sink {
|
||||
LinkedList<String> values = new LinkedList<String>();
|
||||
LinkedList<String> values = new LinkedList<>();
|
||||
boolean hasDefault = false;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -112,6 +112,8 @@ public final class CharacterProperties {
|
|||
* @return the property as a set
|
||||
* @see UProperty
|
||||
* @see UCharacter#hasBinaryProperty
|
||||
* @draft ICU 63
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeSet getBinaryPropertySet(int property) {
|
||||
if (property < 0 || UProperty.BINARY_LIMIT <= property) {
|
||||
|
@ -141,6 +143,8 @@ public final class CharacterProperties {
|
|||
* @return the property as a map
|
||||
* @see UProperty
|
||||
* @see UCharacter#getIntPropertyValue
|
||||
* @draft ICU 63
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final CodePointMap getIntPropertyMap(int property) {
|
||||
if (property < UProperty.INT_START || UProperty.INT_LIMIT <= property) {
|
||||
|
|
|
@ -175,6 +175,7 @@ public abstract class NumberRangeFormatter {
|
|||
*
|
||||
* @return An {@link UnlocalizedNumberRangeFormatter}, to be used for chaining.
|
||||
* @draft ICU 63
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static UnlocalizedNumberRangeFormatter with() {
|
||||
return BASE;
|
||||
|
@ -188,6 +189,7 @@ public abstract class NumberRangeFormatter {
|
|||
* The locale from which to load formats and symbols for number range formatting.
|
||||
* @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining.
|
||||
* @draft ICU 63
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static LocalizedNumberRangeFormatter withLocale(Locale locale) {
|
||||
return BASE.locale(locale);
|
||||
|
@ -201,9 +203,15 @@ public abstract class NumberRangeFormatter {
|
|||
* The locale from which to load formats and symbols for number range formatting.
|
||||
* @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining.
|
||||
* @draft ICU 63
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static LocalizedNumberRangeFormatter withLocale(ULocale locale) {
|
||||
return BASE.locale(locale);
|
||||
}
|
||||
|
||||
/**
|
||||
* Private constructor - this class is not designed for instantiation
|
||||
*/
|
||||
private NumberRangeFormatter() {
|
||||
}
|
||||
}
|
||||
|
|
|
@ -408,10 +408,10 @@ public abstract class Precision implements Cloneable {
|
|||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* {@inheritDoc}
|
||||
* @draft ICU 62
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
public Object clone() {
|
||||
try {
|
||||
|
|
|
@ -93,10 +93,9 @@ public class ScientificNotation extends Notation implements Cloneable {
|
|||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
public Object clone() {
|
||||
try {
|
||||
|
|
|
@ -202,7 +202,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
|
|||
//other.pluralRules = pluralRules;
|
||||
// clone content
|
||||
//other.pluralCountToCurrencyUnitPattern = pluralCountToCurrencyUnitPattern;
|
||||
other.pluralCountToCurrencyUnitPattern = new HashMap<String, String>();
|
||||
other.pluralCountToCurrencyUnitPattern = new HashMap<>();
|
||||
for (String pluralCount : pluralCountToCurrencyUnitPattern.keySet()) {
|
||||
String currencyPattern = pluralCountToCurrencyUnitPattern.get(pluralCount);
|
||||
other.pluralCountToCurrencyUnitPattern.put(pluralCount, currencyPattern);
|
||||
|
@ -231,11 +231,9 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
|
|||
/**
|
||||
* Override hashCode
|
||||
*
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public int hashCode() {
|
||||
return pluralCountToCurrencyUnitPattern.hashCode()
|
||||
^ pluralRules.hashCode()
|
||||
|
@ -283,7 +281,7 @@ public class CurrencyPluralInfo implements Cloneable, Serializable {
|
|||
}
|
||||
|
||||
private void setupCurrencyPluralPattern(ULocale uloc) {
|
||||
pluralCountToCurrencyUnitPattern = new HashMap<String, String>();
|
||||
pluralCountToCurrencyUnitPattern = new HashMap<>();
|
||||
|
||||
String numberStylePattern = NumberFormat.getPattern(uloc, NumberFormat.NUMBERSTYLE);
|
||||
// Split the number style pattern into pos and neg if applicable
|
||||
|
|
|
@ -273,10 +273,8 @@ public class DateIntervalInfo implements Cloneable, Freezable<DateIntervalInfo>,
|
|||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{first=«" + fIntervalPatternFirstPart + "», second=«" + fIntervalPatternSecondPart + "», reversed:" + fFirstDateInPtnIsLaterDate + "}";
|
||||
|
|
|
@ -821,7 +821,8 @@ public final class Edits {
|
|||
/**
|
||||
* A string representation of the current edit represented by the iterator for debugging. You
|
||||
* should not depend on the contents of the return string; it may change over time.
|
||||
* @internal
|
||||
* @return a string representation of the object.
|
||||
* @stable ICU 59
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -126,9 +126,9 @@ public class MeasureFormat extends UFormat {
|
|||
|
||||
private final transient LocalizedNumberFormatter numberFormatter;
|
||||
|
||||
private static final SimpleCache<ULocale, NumericFormatters> localeToNumericDurationFormatters = new SimpleCache<ULocale, NumericFormatters>();
|
||||
private static final SimpleCache<ULocale, NumericFormatters> localeToNumericDurationFormatters = new SimpleCache<>();
|
||||
|
||||
private static final Map<MeasureUnit, Integer> hmsTo012 = new HashMap<MeasureUnit, Integer>();
|
||||
private static final Map<MeasureUnit, Integer> hmsTo012 = new HashMap<>();
|
||||
|
||||
static {
|
||||
hmsTo012.put(MeasureUnit.HOUR, 0);
|
||||
|
@ -486,7 +486,7 @@ public class MeasureFormat extends UFormat {
|
|||
* Two MeasureFormats, a and b, are equal if and only if they have the same formatWidth, locale, and
|
||||
* equal number formats.
|
||||
*
|
||||
* @stable ICU 53
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
@Override
|
||||
public final boolean equals(Object other) {
|
||||
|
@ -506,7 +506,7 @@ public class MeasureFormat extends UFormat {
|
|||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @stable ICU 53
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
|
@ -997,7 +997,7 @@ public class MeasureFormat extends UFormat {
|
|||
this.formatWidth = width;
|
||||
this.numberFormat = numberFormat;
|
||||
this.subClass = subClass;
|
||||
this.keyValues = new HashMap<Object, Object>();
|
||||
this.keyValues = new HashMap<>();
|
||||
}
|
||||
|
||||
// Must have public constructor, to enable Externalizable
|
||||
|
@ -1070,7 +1070,7 @@ public class MeasureFormat extends UFormat {
|
|||
return values[ordinal];
|
||||
}
|
||||
|
||||
private static final Map<ULocale, String> localeIdToRangeFormat = new ConcurrentHashMap<ULocale, String>();
|
||||
private static final Map<ULocale, String> localeIdToRangeFormat = new ConcurrentHashMap<>();
|
||||
|
||||
/**
|
||||
* Return a formatter (compiled SimpleFormatter pattern) for a range, such as "{0}–{1}".
|
||||
|
|
|
@ -1084,7 +1084,7 @@ public class PluralRules implements Serializable {
|
|||
SampleType sampleType2;
|
||||
boolean bounded2 = true;
|
||||
boolean haveBound = false;
|
||||
Set<FixedDecimalRange> samples2 = new LinkedHashSet<FixedDecimalRange>();
|
||||
Set<FixedDecimalRange> samples2 = new LinkedHashSet<>();
|
||||
|
||||
if (source.startsWith("integer")) {
|
||||
sampleType2 = SampleType.INTEGER;
|
||||
|
@ -1215,7 +1215,7 @@ public class PluralRules implements Serializable {
|
|||
static final UnicodeSet BREAK_AND_KEEP = new UnicodeSet('!', '!', '%', '%', ',', ',', '.', '.', '=', '=').freeze();
|
||||
static String[] split(String source) {
|
||||
int last = -1;
|
||||
List<String> result = new ArrayList<String>();
|
||||
List<String> result = new ArrayList<>();
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char ch = source.charAt(i);
|
||||
if (BREAK_AND_IGNORE.contains(ch)) {
|
||||
|
@ -1334,7 +1334,7 @@ public class PluralRules implements Serializable {
|
|||
t = nextToken(tokens, x++, condition);
|
||||
}
|
||||
|
||||
List<Long> valueList = new ArrayList<Long>();
|
||||
List<Long> valueList = new ArrayList<>();
|
||||
|
||||
// the token t is always one item ahead
|
||||
while (true) {
|
||||
|
@ -1756,10 +1756,9 @@ public class PluralRules implements Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return keyword.hashCode() ^ constraint.hashCode();
|
||||
|
@ -1773,7 +1772,7 @@ public class PluralRules implements Serializable {
|
|||
private static class RuleList implements Serializable {
|
||||
private boolean hasExplicitBoundingInfo = false;
|
||||
private static final long serialVersionUID = 1;
|
||||
private final List<Rule> rules = new ArrayList<Rule>();
|
||||
private final List<Rule> rules = new ArrayList<>();
|
||||
|
||||
public RuleList addRule(Rule nextRule) {
|
||||
String keyword = nextRule.getKeyword();
|
||||
|
@ -1821,7 +1820,7 @@ public class PluralRules implements Serializable {
|
|||
}
|
||||
|
||||
public Set<String> getKeywords() {
|
||||
Set<String> result = new LinkedHashSet<String>();
|
||||
Set<String> result = new LinkedHashSet<>();
|
||||
for (Rule rule : rules) {
|
||||
result.add(rule.getKeyword());
|
||||
}
|
||||
|
@ -2020,10 +2019,9 @@ public class PluralRules implements Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return rules.hashCode();
|
||||
|
@ -2175,7 +2173,7 @@ public class PluralRules implements Serializable {
|
|||
if (!keywords.contains(keyword)) {
|
||||
return null;
|
||||
}
|
||||
Set<Double> result = new TreeSet<Double>();
|
||||
Set<Double> result = new TreeSet<>();
|
||||
|
||||
if (rules.hasExplicitBoundingInfo) {
|
||||
FixedDecimalSamples samples = rules.getDecimalSamples(keyword, sampleType);
|
||||
|
@ -2420,7 +2418,7 @@ public class PluralRules implements Serializable {
|
|||
|
||||
// Compute if the quick test is insufficient.
|
||||
|
||||
HashSet<Double> subtractedSet = new HashSet<Double>(values);
|
||||
HashSet<Double> subtractedSet = new HashSet<>(values);
|
||||
for (Double explicit : explicits) {
|
||||
subtractedSet.remove(explicit - offset);
|
||||
}
|
||||
|
|
|
@ -941,13 +941,10 @@ public class RuleBasedNumberFormat extends NumberFormat {
|
|||
}
|
||||
|
||||
/**
|
||||
* Mock implementation of hashCode(). This implementation always returns a constant
|
||||
* value. When Java assertion is enabled, this method triggers an assertion failure.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
}
|
||||
|
@ -1731,7 +1728,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
|
|||
|
||||
// our rule list is an array of the appropriate size
|
||||
ruleSets = new NFRuleSet[numRuleSets];
|
||||
ruleSetsMap = new HashMap<String, NFRuleSet>(numRuleSets * 2 + 1);
|
||||
ruleSetsMap = new HashMap<>(numRuleSets * 2 + 1);
|
||||
defaultRuleSet = null;
|
||||
|
||||
// Used to count the number of public rule sets
|
||||
|
@ -1844,7 +1841,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
|
|||
if (localizations != null) {
|
||||
publicRuleSetNames = localizations[0].clone();
|
||||
|
||||
Map<String, String[]> m = new HashMap<String, String[]>();
|
||||
Map<String, String[]> m = new HashMap<>();
|
||||
for (int i = 1; i < localizations.length; ++i) {
|
||||
String[] data = localizations[i];
|
||||
String loc = data[0];
|
||||
|
|
|
@ -506,7 +506,7 @@ public class SpoofChecker {
|
|||
SpoofData fSpoofData;
|
||||
final UnicodeSet fAllowedCharsSet = new UnicodeSet(0, 0x10ffff); // The UnicodeSet of allowed characters.
|
||||
// for this Spoof Checker. Defaults to all chars.
|
||||
final Set<ULocale> fAllowedLocales = new LinkedHashSet<ULocale>(); // The list of allowed locales.
|
||||
final Set<ULocale> fAllowedLocales = new LinkedHashSet<>(); // The list of allowed locales.
|
||||
private RestrictionLevel fRestrictionLevel;
|
||||
|
||||
/**
|
||||
|
@ -567,7 +567,7 @@ public class SpoofChecker {
|
|||
result.fSpoofData = this.fSpoofData;
|
||||
result.fAllowedCharsSet = (UnicodeSet) (this.fAllowedCharsSet.clone());
|
||||
result.fAllowedCharsSet.freeze();
|
||||
result.fAllowedLocales = new HashSet<ULocale>(this.fAllowedLocales);
|
||||
result.fAllowedLocales = new HashSet<>(this.fAllowedLocales);
|
||||
result.fRestrictionLevel = this.fRestrictionLevel;
|
||||
return result;
|
||||
}
|
||||
|
@ -734,7 +734,7 @@ public class SpoofChecker {
|
|||
* @stable ICU 54
|
||||
*/
|
||||
public Builder setAllowedJavaLocales(Set<Locale> locales) {
|
||||
HashSet<ULocale> ulocales = new HashSet<ULocale>(locales.size());
|
||||
HashSet<ULocale> ulocales = new HashSet<>(locales.size());
|
||||
for (Locale locale : locales) {
|
||||
ulocales.add(ULocale.forLocale(locale));
|
||||
}
|
||||
|
@ -848,10 +848,10 @@ public class SpoofChecker {
|
|||
private int fLineNum;
|
||||
|
||||
ConfusabledataBuilder() {
|
||||
fTable = new Hashtable<Integer, SPUString>();
|
||||
fTable = new Hashtable<>();
|
||||
fKeySet = new UnicodeSet();
|
||||
fKeyVec = new ArrayList<Integer>();
|
||||
fValueVec = new ArrayList<Integer>();
|
||||
fKeyVec = new ArrayList<>();
|
||||
fValueVec = new ArrayList<>();
|
||||
stringPool = new SPUStringPool();
|
||||
}
|
||||
|
||||
|
@ -1093,8 +1093,8 @@ public class SpoofChecker {
|
|||
// combination of a uhash and a Vector.
|
||||
private static class SPUStringPool {
|
||||
public SPUStringPool() {
|
||||
fVec = new Vector<SPUString>();
|
||||
fHash = new Hashtable<String, SPUString>();
|
||||
fVec = new Vector<>();
|
||||
fHash = new Hashtable<>();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
|
@ -1179,7 +1179,7 @@ public class SpoofChecker {
|
|||
* @stable ICU 54
|
||||
*/
|
||||
public Set<Locale> getAllowedJavaLocales() {
|
||||
HashSet<Locale> locales = new HashSet<Locale>(fAllowedLocales.size());
|
||||
HashSet<Locale> locales = new HashSet<>(fAllowedLocales.size());
|
||||
for (ULocale uloc : fAllowedLocales) {
|
||||
locales.add(uloc.toLocale());
|
||||
}
|
||||
|
@ -1535,7 +1535,7 @@ public class SpoofChecker {
|
|||
* @param other
|
||||
* the SpoofChecker being compared with.
|
||||
* @return true if the two SpoofCheckers are equal.
|
||||
* @stable ICU 58
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
@ -1565,7 +1565,7 @@ public class SpoofChecker {
|
|||
|
||||
/**
|
||||
* Overrides {@link Object#hashCode()}.
|
||||
* @stable ICU 58
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
|
|
@ -143,11 +143,10 @@ public class StringPrepParseException extends ParseException {
|
|||
/**
|
||||
* Mock implementation of hashCode(). This implementation always returns a constant
|
||||
* value. When Java assertion is enabled, this method triggers an assertion failure.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @return a hash code value for this object.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public int hashCode() {
|
||||
assert false : "hashCode not designed";
|
||||
return 42;
|
||||
|
|
|
@ -343,7 +343,7 @@ public class TimeUnitFormat extends MeasureFormat {
|
|||
format = NumberFormat.getNumberInstance(locale);
|
||||
}
|
||||
pluralRules = PluralRules.forLocale(locale);
|
||||
timeUnitToCountToPatterns = new HashMap<TimeUnit, Map<String, Object[]>>();
|
||||
timeUnitToCountToPatterns = new HashMap<>();
|
||||
Set<String> pluralKeywords = pluralRules.getKeywords();
|
||||
setup("units/duration", timeUnitToCountToPatterns, FULL_NAME, pluralKeywords);
|
||||
setup("unitsShort/duration", timeUnitToCountToPatterns, ABBREVIATED_NAME, pluralKeywords);
|
||||
|
@ -400,7 +400,7 @@ public class TimeUnitFormat extends MeasureFormat {
|
|||
|
||||
Map<String, Object[]> countToPatterns = timeUnitToCountToPatterns.get(timeUnit);
|
||||
if (countToPatterns == null) {
|
||||
countToPatterns = new TreeMap<String, Object[]>();
|
||||
countToPatterns = new TreeMap<>();
|
||||
timeUnitToCountToPatterns.put(timeUnit, countToPatterns);
|
||||
}
|
||||
|
||||
|
@ -467,7 +467,7 @@ public class TimeUnitFormat extends MeasureFormat {
|
|||
final TimeUnit timeUnit = timeUnits[i];
|
||||
Map<String, Object[]> countToPatterns = timeUnitToCountToPatterns.get(timeUnit);
|
||||
if (countToPatterns == null) {
|
||||
countToPatterns = new TreeMap<String, Object[]>();
|
||||
countToPatterns = new TreeMap<>();
|
||||
timeUnitToCountToPatterns.put(timeUnit, countToPatterns);
|
||||
}
|
||||
for (String pluralCount : keywords) {
|
||||
|
@ -556,8 +556,7 @@ public class TimeUnitFormat extends MeasureFormat {
|
|||
// MeasureFormat
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @deprecated ICU 53 see {@link MeasureFormat}
|
||||
*/
|
||||
@Deprecated
|
||||
@Override
|
||||
|
|
|
@ -26,7 +26,7 @@ import com.ibm.icu.impl.Utility;
|
|||
public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
||||
{
|
||||
// public data member ------------------------------------------------
|
||||
|
||||
|
||||
/**
|
||||
* Internal byte array.
|
||||
* @stable ICU 2.8
|
||||
|
@ -34,16 +34,16 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
public byte[] bytes;
|
||||
|
||||
/**
|
||||
* Size of the internal byte array used.
|
||||
* Different from bytes.length, size will be <= bytes.length.
|
||||
* Size of the internal byte array used.
|
||||
* Different from bytes.length, size will be <= bytes.length.
|
||||
* Semantics of size is similar to java.util.Vector.size().
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public int size;
|
||||
|
||||
|
||||
// public constructor ------------------------------------------------
|
||||
|
||||
/**
|
||||
/**
|
||||
* Construct a new ByteArrayWrapper with no data.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
|
@ -103,15 +103,15 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
// public methods ----------------------------------------------------
|
||||
|
||||
/**
|
||||
* Ensure that the internal byte array is at least of length capacity.
|
||||
* If the byte array is null or its length is less than capacity, a new
|
||||
* byte array of length capacity will be allocated.
|
||||
* The contents of the array (between 0 and size) remain unchanged.
|
||||
* Ensure that the internal byte array is at least of length capacity.
|
||||
* If the byte array is null or its length is less than capacity, a new
|
||||
* byte array of length capacity will be allocated.
|
||||
* The contents of the array (between 0 and size) remain unchanged.
|
||||
* @param capacity minimum length of internal byte array.
|
||||
* @return this ByteArrayWrapper
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
public ByteArrayWrapper ensureCapacity(int capacity)
|
||||
public ByteArrayWrapper ensureCapacity(int capacity)
|
||||
{
|
||||
if (bytes == null || bytes.length < capacity) {
|
||||
byte[] newbytes = new byte[capacity];
|
||||
|
@ -122,11 +122,11 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the internal byte array from offset 0 to (limit - start) with the
|
||||
* contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
|
||||
* byte array of length (limit - start) will be allocated.
|
||||
* Set the internal byte array from offset 0 to (limit - start) with the
|
||||
* contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
|
||||
* byte array of length (limit - start) will be allocated.
|
||||
* This resets the size of the internal byte array to (limit - start).
|
||||
* @param src source byte array to copy from
|
||||
* @param start start offset of src to copy from
|
||||
|
@ -134,15 +134,15 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
* @return this ByteArrayWrapper
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
public final ByteArrayWrapper set(byte[] src, int start, int limit)
|
||||
public final ByteArrayWrapper set(byte[] src, int start, int limit)
|
||||
{
|
||||
size = 0;
|
||||
append(src, start, limit);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
public final ByteArrayWrapper get(byte[] target, int start, int limit)
|
||||
public final ByteArrayWrapper get(byte[] target, int start, int limit)
|
||||
{
|
||||
int len = limit - start;
|
||||
if (len > size) throw new IllegalArgumentException("limit too long");
|
||||
|
@ -152,7 +152,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
*/
|
||||
|
||||
/**
|
||||
* Appends the internal byte array from offset size with the
|
||||
* Appends the internal byte array from offset size with the
|
||||
* contents of src from offset start to limit. This increases the size of
|
||||
* the internal byte array to (size + limit - start).
|
||||
* @param src source byte array to copy from
|
||||
|
@ -161,7 +161,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
* @return this ByteArrayWrapper
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
public final ByteArrayWrapper append(byte[] src, int start, int limit)
|
||||
public final ByteArrayWrapper append(byte[] src, int start, int limit)
|
||||
{
|
||||
int len = limit - start;
|
||||
ensureCapacity(size + len);
|
||||
|
@ -171,7 +171,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
}
|
||||
|
||||
/*
|
||||
public final ByteArrayWrapper append(ByteArrayWrapper other)
|
||||
public final ByteArrayWrapper append(ByteArrayWrapper other)
|
||||
{
|
||||
return append(other.bytes, 0, other.size);
|
||||
}
|
||||
|
@ -190,13 +190,14 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
size = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// Boilerplate ----------------------------------------------------
|
||||
|
||||
|
||||
/**
|
||||
* Returns string value for debugging
|
||||
* @stable ICU 3.2
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
|
@ -210,8 +211,9 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
* Return true if the bytes in each wrapper are equal.
|
||||
* @param other the object to compare to.
|
||||
* @return true if the two objects are equal.
|
||||
* @stable ICU 3.2
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) return true;
|
||||
if (other == null) return false;
|
||||
|
@ -231,8 +233,9 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
/**
|
||||
* Return the hashcode.
|
||||
* @return the hashcode.
|
||||
* @stable ICU 3.2
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = bytes.length;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
|
@ -249,6 +252,7 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
* @throws ClassCastException if the other object is not a ByteArrayWrapper
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(ByteArrayWrapper other) {
|
||||
if (this == other) return 0;
|
||||
int minSize = size < other.size ? size : other.size;
|
||||
|
@ -259,11 +263,11 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
}
|
||||
return size - other.size;
|
||||
}
|
||||
|
||||
|
||||
// private methods -----------------------------------------------------
|
||||
|
||||
|
||||
/**
|
||||
* Copies the contents of src byte array from offset srcoff to the
|
||||
* Copies the contents of src byte array from offset srcoff to the
|
||||
* target of tgt byte array at the offset tgtoff.
|
||||
* @param src source byte array to copy from
|
||||
* @param srcoff start offset of src to copy from
|
||||
|
@ -271,15 +275,15 @@ public class ByteArrayWrapper implements Comparable<ByteArrayWrapper>
|
|||
* @param tgtoff start offset of tgt to copy to
|
||||
* @param length size of contents to copy
|
||||
*/
|
||||
private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
|
||||
private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
|
||||
int tgtoff, int length) {
|
||||
if (length < 64) {
|
||||
for (int i = srcoff, n = tgtoff; -- length >= 0; ++ i, ++ n) {
|
||||
tgt[n] = src[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
System.arraycopy(src, srcoff, tgt, tgtoff, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,35 +17,35 @@ import com.ibm.icu.lang.UCharacter;
|
|||
* @stable ICU 2.0
|
||||
*/
|
||||
public class CaseInsensitiveString {
|
||||
|
||||
|
||||
private String string;
|
||||
|
||||
private int hash = 0;
|
||||
|
||||
|
||||
private String folded = null;
|
||||
|
||||
|
||||
private static String foldCase(String foldee)
|
||||
{
|
||||
return UCharacter.foldCase(foldee, true);
|
||||
}
|
||||
|
||||
|
||||
private void getFolded()
|
||||
{
|
||||
if (folded == null) {
|
||||
folded = foldCase(string);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an CaseInsentiveString object from the given string
|
||||
* @param s The string to construct this object from
|
||||
* @param s The string to construct this object from
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public CaseInsensitiveString(String s) {
|
||||
string = s;
|
||||
}
|
||||
/**
|
||||
* returns the underlying string
|
||||
* returns the underlying string
|
||||
* @return String
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -53,10 +53,11 @@ public class CaseInsensitiveString {
|
|||
return string;
|
||||
}
|
||||
/**
|
||||
* Compare the object with this
|
||||
* @param o Object to compare this object with
|
||||
* Compare the object with this
|
||||
* @param o Object to compare this object with
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o == null) {
|
||||
return false;
|
||||
|
@ -72,26 +73,29 @@ public class CaseInsensitiveString {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the hashCode of this object
|
||||
* @return int hashcode
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
getFolded();
|
||||
|
||||
|
||||
if (hash == 0) {
|
||||
hash = folded.hashCode();
|
||||
}
|
||||
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Overrides superclass method
|
||||
* @stable ICU 3.6
|
||||
* @return a string representation of the object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return string;
|
||||
}
|
||||
|
|
|
@ -316,6 +316,15 @@ public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
|
|||
public final int getValue() { return value; }
|
||||
}
|
||||
|
||||
/**
|
||||
* Protected no-args constructor.
|
||||
*
|
||||
* @draft ICU 63
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
protected CodePointMap() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the map, with range checking.
|
||||
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
|
||||
|
|
|
@ -351,31 +351,43 @@ public class JapaneseCalendar extends GregorianCalendar {
|
|||
/**
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
static public final int CURRENT_ERA = ERA_RULES.getCurrentEraIndex();
|
||||
static public final int CURRENT_ERA;
|
||||
|
||||
/**
|
||||
* Constant for the era starting on Sept. 8, 1868 AD.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
static public final int MEIJI = 232;
|
||||
static public final int MEIJI;
|
||||
|
||||
/**
|
||||
* Constant for the era starting on July 30, 1912 AD.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
static public final int TAISHO = 233;
|
||||
static public final int TAISHO;
|
||||
|
||||
/**
|
||||
* Constant for the era starting on Dec. 25, 1926 AD.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
static public final int SHOWA = 234;
|
||||
static public final int SHOWA;
|
||||
|
||||
/**
|
||||
* Constant for the era starting on Jan. 7, 1989 AD.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
static public final int HEISEI = 235;
|
||||
static public final int HEISEI;
|
||||
|
||||
// We want to make these era constants initialized in a static initializer
|
||||
// block to prevent javac to inline these values in a consumer code.
|
||||
// By doing so, we can keep better binary compatibility across versions even
|
||||
// these values are changed.
|
||||
static {
|
||||
MEIJI = 232;
|
||||
TAISHO = 233;
|
||||
SHOWA = 234;
|
||||
HEISEI = 235;
|
||||
CURRENT_ERA = ERA_RULES.getCurrentEraIndex();
|
||||
}
|
||||
|
||||
/**
|
||||
* Override GregorianCalendar. We should really handle YEAR_WOY and
|
||||
|
|
|
@ -44,7 +44,7 @@ public class MeasureUnit implements Serializable {
|
|||
// All access to the cache or cacheIsPopulated flag must be synchronized on class MeasureUnit,
|
||||
// i.e. from synchronized static methods. Beware of non-static methods.
|
||||
private static final Map<String, Map<String,MeasureUnit>> cache
|
||||
= new HashMap<String, Map<String,MeasureUnit>>();
|
||||
= new HashMap<>();
|
||||
private static boolean cacheIsPopulated = false;
|
||||
|
||||
/**
|
||||
|
@ -95,7 +95,7 @@ public class MeasureUnit implements Serializable {
|
|||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @stable ICU 53
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
@ -105,7 +105,7 @@ public class MeasureUnit implements Serializable {
|
|||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @stable ICU 53
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object rhs) {
|
||||
|
@ -122,7 +122,7 @@ public class MeasureUnit implements Serializable {
|
|||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @stable ICU 53
|
||||
* @stable ICU 3.0
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -152,7 +152,7 @@ public class MeasureUnit implements Serializable {
|
|||
// flexibility for implementation.
|
||||
// Use CollectionSet instead of HashSet for better performance.
|
||||
return units == null ? Collections.<MeasureUnit>emptySet()
|
||||
: Collections.unmodifiableSet(new CollectionSet<MeasureUnit>(units.values()));
|
||||
: Collections.unmodifiableSet(new CollectionSet<>(units.values()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -161,8 +161,8 @@ public class MeasureUnit implements Serializable {
|
|||
* @stable ICU 53
|
||||
*/
|
||||
public synchronized static Set<MeasureUnit> getAvailable() {
|
||||
Set<MeasureUnit> result = new HashSet<MeasureUnit>();
|
||||
for (String type : new HashSet<String>(MeasureUnit.getAvailableTypes())) {
|
||||
Set<MeasureUnit> result = new HashSet<>();
|
||||
for (String type : new HashSet<>(MeasureUnit.getAvailableTypes())) {
|
||||
for (MeasureUnit unit : MeasureUnit.getAvailable(type)) {
|
||||
result.add(unit);
|
||||
}
|
||||
|
@ -348,7 +348,7 @@ public class MeasureUnit implements Serializable {
|
|||
protected synchronized static MeasureUnit addUnit(String type, String unitName, Factory factory) {
|
||||
Map<String, MeasureUnit> tmp = cache.get(type);
|
||||
if (tmp == null) {
|
||||
cache.put(type, tmp = new HashMap<String, MeasureUnit>());
|
||||
cache.put(type, tmp = new HashMap<>());
|
||||
} else {
|
||||
// "intern" the type by setting to first item's type.
|
||||
type = tmp.entrySet().iterator().next().getValue().type;
|
||||
|
@ -1184,7 +1184,7 @@ public class MeasureUnit implements Serializable {
|
|||
public static final MeasureUnit TEASPOON = MeasureUnit.internalGetInstance("volume", "teaspoon");
|
||||
|
||||
private static HashMap<Pair<MeasureUnit, MeasureUnit>, MeasureUnit>unitPerUnitToSingleUnit =
|
||||
new HashMap<Pair<MeasureUnit, MeasureUnit>, MeasureUnit>();
|
||||
new HashMap<>();
|
||||
|
||||
static {
|
||||
unitPerUnitToSingleUnit.put(Pair.<MeasureUnit, MeasureUnit>of(MeasureUnit.LITER, MeasureUnit.KILOMETER), MeasureUnit.LITER_PER_KILOMETER);
|
||||
|
|
|
@ -612,7 +612,7 @@ public class SimpleTimeZone extends BasicTimeZone {
|
|||
/**
|
||||
* Returns a string representation of this object.
|
||||
* @return a string representation of this object
|
||||
* @stable ICU 3.6
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -1140,7 +1140,7 @@ public class SimpleTimeZone extends BasicTimeZone {
|
|||
/**
|
||||
* Overrides equals.
|
||||
* @return true if obj is a SimpleTimeZone equivalent to this
|
||||
* @stable ICU 3.6
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj){
|
||||
|
@ -1180,7 +1180,8 @@ public class SimpleTimeZone extends BasicTimeZone {
|
|||
|
||||
/**
|
||||
* Overrides hashCode.
|
||||
* @stable ICU 3.6
|
||||
* @return a hash code value for this object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public int hashCode(){
|
||||
|
@ -1208,7 +1209,7 @@ public class SimpleTimeZone extends BasicTimeZone {
|
|||
|
||||
/**
|
||||
* Overrides clone.
|
||||
* @stable ICU 3.6
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public Object clone() {
|
||||
|
|
|
@ -1052,7 +1052,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable<Tim
|
|||
|
||||
/**
|
||||
* Overrides equals.
|
||||
* @stable ICU 3.6
|
||||
* @return <code>true</code> if this object is the same as the obj argument; <code>false</code> otherwise.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj){
|
||||
|
@ -1063,7 +1064,8 @@ abstract public class TimeZone implements Serializable, Cloneable, Freezable<Tim
|
|||
|
||||
/**
|
||||
* Overrides hashCode.
|
||||
* @stable ICU 3.6
|
||||
* @return a hash code value for this object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public int hashCode(){
|
||||
|
|
|
@ -668,7 +668,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
/**
|
||||
* This is for compatibility with Locale-- in actuality, since ULocale is
|
||||
* immutable, there is no reason to clone it, so this API returns 'this'.
|
||||
* @stable ICU 3.0
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public Object clone() {
|
||||
|
@ -677,7 +677,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
|
||||
/**
|
||||
* Returns the hashCode.
|
||||
* @stable ICU 3.0
|
||||
* @return a hash code value for this object.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
@ -691,7 +692,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
* function identically might not compare equal.
|
||||
*
|
||||
* @return true if this Locale is equal to the specified object.
|
||||
* @stable ICU 3.0
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
|
@ -1071,7 +1072,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
|
||||
/**
|
||||
* Returns a string representation of this object.
|
||||
* @stable ICU 3.0
|
||||
* @return a string representation of the object.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -482,7 +482,7 @@ public final class VersionInfo implements Comparable<VersionInfo>
|
|||
*
|
||||
* @return the hash code value for this set.
|
||||
* @see java.lang.Object#hashCode()
|
||||
* @stable ICU 58
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
@ -527,7 +527,7 @@ public final class VersionInfo implements Comparable<VersionInfo>
|
|||
/**
|
||||
* Map of singletons
|
||||
*/
|
||||
private static final ConcurrentHashMap<Integer, VersionInfo> MAP_ = new ConcurrentHashMap<Integer, VersionInfo>();
|
||||
private static final ConcurrentHashMap<Integer, VersionInfo> MAP_ = new ConcurrentHashMap<>();
|
||||
/**
|
||||
* Last byte mask
|
||||
*/
|
||||
|
|
|
@ -13,259 +13,9 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* <code>RuleBasedTransliterator</code> is a transliterator
|
||||
* that reads a set of rules in order to determine how to perform
|
||||
* translations. Rule sets are stored in resource bundles indexed by
|
||||
* name. Rules within a rule set are separated by semicolons (';').
|
||||
* To include a literal semicolon, prefix it with a backslash ('\').
|
||||
* Unicode Pattern_White_Space is ignored.
|
||||
* If the first non-blank character on a line is '#',
|
||||
* the entire line is ignored as a comment.
|
||||
*
|
||||
* <p>Each set of rules consists of two groups, one forward, and one
|
||||
* reverse. This is a convention that is not enforced; rules for one
|
||||
* direction may be omitted, with the result that translations in
|
||||
* that direction will not modify the source text. In addition,
|
||||
* bidirectional forward-reverse rules may be specified for
|
||||
* symmetrical transformations.
|
||||
*
|
||||
* <p><b>Rule syntax</b>
|
||||
*
|
||||
* <p>Rule statements take one of the following forms:
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>$alefmadda=\u0622;</code></dt>
|
||||
* <dd><strong>Variable definition.</strong> The name on the
|
||||
* left is assigned the text on the right. In this example,
|
||||
* after this statement, instances of the left hand name,
|
||||
* "<code>$alefmadda</code>", will be replaced by
|
||||
* the Unicode character U+0622. Variable names must begin
|
||||
* with a letter and consist only of letters, digits, and
|
||||
* underscores. Case is significant. Duplicate names cause
|
||||
* an exception to be thrown, that is, variables cannot be
|
||||
* redefined. The right hand side may contain well-formed
|
||||
* text of any length, including no text at all ("<code>$empty=;</code>").
|
||||
* The right hand side may contain embedded <code>UnicodeSet</code>
|
||||
* patterns, for example, "<code>$softvowel=[eiyEIY]</code>".</dd>
|
||||
* <dd> </dd>
|
||||
* <dt><code>ai>$alefmadda;</code></dt>
|
||||
* <dd><strong>Forward translation rule.</strong> This rule
|
||||
* states that the string on the left will be changed to the
|
||||
* string on the right when performing forward
|
||||
* transliteration.</dd>
|
||||
* <dt> </dt>
|
||||
* <dt><code>ai<$alefmadda;</code></dt>
|
||||
* <dd><strong>Reverse translation rule.</strong> This rule
|
||||
* states that the string on the right will be changed to
|
||||
* the string on the left when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>ai<>$alefmadda;</code></dt>
|
||||
* <dd><strong>Bidirectional translation rule.</strong> This
|
||||
* rule states that the string on the right will be changed
|
||||
* to the string on the left when performing forward
|
||||
* transliteration, and vice versa when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
|
||||
* string</em>. The match pattern consists of literal characters,
|
||||
* optionally preceded by context, and optionally followed by
|
||||
* context. Context characters, like literal pattern characters,
|
||||
* must be matched in the text being transliterated. However, unlike
|
||||
* literal pattern characters, they are not replaced by the output
|
||||
* text. For example, the pattern "<code>abc{def}</code>"
|
||||
* indicates the characters "<code>def</code>" must be
|
||||
* preceded by "<code>abc</code>" for a successful match.
|
||||
* If there is a successful match, "<code>def</code>" will
|
||||
* be replaced, but not "<code>abc</code>". The final '<code>}</code>'
|
||||
* is optional, so "<code>abc{def</code>" is equivalent to
|
||||
* "<code>abc{def}</code>". Another example is "<code>{123}456</code>"
|
||||
* (or "<code>123}456</code>") in which the literal
|
||||
* pattern "<code>123</code>" must be followed by "<code>456</code>".
|
||||
*
|
||||
* <p>The output string of a forward or reverse rule consists of
|
||||
* characters to replace the literal pattern characters. If the
|
||||
* output string contains the character '<code>|</code>', this is
|
||||
* taken to indicate the location of the <em>cursor</em> after
|
||||
* replacement. The cursor is the point in the text at which the
|
||||
* next replacement, if any, will be applied. The cursor is usually
|
||||
* placed within the replacement text; however, it can actually be
|
||||
* placed into the precending or following context by using the
|
||||
* special character '<code>@</code>'. Examples:
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>a {foo} z > | @ bar; # foo -> bar, move cursor
|
||||
* before a<br>
|
||||
* {foo} xyz > bar @@|; # foo -> bar, cursor between
|
||||
* y and z</code>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p><b>UnicodeSet</b>
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may appear anywhere that
|
||||
* makes sense. They may appear in variable definitions.
|
||||
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
|
||||
* contain variable references, such as "<code>$a=[a-z];$not_a=[^$a]</code>",
|
||||
* or "<code>$range=a-z;$ll=[$range]</code>".
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may also be embedded directly
|
||||
* into rule strings. Thus, the following two rules are equivalent:
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>$vowel=[aeiou]; $vowel>'*'; # One way to do this<br>
|
||||
* [aeiou]>'*';
|
||||
* #
|
||||
* Another way</code>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>See {@link UnicodeSet} for more documentation and examples.
|
||||
*
|
||||
* <p><b>Segments</b>
|
||||
*
|
||||
* <p>Segments of the input string can be matched and copied to the
|
||||
* output string. This makes certain sets of rules simpler and more
|
||||
* general, and makes reordering possible. For example:
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>([a-z]) > $1 $1;
|
||||
* #
|
||||
* double lowercase letters<br>
|
||||
* ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs</code>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>The segment of the input string to be copied is delimited by
|
||||
* "<code>(</code>" and "<code>)</code>". Up to
|
||||
* nine segments may be defined. Segments may not overlap. In the
|
||||
* output string, "<code>$1</code>" through "<code>$9</code>"
|
||||
* represent the input string segments, in left-to-right order of
|
||||
* definition.
|
||||
*
|
||||
* <p><b>Anchors</b>
|
||||
*
|
||||
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
|
||||
* special characters '<code>^</code>' and '<code>$</code>'. For example:
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>^ a > 'BEG_A'; # match 'a' at start of text<br>
|
||||
* a > 'A'; # match other instances
|
||||
* of 'a'<br>
|
||||
* z $ > 'END_Z'; # match 'z' at end of text<br>
|
||||
* z > 'Z'; # match other instances
|
||||
* of 'z'</code>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
|
||||
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
|
||||
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
|
||||
* match either the beginning or the end of the text, depending on its placement. For
|
||||
* example:
|
||||
*
|
||||
* <blockquote>
|
||||
* <p><code>$x = [a-z$]; # match 'a' through 'z' OR anchor<br>
|
||||
* $x 1 > 2; # match '1' after a-z or at the start<br>
|
||||
* 3 $x > 4; # match '3' before a-z or at the end</code>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p><b>Example</b>
|
||||
*
|
||||
* <p>The following example rules illustrate many of the features of
|
||||
* the rule language.
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 1.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>abc{def}>x|y</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 2.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>xyz>r</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 3.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>yz>q</code></td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>Applying these rules to the string "<code>adefabcdefz</code>"
|
||||
* yields the following results:
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>|adefabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Initial state, no rules match. Advance
|
||||
* cursor.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>a|defabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Still no match. Rule 1 does not match
|
||||
* because the preceding context is not present.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>ad|efabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Still no match. Keep advancing until
|
||||
* there is a match...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>ade|fabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adef|abcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefa|bcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefab|cdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabc|defz</code></td>
|
||||
* <td style="vertical-align: top;">Rule 1 matches; replace "<code>def</code>"
|
||||
* with "<code>xy</code>" and back up the cursor
|
||||
* to before the '<code>y</code>'.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcx|yz</code></td>
|
||||
* <td style="vertical-align: top;">Although "<code>xyz</code>" is
|
||||
* present, rule 2 does not match because the cursor is
|
||||
* before the '<code>y</code>', not before the '<code>x</code>'.
|
||||
* Rule 3 does match. Replace "<code>yz</code>"
|
||||
* with "<code>q</code>".</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcxq|</code></td>
|
||||
* <td style="vertical-align: top;">The cursor is at the end;
|
||||
* transliteration is complete.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>The order of rules is significant. If multiple rules may match
|
||||
* at some point, the first matching rule is applied.
|
||||
*
|
||||
* <p>Forward and reverse rules may have an empty output string.
|
||||
* Otherwise, an empty left or right hand side of any statement is a
|
||||
* syntax error.
|
||||
*
|
||||
* <p>Single quotes are used to quote any character other than a
|
||||
* digit or letter. To specify a single quote itself, inside or
|
||||
* outside of quotes, use two single quotes in a row. For example,
|
||||
* the rule "<code>'>'>o''clock</code>" changes the
|
||||
* string "<code>></code>" to the string "<code>o'clock</code>".
|
||||
*
|
||||
* <p><b>Notes</b>
|
||||
*
|
||||
* <p>While a RuleBasedTransliterator is being built, it checks that
|
||||
* the rules are added in proper order. For example, if the rule
|
||||
* "a>x" is followed by the rule "ab>y",
|
||||
* then the second rule will throw an exception. The reason is that
|
||||
* the second rule can never be triggered, since the first rule
|
||||
* always matches anything it matches. In other words, the first
|
||||
* rule <em>masks</em> the second rule.
|
||||
* built from a set of rules as defined for
|
||||
* {@link Transliterator#createFromRules(String, String, int)}.
|
||||
* See the class {@link Transliterator} documentation for the rule syntax.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @internal
|
||||
|
@ -369,7 +119,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
static class Data {
|
||||
public Data() {
|
||||
variableNames = new HashMap<String, char[]>();
|
||||
variableNames = new HashMap<>();
|
||||
ruleSet = new TransliterationRuleSet();
|
||||
}
|
||||
|
||||
|
@ -487,5 +237,3 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
return new RuleBasedTransliterator(getID(), data, filter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
* modified as each new character arrives.
|
||||
*
|
||||
* <p>
|
||||
* Consider the simple <code>RuleBasedTransliterator</code>:
|
||||
* Consider the simple rule-based Transliterator:
|
||||
*
|
||||
* <blockquote><code>
|
||||
* th>{theta}<br>
|
||||
|
@ -110,8 +110,8 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
* that the transliterator will look at. It is advanced as text becomes committed (but it is not the committed index;
|
||||
* that's the <code>cursor</code>). The <code>cursor</code> index, described above, marks the point at which the
|
||||
* transliterator last stopped, either because it reached the end, or because it required more characters to
|
||||
* disambiguate between possible inputs. The <code>cursor</code> can also be explicitly set by rules in a
|
||||
* <code>RuleBasedTransliterator</code>. Any characters before the <code>cursor</code> index are frozen; future keyboard
|
||||
* disambiguate between possible inputs. The <code>cursor</code> can also be explicitly set by rules.
|
||||
* Any characters before the <code>cursor</code> index are frozen; future keyboard
|
||||
* transliteration calls within this input sequence will not change them. New text is inserted at the <code>limit</code>
|
||||
* index, which marks the end of the substring that the transliterator looks at.
|
||||
*
|
||||
|
@ -222,13 +222,262 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
* <code>transliterate()</code> method taking a <code>String</code> and <code>StringBuffer</code> if the performance of
|
||||
* these methods can be improved over the performance obtained by the default implementations in this class.
|
||||
*
|
||||
* <p><b>Rule syntax</b>
|
||||
*
|
||||
* <p>A set of rules determines how to perform translations.
|
||||
* Rules within a rule set are separated by semicolons (';').
|
||||
* To include a literal semicolon, prefix it with a backslash ('\').
|
||||
* Unicode Pattern_White_Space is ignored.
|
||||
* If the first non-blank character on a line is '#',
|
||||
* the entire line is ignored as a comment.
|
||||
*
|
||||
* <p>Each set of rules consists of two groups, one forward, and one
|
||||
* reverse. This is a convention that is not enforced; rules for one
|
||||
* direction may be omitted, with the result that translations in
|
||||
* that direction will not modify the source text. In addition,
|
||||
* bidirectional forward-reverse rules may be specified for
|
||||
* symmetrical transformations.
|
||||
*
|
||||
* <p>Note: Another description of the Transliterator rule syntax is available in
|
||||
* <a href="https://www.unicode.org/reports/tr35/tr35-general.html#Transform_Rules_Syntax">section
|
||||
* Transform Rules Syntax of UTS #35: Unicode LDML</a>.
|
||||
* The rules are shown there using arrow symbols ← and → and ↔.
|
||||
* ICU supports both those and the equivalent ASCII symbols < and > and <>.
|
||||
*
|
||||
* <p>Rule statements take one of the following forms:
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>$alefmadda=\\u0622;</code></dt>
|
||||
* <dd><strong>Variable definition.</strong> The name on the
|
||||
* left is assigned the text on the right. In this example,
|
||||
* after this statement, instances of the left hand name,
|
||||
* "<code>$alefmadda</code>", will be replaced by
|
||||
* the Unicode character U+0622. Variable names must begin
|
||||
* with a letter and consist only of letters, digits, and
|
||||
* underscores. Case is significant. Duplicate names cause
|
||||
* an exception to be thrown, that is, variables cannot be
|
||||
* redefined. The right hand side may contain well-formed
|
||||
* text of any length, including no text at all ("<code>$empty=;</code>").
|
||||
* The right hand side may contain embedded <code>UnicodeSet</code>
|
||||
* patterns, for example, "<code>$softvowel=[eiyEIY]</code>".</dd>
|
||||
* <dt><code>ai>$alefmadda;</code></dt>
|
||||
* <dd><strong>Forward translation rule.</strong> This rule
|
||||
* states that the string on the left will be changed to the
|
||||
* string on the right when performing forward
|
||||
* transliteration.</dd>
|
||||
* <dt><code>ai<$alefmadda;</code></dt>
|
||||
* <dd><strong>Reverse translation rule.</strong> This rule
|
||||
* states that the string on the right will be changed to
|
||||
* the string on the left when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <dl>
|
||||
* <dt><code>ai<>$alefmadda;</code></dt>
|
||||
* <dd><strong>Bidirectional translation rule.</strong> This
|
||||
* rule states that the string on the right will be changed
|
||||
* to the string on the left when performing forward
|
||||
* transliteration, and vice versa when performing reverse
|
||||
* transliteration.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <p>Translation rules consist of a <em>match pattern</em> and an <em>output
|
||||
* string</em>. The match pattern consists of literal characters,
|
||||
* optionally preceded by context, and optionally followed by
|
||||
* context. Context characters, like literal pattern characters,
|
||||
* must be matched in the text being transliterated. However, unlike
|
||||
* literal pattern characters, they are not replaced by the output
|
||||
* text. For example, the pattern "<code>abc{def}</code>"
|
||||
* indicates the characters "<code>def</code>" must be
|
||||
* preceded by "<code>abc</code>" for a successful match.
|
||||
* If there is a successful match, "<code>def</code>" will
|
||||
* be replaced, but not "<code>abc</code>". The final '<code>}</code>'
|
||||
* is optional, so "<code>abc{def</code>" is equivalent to
|
||||
* "<code>abc{def}</code>". Another example is "<code>{123}456</code>"
|
||||
* (or "<code>123}456</code>") in which the literal
|
||||
* pattern "<code>123</code>" must be followed by "<code>456</code>".
|
||||
*
|
||||
* <p>The output string of a forward or reverse rule consists of
|
||||
* characters to replace the literal pattern characters. If the
|
||||
* output string contains the character '<code>|</code>', this is
|
||||
* taken to indicate the location of the <em>cursor</em> after
|
||||
* replacement. The cursor is the point in the text at which the
|
||||
* next replacement, if any, will be applied. The cursor is usually
|
||||
* placed within the replacement text; however, it can actually be
|
||||
* placed into the precending or following context by using the
|
||||
* special character '@'. Examples:
|
||||
*
|
||||
* <pre>
|
||||
* a {foo} z > | @ bar; # foo -> bar, move cursor before a
|
||||
* {foo} xyz > bar @@|; # foo -> bar, cursor between y and z
|
||||
* </pre>
|
||||
*
|
||||
* <p><b>UnicodeSet</b>
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may appear anywhere that
|
||||
* makes sense. They may appear in variable definitions.
|
||||
* Contrariwise, <code>UnicodeSet</code> patterns may themselves
|
||||
* contain variable references, such as "<code>$a=[a-z];$not_a=[^$a]</code>",
|
||||
* or "<code>$range=a-z;$ll=[$range]</code>".
|
||||
*
|
||||
* <p><code>UnicodeSet</code> patterns may also be embedded directly
|
||||
* into rule strings. Thus, the following two rules are equivalent:
|
||||
*
|
||||
* <pre>
|
||||
* $vowel=[aeiou]; $vowel>'*'; # One way to do this
|
||||
* [aeiou]>'*'; # Another way
|
||||
* </pre>
|
||||
*
|
||||
* <p>See {@link UnicodeSet} for more documentation and examples.
|
||||
*
|
||||
* <p><b>Segments</b>
|
||||
*
|
||||
* <p>Segments of the input string can be matched and copied to the
|
||||
* output string. This makes certain sets of rules simpler and more
|
||||
* general, and makes reordering possible. For example:
|
||||
*
|
||||
* <pre>
|
||||
* ([a-z]) > $1 $1; # double lowercase letters
|
||||
* ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
|
||||
* </pre>
|
||||
*
|
||||
* <p>The segment of the input string to be copied is delimited by
|
||||
* "<code>(</code>" and "<code>)</code>". Up to
|
||||
* nine segments may be defined. Segments may not overlap. In the
|
||||
* output string, "<code>$1</code>" through "<code>$9</code>"
|
||||
* represent the input string segments, in left-to-right order of
|
||||
* definition.
|
||||
*
|
||||
* <p><b>Anchors</b>
|
||||
*
|
||||
* <p>Patterns can be anchored to the beginning or the end of the text. This is done with the
|
||||
* special characters '<code>^</code>' and '<code>$</code>'. For example:
|
||||
*
|
||||
* <pre>
|
||||
* ^ a > 'BEG_A'; # match 'a' at start of text
|
||||
* a > 'A'; # match other instances of 'a'
|
||||
* z $ > 'END_Z'; # match 'z' at end of text
|
||||
* z > 'Z'; # match other instances of 'z'
|
||||
* </pre>
|
||||
*
|
||||
* <p>It is also possible to match the beginning or the end of the text using a <code>UnicodeSet</code>.
|
||||
* This is done by including a virtual anchor character '<code>$</code>' at the end of the
|
||||
* set pattern. Although this is usually the match chafacter for the end anchor, the set will
|
||||
* match either the beginning or the end of the text, depending on its placement. For
|
||||
* example:
|
||||
*
|
||||
* <pre>
|
||||
* $x = [a-z$]; # match 'a' through 'z' OR anchor
|
||||
* $x 1 > 2; # match '1' after a-z or at the start
|
||||
* 3 $x > 4; # match '3' before a-z or at the end
|
||||
* </pre>
|
||||
*
|
||||
* <p><b>Example</b>
|
||||
*
|
||||
* <p>The following example rules illustrate many of the features of
|
||||
* the rule language.
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 1.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>abc{def}>x|y</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 2.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>xyz>r</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top;">Rule 3.</td>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>yz>q</code></td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>Applying these rules to the string "<code>adefabcdefz</code>"
|
||||
* yields the following results:
|
||||
*
|
||||
* <table border="0" cellpadding="4">
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>|adefabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Initial state, no rules match. Advance
|
||||
* cursor.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>a|defabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Still no match. Rule 1 does not match
|
||||
* because the preceding context is not present.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>ad|efabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">Still no match. Keep advancing until
|
||||
* there is a match...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>ade|fabcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adef|abcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefa|bcdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefab|cdefz</code></td>
|
||||
* <td style="vertical-align: top;">...</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabc|defz</code></td>
|
||||
* <td style="vertical-align: top;">Rule 1 matches; replace "<code>def</code>"
|
||||
* with "<code>xy</code>" and back up the cursor
|
||||
* to before the '<code>y</code>'.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcx|yz</code></td>
|
||||
* <td style="vertical-align: top;">Although "<code>xyz</code>" is
|
||||
* present, rule 2 does not match because the cursor is
|
||||
* before the '<code>y</code>', not before the '<code>x</code>'.
|
||||
* Rule 3 does match. Replace "<code>yz</code>"
|
||||
* with "<code>q</code>".</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td style="vertical-align: top; write-space: nowrap;"><code>adefabcxq|</code></td>
|
||||
* <td style="vertical-align: top;">The cursor is at the end;
|
||||
* transliteration is complete.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>The order of rules is significant. If multiple rules may match
|
||||
* at some point, the first matching rule is applied.
|
||||
*
|
||||
* <p>Forward and reverse rules may have an empty output string.
|
||||
* Otherwise, an empty left or right hand side of any statement is a
|
||||
* syntax error.
|
||||
*
|
||||
* <p>Single quotes are used to quote any character other than a
|
||||
* digit or letter. To specify a single quote itself, inside or
|
||||
* outside of quotes, use two single quotes in a row. For example,
|
||||
* the rule "<code>'>'>o''clock</code>" changes the
|
||||
* string "<code>></code>" to the string "<code>o'clock</code>".
|
||||
*
|
||||
* <p><b>Notes</b>
|
||||
*
|
||||
* <p>While a Transliterator is being built from rules, it checks that
|
||||
* the rules are added in proper order. For example, if the rule
|
||||
* "a>x" is followed by the rule "ab>y",
|
||||
* then the second rule will throw an exception. The reason is that
|
||||
* the second rule can never be triggered, since the first rule
|
||||
* always matches anything it matches. In other words, the first
|
||||
* rule <em>masks</em> the second rule.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public abstract class Transliterator implements StringTransform {
|
||||
/**
|
||||
* Direction constant indicating the forward direction in a transliterator,
|
||||
* e.g., the forward rules of a RuleBasedTransliterator. An "A-B"
|
||||
* e.g., the forward rules of a rule-based Transliterator. An "A-B"
|
||||
* transliterator transliterates A to B when operating in the forward
|
||||
* direction, and B to A when operating in the reverse direction.
|
||||
* @stable ICU 2.0
|
||||
|
@ -237,7 +486,7 @@ public abstract class Transliterator implements StringTransform {
|
|||
|
||||
/**
|
||||
* Direction constant indicating the reverse direction in a transliterator,
|
||||
* e.g., the reverse rules of a RuleBasedTransliterator. An "A-B"
|
||||
* e.g., the reverse rules of a rule-based Transliterator. An "A-B"
|
||||
* transliterator transliterates A to B when operating in the forward
|
||||
* direction, and B to A when operating in the reverse direction.
|
||||
* @stable ICU 2.0
|
||||
|
@ -358,7 +607,7 @@ public abstract class Transliterator implements StringTransform {
|
|||
|
||||
/**
|
||||
* Returns true if this Position is equal to the given object.
|
||||
* @stable ICU 2.6
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
|
@ -373,7 +622,8 @@ public abstract class Transliterator implements StringTransform {
|
|||
}
|
||||
|
||||
/**
|
||||
* @draft ICU 63
|
||||
* {@inheritDoc}
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
@ -382,7 +632,8 @@ public abstract class Transliterator implements StringTransform {
|
|||
|
||||
/**
|
||||
* Returns a string representation of this Position.
|
||||
* @stable ICU 2.6
|
||||
* @return a string representation of the object.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -1100,7 +1351,7 @@ public abstract class Transliterator implements StringTransform {
|
|||
/**
|
||||
* Transliterate a substring of text, as specified by index, taking filters
|
||||
* into account. This method is for subclasses that need to delegate to
|
||||
* another transliterator, such as CompoundTransliterator.
|
||||
* another transliterator.
|
||||
* @param text the text to be transliterated
|
||||
* @param index the position indices
|
||||
* @param incremental if TRUE, then assume more characters may be inserted
|
||||
|
@ -1343,7 +1594,7 @@ public abstract class Transliterator implements StringTransform {
|
|||
public static Transliterator getInstance(String ID,
|
||||
int dir) {
|
||||
StringBuffer canonID = new StringBuffer();
|
||||
List<SingleID> list = new ArrayList<SingleID>();
|
||||
List<SingleID> list = new ArrayList<>();
|
||||
UnicodeSet[] globalFilter = new UnicodeSet[1];
|
||||
if (!TransliteratorIDParser.parseCompoundID(ID, dir, canonID, list, globalFilter)) {
|
||||
throw new IllegalArgumentException("Invalid ID " + ID);
|
||||
|
@ -1398,11 +1649,17 @@ public abstract class Transliterator implements StringTransform {
|
|||
|
||||
/**
|
||||
* Returns a <code>Transliterator</code> object constructed from
|
||||
* the given rule string. This will be a RuleBasedTransliterator,
|
||||
* the given rule string. This will be a rule-based Transliterator,
|
||||
* if the rule string contains only rules, or a
|
||||
* CompoundTransliterator, if it contains ID blocks, or a
|
||||
* NullTransliterator, if it contains ID blocks which parse as
|
||||
* compound Transliterator, if it contains ID blocks, or a
|
||||
* null Transliterator, if it contains ID blocks which parse as
|
||||
* empty for the given direction.
|
||||
*
|
||||
* @param ID the id for the transliterator.
|
||||
* @param rules rules, separated by ';'
|
||||
* @param dir either FORWARD or REVERSE.
|
||||
* @return a newly created Transliterator
|
||||
* @throws IllegalArgumentException if there is a problem with the ID or the rules
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public static final Transliterator createFromRules(String ID, String rules, int dir) {
|
||||
|
@ -1435,7 +1692,7 @@ public abstract class Transliterator implements StringTransform {
|
|||
}
|
||||
}
|
||||
else {
|
||||
List<Transliterator> transliterators = new ArrayList<Transliterator>();
|
||||
List<Transliterator> transliterators = new ArrayList<>();
|
||||
int passNumber = 1;
|
||||
|
||||
int limit = Math.max(parser.idBlockVector.size(), parser.dataVector.size());
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.dev.tool.docs;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Checks if API status of equals/hashCode is same with its containing class.
|
||||
*
|
||||
* @author Yoshito
|
||||
*/
|
||||
public class APIStatusConsistencyChecker {
|
||||
public static void main(String[] args) {
|
||||
// args[0] API signature file path
|
||||
// args[1] (Optional) List of classes to be skipped, separated by semicolon
|
||||
if (args.length < 1) {
|
||||
System.err.println("Missing API signature file path.");
|
||||
} else if (args.length > 2) {
|
||||
System.err.println("Too many command arguments");
|
||||
}
|
||||
|
||||
List<String> skipClasses = Collections.emptyList();
|
||||
if (args.length == 2) {
|
||||
String[] classes = args[1].split(";");
|
||||
skipClasses = Arrays.asList(classes);
|
||||
}
|
||||
|
||||
// Load the ICU4J API signature file
|
||||
Set<APIInfo> apiInfoSet = APIData.read(new File(args[0]), true).getAPIInfoSet();
|
||||
APIStatusConsistencyChecker checker = new APIStatusConsistencyChecker(apiInfoSet, skipClasses, new PrintWriter(System.err, true));
|
||||
checker.checkConsistency();
|
||||
System.exit(checker.errCount);
|
||||
}
|
||||
|
||||
private int errCount = 0;
|
||||
private Set<APIInfo> apiInfoSet;
|
||||
private PrintWriter pw;
|
||||
private List<String> skipClasses;
|
||||
|
||||
public APIStatusConsistencyChecker(Set<APIInfo> apiInfoSet, List<String> skipClasses, PrintWriter pw) {
|
||||
this.apiInfoSet = apiInfoSet;
|
||||
this.skipClasses = skipClasses;
|
||||
this.pw = pw;
|
||||
}
|
||||
|
||||
public int errorCount() {
|
||||
return errCount;
|
||||
}
|
||||
|
||||
// Methods that should have same API status with a containing class
|
||||
static final String[][] METHODS = {
|
||||
//{"<method name>", "<method signature in APIInfo data>"},
|
||||
{"equals", "boolean(java.lang.Object)"},
|
||||
{"hashCode", "int()"},
|
||||
{"toString", "java.lang.String()"},
|
||||
{"clone", "java.lang.Object()"},
|
||||
};
|
||||
|
||||
public void checkConsistency() {
|
||||
Map<String, APIInfo> classMap = new TreeMap<>();
|
||||
// Build a map of APIInfo for classes, indexed by class name
|
||||
for (APIInfo api : apiInfoSet) {
|
||||
if (!api.isPublic() && !api.isProtected()) {
|
||||
continue;
|
||||
}
|
||||
if (!api.isClass() && !api.isEnum()) {
|
||||
continue;
|
||||
}
|
||||
String fullClassName = api.getPackageName() + "." + api.getName();
|
||||
classMap.put(fullClassName, api);
|
||||
}
|
||||
|
||||
// Walk through methods
|
||||
for (APIInfo api : apiInfoSet) {
|
||||
if (!api.isMethod()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String fullClassName = api.getPackageName() + "." + api.getClassName();
|
||||
if (skipClasses.contains(fullClassName)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean checkWithClass = false;
|
||||
String methodName = api.getName();
|
||||
String methodSig = api.getSignature();
|
||||
|
||||
for (String[] method : METHODS) {
|
||||
if (method[0].equals(methodName) && method[1].equals(methodSig)) {
|
||||
checkWithClass = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!checkWithClass) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this method has same API status with the containing class
|
||||
APIInfo clsApi = classMap.get(fullClassName);
|
||||
if (clsApi == null) {
|
||||
pw.println("## Error ## Class " + fullClassName + " is not found.");
|
||||
errCount++;
|
||||
}
|
||||
|
||||
int methodStatus = api.getVal(APIInfo.STA);
|
||||
String methodVer = api.getStatusVersion();
|
||||
int classStatus = clsApi.getVal(APIInfo.STA);
|
||||
String classVer = clsApi.getStatusVersion();
|
||||
|
||||
if (methodStatus != classStatus || !Objects.equals(methodVer, classVer)) {
|
||||
pw.println("## Error ## " + methodName + " in " + fullClassName);
|
||||
errCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -56,7 +56,7 @@ public class DeprecatedAPIChecker {
|
|||
public void checkDeprecated() {
|
||||
// Gather API class/enum names and its names that can be
|
||||
// used for Class.forName()
|
||||
Map<String, String> apiClassNameMap = new TreeMap<String, String>();
|
||||
Map<String, String> apiClassNameMap = new TreeMap<>();
|
||||
for (APIInfo api : apiInfoSet) {
|
||||
if (!api.isPublic() && !api.isProtected()) {
|
||||
continue;
|
||||
|
@ -133,6 +133,18 @@ public class DeprecatedAPIChecker {
|
|||
}
|
||||
|
||||
List<String> paramNames = getParamNames(ctor);
|
||||
|
||||
Class<?> declClass = cls.getDeclaringClass();
|
||||
if (declClass != null && !Modifier.isStatic(cls.getModifiers())) {
|
||||
// This is non-static inner class's constructor.
|
||||
// javac automatically injects instance of declaring class
|
||||
// as the first param of the constructor, but ICU's API
|
||||
// signature is based on javadoc and it generates signature
|
||||
// without the implicit parameter.
|
||||
assert paramNames.get(0).equals(declClass.getName());
|
||||
paramNames.remove(0);
|
||||
}
|
||||
|
||||
api = findConstructorInfo(apiInfoSet, clsName, paramNames);
|
||||
|
||||
if (api == null) {
|
||||
|
@ -351,7 +363,7 @@ public class DeprecatedAPIChecker {
|
|||
throw new IllegalArgumentException(api.toString() + " is not a constructor or a method.");
|
||||
}
|
||||
|
||||
List<String> nameList = new ArrayList<String>();
|
||||
List<String> nameList = new ArrayList<>();
|
||||
String signature = api.getSignature();
|
||||
int start = signature.indexOf('(');
|
||||
int end = signature.indexOf(')');
|
||||
|
@ -410,7 +422,7 @@ public class DeprecatedAPIChecker {
|
|||
private static char[] PRIMITIVE_SIGNATURES = { 'B', 'S', 'I', 'J', 'F', 'D', 'Z', 'C' };
|
||||
|
||||
private static List<String> toTypeNameList(Type[] types) {
|
||||
List<String> nameList = new ArrayList<String>();
|
||||
List<String> nameList = new ArrayList<>();
|
||||
|
||||
for (Type t : types) {
|
||||
StringBuilder s = new StringBuilder();
|
||||
|
|
Loading…
Add table
Reference in a new issue