mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-11 08:01:32 +00:00
ICU-20777 Merge the likelySubtags implemention
Change testdata/likelySubtags.txt to consider FAIL line ICU-20777 Fix Java Tests ICU-20777 Fix all issues ICU-20777 Incase timeout ICU-20777 ICU-20777 Skip Data Driven test
This commit is contained in:
parent
27181e36a6
commit
ffc449de62
24 changed files with 3958 additions and 1493 deletions
|
@ -603,7 +603,9 @@ cc_library(
|
|||
"locbased.cpp",
|
||||
"locid.cpp",
|
||||
"loclikely.cpp",
|
||||
"loclikelysubtags.cpp",
|
||||
"locmap.cpp",
|
||||
"lsr.cpp",
|
||||
"resbund.cpp",
|
||||
"resource.cpp",
|
||||
"uloc.cpp",
|
||||
|
|
|
@ -307,7 +307,7 @@ LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale
|
|||
if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
|
||||
return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2080,6 +2080,10 @@ Locale::addLikelySubtags(UErrorCode& status) {
|
|||
|
||||
void
|
||||
Locale::minimizeSubtags(UErrorCode& status) {
|
||||
Locale::minimizeSubtags(false, status);
|
||||
}
|
||||
void
|
||||
Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
@ -2087,7 +2091,7 @@ Locale::minimizeSubtags(UErrorCode& status) {
|
|||
CharString minimizedLocaleID;
|
||||
{
|
||||
CharStringByteSink sink(&minimizedLocaleID);
|
||||
ulocimp_minimizeSubtags(fullName, sink, &status);
|
||||
ulocimp_minimizeSubtags(fullName, sink, favorScript, &status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
|
|
|
@ -31,82 +31,10 @@
|
|||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "loclikelysubtags.h"
|
||||
#include "ulocimp.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
/**
|
||||
* These are the canonical strings for unknown languages, scripts and regions.
|
||||
**/
|
||||
static const char* const unknownLanguage = "und";
|
||||
static const char* const unknownScript = "Zzzz";
|
||||
static const char* const unknownRegion = "ZZ";
|
||||
|
||||
/**
|
||||
* This function looks for the localeID in the likelySubtags resource.
|
||||
*
|
||||
* @param localeID The tag to find.
|
||||
* @param buffer A buffer to hold the matching entry
|
||||
* @param bufferLength The length of the output buffer
|
||||
* @return A pointer to "buffer" if found, or a null pointer if not.
|
||||
*/
|
||||
static const char* U_CALLCONV
|
||||
findLikelySubtags(const char* localeID,
|
||||
char* buffer,
|
||||
int32_t bufferLength,
|
||||
UErrorCode* err) {
|
||||
const char* result = nullptr;
|
||||
|
||||
if (!U_FAILURE(*err)) {
|
||||
int32_t resLen = 0;
|
||||
const char16_t* s = nullptr;
|
||||
UErrorCode tmpErr = U_ZERO_ERROR;
|
||||
icu::LocalUResourceBundlePointer subtags(ures_openDirect(nullptr, "likelySubtags", &tmpErr));
|
||||
if (U_SUCCESS(tmpErr)) {
|
||||
icu::CharString und;
|
||||
if (localeID != nullptr) {
|
||||
if (*localeID == '\0') {
|
||||
localeID = unknownLanguage;
|
||||
} else if (*localeID == '_') {
|
||||
und.append(unknownLanguage, *err);
|
||||
und.append(localeID, *err);
|
||||
if (U_FAILURE(*err)) {
|
||||
return nullptr;
|
||||
}
|
||||
localeID = und.data();
|
||||
}
|
||||
}
|
||||
s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
|
||||
|
||||
if (U_FAILURE(tmpErr)) {
|
||||
/*
|
||||
* If a resource is missing, it's not really an error, it's
|
||||
* just that we don't have any data for that particular locale ID.
|
||||
*/
|
||||
if (tmpErr != U_MISSING_RESOURCE_ERROR) {
|
||||
*err = tmpErr;
|
||||
}
|
||||
}
|
||||
else if (resLen >= bufferLength) {
|
||||
/* The buffer should never overflow. */
|
||||
*err = U_INTERNAL_PROGRAM_ERROR;
|
||||
}
|
||||
else {
|
||||
u_UCharsToChars(s, buffer, resLen + 1);
|
||||
if (resLen >= 3 &&
|
||||
uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
|
||||
(resLen == 3 || buffer[3] == '_')) {
|
||||
uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
|
||||
}
|
||||
result = buffer;
|
||||
}
|
||||
} else {
|
||||
*err = tmpErr;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a tag to a buffer, adding the separator if necessary. The buffer
|
||||
* must be large enough to contain the resulting tag plus any separator
|
||||
|
@ -360,57 +288,6 @@ error:
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tag string from the supplied parameters. The lang, script and region
|
||||
* parameters may be nullptr pointers. If they are, their corresponding length parameters
|
||||
* must be less than or equal to 0. If the lang parameter is an empty string, the
|
||||
* default value for an unknown language is written to the output buffer.
|
||||
*
|
||||
* If the length of the new string exceeds the capacity of the output buffer,
|
||||
* the function copies as many bytes to the output buffer as it can, and returns
|
||||
* the error U_BUFFER_OVERFLOW_ERROR.
|
||||
*
|
||||
* If an illegal argument is provided, the function returns the error
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* @param lang The language tag to use.
|
||||
* @param langLength The length of the language tag.
|
||||
* @param script The script tag to use.
|
||||
* @param scriptLength The length of the script tag.
|
||||
* @param region The region tag to use.
|
||||
* @param regionLength The length of the region tag.
|
||||
* @param trailing Any trailing data to append to the new tag.
|
||||
* @param trailingLength The length of the trailing data.
|
||||
* @param sink The output sink receiving the tag string.
|
||||
* @param err A pointer to a UErrorCode for error reporting.
|
||||
**/
|
||||
static void U_CALLCONV
|
||||
createTagString(
|
||||
const char* lang,
|
||||
int32_t langLength,
|
||||
const char* script,
|
||||
int32_t scriptLength,
|
||||
const char* region,
|
||||
int32_t regionLength,
|
||||
const char* trailing,
|
||||
int32_t trailingLength,
|
||||
icu::ByteSink& sink,
|
||||
UErrorCode* err)
|
||||
{
|
||||
createTagStringWithAlternates(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
trailing,
|
||||
trailingLength,
|
||||
nullptr,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the language, script, and region subtags from a tag string, and copy the
|
||||
* results into the corresponding output parameters. The buffers are null-terminated,
|
||||
|
@ -494,13 +371,6 @@ parseTagString(
|
|||
*scriptLength = subtagLength;
|
||||
|
||||
if (*scriptLength > 0) {
|
||||
if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
|
||||
/**
|
||||
* If the script part is the "unknown" script, then don't return it.
|
||||
**/
|
||||
*scriptLength = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move past any separator.
|
||||
*/
|
||||
|
@ -517,14 +387,7 @@ parseTagString(
|
|||
|
||||
*regionLength = subtagLength;
|
||||
|
||||
if (*regionLength > 0) {
|
||||
if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
|
||||
/**
|
||||
* If the region part is the "unknown" region, then don't return it.
|
||||
**/
|
||||
*regionLength = 0;
|
||||
}
|
||||
} else if (*position != 0 && *position != '@') {
|
||||
if (*regionLength <= 0 && *position != 0 && *position != '@') {
|
||||
/* back up over consumed trailing separator */
|
||||
--position;
|
||||
}
|
||||
|
@ -546,264 +409,6 @@ error:
|
|||
goto exit;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
createLikelySubtagsString(
|
||||
const char* lang,
|
||||
int32_t langLength,
|
||||
const char* script,
|
||||
int32_t scriptLength,
|
||||
const char* region,
|
||||
int32_t regionLength,
|
||||
const char* variants,
|
||||
int32_t variantsLength,
|
||||
icu::ByteSink& sink,
|
||||
UErrorCode* err) {
|
||||
/**
|
||||
* ULOC_FULLNAME_CAPACITY will provide enough capacity
|
||||
* that we can build a string that contains the language,
|
||||
* script and region code without worrying about overrunning
|
||||
* the user-supplied buffer.
|
||||
**/
|
||||
char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with the script and region first.
|
||||
**/
|
||||
if (scriptLength > 0 && regionLength > 0) {
|
||||
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with just the script.
|
||||
**/
|
||||
if (scriptLength > 0) {
|
||||
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with just the region.
|
||||
**/
|
||||
if (regionLength > 0) {
|
||||
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finally, try just the language.
|
||||
**/
|
||||
{
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
error:
|
||||
|
||||
if (!U_FAILURE(*err)) {
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t count = 0; \
|
||||
int32_t i; \
|
||||
|
@ -836,7 +441,6 @@ _uloc_addLikelySubtags(const char* localeID,
|
|||
const char* trailing = "";
|
||||
int32_t trailingLength = 0;
|
||||
int32_t trailingIndex = 0;
|
||||
UBool success = false;
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
|
@ -862,6 +466,9 @@ _uloc_addLikelySubtags(const char* localeID,
|
|||
|
||||
goto error;
|
||||
}
|
||||
if (langLength > 3) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Find the length of the trailing portion. */
|
||||
while (_isIDSeparator(localeID[trailingIndex])) {
|
||||
|
@ -871,30 +478,33 @@ _uloc_addLikelySubtags(const char* localeID,
|
|||
trailingLength = (int32_t)uprv_strlen(trailing);
|
||||
|
||||
CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
|
||||
|
||||
success =
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
{
|
||||
const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
icu::LSR lsr = likelySubtags->makeMaximizedLsrFrom(icu::Locale::createFromName(localeID), true, *err);
|
||||
const char* language = lsr.language;
|
||||
if (uprv_strcmp(language, "und") == 0) {
|
||||
language = "";
|
||||
}
|
||||
createTagStringWithAlternates(
|
||||
language,
|
||||
(int32_t)uprv_strlen(language),
|
||||
lsr.script,
|
||||
(int32_t)uprv_strlen(lsr.script),
|
||||
lsr.region,
|
||||
(int32_t)uprv_strlen(lsr.region),
|
||||
trailing,
|
||||
trailingLength,
|
||||
nullptr,
|
||||
sink,
|
||||
err);
|
||||
|
||||
if (!success) {
|
||||
const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
|
||||
|
||||
/*
|
||||
* If we get here, we need to return localeID.
|
||||
*/
|
||||
sink.Append(localeID, localIDLength);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
return true;
|
||||
|
||||
error:
|
||||
|
||||
|
@ -913,6 +523,7 @@ static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*)
|
|||
static void
|
||||
_uloc_minimizeSubtags(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
bool favorScript,
|
||||
UErrorCode* err) {
|
||||
icu::CharString maximizedTagBuffer;
|
||||
|
||||
|
@ -925,7 +536,6 @@ _uloc_minimizeSubtags(const char* localeID,
|
|||
const char* trailing = "";
|
||||
int32_t trailingLength = 0;
|
||||
int32_t trailingIndex = 0;
|
||||
UBool successGetMax = false;
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
|
@ -964,213 +574,38 @@ _uloc_minimizeSubtags(const char* localeID,
|
|||
CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
|
||||
|
||||
{
|
||||
icu::CharString base;
|
||||
{
|
||||
icu::CharStringByteSink baseSink(&base);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
baseSink,
|
||||
err);
|
||||
}
|
||||
|
||||
/**
|
||||
* First, we need to first get the maximization
|
||||
* from AddLikelySubtags.
|
||||
**/
|
||||
{
|
||||
icu::CharStringByteSink maxSink(&maximizedTagBuffer);
|
||||
successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
|
||||
}
|
||||
}
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!successGetMax) {
|
||||
/**
|
||||
* If we got here, return the locale ID parameter unchanged.
|
||||
**/
|
||||
const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
|
||||
sink.Append(localeID, localeIDLength);
|
||||
return;
|
||||
}
|
||||
|
||||
// In the following, the lang, script, region are referring to those in
|
||||
// the maximizedTagBuffer, not the one in the localeID.
|
||||
langLength = sizeof(lang);
|
||||
scriptLength = sizeof(script);
|
||||
regionLength = sizeof(region);
|
||||
parseTagString(
|
||||
maximizedTagBuffer.data(),
|
||||
lang,
|
||||
&langLength,
|
||||
script,
|
||||
&scriptLength,
|
||||
region,
|
||||
®ionLength,
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start first with just the language.
|
||||
**/
|
||||
{
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink tagSink(&tagBuffer);
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
tagSink,
|
||||
err);
|
||||
}
|
||||
|
||||
const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
else if (!tagBuffer.isEmpty() &&
|
||||
uprv_strnicmp(
|
||||
maximizedTagBuffer.data(),
|
||||
tagBuffer.data(),
|
||||
tagBuffer.length()) == 0) {
|
||||
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Next, try the language and region.
|
||||
**/
|
||||
if (regionLength > 0) {
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink tagSink(&tagBuffer);
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
tagSink,
|
||||
err);
|
||||
}
|
||||
|
||||
icu::LSR lsr = likelySubtags->minimizeSubtags(
|
||||
{lang, langLength},
|
||||
{script, scriptLength},
|
||||
{region, regionLength},
|
||||
favorScript,
|
||||
*err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
else if (!tagBuffer.isEmpty() &&
|
||||
uprv_strnicmp(
|
||||
maximizedTagBuffer.data(),
|
||||
tagBuffer.data(),
|
||||
tagBuffer.length()) == 0) {
|
||||
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
const char* language = lsr.language;
|
||||
if (uprv_strcmp(language, "und") == 0) {
|
||||
language = "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finally, try the language and script. This is our last chance,
|
||||
* since trying with all three subtags would only yield the
|
||||
* maximal version that we already have.
|
||||
**/
|
||||
if (scriptLength > 0) {
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink tagSink(&tagBuffer);
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
tagSink,
|
||||
err);
|
||||
}
|
||||
|
||||
createTagStringWithAlternates(
|
||||
language,
|
||||
(int32_t)uprv_strlen(language),
|
||||
lsr.script,
|
||||
(int32_t)uprv_strlen(lsr.script),
|
||||
lsr.region,
|
||||
(int32_t)uprv_strlen(lsr.region),
|
||||
trailing,
|
||||
trailingLength,
|
||||
nullptr,
|
||||
sink,
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
else if (!tagBuffer.isEmpty() &&
|
||||
uprv_strnicmp(
|
||||
maximizedTagBuffer.data(),
|
||||
tagBuffer.data(),
|
||||
tagBuffer.length()) == 0) {
|
||||
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/**
|
||||
* If we got here, return the max + trail.
|
||||
**/
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1271,7 +706,7 @@ uloc_minimizeSubtags(const char* localeID,
|
|||
icu::CheckedArrayByteSink sink(
|
||||
minimizedLocaleID, minimizedLocaleIDCapacity);
|
||||
|
||||
ulocimp_minimizeSubtags(localeID, sink, status);
|
||||
ulocimp_minimizeSubtags(localeID, sink, false, status);
|
||||
int32_t reslen = sink.NumberOfBytesAppended();
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
|
@ -1291,6 +726,7 @@ uloc_minimizeSubtags(const char* localeID,
|
|||
U_CAPI void U_EXPORT2
|
||||
ulocimp_minimizeSubtags(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
bool favorScript,
|
||||
UErrorCode* status) {
|
||||
PreflightingLocaleIDBuffer localeBuffer;
|
||||
do {
|
||||
|
@ -1298,7 +734,7 @@ ulocimp_minimizeSubtags(const char* localeID,
|
|||
localeBuffer.getCapacity(), status);
|
||||
} while (localeBuffer.needToTryAgain(status));
|
||||
|
||||
_uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
|
||||
_uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, favorScript, status);
|
||||
}
|
||||
|
||||
// Pairs of (language subtag, + or -) for finding out fast if common languages
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "uniquecharstr.h"
|
||||
#include "uresdata.h"
|
||||
#include "uresimp.h"
|
||||
#include "uvector.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -304,7 +305,7 @@ private:
|
|||
encoded %= 27 * 27;
|
||||
if (encoded < 27) {
|
||||
// Selected M49 code index, find the code from "m49" resource.
|
||||
return m49IndexToCode(m49Array, value, 2, errorCode);
|
||||
return m49IndexToCode(m49Array, value, encoded, errorCode);
|
||||
}
|
||||
char region[2];
|
||||
region[0] = 'A' + ((encoded % 27) - 1);
|
||||
|
@ -339,15 +340,52 @@ private:
|
|||
namespace {
|
||||
|
||||
XLikelySubtags *gLikelySubtags = nullptr;
|
||||
UVector *gMacroregions = nullptr;
|
||||
UInitOnce gInitOnce {};
|
||||
|
||||
UBool U_CALLCONV cleanup() {
|
||||
delete gLikelySubtags;
|
||||
gLikelySubtags = nullptr;
|
||||
delete gMacroregions;
|
||||
gMacroregions = nullptr;
|
||||
gInitOnce.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char16_t RANGE_MARKER = 0x7E; /* '~' */
|
||||
UVector* loadMacroregions(UErrorCode &status) {
|
||||
LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
|
||||
|
||||
LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status));
|
||||
LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status));
|
||||
LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",nullptr,&status));
|
||||
LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",nullptr,&status));
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) {
|
||||
UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
|
||||
int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
|
||||
char16_t buf[6];
|
||||
regionName.extract(buf,6,status);
|
||||
if ( rangeMarkerLocation > 0 ) {
|
||||
char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
|
||||
buf[rangeMarkerLocation] = 0;
|
||||
while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
|
||||
LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
|
||||
newMacroRegions->adoptElement(newRegion.orphan(),status);
|
||||
buf[rangeMarkerLocation-1]++;
|
||||
}
|
||||
} else {
|
||||
LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
|
||||
newMacroRegions->adoptElement(newRegion.orphan(),status);
|
||||
}
|
||||
}
|
||||
return newMacroRegions.orphan();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
|
||||
|
@ -357,10 +395,14 @@ void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
|
|||
data.load(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
gLikelySubtags = new XLikelySubtags(data);
|
||||
if (gLikelySubtags == nullptr) {
|
||||
gMacroregions = loadMacroregions(errorCode);
|
||||
if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) {
|
||||
delete gLikelySubtags;
|
||||
delete gMacroregions;
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
|
||||
}
|
||||
|
||||
|
@ -411,15 +453,28 @@ XLikelySubtags::~XLikelySubtags() {
|
|||
delete[] lsrs;
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
|
||||
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
const char *name = locale.getName();
|
||||
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
|
||||
// Private use language tag x-subtag-subtag... which CLDR changes to
|
||||
// und-x-subtag-subtag...
|
||||
return LSR(name, "", "", LSR::EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant(), errorCode);
|
||||
LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant(), returnInputIfUnmatch, errorCode);
|
||||
|
||||
if (uprv_strlen(max.language) == 0 &&
|
||||
uprv_strlen(max.script) == 0 &&
|
||||
uprv_strlen(max.region) == 0) {
|
||||
// No match. ICU API mandate us to
|
||||
// If the provided ULocale instance is already in the maximal form, or
|
||||
// there is no data available available for maximization, it will be
|
||||
// returned.
|
||||
return LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR::EXPLICIT_LSR, errorCode);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -432,7 +487,9 @@ const char *getCanonical(const CharStringMap &aliases, const char *alias) {
|
|||
} // namespace
|
||||
|
||||
LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
|
||||
const char *variant, UErrorCode &errorCode) const {
|
||||
const char *variant,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
|
||||
// They should match only themselves,
|
||||
// not other locales with what looks like the same language and script subtags.
|
||||
|
@ -472,64 +529,91 @@ LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, c
|
|||
language = getCanonical(languageAliases, language);
|
||||
// (We have no script mappings.)
|
||||
region = getCanonical(regionAliases, region);
|
||||
return maximize(language, script, region);
|
||||
return maximize(language, script, region, returnInputIfUnmatch, errorCode);
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
|
||||
if (uprv_strcmp(language, "und") == 0) {
|
||||
LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
return maximize({language, (int32_t)uprv_strlen(language)},
|
||||
{script, (int32_t)uprv_strlen(script)},
|
||||
{region, (int32_t)uprv_strlen(region)},
|
||||
returnInputIfUnmatch,
|
||||
errorCode);
|
||||
}
|
||||
|
||||
bool XLikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const {
|
||||
// In Java, we use Region class. In C++, since Region is under i18n,
|
||||
// we read the same data used by Region into gMacroregions avoid dependency
|
||||
// from common to i18n/region.cpp
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
UnicodeString str(UnicodeString::fromUTF8(region));
|
||||
return gMacroregions->contains((void *)&str);
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
|
||||
}
|
||||
if (language.compare("und") == 0) {
|
||||
language = "";
|
||||
}
|
||||
if (uprv_strcmp(script, "Zzzz") == 0) {
|
||||
if (script.compare("Zzzz") == 0) {
|
||||
script = "";
|
||||
}
|
||||
if (uprv_strcmp(region, "ZZ") == 0) {
|
||||
if (region.compare("ZZ") == 0) {
|
||||
region = "";
|
||||
}
|
||||
if (*script != 0 && *region != 0 && *language != 0) {
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
|
||||
if (!script.empty() && !region.empty() && !language.empty()) {
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode); // already maximized
|
||||
}
|
||||
bool retainLanguage = false;
|
||||
bool retainScript = false;
|
||||
bool retainRegion = false;
|
||||
|
||||
uint32_t retainOldMask = 0;
|
||||
BytesTrie iter(trie);
|
||||
uint64_t state;
|
||||
int32_t value;
|
||||
// Small optimization: Array lookup for first language letter.
|
||||
int32_t c0;
|
||||
if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
|
||||
language[1] != 0 && // language.length() >= 2
|
||||
if (0 <= (c0 = uprv_lowerOrdinal(language.data()[0])) && c0 <= 25 &&
|
||||
language.length() >= 2 &&
|
||||
(state = trieFirstLetterStates[c0]) != 0) {
|
||||
value = trieNext(iter.resetToState64(state), language, 1);
|
||||
} else {
|
||||
value = trieNext(iter, language, 0);
|
||||
}
|
||||
bool matchLanguage = (value >= 0);
|
||||
bool matchScript = false;
|
||||
if (value >= 0) {
|
||||
if (*language != 0) {
|
||||
retainOldMask |= 4;
|
||||
}
|
||||
retainLanguage = !language.empty();
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 4;
|
||||
retainLanguage = true;
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
state = 0;
|
||||
}
|
||||
|
||||
if (value >= 0 && !script.empty()) {
|
||||
matchScript = true;
|
||||
}
|
||||
if (value > 0) {
|
||||
// Intermediate or final value from just language.
|
||||
if (value == SKIP_SCRIPT) {
|
||||
value = 0;
|
||||
}
|
||||
if (*script != 0) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
retainScript = !script.empty();
|
||||
} else {
|
||||
value = trieNext(iter, script, 0);
|
||||
if (value >= 0) {
|
||||
if (*script != 0) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
retainScript = !script.empty();
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 2;
|
||||
retainScript = true;
|
||||
if (state == 0) {
|
||||
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
|
||||
} else {
|
||||
|
@ -541,19 +625,19 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
|
|||
}
|
||||
}
|
||||
|
||||
bool matchRegion = false;
|
||||
if (value > 0) {
|
||||
// Final value from just language or language+script.
|
||||
if (*region != 0) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
retainRegion = !region.empty();
|
||||
} else {
|
||||
value = trieNext(iter, region, 0);
|
||||
if (value >= 0) {
|
||||
if (*region != 0) {
|
||||
retainOldMask |= 1;
|
||||
if (!region.empty() && !isMacroregion(region, errorCode)) {
|
||||
retainRegion = true;
|
||||
matchRegion = true;
|
||||
}
|
||||
} else {
|
||||
retainOldMask |= 1;
|
||||
retainRegion = true;
|
||||
if (state == 0) {
|
||||
value = defaultLsrIndex;
|
||||
} else {
|
||||
|
@ -564,28 +648,33 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
|
|||
}
|
||||
}
|
||||
U_ASSERT(value < lsrsLength);
|
||||
const LSR &result = lsrs[value];
|
||||
const LSR &matched = lsrs[value];
|
||||
|
||||
if (*language == 0) {
|
||||
language = "und";
|
||||
if (returnInputIfUnmatch &&
|
||||
(!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
|
||||
return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching.
|
||||
}
|
||||
if (language.empty()) {
|
||||
language = StringPiece("und");
|
||||
}
|
||||
|
||||
if (retainOldMask == 0) {
|
||||
if (!(retainLanguage || retainScript || retainRegion)) {
|
||||
// Quickly return a copy of the lookup-result LSR
|
||||
// without new allocation of the subtags.
|
||||
return LSR(result.language, result.script, result.region, result.flags);
|
||||
return LSR(matched.language, matched.script, matched.region, matched.flags);
|
||||
}
|
||||
if ((retainOldMask & 4) == 0) {
|
||||
language = result.language;
|
||||
if (!retainLanguage) {
|
||||
language = matched.language;
|
||||
}
|
||||
if ((retainOldMask & 2) == 0) {
|
||||
script = result.script;
|
||||
if (!retainScript) {
|
||||
script = matched.script;
|
||||
}
|
||||
if ((retainOldMask & 1) == 0) {
|
||||
region = result.region;
|
||||
if (!retainRegion) {
|
||||
region = matched.region;
|
||||
}
|
||||
int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
return LSR(language, script, region, retainOldMask);
|
||||
return LSR(language, script, region, retainMask, errorCode);
|
||||
}
|
||||
|
||||
int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
|
||||
|
@ -721,57 +810,97 @@ int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
|
|||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
|
||||
// in loclikely.cpp to this new code, including activating this
|
||||
// minimizeSubtags() function. The LocaleMatcher does not minimize.
|
||||
#if 0
|
||||
LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
|
||||
const char *regionIn, ULocale.Minimize fieldToFavor,
|
||||
UErrorCode &errorCode) const {
|
||||
LSR result = maximize(languageIn, scriptIn, regionIn);
|
||||
|
||||
// We could try just a series of checks, like:
|
||||
// LSR result2 = addLikelySubtags(languageIn, "", "");
|
||||
// if result.equals(result2) return result2;
|
||||
// However, we can optimize 2 of the cases:
|
||||
// (languageIn, "", "")
|
||||
// (languageIn, "", regionIn)
|
||||
|
||||
// value00 = lookup(result.language, "", "")
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
int value = trieNext(iter, result.language, 0);
|
||||
U_ASSERT(value >= 0);
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value >= 0);
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
int32_t XLikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
|
||||
UStringTrieResult result;
|
||||
uint8_t c;
|
||||
if (s.length() == i) {
|
||||
result = iter.next(u'*');
|
||||
} else {
|
||||
c = s.data()[i];
|
||||
for (;;) {
|
||||
c = uprv_invCharToAscii(c);
|
||||
// EBCDIC: If s[i] is not an invariant character,
|
||||
// then c is now 0 and will simply not match anything, which is harmless.
|
||||
if (i+1 != s.length()) {
|
||||
if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
|
||||
return -1;
|
||||
}
|
||||
c = s.data()[++i];
|
||||
} else {
|
||||
// last character of this subtag
|
||||
result = iter.next(c | 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
U_ASSERT(value > 0);
|
||||
LSR value00 = lsrs[value];
|
||||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
switch (result) {
|
||||
case USTRINGTRIE_NO_MATCH: return -1;
|
||||
case USTRINGTRIE_NO_VALUE: return 0;
|
||||
case USTRINGTRIE_INTERMEDIATE_VALUE:
|
||||
U_ASSERT(iter.getValue() == SKIP_SCRIPT);
|
||||
return SKIP_SCRIPT;
|
||||
case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
|
||||
default: return -1;
|
||||
}
|
||||
|
||||
// The last case is not as easy to optimize.
|
||||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (favorRegionOk) {
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
LSR XLikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
|
||||
StringPiece region,
|
||||
bool favorScript,
|
||||
UErrorCode &errorCode) const {
|
||||
LSR max = maximize(language, script, region, true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
// If no match, return it.
|
||||
if (uprv_strlen(max.language) == 0 &&
|
||||
uprv_strlen(max.script) == 0 &&
|
||||
uprv_strlen(max.region) == 0) {
|
||||
// No match. ICU API mandate us to
|
||||
// "If this Locale is already in the minimal form, or not valid, or
|
||||
// there is no data available for minimization, the Locale will be
|
||||
// unchanged."
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
|
||||
}
|
||||
// try language
|
||||
LSR test = maximize(max.language, "", "", true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, "", "", LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
|
||||
if (!favorScript) {
|
||||
// favor Region
|
||||
// try language and region
|
||||
test = maximize(max.language, "", max.region, true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
}
|
||||
// try language and script
|
||||
test = maximize(max.language, max.script, "", true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, max.script, "", LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
if (favorScript) {
|
||||
// try language and region
|
||||
test = maximize(max.language, "", max.region, true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
}
|
||||
return LSR(max.language, max.script, max.region, LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "charstrmap.h"
|
||||
|
@ -47,7 +48,9 @@ public:
|
|||
static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
|
||||
|
||||
// VisibleForTesting
|
||||
LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
|
||||
LSR makeMaximizedLsrFrom(const Locale &locale,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Tests whether lsr is "more likely" than other.
|
||||
|
@ -61,13 +64,9 @@ public:
|
|||
*/
|
||||
int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
|
||||
|
||||
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
|
||||
// in loclikely.cpp to this new code, including activating this
|
||||
// minimizeSubtags() function. The LocaleMatcher does not minimize.
|
||||
#if 0
|
||||
LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
|
||||
ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
|
||||
#endif
|
||||
LSR minimizeSubtags(StringPiece language, StringPiece script, StringPiece region,
|
||||
bool favorScript,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
// visible for LocaleDistance
|
||||
const LocaleDistanceData &getDistanceData() const { return distanceData; }
|
||||
|
@ -80,16 +79,25 @@ private:
|
|||
static void initLikelySubtags(UErrorCode &errorCode);
|
||||
|
||||
LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
|
||||
const char *variant, UErrorCode &errorCode) const;
|
||||
const char *variant,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
|
||||
*/
|
||||
LSR maximize(const char *language, const char *script, const char *region) const;
|
||||
LSR maximize(const char *language, const char *script, const char *region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
LSR maximize(StringPiece language, StringPiece script, StringPiece region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
int32_t getLikelyIndex(const char *language, const char *script) const;
|
||||
bool isMacroregion(StringPiece& region, UErrorCode &errorCode) const;
|
||||
|
||||
static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
|
||||
static int32_t trieNext(BytesTrie &iter, StringPiece s, int32_t i);
|
||||
|
||||
UResourceBundle *langInfoBundle;
|
||||
// We could store the strings by value, except that if there were few enough strings,
|
||||
|
|
|
@ -31,6 +31,26 @@ LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t
|
|||
}
|
||||
}
|
||||
|
||||
LSR::LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
|
||||
UErrorCode &errorCode) :
|
||||
language(nullptr), script(nullptr), region(nullptr),
|
||||
regionIndex(indexForRegion(r.data())), flags(f) {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
CharString data;
|
||||
data.append(lang, errorCode).append('\0', errorCode);
|
||||
int32_t scriptOffset = data.length();
|
||||
data.append(scr, errorCode).append('\0', errorCode);
|
||||
int32_t regionOffset = data.length();
|
||||
data.append(r, errorCode);
|
||||
owned = data.cloneData(errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
language = owned;
|
||||
script = owned + scriptOffset;
|
||||
region = owned + regionOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LSR::LSR(LSR &&other) noexcept :
|
||||
language(other.language), script(other.script), region(other.region), owned(other.owned),
|
||||
regionIndex(other.regionIndex), flags(other.flags),
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#ifndef __LSR_H__
|
||||
#define __LSR_H__
|
||||
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cstring.h"
|
||||
|
@ -45,6 +46,8 @@ struct LSR final : public UMemory {
|
|||
*/
|
||||
LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
|
||||
UErrorCode &errorCode);
|
||||
LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
|
||||
UErrorCode &errorCode);
|
||||
LSR(LSR &&other) noexcept;
|
||||
LSR(const LSR &other) = delete;
|
||||
inline ~LSR() {
|
||||
|
|
|
@ -237,6 +237,7 @@ ulocimp_addLikelySubtags(const char* localeID,
|
|||
*
|
||||
* @param localeID The locale to minimize
|
||||
* @param sink The output sink receiving the maximized locale
|
||||
* @param favorScript favor to keep script if true, region if false.
|
||||
* @param err Error information if minimizing the locale failed. If the length
|
||||
* of the localeID and the null-terminator is greater than the maximum allowed size,
|
||||
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
|
||||
|
@ -245,6 +246,7 @@ ulocimp_addLikelySubtags(const char* localeID,
|
|||
U_CAPI void U_EXPORT2
|
||||
ulocimp_minimizeSubtags(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
bool favorScript,
|
||||
UErrorCode* err);
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
|
|
|
@ -1113,6 +1113,15 @@ protected: /* only protected for testing purposes. DO NOT USE. */
|
|||
* @internal
|
||||
*/
|
||||
void setFromPOSIXID(const char *posixID);
|
||||
/**
|
||||
* Minimize the subtags for this Locale, per the algorithm described
|
||||
* @param favorScript favor to keep script if true, to keep region if false.
|
||||
* @param status error information if maximizing this Locale failed.
|
||||
* If this Locale is not well-formed, the error code is
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @internal
|
||||
*/
|
||||
void minimizeSubtags(bool favorScript, UErrorCode& status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
private:
|
||||
|
|
|
@ -4605,8 +4605,8 @@ const char* const full_data[][3] = {
|
|||
"am"
|
||||
}, {
|
||||
"und_Ethi_ER",
|
||||
"am_Ethi_ER",
|
||||
"am_ER"
|
||||
"ti_Ethi_ER",
|
||||
"ti_ER"
|
||||
}, {
|
||||
"und_FI",
|
||||
"fi_Latn_FI",
|
||||
|
@ -5381,8 +5381,8 @@ const char* const full_data[][3] = {
|
|||
"trv"
|
||||
}, {
|
||||
"und_Latn_HK",
|
||||
"zh_Latn_HK",
|
||||
"zh_Latn_HK"
|
||||
"en_Latn_HK",
|
||||
"en_HK"
|
||||
}, {
|
||||
"und_Latn_AQ",
|
||||
"_Latn_AQ",
|
||||
|
|
|
@ -655,17 +655,19 @@ group: resourcebundle
|
|||
localebuilder.o
|
||||
ulocale.o
|
||||
ulocbuilder.o
|
||||
loclikelysubtags.o
|
||||
deps
|
||||
udata ucol_swp
|
||||
sort stringenumeration uhash uvector
|
||||
uscript_props propname
|
||||
bytesinkutil
|
||||
errorcode
|
||||
lsr
|
||||
|
||||
group: localematcher
|
||||
localematcher.o
|
||||
deps
|
||||
resourcebundle localeprioritylist loclikelysubtags locdistance lsr
|
||||
resourcebundle localeprioritylist locdistance
|
||||
|
||||
group: localeprioritylist
|
||||
localeprioritylist.o
|
||||
|
@ -675,12 +677,7 @@ group: localeprioritylist
|
|||
group: locdistance
|
||||
locdistance.o
|
||||
deps
|
||||
loclikelysubtags
|
||||
|
||||
group: loclikelysubtags
|
||||
loclikelysubtags.o
|
||||
deps
|
||||
lsr resourcebundle
|
||||
resourcebundle
|
||||
|
||||
group: lsr
|
||||
lsr.o
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <iterator>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <cctype>
|
||||
|
||||
#include "loctest.h"
|
||||
#include "unicode/localebuilder.h"
|
||||
|
@ -234,6 +235,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
|||
TESTCASE_AUTO(TestAddLikelySubtags);
|
||||
TESTCASE_AUTO(TestMinimizeSubtags);
|
||||
TESTCASE_AUTO(TestAddLikelyAndMinimizeSubtags);
|
||||
TESTCASE_AUTO(TestDataDrivenLikelySubtags);
|
||||
TESTCASE_AUTO(TestKeywordVariants);
|
||||
TESTCASE_AUTO(TestCreateUnicodeKeywords);
|
||||
TESTCASE_AUTO(TestKeywordVariantParsing);
|
||||
|
@ -1711,6 +1713,11 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
|
|||
const char* const add;
|
||||
const char* const remove;
|
||||
} full_data[] = {
|
||||
{
|
||||
"und",
|
||||
"en_Latn_US",
|
||||
"en"
|
||||
},
|
||||
{
|
||||
"und_AQ",
|
||||
"_Latn_AQ",
|
||||
|
@ -2517,8 +2524,8 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
|
|||
"am"
|
||||
}, {
|
||||
"und_Ethi_ER",
|
||||
"am_Ethi_ER",
|
||||
"am_ER"
|
||||
"ti_Ethi_ER",
|
||||
"ti_ER"
|
||||
}, {
|
||||
"und_FI",
|
||||
"fi_Latn_FI",
|
||||
|
@ -3293,8 +3300,8 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
|
|||
"trv"
|
||||
}, {
|
||||
"und_Latn_HK",
|
||||
"zh_Latn_HK",
|
||||
"zh_Latn_HK"
|
||||
"en_Latn_HK",
|
||||
"en_HK"
|
||||
}, {
|
||||
"und_Latn_AQ",
|
||||
"_Latn_AQ",
|
||||
|
@ -3865,7 +3872,6 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
LocaleTest::TestKeywordVariants() {
|
||||
static const struct {
|
||||
|
@ -5546,6 +5552,184 @@ void LocaleTest::TestLocaleCanonicalizationFromFile()
|
|||
}
|
||||
}
|
||||
|
||||
std::string trim(const std::string &s) {
|
||||
auto start = s.begin();
|
||||
while (start != s.end() && std::isspace(*start)) {
|
||||
start++;
|
||||
}
|
||||
|
||||
auto end = s.end();
|
||||
do {
|
||||
end--;
|
||||
} while (std::distance(start, end) > 0 && std::isspace(*end));
|
||||
|
||||
return std::string(start, end + 1);
|
||||
}
|
||||
|
||||
// A testing helper class which favorScript when minimizeSubtags.
|
||||
class FavorScriptLocale : public Locale {
|
||||
public:
|
||||
FavorScriptLocale(const Locale& l) :Locale(l) { }
|
||||
void minimizeSubtags(UErrorCode& status) {
|
||||
Locale::minimizeSubtags(true, status);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
bool isKnownSourceFor20777(const std::string& s) {
|
||||
return s == "und-001" ||
|
||||
s == "und-AQ" ||
|
||||
s == "und-CC" ||
|
||||
s == "und-SL" ||
|
||||
s == "und-SS" ||
|
||||
s == "und-ZM" ||
|
||||
s.find("und-Latn-") == 0;
|
||||
}
|
||||
|
||||
void U_CALLCONV
|
||||
testLikelySubtagsLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
(void)fieldCount;
|
||||
LocaleTest* THIS = (LocaleTest*)context;
|
||||
std::string source(trim(std::string(fields[0][0], fields[0][1]-fields[0][0])));
|
||||
std::string addLikely(trim(std::string(fields[1][0], fields[1][1]-fields[1][0])));
|
||||
std::string removeFavorScript(trim(std::string(fields[2][0], fields[2][1]-fields[2][0])));
|
||||
if (removeFavorScript.length() == 0) {
|
||||
removeFavorScript = addLikely;
|
||||
}
|
||||
std::string removeFavorRegion(trim(std::string(fields[3][0], fields[3][1]-fields[3][0])));
|
||||
|
||||
if (removeFavorRegion.length() == 0) {
|
||||
removeFavorRegion = removeFavorScript;
|
||||
}
|
||||
Locale l = Locale::forLanguageTag(source, *pErrorCode);
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
THIS->errln("forLanguageTag(%s) return error %x %s", source.c_str(),
|
||||
*pErrorCode, u_errorName(*pErrorCode));
|
||||
*pErrorCode = U_ZERO_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
Locale actualMax(l);
|
||||
actualMax.addLikelySubtags(*pErrorCode);
|
||||
if (addLikely == "FAIL") {
|
||||
if (uprv_strcmp(l.getName(), actualMax.getName()) != 0) {
|
||||
THIS->errln("addLikelySubtags('%s') return should return the same but return '%s'",
|
||||
l.getName(), actualMax.getName());
|
||||
}
|
||||
} else {
|
||||
std::string max = actualMax.toLanguageTag<std::string>(*pErrorCode);
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
THIS->errln("toLanguageTag(%s) return error %x %s", actualMax.getName(),
|
||||
*pErrorCode, u_errorName(*pErrorCode));
|
||||
*pErrorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
if (max != addLikely) {
|
||||
if (isKnownSourceFor20777(source)) {
|
||||
THIS->logKnownIssue(
|
||||
"ICU-20777", "addLikelySubtags('%s') should return '%s' but got '%s'",
|
||||
source.c_str(), addLikely.c_str(), max.c_str());
|
||||
} else {
|
||||
THIS->errln("addLikelySubtags('%s') should return '%s' but got '%s'",
|
||||
source.c_str(), addLikely.c_str(), max.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Locale actualMin(l);
|
||||
actualMin.minimizeSubtags(*pErrorCode);
|
||||
if (removeFavorRegion == "FAIL") {
|
||||
if (uprv_strcmp(l.getName(), actualMin.getName()) != 0) {
|
||||
THIS->errln("minimizeSubtags('%s') return should return the same but return '%s'",
|
||||
l.getName(), actualMin.getName());
|
||||
}
|
||||
} else {
|
||||
std::string min = actualMin.toLanguageTag<std::string>(*pErrorCode);
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
THIS->errln("toLanguageTag(%s) return error %x %s", actualMin.getName(),
|
||||
*pErrorCode, u_errorName(*pErrorCode));
|
||||
*pErrorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
if (min != removeFavorRegion) {
|
||||
if (isKnownSourceFor20777(source)) {
|
||||
THIS->logKnownIssue(
|
||||
"ICU-20777", "minimizeSubtags('%s') should return '%s' but got '%s'",
|
||||
source.c_str(), removeFavorRegion.c_str(), min.c_str());
|
||||
} else {
|
||||
THIS->errln("minimizeSubtags('%s') should return '%s' but got '%s'",
|
||||
source.c_str(), removeFavorRegion.c_str(), min.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FavorScriptLocale actualMinFavorScript(l);
|
||||
actualMinFavorScript.minimizeSubtags(*pErrorCode);
|
||||
if (removeFavorScript == "FAIL") {
|
||||
if (uprv_strcmp(l.getName(), actualMinFavorScript.getName()) != 0) {
|
||||
THIS->errln("minimizeSubtags('%s') return should return the same but return '%s'",
|
||||
l.getName(), actualMinFavorScript.getName());
|
||||
}
|
||||
} else {
|
||||
std::string min = actualMinFavorScript.toLanguageTag<std::string>(*pErrorCode);
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
THIS->errln("toLanguageTag(%s) favor script return error %x %s", actualMinFavorScript.getName(),
|
||||
*pErrorCode, u_errorName(*pErrorCode));
|
||||
*pErrorCode = U_ZERO_ERROR;
|
||||
} else {
|
||||
if (min != removeFavorScript) {
|
||||
if (isKnownSourceFor20777(source)) {
|
||||
THIS->logKnownIssue(
|
||||
"ICU-20777",
|
||||
"minimizeSubtags('%s') favor script should return '%s' but got '%s'",
|
||||
source.c_str(), removeFavorScript.c_str(), min.c_str());
|
||||
} else {
|
||||
THIS->errln("minimizeSubtags('%s') favor script should return '%s' but got '%s'",
|
||||
source.c_str(), removeFavorScript.c_str(), min.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LocaleTest::TestDataDrivenLikelySubtags() {
|
||||
if (quick) {
|
||||
// This test is too slow to run. Only run in -e mode.
|
||||
return;
|
||||
}
|
||||
IcuTestErrorCode errorCode(*this, "TestDataDrivenLikelySubtags()");
|
||||
const char* name = "likelySubtags.txt";
|
||||
const char *sourceTestDataPath = getSourceTestData(errorCode);
|
||||
if (errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
|
||||
"folder (getSourceTestData())")) {
|
||||
return;
|
||||
}
|
||||
CharString path(sourceTestDataPath, errorCode);
|
||||
path.appendPathPart(name, errorCode);
|
||||
LocalStdioFilePointer testFile(fopen(path.data(), "r"));
|
||||
if (testFile.isNull()) {
|
||||
errln("unable to open %s", path.data());
|
||||
return;
|
||||
}
|
||||
|
||||
// Columns (c1, c2,...) are separated by semicolons.
|
||||
// Leading and trailing spaces and tabs in each column are ignored.
|
||||
// Comments are indicated with hash marks.
|
||||
const int32_t kNumFields = 4;
|
||||
char *fields[kNumFields][2];
|
||||
|
||||
u_parseDelimitedFile(path.data(), ';', fields, kNumFields, testLikelySubtagsLineFn,
|
||||
this, errorCode);
|
||||
if (errorCode.errIfFailureAndReset("error parsing %s", name)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void LocaleTest::TestKnownCanonicalizedListCorrect()
|
||||
{
|
||||
IcuTestErrorCode status(*this, "TestKnownCanonicalizedListCorrect");
|
||||
|
|
|
@ -131,6 +131,7 @@ public:
|
|||
void TestAddLikelySubtags();
|
||||
void TestMinimizeSubtags();
|
||||
void TestAddLikelyAndMinimizeSubtags();
|
||||
void TestDataDrivenLikelySubtags();
|
||||
|
||||
void TestForLanguageTag();
|
||||
void TestForLanguageTagLegacyTagBug21676();
|
||||
|
|
1607
icu4c/source/test/testdata/likelySubtags.txt
vendored
Normal file
1607
icu4c/source/test/testdata/likelySubtags.txt
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -225,8 +225,8 @@ public class LocaleDistance {
|
|||
// VisibleForTesting
|
||||
public int testOnlyDistance(ULocale desired, ULocale supported,
|
||||
int threshold, FavorSubtag favorSubtag) {
|
||||
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
|
||||
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
|
||||
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported, false);
|
||||
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired, false);
|
||||
int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR }, 1,
|
||||
shiftDistance(threshold), favorSubtag, LocaleMatcher.Direction.WITH_ONE_WAY);
|
||||
return getDistanceFloor(indexAndDistance);
|
||||
|
|
|
@ -15,6 +15,7 @@ import com.ibm.icu.impl.ICUData;
|
|||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.Region;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
public final class XLikelySubtags {
|
||||
|
@ -180,7 +181,7 @@ public final class XLikelySubtags {
|
|||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public LSR makeMaximizedLsrFrom(ULocale locale) {
|
||||
public LSR makeMaximizedLsrFrom(ULocale locale, boolean returnInputIfUnmatch) {
|
||||
String name = locale.getName(); // Faster than .toLanguageTag().
|
||||
if (name.startsWith("@x=")) {
|
||||
String tag = locale.toLanguageTag();
|
||||
|
@ -189,8 +190,12 @@ public final class XLikelySubtags {
|
|||
// und-x-subtag-subtag...
|
||||
return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant());
|
||||
LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant(), returnInputIfUnmatch);
|
||||
if (max.language.isEmpty() && max.script.isEmpty() && max.region.isEmpty()) {
|
||||
return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR.EXPLICIT_LSR);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
public LSR makeMaximizedLsrFrom(Locale locale) {
|
||||
|
@ -201,10 +206,10 @@ public final class XLikelySubtags {
|
|||
return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant());
|
||||
locale.getVariant(), false);
|
||||
}
|
||||
|
||||
private LSR makeMaximizedLsr(String language, String script, String region, String variant) {
|
||||
private LSR makeMaximizedLsr(String language, String script, String region, String variant, boolean returnInputIfUnmatch) {
|
||||
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
|
||||
// They should match only themselves,
|
||||
// not other locales with what looks like the same language and script subtags.
|
||||
|
@ -248,13 +253,23 @@ public final class XLikelySubtags {
|
|||
language = getCanonical(languageAliases, language);
|
||||
// (We have no script mappings.)
|
||||
region = getCanonical(regionAliases, region);
|
||||
return maximize(language, script, region);
|
||||
return maximize(language, script, region, returnInputIfUnmatch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to find out a region is a macroregion
|
||||
*/
|
||||
private boolean isMacroregion(String region) {
|
||||
Region.RegionType type = Region.getInstance(region).getType();
|
||||
return type == Region.RegionType.WORLD ||
|
||||
type == Region.RegionType.CONTINENT ||
|
||||
type == Region.RegionType.SUBCONTINENT ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
|
||||
*/
|
||||
private LSR maximize(String language, String script, String region) {
|
||||
private LSR maximize(String language, String script, String region, boolean returnInputIfUnmatch) {
|
||||
if (language.equals("und")) {
|
||||
language = "";
|
||||
}
|
||||
|
@ -268,7 +283,9 @@ public final class XLikelySubtags {
|
|||
return new LSR(language, script, region, LSR.EXPLICIT_LSR); // already maximized
|
||||
}
|
||||
|
||||
int retainOldMask = 0;
|
||||
boolean retainLanguage = false;
|
||||
boolean retainScript = false;
|
||||
boolean retainRegion = false;
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
long state;
|
||||
int value;
|
||||
|
@ -280,34 +297,33 @@ public final class XLikelySubtags {
|
|||
} else {
|
||||
value = trieNext(iter, language, 0);
|
||||
}
|
||||
boolean matchLanguage = (value >= 0);
|
||||
boolean matchScript = false;
|
||||
if (value >= 0) {
|
||||
if (!language.isEmpty()) {
|
||||
retainOldMask |= 4;
|
||||
}
|
||||
retainLanguage = ! language.isEmpty();
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 4;
|
||||
retainLanguage = true;
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
state = 0;
|
||||
}
|
||||
|
||||
if (value >= 0 && !script.isEmpty()) {
|
||||
matchScript = true;
|
||||
}
|
||||
if (value > 0) {
|
||||
// Intermediate or final value from just language.
|
||||
if (value == SKIP_SCRIPT) {
|
||||
value = 0;
|
||||
}
|
||||
if (!script.isEmpty()) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
retainScript = ! script.isEmpty();
|
||||
} else {
|
||||
value = trieNext(iter, script, 0);
|
||||
if (value >= 0) {
|
||||
if (!script.isEmpty()) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
retainScript = ! script.isEmpty();
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 2;
|
||||
retainScript = true;
|
||||
if (state == 0) {
|
||||
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
|
||||
} else {
|
||||
|
@ -319,19 +335,19 @@ public final class XLikelySubtags {
|
|||
}
|
||||
}
|
||||
|
||||
boolean matchRegion = false;
|
||||
if (value > 0) {
|
||||
// Final value from just language or language+script.
|
||||
if (!region.isEmpty()) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
retainRegion = ! region.isEmpty();
|
||||
} else {
|
||||
value = trieNext(iter, region, 0);
|
||||
if (value >= 0) {
|
||||
if (!region.isEmpty()) {
|
||||
retainOldMask |= 1;
|
||||
if (!region.isEmpty() && !isMacroregion(region)) {
|
||||
retainRegion = true;
|
||||
matchRegion = true;
|
||||
}
|
||||
} else {
|
||||
retainOldMask |= 1;
|
||||
retainRegion = true;
|
||||
if (state == 0) {
|
||||
value = defaultLsrIndex;
|
||||
} else {
|
||||
|
@ -343,25 +359,30 @@ public final class XLikelySubtags {
|
|||
}
|
||||
LSR result = lsrs[value];
|
||||
|
||||
if (returnInputIfUnmatch &&
|
||||
(!(matchLanguage || matchScript || (matchRegion && language.isEmpty())))) {
|
||||
return new LSR("", "", "", LSR.EXPLICIT_LSR); // no matching.
|
||||
}
|
||||
if (language.isEmpty()) {
|
||||
language = "und";
|
||||
}
|
||||
|
||||
if (retainOldMask == 0) {
|
||||
if (! (retainLanguage || retainScript || retainRegion)) {
|
||||
assert result.flags == LSR.IMPLICIT_LSR;
|
||||
return result;
|
||||
}
|
||||
if ((retainOldMask & 4) == 0) {
|
||||
if (!retainLanguage) {
|
||||
language = result.language;
|
||||
}
|
||||
if ((retainOldMask & 2) == 0) {
|
||||
if (!retainScript) {
|
||||
script = result.script;
|
||||
}
|
||||
if ((retainOldMask & 1) == 0) {
|
||||
if (!retainRegion) {
|
||||
region = result.region;
|
||||
}
|
||||
int retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
return new LSR(language, script, region, retainOldMask);
|
||||
return new LSR(language, script, region, retainMask);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -502,50 +523,37 @@ public final class XLikelySubtags {
|
|||
}
|
||||
}
|
||||
|
||||
LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
|
||||
public LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
|
||||
ULocale.Minimize fieldToFavor) {
|
||||
LSR result = maximize(languageIn, scriptIn, regionIn);
|
||||
|
||||
// We could try just a series of checks, like:
|
||||
// LSR result2 = addLikelySubtags(languageIn, "", "");
|
||||
// if result.equals(result2) return result2;
|
||||
// However, we can optimize 2 of the cases:
|
||||
// (languageIn, "", "")
|
||||
// (languageIn, "", regionIn)
|
||||
|
||||
// value00 = lookup(result.language, "", "")
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
int value = trieNext(iter, result.language, 0);
|
||||
assert value >= 0;
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value >= 0;
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
LSR max = maximize(languageIn, scriptIn, regionIn, true);
|
||||
if (max.language.isEmpty() && max.region.isEmpty() && max.script.isEmpty()) {
|
||||
// Cannot match, return as is
|
||||
return new LSR(languageIn, scriptIn, regionIn, LSR.EXPLICIT_LSR);
|
||||
}
|
||||
LSR test = maximize(max.language, "", "", true);
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return new LSR(max.language, "", "", LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
if (ULocale.Minimize.FAVOR_REGION == fieldToFavor) {
|
||||
test = maximize(max.language, "", max.region, true);
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return new LSR(max.language, "", max.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
test = maximize(max.language, max.script, "", true);
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return new LSR(max.language, max.script, "", LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
} else {
|
||||
test = maximize(max.language, max.script, "", true);
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return new LSR(max.language, max.script, "", LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
test = maximize(max.language, "", max.region, true);
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return new LSR(max.language, "", max.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
}
|
||||
assert value > 0;
|
||||
LSR value00 = lsrs[value];
|
||||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
}
|
||||
|
||||
// The last case is not as easy to optimize.
|
||||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (favorRegionOk) {
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
return result;
|
||||
return new LSR(max.language, max.script, max.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
|
||||
private Map<String, LSR> getTable() {
|
||||
|
|
|
@ -796,7 +796,7 @@ public final class LocaleMatcher {
|
|||
if (locale.equals(UND_ULOCALE)) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,15 +42,16 @@ import com.ibm.icu.impl.locale.BaseLocale;
|
|||
import com.ibm.icu.impl.locale.Extension;
|
||||
import com.ibm.icu.impl.locale.InternalLocaleBuilder;
|
||||
import com.ibm.icu.impl.locale.KeyTypeData;
|
||||
import com.ibm.icu.impl.locale.LSR;
|
||||
import com.ibm.icu.impl.locale.LanguageTag;
|
||||
import com.ibm.icu.impl.locale.LocaleExtensions;
|
||||
import com.ibm.icu.impl.locale.LocaleSyntaxException;
|
||||
import com.ibm.icu.impl.locale.ParseStatus;
|
||||
import com.ibm.icu.impl.locale.UnicodeLocaleExtension;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.LocaleDisplayNames;
|
||||
import com.ibm.icu.text.LocaleDisplayNames.DialectHandling;
|
||||
|
||||
/**
|
||||
* {@icuenhanced java.util.Locale}.{@icu _usage_}
|
||||
*
|
||||
|
@ -2722,12 +2723,10 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
trailing = loc.localeID.substring(trailingIndex);
|
||||
}
|
||||
|
||||
String newLocaleID =
|
||||
createLikelySubtagsString(
|
||||
tags[0],
|
||||
tags[1],
|
||||
tags[2],
|
||||
trailing);
|
||||
LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(
|
||||
new ULocale(loc.getLanguage(), loc.getScript(), loc.getCountry()), true);
|
||||
String newLocaleID = createTagString(max.language, max.script, max.region,
|
||||
trailing);
|
||||
|
||||
return newLocaleID == null ? loc : new ULocale(newLocaleID);
|
||||
}
|
||||
|
@ -2819,148 +2818,22 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
@Deprecated
|
||||
public static ULocale minimizeSubtags(ULocale loc, Minimize fieldToFavor) {
|
||||
String[] tags = new String[3];
|
||||
String trailing = null;
|
||||
|
||||
int trailingIndex = parseTagString(
|
||||
loc.localeID,
|
||||
tags);
|
||||
|
||||
String originalLang = tags[0];
|
||||
String originalScript = tags[1];
|
||||
String originalRegion = tags[2];
|
||||
String originalTrailing = null;
|
||||
|
||||
if (trailingIndex < loc.localeID.length()) {
|
||||
/*
|
||||
* Create a String that contains everything
|
||||
* after the language, script, and region.
|
||||
*/
|
||||
originalTrailing = loc.localeID.substring(trailingIndex);
|
||||
trailing = loc.localeID.substring(trailingIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* First, we need to first get the maximization
|
||||
* by adding any likely subtags.
|
||||
**/
|
||||
String maximizedLocaleID =
|
||||
createLikelySubtagsString(
|
||||
originalLang,
|
||||
originalScript,
|
||||
originalRegion,
|
||||
null);
|
||||
LSR lsr = XLikelySubtags.INSTANCE.minimizeSubtags(
|
||||
loc.getLanguage(), loc.getScript(), loc.getCountry(), fieldToFavor);
|
||||
String newLocaleID = createTagString(lsr.language, lsr.script, lsr.region,
|
||||
trailing);
|
||||
|
||||
/**
|
||||
* If maximization fails, there's nothing
|
||||
* we can do.
|
||||
**/
|
||||
if (isEmptyString(maximizedLocaleID)) {
|
||||
return loc;
|
||||
}
|
||||
else {
|
||||
/**
|
||||
* Start first with just the language.
|
||||
**/
|
||||
String tag =
|
||||
createLikelySubtagsString(
|
||||
originalLang,
|
||||
null,
|
||||
null,
|
||||
null);
|
||||
|
||||
if (tag.equals(maximizedLocaleID)) {
|
||||
String newLocaleID =
|
||||
createTagString(
|
||||
originalLang,
|
||||
null,
|
||||
null,
|
||||
originalTrailing);
|
||||
|
||||
return new ULocale(newLocaleID);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Next, try the language and region.
|
||||
**/
|
||||
if (fieldToFavor == Minimize.FAVOR_REGION) {
|
||||
if (originalRegion.length() != 0) {
|
||||
String tag =
|
||||
createLikelySubtagsString(
|
||||
originalLang,
|
||||
null,
|
||||
originalRegion,
|
||||
null);
|
||||
|
||||
if (tag.equals(maximizedLocaleID)) {
|
||||
String newLocaleID =
|
||||
createTagString(
|
||||
originalLang,
|
||||
null,
|
||||
originalRegion,
|
||||
originalTrailing);
|
||||
|
||||
return new ULocale(newLocaleID);
|
||||
}
|
||||
}
|
||||
if (originalScript.length() != 0){
|
||||
String tag =
|
||||
createLikelySubtagsString(
|
||||
originalLang,
|
||||
originalScript,
|
||||
null,
|
||||
null);
|
||||
|
||||
if (tag.equals(maximizedLocaleID)) {
|
||||
String newLocaleID =
|
||||
createTagString(
|
||||
originalLang,
|
||||
originalScript,
|
||||
null,
|
||||
originalTrailing);
|
||||
|
||||
return new ULocale(newLocaleID);
|
||||
}
|
||||
}
|
||||
} else { // FAVOR_SCRIPT, so
|
||||
if (originalScript.length() != 0){
|
||||
String tag =
|
||||
createLikelySubtagsString(
|
||||
originalLang,
|
||||
originalScript,
|
||||
null,
|
||||
null);
|
||||
|
||||
if (tag.equals(maximizedLocaleID)) {
|
||||
String newLocaleID =
|
||||
createTagString(
|
||||
originalLang,
|
||||
originalScript,
|
||||
null,
|
||||
originalTrailing);
|
||||
|
||||
return new ULocale(newLocaleID);
|
||||
}
|
||||
}
|
||||
if (originalRegion.length() != 0) {
|
||||
String tag =
|
||||
createLikelySubtagsString(
|
||||
originalLang,
|
||||
null,
|
||||
originalRegion,
|
||||
null);
|
||||
|
||||
if (tag.equals(maximizedLocaleID)) {
|
||||
String newLocaleID =
|
||||
createTagString(
|
||||
originalLang,
|
||||
null,
|
||||
originalRegion,
|
||||
originalTrailing);
|
||||
|
||||
return new ULocale(newLocaleID);
|
||||
}
|
||||
}
|
||||
}
|
||||
return loc;
|
||||
return newLocaleID == null ? loc : new ULocale(newLocaleID);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3007,10 +2880,9 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
* @return The new tag string.
|
||||
**/
|
||||
private static String createTagString(String lang, String script, String region,
|
||||
String trailing, String alternateTags) {
|
||||
String trailing) {
|
||||
|
||||
LocaleIDParser parser = null;
|
||||
boolean regionAppended = false;
|
||||
|
||||
StringBuilder tag = new StringBuilder();
|
||||
|
||||
|
@ -3018,8 +2890,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
appendTag(
|
||||
lang,
|
||||
tag);
|
||||
}
|
||||
else if (isEmptyString(alternateTags)) {
|
||||
} else {
|
||||
/*
|
||||
* Append the value for an unknown language, if
|
||||
* we found no language.
|
||||
|
@ -3028,66 +2899,17 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
UNDEFINED_LANGUAGE,
|
||||
tag);
|
||||
}
|
||||
else {
|
||||
parser = new LocaleIDParser(alternateTags);
|
||||
|
||||
String alternateLang = parser.getLanguage();
|
||||
|
||||
/*
|
||||
* Append the value for an unknown language, if
|
||||
* we found no language.
|
||||
*/
|
||||
appendTag(
|
||||
!isEmptyString(alternateLang) ? alternateLang : UNDEFINED_LANGUAGE,
|
||||
tag);
|
||||
}
|
||||
|
||||
if (!isEmptyString(script)) {
|
||||
appendTag(
|
||||
script,
|
||||
tag);
|
||||
}
|
||||
else if (!isEmptyString(alternateTags)) {
|
||||
/*
|
||||
* Parse the alternateTags string for the script.
|
||||
*/
|
||||
if (parser == null) {
|
||||
parser = new LocaleIDParser(alternateTags);
|
||||
}
|
||||
|
||||
String alternateScript = parser.getScript();
|
||||
|
||||
if (!isEmptyString(alternateScript)) {
|
||||
appendTag(
|
||||
alternateScript,
|
||||
tag);
|
||||
}
|
||||
}
|
||||
|
||||
if (!isEmptyString(region)) {
|
||||
appendTag(
|
||||
region,
|
||||
tag);
|
||||
|
||||
regionAppended = true;
|
||||
}
|
||||
else if (!isEmptyString(alternateTags)) {
|
||||
/*
|
||||
* Parse the alternateTags string for the region.
|
||||
*/
|
||||
if (parser == null) {
|
||||
parser = new LocaleIDParser(alternateTags);
|
||||
}
|
||||
|
||||
String alternateRegion = parser.getCountry();
|
||||
|
||||
if (!isEmptyString(alternateRegion)) {
|
||||
appendTag(
|
||||
alternateRegion,
|
||||
tag);
|
||||
|
||||
regionAppended = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (trailing != null && trailing.length() > 1) {
|
||||
|
@ -3107,7 +2929,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
separators = 1;
|
||||
}
|
||||
|
||||
if (regionAppended) {
|
||||
if (!isEmptyString(region)) {
|
||||
/*
|
||||
* If we appended a region, we may need to strip
|
||||
* the extra separator from the variant portion.
|
||||
|
@ -3134,21 +2956,6 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
return tag.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tag string from the supplied parameters. The lang, script and region
|
||||
* parameters may be null references.If the lang parameter is an empty string, the
|
||||
* default value for an unknown language is written to the output buffer.
|
||||
*
|
||||
* @param lang The language tag to use.
|
||||
* @param script The script tag to use.
|
||||
* @param region The region tag to use.
|
||||
* @param trailing Any trailing data to append to the new tag.
|
||||
* @return The new String.
|
||||
**/
|
||||
static String createTagString(String lang, String script, String region, String trailing) {
|
||||
return createTagString(lang, script, region, trailing, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the language, script, and region subtags from a tag string, and return the results.
|
||||
*
|
||||
|
@ -3214,144 +3021,6 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
}
|
||||
}
|
||||
|
||||
private static String lookupLikelySubtags(String localeId) {
|
||||
UResourceBundle bundle =
|
||||
UResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, "likelySubtags");
|
||||
try {
|
||||
return bundle.getString(localeId);
|
||||
}
|
||||
catch(MissingResourceException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String createLikelySubtagsString(String lang, String script, String region,
|
||||
String variants) {
|
||||
|
||||
/**
|
||||
* Try the language with the script and region first.
|
||||
*/
|
||||
if (!isEmptyString(script) && !isEmptyString(region)) {
|
||||
|
||||
String searchTag =
|
||||
createTagString(
|
||||
lang,
|
||||
script,
|
||||
region,
|
||||
null);
|
||||
|
||||
String likelySubtags = lookupLikelySubtags(searchTag);
|
||||
|
||||
/*
|
||||
if (likelySubtags == null) {
|
||||
if (likelySubtags2 != null) {
|
||||
System.err.println("Tag mismatch: \"(null)\" \"" + likelySubtags2 + "\"");
|
||||
}
|
||||
}
|
||||
else if (likelySubtags2 == null) {
|
||||
System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"(null)\"");
|
||||
}
|
||||
else if (!likelySubtags.equals(likelySubtags2)) {
|
||||
System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"" + likelySubtags2
|
||||
+ "\"");
|
||||
}
|
||||
*/
|
||||
if (likelySubtags != null) {
|
||||
// Always use the language tag from the
|
||||
// maximal string, since it may be more
|
||||
// specific than the one provided.
|
||||
return createTagString(
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
variants,
|
||||
likelySubtags);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with just the script.
|
||||
**/
|
||||
if (!isEmptyString(script)) {
|
||||
|
||||
String searchTag =
|
||||
createTagString(
|
||||
lang,
|
||||
script,
|
||||
null,
|
||||
null);
|
||||
|
||||
String likelySubtags = lookupLikelySubtags(searchTag);
|
||||
if (likelySubtags != null) {
|
||||
// Always use the language tag from the
|
||||
// maximal string, since it may be more
|
||||
// specific than the one provided.
|
||||
return createTagString(
|
||||
null,
|
||||
null,
|
||||
region,
|
||||
variants,
|
||||
likelySubtags);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with just the region.
|
||||
**/
|
||||
if (!isEmptyString(region)) {
|
||||
|
||||
String searchTag =
|
||||
createTagString(
|
||||
lang,
|
||||
null,
|
||||
region,
|
||||
null);
|
||||
|
||||
String likelySubtags = lookupLikelySubtags(searchTag);
|
||||
|
||||
if (likelySubtags != null) {
|
||||
// Always use the language tag from the
|
||||
// maximal string, since it may be more
|
||||
// specific than the one provided.
|
||||
return createTagString(
|
||||
null,
|
||||
script,
|
||||
null,
|
||||
variants,
|
||||
likelySubtags);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finally, try just the language.
|
||||
**/
|
||||
{
|
||||
String searchTag =
|
||||
createTagString(
|
||||
lang,
|
||||
null,
|
||||
null,
|
||||
null);
|
||||
|
||||
String likelySubtags = lookupLikelySubtags(searchTag);
|
||||
|
||||
if (likelySubtags != null) {
|
||||
// Always use the language tag from the
|
||||
// maximal string, since it may be more
|
||||
// specific than the one provided.
|
||||
return createTagString(
|
||||
null,
|
||||
script,
|
||||
region,
|
||||
variants,
|
||||
likelySubtags);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// --------------------------------
|
||||
// BCP47/OpenJDK APIs
|
||||
// --------------------------------
|
||||
|
|
1607
icu4j/main/tests/core/src/com/ibm/icu/dev/data/likelySubtags.txt
Normal file
1607
icu4j/main/tests/core/src/com/ibm/icu/dev/data/likelySubtags.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -24,7 +24,6 @@ import org.junit.Test;
|
|||
import org.junit.runner.RunWith;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.tool.locale.LikelySubtagsBuilder;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
|
@ -869,19 +868,12 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
for (ULocale locale : list) {
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale, false);
|
||||
}
|
||||
}
|
||||
return System.nanoTime() - start;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLikelySubtagsLoadedDataSameAsBuiltFromScratch() {
|
||||
XLikelySubtags.Data built = LikelySubtagsBuilder.build();
|
||||
XLikelySubtags.Data loaded = XLikelySubtags.Data.load();
|
||||
assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
|
||||
}
|
||||
|
||||
private static final class TestCase implements Cloneable {
|
||||
private static final String ENDL = System.getProperties().getProperty("line.separator");
|
||||
|
||||
|
|
|
@ -16,11 +16,13 @@ import java.io.BufferedReader;
|
|||
import java.io.IOException;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -54,7 +56,10 @@ import com.ibm.icu.util.ULocale.Minimize;
|
|||
import com.ibm.icu.util.UResourceBundle;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
import junitparams.JUnitParamsRunner;
|
||||
import junitparams.Parameters;
|
||||
|
||||
@RunWith(JUnitParamsRunner.class)
|
||||
public class ULocaleTest extends TestFmwk {
|
||||
|
||||
// Ticket #8078 and #11674
|
||||
|
@ -1947,7 +1952,7 @@ public class ULocaleTest extends TestFmwk {
|
|||
"de__POSIX_1901"
|
||||
}, {
|
||||
"und",
|
||||
""
|
||||
"en"
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -2760,8 +2765,8 @@ public class ULocaleTest extends TestFmwk {
|
|||
"am"
|
||||
}, {
|
||||
"und_Ethi_ER",
|
||||
"am_Ethi_ER",
|
||||
"am_ER"
|
||||
"ti_Ethi_ER",
|
||||
"ti_ER"
|
||||
}, {
|
||||
"und_FI",
|
||||
"fi_Latn_FI",
|
||||
|
@ -3536,8 +3541,8 @@ public class ULocaleTest extends TestFmwk {
|
|||
"trv"
|
||||
}, {
|
||||
"und_Latn_HK",
|
||||
"zh_Latn_HK",
|
||||
"zh_Latn_HK"
|
||||
"en_Latn_HK",
|
||||
"en_HK"
|
||||
}, {
|
||||
"und_Latn_AQ",
|
||||
"_Latn_AQ",
|
||||
|
@ -5417,4 +5422,103 @@ public class ULocaleTest extends TestFmwk {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
boolean isKnownSourceFor20777(String s) {
|
||||
return s.equals("und-001") ||
|
||||
s.equals("und-AQ") ||
|
||||
s.equals("und-CC") ||
|
||||
s.equals("und-SL") ||
|
||||
s.equals("und-SS") ||
|
||||
s.equals("und-ZM") ||
|
||||
s.startsWith("und-Latn-");
|
||||
}
|
||||
|
||||
private static final class TestCase implements Cloneable {
|
||||
private static final String ENDL = System.getProperties().getProperty("line.separator");
|
||||
|
||||
int lineNr = 0;
|
||||
|
||||
String source = "";
|
||||
String addLikely = "";
|
||||
String removeFavorScript = "";
|
||||
String removeFavorRegion = "";
|
||||
|
||||
@Override
|
||||
public TestCase clone() throws CloneNotSupportedException {
|
||||
return (TestCase) super.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return (new StringBuilder(source))
|
||||
.append(";")
|
||||
.append(addLikely)
|
||||
.append(";")
|
||||
.append(removeFavorScript)
|
||||
.append(";")
|
||||
.append(removeFavorRegion)
|
||||
.toString();
|
||||
}
|
||||
}
|
||||
static List<TestCase> readLikelySubtagsTestCases() throws Exception {
|
||||
List<TestCase> tests = new ArrayList<>();
|
||||
TestCase test = new TestCase();
|
||||
BufferedReader testFile = TestUtil.getDataReader("likelySubtags.txt");
|
||||
try {
|
||||
String line;
|
||||
while ((line = testFile.readLine()) != null) {
|
||||
if (line.startsWith("#")) continue;
|
||||
String [] fields = line.split("[ \t]?;[ \t]?");
|
||||
if (fields.length < 2) continue;
|
||||
test.source = fields[0];
|
||||
test.addLikely = fields[1];
|
||||
test.removeFavorScript = (fields.length < 3) || fields[2].isEmpty() ? test.addLikely : fields[2];
|
||||
test.removeFavorRegion = (fields.length < 4) || fields[3].isEmpty() ? test.removeFavorScript : fields[3];
|
||||
tests.add(test.clone());
|
||||
}
|
||||
} finally {
|
||||
testFile.close();
|
||||
}
|
||||
return tests;
|
||||
}
|
||||
|
||||
@Test
|
||||
@Parameters(method = "readLikelySubtagsTestCases")
|
||||
public void likelySubtagsDataDriven(TestCase test) {
|
||||
ULocale l = ULocale.forLanguageTag(test.source);
|
||||
if (isKnownSourceFor20777(test.source)) {
|
||||
if (test.addLikely.equals(ULocale.addLikelySubtags(l).toLanguageTag())) {
|
||||
logKnownIssue("ICU-20777", "addLikelySubtags(" + test.source + ")");
|
||||
}
|
||||
if (test.removeFavorRegion.equals(ULocale.minimizeSubtags(l).toLanguageTag())) {
|
||||
logKnownIssue("ICU-20777", "minimizeSubtags(" + test.source + ")");
|
||||
}
|
||||
if (test.removeFavorScript.equals(ULocale.minimizeSubtags(
|
||||
l, ULocale.Minimize.FAVOR_SCRIPT).toLanguageTag())) {
|
||||
logKnownIssue("ICU-20777", "minimizeSubtags(" + test.source + ") - FAVOR_SCRIPT");
|
||||
}
|
||||
} else {
|
||||
if (test.addLikely.equals("FAIL")) {
|
||||
assertEquals("addLikelySubtags(" + test.source + ") should be unchanged",
|
||||
l, ULocale.addLikelySubtags(l));
|
||||
} else {
|
||||
assertEquals("addLikelySubtags(" + test.source + ")",
|
||||
test.addLikely, ULocale.addLikelySubtags(l).toLanguageTag());
|
||||
}
|
||||
if (test.removeFavorRegion.equals("FAIL")) {
|
||||
assertEquals("minimizeSubtags(" + test.source + ") should be unchanged",
|
||||
l, ULocale.minimizeSubtags(l));
|
||||
} else {
|
||||
assertEquals("minimizeSubtags(" + test.source + ")",
|
||||
test.removeFavorRegion, ULocale.minimizeSubtags(l).toLanguageTag());
|
||||
}
|
||||
if (test.removeFavorScript.equals("FAIL")) {
|
||||
assertEquals("minimizeSubtags(" + test.source + ") - FAVOR_SCRIPT should be unchanged",
|
||||
l, ULocale.minimizeSubtags(l, ULocale.Minimize.FAVOR_SCRIPT));
|
||||
} else {
|
||||
assertEquals("minimizeSubtags(" + test.source + ") - FAVOR_SCRIPT",
|
||||
test.removeFavorScript, ULocale.minimizeSubtags(l, ULocale.Minimize.FAVOR_SCRIPT).toLanguageTag());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,317 +0,0 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.dev.tool.locale;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.impl.locale.LSR;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags;
|
||||
import com.ibm.icu.util.BytesTrieBuilder;
|
||||
import com.ibm.icu.util.ICUException;
|
||||
|
||||
/**
|
||||
* Builds data for XLikelySubtags.
|
||||
* Reads source data from ICU resource bundles.
|
||||
*/
|
||||
public class LikelySubtagsBuilder {
|
||||
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
|
||||
|
||||
private static ICUResourceBundle getSupplementalDataBundle(String name) {
|
||||
return ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, name,
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
|
||||
}
|
||||
|
||||
private static final class AliasesBuilder {
|
||||
final Map<String, String> toCanonical = new HashMap<>();
|
||||
final Multimap<String, String> toAliases;
|
||||
|
||||
public Set<String> getAliases(String canonical) {
|
||||
Set<String> aliases = toAliases.get(canonical);
|
||||
return aliases == null ? Collections.singleton(canonical) : aliases;
|
||||
}
|
||||
|
||||
public AliasesBuilder(String type) {
|
||||
ICUResourceBundle metadata = getSupplementalDataBundle("metadata");
|
||||
UResource.Value value = metadata.getValueWithFallback("alias/" + type);
|
||||
UResource.Table aliases = value.getTable();
|
||||
UResource.Key key = new UResource.Key();
|
||||
for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
|
||||
String aliasFrom = key.toString();
|
||||
if (aliasFrom.contains("_") || aliasFrom.contains("-")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
UResource.Table table = value.getTable();
|
||||
if (table.findValue("reason", value) && value.getString().equals("overlong")) {
|
||||
continue;
|
||||
}
|
||||
if (!table.findValue("replacement", value)) {
|
||||
continue;
|
||||
}
|
||||
String aliasTo = value.getString();
|
||||
int spacePos = aliasTo.indexOf(' ');
|
||||
String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
|
||||
if (aliasFirst.contains("_")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
toCanonical.put(aliasFrom, aliasFirst);
|
||||
}
|
||||
if (type.equals("language")) {
|
||||
toCanonical.put("mo", "ro"); // special case
|
||||
}
|
||||
toAliases = Multimaps.invertFrom(toCanonical, HashMultimap.<String, String>create());
|
||||
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("*** " + type + " aliases");
|
||||
for (Map.Entry<String, String> mapping : new TreeMap<>(toCanonical).entrySet()) {
|
||||
System.out.println(mapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final class TrieBuilder {
|
||||
byte[] bytes = new byte[24];
|
||||
int length = 0;
|
||||
BytesTrieBuilder tb = new BytesTrieBuilder();
|
||||
|
||||
void addValue(int value) {
|
||||
assert value >= 0;
|
||||
tb.add(bytes, length, value);
|
||||
}
|
||||
|
||||
void addStar() {
|
||||
bytes[length++] = '*';
|
||||
}
|
||||
|
||||
void addSubtag(String s) {
|
||||
assert !s.isEmpty();
|
||||
assert !s.equals("*");
|
||||
int end = s.length() - 1;
|
||||
for (int i = 0;; ++i) {
|
||||
char c = s.charAt(i);
|
||||
assert c <= 0x7f;
|
||||
if (i < end) {
|
||||
bytes[length++] = (byte) c;
|
||||
} else {
|
||||
// Mark the last character as a terminator to avoid overlap matches.
|
||||
bytes[length++] = (byte) (c | 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
byte[] build() {
|
||||
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
|
||||
// Allocate an array with just the necessary capacity,
|
||||
// so that we do not hold on to a larger array for a long time.
|
||||
byte[] bytes = new byte[buffer.remaining()];
|
||||
buffer.get(bytes);
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("likely subtags trie size: " + bytes.length + " bytes");
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static XLikelySubtags.Data build() {
|
||||
AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
|
||||
AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
|
||||
|
||||
Map<String, Map<String, Map<String, LSR>>> langTable =
|
||||
makeTable(languageAliasesBuilder, regionAliasesBuilder);
|
||||
|
||||
TrieBuilder trieBuilder = new TrieBuilder();
|
||||
Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
|
||||
// Reserve index 0 as "no value":
|
||||
// The runtime lookup returns 0 for an intermediate match with no value.
|
||||
lsrIndexes.put(new LSR("", "", "", LSR.DONT_CARE_FLAGS), 0); // arbitrary LSR
|
||||
// Reserve index 1 for SKIP_SCRIPT:
|
||||
// The runtime lookup returns 1 for an intermediate match with a value.
|
||||
// This LSR looks good when printing the data.
|
||||
lsrIndexes.put(new LSR("skip", "script", "", LSR.DONT_CARE_FLAGS), 1);
|
||||
// We could prefill the lsrList with common locales to give them small indexes,
|
||||
// and see if that improves performance a little.
|
||||
for (Map.Entry<String, Map<String, Map<String, LSR>>> ls : langTable.entrySet()) {
|
||||
trieBuilder.length = 0;
|
||||
String lang = ls.getKey();
|
||||
if (lang.equals("und")) {
|
||||
trieBuilder.addStar();
|
||||
} else {
|
||||
trieBuilder.addSubtag(lang);
|
||||
}
|
||||
Map<String, Map<String, LSR>> scriptTable = ls.getValue();
|
||||
boolean skipScript = false;
|
||||
if (scriptTable.size() == 1) {
|
||||
Map<String, LSR> regionTable = scriptTable.get("");
|
||||
if (regionTable.size() == 1) {
|
||||
// Prune the script and region levels from language with
|
||||
// only * for scripts and regions.
|
||||
int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
|
||||
trieBuilder.addValue(i);
|
||||
continue;
|
||||
} else {
|
||||
// Prune the script level from language with only * for scripts
|
||||
// but with real regions.
|
||||
// Set an intermediate value as a signal to the lookup code.
|
||||
trieBuilder.addValue(XLikelySubtags.SKIP_SCRIPT);
|
||||
skipScript = true;
|
||||
}
|
||||
}
|
||||
int scriptStartLength = trieBuilder.length;
|
||||
for (Map.Entry<String, Map<String, LSR>> sr : scriptTable.entrySet()) {
|
||||
trieBuilder.length = scriptStartLength;
|
||||
if (!skipScript) {
|
||||
String script = sr.getKey();
|
||||
if (script.isEmpty()) {
|
||||
trieBuilder.addStar();
|
||||
} else {
|
||||
trieBuilder.addSubtag(script);
|
||||
}
|
||||
}
|
||||
Map<String, LSR> regionTable = sr.getValue();
|
||||
if (regionTable.size() == 1) {
|
||||
// Prune the region level from language+script with only * for regions.
|
||||
int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
|
||||
trieBuilder.addValue(i);
|
||||
continue;
|
||||
}
|
||||
int regionStartLength = trieBuilder.length;
|
||||
for (Map.Entry<String, LSR> r2lsr : regionTable.entrySet()) {
|
||||
trieBuilder.length = regionStartLength;
|
||||
String region = r2lsr.getKey();
|
||||
// Map the whole lang+script+region to a unique, dense index of the LSR.
|
||||
if (region.isEmpty()) {
|
||||
trieBuilder.addStar();
|
||||
} else {
|
||||
trieBuilder.addSubtag(region);
|
||||
}
|
||||
int i = uniqueIdForLsr(lsrIndexes, r2lsr.getValue());
|
||||
trieBuilder.addValue(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
byte[] trie = trieBuilder.build();
|
||||
LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
|
||||
return new XLikelySubtags.Data(
|
||||
languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
|
||||
}
|
||||
|
||||
private static int uniqueIdForLsr(Map<LSR, Integer> lsrIndexes, LSR lsr) {
|
||||
Integer index = lsrIndexes.get(lsr);
|
||||
if (index != null) {
|
||||
return index.intValue();
|
||||
} else {
|
||||
int i = lsrIndexes.size();
|
||||
lsrIndexes.put(lsr, i);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, Map<String, Map<String, LSR>>> makeTable(
|
||||
AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) {
|
||||
Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
|
||||
// set the base data
|
||||
ICUResourceBundle likelySubtags = getSupplementalDataBundle("likelySubtags");
|
||||
UResource.Value value = likelySubtags.getValueWithFallback("");
|
||||
UResource.Table table = value.getTable();
|
||||
UResource.Key key = new UResource.Key();
|
||||
for (int i = 0; table.getKeyAndValue(i, key, value); ++i) {
|
||||
LSR ltp = lsrFromLocaleID(key.toString()); // source
|
||||
final String language = ltp.language;
|
||||
final String script = ltp.script;
|
||||
final String region = ltp.region;
|
||||
|
||||
ltp = lsrFromLocaleID(value.getString()); // target
|
||||
set(result, language, script, region, ltp);
|
||||
|
||||
// now add aliases
|
||||
Collection<String> languageAliases = languageAliasesBuilder.getAliases(language);
|
||||
Collection<String> regionAliases = regionAliasesBuilder.getAliases(region);
|
||||
for (String languageAlias : languageAliases) {
|
||||
for (String regionAlias : regionAliases) {
|
||||
if (languageAlias.equals(language) && regionAlias.equals(region)) {
|
||||
continue;
|
||||
}
|
||||
set(result, languageAlias, script, regionAlias, ltp);
|
||||
}
|
||||
}
|
||||
}
|
||||
// hack
|
||||
set(result, "und", "Latn", "", new LSR("en", "Latn", "US", LSR.DONT_CARE_FLAGS));
|
||||
|
||||
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
|
||||
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
|
||||
|
||||
// so und-Latn-GH => ak-Latn-GH
|
||||
Map<String, Map<String, LSR>> undScriptMap = result.get("und");
|
||||
Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
|
||||
for (Map.Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
|
||||
final LSR lsr = regionEntry.getValue();
|
||||
set(result, "und", lsr.script, lsr.region, lsr);
|
||||
}
|
||||
//
|
||||
// check that every level has "" (or "und")
|
||||
if (!result.containsKey("und")) {
|
||||
throw new IllegalArgumentException("failure: base");
|
||||
}
|
||||
for (Map.Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
|
||||
String lang = langEntry.getKey();
|
||||
final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
|
||||
if (!scriptMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang);
|
||||
}
|
||||
for (Map.Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
|
||||
String script = scriptEntry.getKey();
|
||||
final Map<String, LSR> regionMap = scriptEntry.getValue();
|
||||
if (!regionMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang + "-" + script);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Parses locale IDs in the likelySubtags data, not arbitrary language tags.
|
||||
private static LSR lsrFromLocaleID(String languageIdentifier) {
|
||||
String[] parts = languageIdentifier.split("[-_]");
|
||||
if (parts.length < 1 || parts.length > 3) {
|
||||
throw new ICUException("too many subtags");
|
||||
}
|
||||
String lang = parts[0];
|
||||
String p2 = parts.length < 2 ? "" : parts[1];
|
||||
String p3 = parts.length < 3 ? "" : parts[2];
|
||||
return p2.length() < 4 ?
|
||||
new LSR(lang, "", p2, LSR.DONT_CARE_FLAGS) :
|
||||
new LSR(lang, p2, p3, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
|
||||
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
|
||||
final String language, final String script, final String region, LSR newValue) {
|
||||
Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
|
||||
Map<String, LSR> regionTable = getSubtable(scriptTable, script);
|
||||
regionTable.put(region, newValue);
|
||||
}
|
||||
|
||||
private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K subtag) {
|
||||
Map<V, T> subTable = table.get(subtag);
|
||||
if (subTable == null) {
|
||||
table.put(subtag, subTable = new TreeMap<>());
|
||||
}
|
||||
return subTable;
|
||||
}
|
||||
}
|
|
@ -489,7 +489,7 @@ public final class LocaleDistanceBuilder {
|
|||
Set<LSR> paradigmLSRs = new LinkedHashSet<>(); // could be TreeSet if LSR were Comparable
|
||||
for (String paradigm : paradigms) {
|
||||
ULocale pl = new ULocale(paradigm);
|
||||
LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);
|
||||
LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl, false);
|
||||
// Clear the LSR flags to make the data equality test in
|
||||
// LocaleDistanceTest happy.
|
||||
paradigmLSRs.add(new LSR(max.language, max.script, max.region, LSR.DONT_CARE_FLAGS));
|
||||
|
@ -887,7 +887,7 @@ public final class LocaleDistanceBuilder {
|
|||
}
|
||||
|
||||
public static final void main(String[] args) throws IOException {
|
||||
XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build();
|
||||
XLikelySubtags.Data likelyData = XLikelySubtags.Data.load();
|
||||
LocaleDistance.Data distanceData = build();
|
||||
System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME);
|
||||
try (PrintWriter out = openWriter()) {
|
||||
|
|
Loading…
Add table
Reference in a new issue