mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-9695 port LocaleMatcher to C++
This commit is contained in:
parent
7942b58b81
commit
41c24b6c00
48 changed files with 6951 additions and 344 deletions
|
@ -88,8 +88,9 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
|||
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
|
||||
resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucurr.o \
|
||||
localebuilder.o \
|
||||
localebuilder.o localeprioritylist.o \
|
||||
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
|
||||
lsr.o loclikelysubtags.o locdistance.o localematcher.o \
|
||||
bytestream.o stringpiece.o bytesinkutil.o \
|
||||
stringtriebuilder.o bytestriebuilder.o \
|
||||
bytestrie.o bytestrieiterator.o \
|
||||
|
|
|
@ -35,6 +35,17 @@ CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
|
|||
return *this;
|
||||
}
|
||||
|
||||
char *CharString::cloneData(UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
char *p = static_cast<char *>(uprv_malloc(len + 1));
|
||||
if (p == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
uprv_memcpy(p, buffer.getAlias(), len + 1);
|
||||
return p;
|
||||
}
|
||||
|
||||
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
|
||||
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
|
||||
len=s.len;
|
||||
|
@ -52,6 +63,18 @@ int32_t CharString::lastIndexOf(char c) const {
|
|||
return -1;
|
||||
}
|
||||
|
||||
bool CharString::contains(StringPiece s) const {
|
||||
if (s.empty()) { return false; }
|
||||
const char *p = buffer.getAlias();
|
||||
int32_t lastStart = len - s.length();
|
||||
for (int32_t i = 0; i <= lastStart; ++i) {
|
||||
if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
CharString &CharString::truncate(int32_t newLength) {
|
||||
if(newLength<0) {
|
||||
newLength=0;
|
||||
|
|
|
@ -82,10 +82,24 @@ public:
|
|||
|
||||
const char *data() const { return buffer.getAlias(); }
|
||||
char *data() { return buffer.getAlias(); }
|
||||
/**
|
||||
* Allocates length()+1 chars and copies the NUL-terminated data().
|
||||
* The caller must uprv_free() the result.
|
||||
*/
|
||||
char *cloneData(UErrorCode &errorCode) const;
|
||||
|
||||
bool operator==(StringPiece other) const {
|
||||
return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
|
||||
}
|
||||
bool operator!=(StringPiece other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
/** @return last index of c, or -1 if c is not in this string */
|
||||
int32_t lastIndexOf(char c) const;
|
||||
|
||||
bool contains(StringPiece s) const;
|
||||
|
||||
CharString &clear() { len=0; buffer[0]=0; return *this; }
|
||||
CharString &truncate(int32_t newLength);
|
||||
|
||||
|
|
|
@ -239,14 +239,20 @@
|
|||
<ClCompile Include="punycode.cpp" />
|
||||
<ClCompile Include="uidna.cpp" />
|
||||
<ClCompile Include="uts46.cpp" />
|
||||
<ClCompile Include="localebuilder.cpp" />
|
||||
<ClCompile Include="localematcher.cpp" />
|
||||
<ClCompile Include="localeprioritylist.cpp" />
|
||||
<ClCompile Include="locavailable.cpp" />
|
||||
<ClCompile Include="locbased.cpp" />
|
||||
<ClCompile Include="locdispnames.cpp" />
|
||||
<ClCompile Include="locdistance.cpp" />
|
||||
<ClCompile Include="locdspnm.cpp" />
|
||||
<ClCompile Include="locid.cpp" />
|
||||
<ClCompile Include="loclikely.cpp" />
|
||||
<ClCompile Include="loclikelysubtags.cpp" />
|
||||
<ClCompile Include="locresdata.cpp" />
|
||||
<ClCompile Include="locutil.cpp" />
|
||||
<ClCompile Include="lsr.cpp" />
|
||||
<ClCompile Include="resbund.cpp" />
|
||||
<ClCompile Include="resbund_cnv.cpp" />
|
||||
<ClCompile Include="ucat.cpp" />
|
||||
|
@ -257,7 +263,6 @@
|
|||
<ClCompile Include="uresdata.cpp" />
|
||||
<ClCompile Include="resource.cpp" />
|
||||
<ClCompile Include="ucurr.cpp" />
|
||||
<ClCompile Include="localebuilder.cpp" />
|
||||
<ClCompile Include="caniter.cpp" />
|
||||
<ClCompile Include="filterednormalizer2.cpp" />
|
||||
<ClCompile Include="loadednormalizer2impl.cpp" />
|
||||
|
@ -408,8 +413,12 @@
|
|||
<ClInclude Include="ustrfmt.h" />
|
||||
<ClInclude Include="util.h" />
|
||||
<ClInclude Include="punycode.h" />
|
||||
<ClInclude Include="localeprioritylist.h" />
|
||||
<ClInclude Include="locbased.h" />
|
||||
<ClInclude Include="locdistance.h" />
|
||||
<ClInclude Include="loclikelysubtags.h" />
|
||||
<ClInclude Include="locutil.h" />
|
||||
<ClInclude Include="lsr.h" />
|
||||
<ClInclude Include="sharedobject.h" />
|
||||
<ClCompile Include="sharedobject.cpp" />
|
||||
<ClInclude Include="ulocimp.h" />
|
||||
|
@ -449,7 +458,6 @@
|
|||
<ClInclude Include="ustr_imp.h" />
|
||||
<ClInclude Include="static_unicode_sets.h" />
|
||||
<ClInclude Include="capi_helper.h" />
|
||||
<ClInclude Include="unicode\localebuilder.h" />
|
||||
<ClInclude Include="restrace.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
|
|
@ -313,6 +313,15 @@
|
|||
<ClCompile Include="uts46.cpp">
|
||||
<Filter>idna</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="localebuilder.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="localematcher.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="localeprioritylist.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="locavailable.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
|
@ -322,18 +331,27 @@
|
|||
<ClCompile Include="locdispnames.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="locdistance.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="locid.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="loclikely.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="loclikelysubtags.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="locresdata.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="locutil.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="lsr.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="resbund.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
|
@ -361,9 +379,6 @@
|
|||
<ClCompile Include="resource.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="localebuilder.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="caniter.cpp">
|
||||
<Filter>normalization</Filter>
|
||||
</ClCompile>
|
||||
|
@ -816,12 +831,24 @@
|
|||
<ClInclude Include="punycode.h">
|
||||
<Filter>idna</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="localeprioritylist.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="locbased.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="locdistance.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="loclikelysubtags.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="locutil.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="lsr.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="ulocimp.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClInclude>
|
||||
|
@ -1078,6 +1105,12 @@
|
|||
<CustomBuild Include="unicode\uidna.h">
|
||||
<Filter>idna</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\localebuilder.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\localematcher.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\locid.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</CustomBuild>
|
||||
|
@ -1237,8 +1270,5 @@
|
|||
<CustomBuild Include="unicode\stringoptions.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\localebuilder.h">
|
||||
<Filter>locales & resources</Filter>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
|
@ -430,14 +430,20 @@
|
|||
<ClCompile Include="punycode.cpp" />
|
||||
<ClCompile Include="uidna.cpp" />
|
||||
<ClCompile Include="uts46.cpp" />
|
||||
<ClCompile Include="localebuilder.cpp" />
|
||||
<ClCompile Include="localematcher.cpp" />
|
||||
<ClCompile Include="localeprioritylist.cpp" />
|
||||
<ClCompile Include="locavailable.cpp" />
|
||||
<ClCompile Include="locbased.cpp" />
|
||||
<ClCompile Include="locdispnames.cpp" />
|
||||
<ClCompile Include="locdistance.cpp" />
|
||||
<ClCompile Include="locdspnm.cpp" />
|
||||
<ClCompile Include="locid.cpp" />
|
||||
<ClCompile Include="loclikely.cpp" />
|
||||
<ClCompile Include="loclikelysubtags.cpp" />
|
||||
<ClCompile Include="locresdata.cpp" />
|
||||
<ClCompile Include="locutil.cpp" />
|
||||
<ClCompile Include="lsr.cpp" />
|
||||
<ClCompile Include="resbund.cpp" />
|
||||
<ClCompile Include="resbund_cnv.cpp" />
|
||||
<ClCompile Include="ucat.cpp" />
|
||||
|
@ -448,7 +454,6 @@
|
|||
<ClCompile Include="uresdata.cpp" />
|
||||
<ClCompile Include="resource.cpp" />
|
||||
<ClCompile Include="ucurr.cpp" />
|
||||
<ClCompile Include="localebuilder.cpp" />
|
||||
<ClCompile Include="caniter.cpp" />
|
||||
<ClCompile Include="filterednormalizer2.cpp" />
|
||||
<ClCompile Include="loadednormalizer2impl.cpp" />
|
||||
|
@ -600,8 +605,12 @@
|
|||
<ClInclude Include="ustrfmt.h" />
|
||||
<ClInclude Include="util.h" />
|
||||
<ClInclude Include="punycode.h" />
|
||||
<ClInclude Include="localeprioritylist.h" />
|
||||
<ClInclude Include="locbased.h" />
|
||||
<ClInclude Include="locdistance.h" />
|
||||
<ClInclude Include="loclikelysubtags.h" />
|
||||
<ClInclude Include="locutil.h" />
|
||||
<ClInclude Include="lsr.h" />
|
||||
<ClInclude Include="sharedobject.h" />
|
||||
<ClCompile Include="sharedobject.cpp" />
|
||||
<ClInclude Include="ulocimp.h" />
|
||||
|
@ -640,7 +649,6 @@
|
|||
<ClInclude Include="ustr_imp.h" />
|
||||
<ClInclude Include="static_unicode_sets.h" />
|
||||
<ClInclude Include="capi_helper.h" />
|
||||
<ClInclude Include="unicode\localebuilder.h" />
|
||||
<ClInclude Include="restrace.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
|
|
@ -157,13 +157,18 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len)
|
|||
}
|
||||
|
||||
static void
|
||||
_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
|
||||
_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
|
||||
Locale& to, bool validate, UErrorCode& errorCode)
|
||||
{
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
|
||||
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
|
||||
LocalPointer<icu::StringEnumeration> ownedKeywords;
|
||||
if (keywords == nullptr) {
|
||||
ownedKeywords.adoptInstead(from.createKeywords(errorCode));
|
||||
if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
|
||||
keywords = ownedKeywords.getAlias();
|
||||
}
|
||||
const char* key;
|
||||
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
|
||||
while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
|
||||
CharString value;
|
||||
CharStringByteSink sink(&value);
|
||||
from.getKeywordValue(key, sink, errorCode);
|
||||
|
@ -176,34 +181,34 @@ _copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& error
|
|||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
to->setKeywordValue(key, value.data(), errorCode);
|
||||
to.setKeywordValue(key, value.data(), errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
}
|
||||
}
|
||||
|
||||
void static
|
||||
_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
|
||||
_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
|
||||
{
|
||||
// Clear Unicode attributes
|
||||
locale->setKeywordValue(kAttributeKey, "", errorCode);
|
||||
locale.setKeywordValue(kAttributeKey, "", errorCode);
|
||||
|
||||
// Clear all Unicode keyword values
|
||||
LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
|
||||
LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
|
||||
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
|
||||
const char* key;
|
||||
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
|
||||
locale->setUnicodeKeywordValue(key, nullptr, errorCode);
|
||||
locale.setUnicodeKeywordValue(key, nullptr, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
|
||||
_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
|
||||
{
|
||||
// Add the unicode extensions to extensions_
|
||||
CharString locale_str("und-u-", errorCode);
|
||||
locale_str.append(value, errorCode);
|
||||
_copyExtensions(
|
||||
Locale::forLanguageTag(locale_str.data(), errorCode),
|
||||
Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
|
||||
locale, false, errorCode);
|
||||
}
|
||||
|
||||
|
@ -235,10 +240,10 @@ LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
|
|||
status_);
|
||||
return *this;
|
||||
}
|
||||
_clearUAttributesAndKeyType(extensions_, status_);
|
||||
_clearUAttributesAndKeyType(*extensions_, status_);
|
||||
if (U_FAILURE(status_)) { return *this; }
|
||||
if (!value.empty()) {
|
||||
_setUnicodeExtensions(extensions_, value_str, status_);
|
||||
_setUnicodeExtensions(*extensions_, value_str, status_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -401,6 +406,24 @@ Locale makeBogusLocale() {
|
|||
return bogus;
|
||||
}
|
||||
|
||||
void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
|
||||
{
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
|
||||
if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
|
||||
// Error, or no extensions to copy.
|
||||
return;
|
||||
}
|
||||
if (extensions_ == nullptr) {
|
||||
extensions_ = new Locale();
|
||||
if (extensions_ == nullptr) {
|
||||
status_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
_copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
|
||||
}
|
||||
|
||||
Locale LocaleBuilder::build(UErrorCode& errorCode)
|
||||
{
|
||||
if (U_FAILURE(errorCode)) {
|
||||
|
@ -425,7 +448,7 @@ Locale LocaleBuilder::build(UErrorCode& errorCode)
|
|||
}
|
||||
Locale product(locale_str.data());
|
||||
if (extensions_ != nullptr) {
|
||||
_copyExtensions(*extensions_, &product, true, errorCode);
|
||||
_copyExtensions(*extensions_, nullptr, product, true, errorCode);
|
||||
}
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return makeBogusLocale();
|
||||
|
|
720
icu4c/source/common/localematcher.cpp
Normal file
720
icu4c/source/common/localematcher.cpp
Normal file
|
@ -0,0 +1,720 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// localematcher.cpp
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#ifndef __LOCMATCHER_H__
|
||||
#define __LOCMATCHER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localebuilder.h"
|
||||
#include "unicode/localematcher.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cstring.h"
|
||||
#include "localeprioritylist.h"
|
||||
#include "loclikelysubtags.h"
|
||||
#include "locdistance.h"
|
||||
#include "lsr.h"
|
||||
#include "uassert.h"
|
||||
#include "uhash.h"
|
||||
#include "uvector.h"
|
||||
|
||||
#define UND_LSR LSR("und", "", "")
|
||||
|
||||
/**
|
||||
* Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
enum ULocMatchLifetime {
|
||||
/**
|
||||
* Locale objects are temporary.
|
||||
* The matcher will make a copy of a locale that will be used beyond one function call.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
ULOCMATCH_TEMPORARY_LOCALES,
|
||||
/**
|
||||
* Locale objects are stored at least as long as the matcher is used.
|
||||
* The matcher will keep only a pointer to a locale that will be used beyond one function call,
|
||||
* avoiding a copy.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum ULocMatchLifetime ULocMatchLifetime;
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
|
||||
desiredLocale(src.desiredLocale),
|
||||
supportedLocale(src.supportedLocale),
|
||||
desiredIndex(src.desiredIndex),
|
||||
supportedIndex(src.supportedIndex),
|
||||
desiredIsOwned(src.desiredIsOwned) {
|
||||
if (desiredIsOwned) {
|
||||
src.desiredLocale = nullptr;
|
||||
src.desiredIndex = -1;
|
||||
src.desiredIsOwned = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
LocaleMatcher::Result::~Result() {
|
||||
if (desiredIsOwned) {
|
||||
delete desiredLocale;
|
||||
}
|
||||
}
|
||||
|
||||
LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
|
||||
this->~Result();
|
||||
|
||||
desiredLocale = src.desiredLocale;
|
||||
supportedLocale = src.supportedLocale;
|
||||
desiredIndex = src.desiredIndex;
|
||||
supportedIndex = src.supportedIndex;
|
||||
desiredIsOwned = src.desiredIsOwned;
|
||||
|
||||
if (desiredIsOwned) {
|
||||
src.desiredLocale = nullptr;
|
||||
src.desiredIndex = -1;
|
||||
src.desiredIsOwned = FALSE;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
|
||||
return Locale::getRoot();
|
||||
}
|
||||
const Locale *bestDesired = getDesiredLocale();
|
||||
if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
|
||||
return *supportedLocale;
|
||||
}
|
||||
LocaleBuilder b;
|
||||
b.setLocale(*supportedLocale);
|
||||
|
||||
// Copy the region from bestDesired, if there is one.
|
||||
const char *region = bestDesired->getCountry();
|
||||
if (*region != 0) {
|
||||
b.setRegion(region);
|
||||
}
|
||||
|
||||
// Copy the variants from bestDesired, if there are any.
|
||||
// Note that this will override any supportedLocale variants.
|
||||
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
|
||||
const char *variants = bestDesired->getVariant();
|
||||
if (*variants != 0) {
|
||||
b.setVariant(variants);
|
||||
}
|
||||
|
||||
// Copy the extensions from bestDesired, if there are any.
|
||||
// C++ note: The following note, copied from Java, may not be true,
|
||||
// as long as C++ copies by legacy ICU keyword, not by extension singleton.
|
||||
// Note that this will override any supportedLocale extensions.
|
||||
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
|
||||
// (replacing calendar).
|
||||
b.copyExtensionsFrom(*bestDesired, errorCode);
|
||||
return b.build(errorCode);
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
|
||||
errorCode_(src.errorCode_),
|
||||
supportedLocales_(src.supportedLocales_),
|
||||
thresholdDistance_(src.thresholdDistance_),
|
||||
demotion_(src.demotion_),
|
||||
defaultLocale_(src.defaultLocale_),
|
||||
favor_(src.favor_) {
|
||||
src.supportedLocales_ = nullptr;
|
||||
src.defaultLocale_ = nullptr;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder::~Builder() {
|
||||
delete supportedLocales_;
|
||||
delete defaultLocale_;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
|
||||
this->~Builder();
|
||||
|
||||
errorCode_ = src.errorCode_;
|
||||
supportedLocales_ = src.supportedLocales_;
|
||||
thresholdDistance_ = src.thresholdDistance_;
|
||||
demotion_ = src.demotion_;
|
||||
defaultLocale_ = src.defaultLocale_;
|
||||
favor_ = src.favor_;
|
||||
|
||||
src.supportedLocales_ = nullptr;
|
||||
src.defaultLocale_ = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void LocaleMatcher::Builder::clearSupportedLocales() {
|
||||
if (supportedLocales_ != nullptr) {
|
||||
supportedLocales_->removeAllElements();
|
||||
}
|
||||
}
|
||||
|
||||
bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
|
||||
if (U_FAILURE(errorCode_)) { return false; }
|
||||
if (supportedLocales_ != nullptr) { return true; }
|
||||
supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) { return false; }
|
||||
if (supportedLocales_ == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
|
||||
StringPiece locales) {
|
||||
LocalePriorityList list(locales, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
clearSupportedLocales();
|
||||
if (!ensureSupportedLocaleVector()) { return *this; }
|
||||
int32_t length = list.getLengthIncludingRemoved();
|
||||
for (int32_t i = 0; i < length; ++i) {
|
||||
Locale *locale = list.orphanLocaleAt(i);
|
||||
if (locale == nullptr) { continue; }
|
||||
supportedLocales_->addElement(locale, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete locale;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
clearSupportedLocales();
|
||||
if (!ensureSupportedLocaleVector()) { return *this; }
|
||||
while (locales.hasNext()) {
|
||||
const Locale &locale = locales.next();
|
||||
Locale *clone = locale.clone();
|
||||
if (clone == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
supportedLocales_->addElement(clone, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete clone;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
|
||||
if (!ensureSupportedLocaleVector()) { return *this; }
|
||||
Locale *clone = locale.clone();
|
||||
if (clone == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
supportedLocales_->addElement(clone, errorCode_);
|
||||
if (U_FAILURE(errorCode_)) {
|
||||
delete clone;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
Locale *clone = nullptr;
|
||||
if (defaultLocale != nullptr) {
|
||||
clone = defaultLocale->clone();
|
||||
if (clone == nullptr) {
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
delete defaultLocale_;
|
||||
defaultLocale_ = clone;
|
||||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
favor_ = subtag;
|
||||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
demotion_ = demotion;
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* <i>Internal only!</i>
|
||||
*
|
||||
* @param thresholdDistance the thresholdDistance to set, with -1 = default
|
||||
* @return this Builder object
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
if (thresholdDistance > 100) {
|
||||
thresholdDistance = 100;
|
||||
}
|
||||
thresholdDistance_ = thresholdDistance;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
|
||||
if (U_FAILURE(outErrorCode)) { return TRUE; }
|
||||
if (U_SUCCESS(errorCode_)) { return FALSE; }
|
||||
outErrorCode = errorCode_;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
|
||||
if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
|
||||
errorCode = errorCode_;
|
||||
}
|
||||
return LocaleMatcher(*this, errorCode);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t hashLSR(const UHashTok token) {
|
||||
const LSR *lsr = static_cast<const LSR *>(token.pointer);
|
||||
return lsr->hashCode;
|
||||
}
|
||||
|
||||
UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
|
||||
const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
|
||||
const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
|
||||
return *lsr1 == *lsr2;
|
||||
}
|
||||
|
||||
bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
U_ASSERT(i > 0);
|
||||
int32_t index = uhash_geti(lsrToIndex, &lsr);
|
||||
if (index != 0) {
|
||||
return false;
|
||||
} else {
|
||||
uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
|
||||
return U_SUCCESS(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
||||
likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
|
||||
localeDistance(*LocaleDistance::getSingleton(errorCode)),
|
||||
thresholdDistance(builder.thresholdDistance_),
|
||||
demotionPerDesiredLocale(0),
|
||||
favorSubtag(builder.favor_),
|
||||
supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
|
||||
supportedLsrToIndex(nullptr),
|
||||
supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
|
||||
ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (thresholdDistance < 0) {
|
||||
thresholdDistance = localeDistance.getDefaultScriptDistance();
|
||||
}
|
||||
supportedLocalesLength = builder.supportedLocales_ != nullptr ?
|
||||
builder.supportedLocales_->size() : 0;
|
||||
const Locale *def = builder.defaultLocale_;
|
||||
int32_t idef = -1;
|
||||
if (supportedLocalesLength > 0) {
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., language tag strings)
|
||||
// we can return those by parallel index.
|
||||
supportedLocales = static_cast<const Locale **>(
|
||||
uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
|
||||
// Supported LRSs in input order.
|
||||
// In C++, we store these permanently to simplify ownership management
|
||||
// in the hash tables. Duplicate LSRs (if any) are unused overhead.
|
||||
lsrs = new LSR[supportedLocalesLength];
|
||||
if (supportedLocales == nullptr || lsrs == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
// If the constructor fails partway, we need null pointers for destructibility.
|
||||
uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
|
||||
// Also find the first supported locale whose LSR is
|
||||
// the same as that for the default locale.
|
||||
LSR builderDefaultLSR;
|
||||
const LSR *defLSR = nullptr;
|
||||
if (def != nullptr) {
|
||||
builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
defLSR = &builderDefaultLSR;
|
||||
}
|
||||
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
|
||||
const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
|
||||
supportedLocales[i] = locale.clone();
|
||||
if (supportedLocales[i] == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
const Locale &supportedLocale = *supportedLocales[i];
|
||||
LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
|
||||
lsr.setHashCode();
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (idef < 0 && defLSR != nullptr && lsr == *defLSR) {
|
||||
idef = i;
|
||||
defLSR = &lsr; // owned pointer to put into supportedLsrToIndex
|
||||
if (*def == supportedLocale) {
|
||||
def = &supportedLocale; // owned pointer to keep
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need an unordered map from LSR to first supported locale with that LSR,
|
||||
// and an ordered list of (LSR, supported index).
|
||||
// We insert the supported locales in the following order:
|
||||
// 1. Default locale, if it is supported.
|
||||
// 2. Priority locales (aka "paradigm locales") in builder order.
|
||||
// 3. Remaining locales in builder order.
|
||||
// In Java, we use a LinkedHashMap for both map & ordered lists.
|
||||
// In C++, we use separate structures.
|
||||
// We over-allocate arrays of LSRs and indexes for simplicity.
|
||||
// We reserve slots at the array starts for the default and paradigm locales,
|
||||
// plus enough for all supported locales.
|
||||
// If there are few paradigm locales and few duplicate supported LSRs,
|
||||
// then the amount of wasted space is small.
|
||||
supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
|
||||
supportedLocalesLength, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength();
|
||||
int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength;
|
||||
supportedLSRs = static_cast<const LSR **>(
|
||||
uprv_malloc(suppLSRsCapacity * sizeof(const LSR *)));
|
||||
supportedIndexes = static_cast<int32_t *>(
|
||||
uprv_malloc(suppLSRsCapacity * sizeof(int32_t)));
|
||||
if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t paradigmIndex = 0;
|
||||
int32_t otherIndex = paradigmLimit;
|
||||
if (idef >= 0) {
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
|
||||
supportedLSRs[0] = defLSR;
|
||||
supportedIndexes[0] = idef;
|
||||
paradigmIndex = 1;
|
||||
}
|
||||
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
|
||||
if (i == idef) { continue; }
|
||||
const Locale &locale = *supportedLocales[i];
|
||||
const LSR &lsr = lsrs[i];
|
||||
if (defLSR == nullptr) {
|
||||
U_ASSERT(i == 0);
|
||||
def = &locale;
|
||||
defLSR = &lsr;
|
||||
idef = 0;
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode);
|
||||
supportedLSRs[0] = &lsr;
|
||||
supportedIndexes[0] = 0;
|
||||
paradigmIndex = 1;
|
||||
} else if (idef >= 0 && lsr == *defLSR) {
|
||||
// lsr == *defLSR means that this supported locale is
|
||||
// a duplicate of the default locale.
|
||||
// Either an explicit default locale is supported, and we added it before the loop,
|
||||
// or there is no explicit default locale, and this is
|
||||
// a duplicate of the first supported locale.
|
||||
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
|
||||
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
|
||||
} else {
|
||||
if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) {
|
||||
if (localeDistance.isParadigmLSR(lsr)) {
|
||||
supportedLSRs[paradigmIndex] = &lsr;
|
||||
supportedIndexes[paradigmIndex++] = i;
|
||||
} else {
|
||||
supportedLSRs[otherIndex] = &lsr;
|
||||
supportedIndexes[otherIndex++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
}
|
||||
// Squeeze out unused array slots.
|
||||
if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) {
|
||||
uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit,
|
||||
(otherIndex - paradigmLimit) * sizeof(const LSR *));
|
||||
uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit,
|
||||
(otherIndex - paradigmLimit) * sizeof(int32_t));
|
||||
}
|
||||
supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex);
|
||||
}
|
||||
|
||||
if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
|
||||
ownedDefaultLocale = def->clone();
|
||||
if (ownedDefaultLocale == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
def = ownedDefaultLocale;
|
||||
}
|
||||
defaultLocale = def;
|
||||
defaultLocaleIndex = idef;
|
||||
|
||||
if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
|
||||
demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
|
||||
}
|
||||
}
|
||||
|
||||
LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
|
||||
likelySubtags(src.likelySubtags),
|
||||
localeDistance(src.localeDistance),
|
||||
thresholdDistance(src.thresholdDistance),
|
||||
demotionPerDesiredLocale(src.demotionPerDesiredLocale),
|
||||
favorSubtag(src.favorSubtag),
|
||||
supportedLocales(src.supportedLocales), lsrs(src.lsrs),
|
||||
supportedLocalesLength(src.supportedLocalesLength),
|
||||
supportedLsrToIndex(src.supportedLsrToIndex),
|
||||
supportedLSRs(src.supportedLSRs),
|
||||
supportedIndexes(src.supportedIndexes),
|
||||
supportedLSRsLength(src.supportedLSRsLength),
|
||||
ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale),
|
||||
defaultLocaleIndex(src.defaultLocaleIndex) {
|
||||
src.supportedLocales = nullptr;
|
||||
src.lsrs = nullptr;
|
||||
src.supportedLocalesLength = 0;
|
||||
src.supportedLsrToIndex = nullptr;
|
||||
src.supportedLSRs = nullptr;
|
||||
src.supportedIndexes = nullptr;
|
||||
src.supportedLSRsLength = 0;
|
||||
src.ownedDefaultLocale = nullptr;
|
||||
src.defaultLocale = nullptr;
|
||||
src.defaultLocaleIndex = -1;
|
||||
}
|
||||
|
||||
LocaleMatcher::~LocaleMatcher() {
|
||||
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
|
||||
delete supportedLocales[i];
|
||||
}
|
||||
uprv_free(supportedLocales);
|
||||
delete[] lsrs;
|
||||
uhash_close(supportedLsrToIndex);
|
||||
uprv_free(supportedLSRs);
|
||||
uprv_free(supportedIndexes);
|
||||
delete ownedDefaultLocale;
|
||||
}
|
||||
|
||||
LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
|
||||
this->~LocaleMatcher();
|
||||
|
||||
thresholdDistance = src.thresholdDistance;
|
||||
demotionPerDesiredLocale = src.demotionPerDesiredLocale;
|
||||
favorSubtag = src.favorSubtag;
|
||||
supportedLocales = src.supportedLocales;
|
||||
lsrs = src.lsrs;
|
||||
supportedLocalesLength = src.supportedLocalesLength;
|
||||
supportedLsrToIndex = src.supportedLsrToIndex;
|
||||
supportedLSRs = src.supportedLSRs;
|
||||
supportedIndexes = src.supportedIndexes;
|
||||
supportedLSRsLength = src.supportedLSRsLength;
|
||||
ownedDefaultLocale = src.ownedDefaultLocale;
|
||||
defaultLocale = src.defaultLocale;
|
||||
defaultLocaleIndex = src.defaultLocaleIndex;
|
||||
|
||||
src.supportedLocales = nullptr;
|
||||
src.lsrs = nullptr;
|
||||
src.supportedLocalesLength = 0;
|
||||
src.supportedLsrToIndex = nullptr;
|
||||
src.supportedLSRs = nullptr;
|
||||
src.supportedIndexes = nullptr;
|
||||
src.supportedLSRsLength = 0;
|
||||
src.ownedDefaultLocale = nullptr;
|
||||
src.defaultLocale = nullptr;
|
||||
src.defaultLocaleIndex = -1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
class LocaleLsrIterator {
|
||||
public:
|
||||
LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
|
||||
ULocMatchLifetime lifetime) :
|
||||
likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
|
||||
|
||||
~LocaleLsrIterator() {
|
||||
if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
|
||||
delete remembered;
|
||||
}
|
||||
}
|
||||
|
||||
bool hasNext() const {
|
||||
return locales.hasNext();
|
||||
}
|
||||
|
||||
LSR next(UErrorCode &errorCode) {
|
||||
current = &locales.next();
|
||||
return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
|
||||
}
|
||||
|
||||
void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
bestDesiredIndex = desiredIndex;
|
||||
if (lifetime == ULOCMATCH_STORED_LOCALES) {
|
||||
remembered = current;
|
||||
} else {
|
||||
// ULOCMATCH_TEMPORARY_LOCALES
|
||||
delete remembered;
|
||||
remembered = new Locale(*current);
|
||||
if (remembered == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Locale *orphanRemembered() {
|
||||
const Locale *rem = remembered;
|
||||
remembered = nullptr;
|
||||
return rem;
|
||||
}
|
||||
|
||||
int32_t getBestDesiredIndex() const {
|
||||
return bestDesiredIndex;
|
||||
}
|
||||
|
||||
private:
|
||||
const XLikelySubtags &likelySubtags;
|
||||
Locale::Iterator &locales;
|
||||
ULocMatchLifetime lifetime;
|
||||
const Locale *current = nullptr, *remembered = nullptr;
|
||||
int32_t bestDesiredIndex = -1;
|
||||
};
|
||||
|
||||
const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
int32_t suppIndex = getBestSuppIndex(
|
||||
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
|
||||
nullptr, errorCode);
|
||||
return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
|
||||
}
|
||||
|
||||
const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
|
||||
UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
if (!desiredLocales.hasNext()) {
|
||||
return defaultLocale;
|
||||
}
|
||||
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
|
||||
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
|
||||
return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
|
||||
}
|
||||
|
||||
const Locale *LocaleMatcher::getBestMatchForListString(
|
||||
StringPiece desiredLocaleList, UErrorCode &errorCode) const {
|
||||
LocalePriorityList list(desiredLocaleList, errorCode);
|
||||
LocalePriorityList::Iterator iter = list.iterator();
|
||||
return getBestMatch(iter, errorCode);
|
||||
}
|
||||
|
||||
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
|
||||
const Locale &desiredLocale, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
}
|
||||
int32_t suppIndex = getBestSuppIndex(
|
||||
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
|
||||
nullptr, errorCode);
|
||||
if (U_FAILURE(errorCode) || suppIndex < 0) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
} else {
|
||||
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
|
||||
Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
}
|
||||
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
|
||||
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
|
||||
if (U_FAILURE(errorCode) || suppIndex < 0) {
|
||||
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
|
||||
} else {
|
||||
return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
|
||||
lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
|
||||
UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
int32_t desiredIndex = 0;
|
||||
int32_t bestSupportedLsrIndex = -1;
|
||||
for (int32_t bestDistance = thresholdDistance;;) {
|
||||
// Quick check for exact maximized LSR.
|
||||
// Returns suppIndex+1 where 0 means not found.
|
||||
if (supportedLsrToIndex != nullptr) {
|
||||
desiredLSR.setHashCode();
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
|
||||
if (index != 0) {
|
||||
int32_t suppIndex = index - 1;
|
||||
if (remainingIter != nullptr) {
|
||||
remainingIter->rememberCurrent(desiredIndex, errorCode);
|
||||
}
|
||||
return suppIndex;
|
||||
}
|
||||
}
|
||||
int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestDistance = bestIndexAndDistance & 0xff;
|
||||
if (remainingIter != nullptr) {
|
||||
remainingIter->rememberCurrent(desiredIndex, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
}
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1;
|
||||
}
|
||||
if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
|
||||
break;
|
||||
}
|
||||
if (remainingIter == nullptr || !remainingIter->hasNext()) {
|
||||
break;
|
||||
}
|
||||
desiredLSR = remainingIter->next(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
++desiredIndex;
|
||||
}
|
||||
if (bestSupportedLsrIndex < 0) {
|
||||
// no good match
|
||||
return -1;
|
||||
}
|
||||
return supportedIndexes[bestSupportedLsrIndex];
|
||||
}
|
||||
|
||||
double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
|
||||
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
|
||||
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return 0; }
|
||||
const LSR *pSuppLSR = &suppLSR;
|
||||
int32_t distance = localeDistance.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
|
||||
&pSuppLSR, 1,
|
||||
thresholdDistance, favorSubtag) & 0xff;
|
||||
return (100 - distance) / 100.0;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __LOCMATCHER_H__
|
239
icu4c/source/common/localeprioritylist.cpp
Normal file
239
icu4c/source/common/localeprioritylist.cpp
Normal file
|
@ -0,0 +1,239 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// localeprioritylist.cpp
|
||||
// created: 2019jul11 Markus W. Scherer
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "localeprioritylist.h"
|
||||
#include "uarrsort.h"
|
||||
#include "uassert.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
||||
int32_t hashLocale(const UHashTok token) {
|
||||
auto *locale = static_cast<const Locale *>(token.pointer);
|
||||
return locale->hashCode();
|
||||
}
|
||||
|
||||
UBool compareLocales(const UHashTok t1, const UHashTok t2) {
|
||||
auto *l1 = static_cast<const Locale *>(t1.pointer);
|
||||
auto *l2 = static_cast<const Locale *>(t2.pointer);
|
||||
return *l1 == *l2;
|
||||
}
|
||||
|
||||
constexpr int32_t WEIGHT_ONE = 1000;
|
||||
|
||||
struct LocaleAndWeight {
|
||||
Locale *locale;
|
||||
int32_t weight; // 0..1000 = 0.0..1.0
|
||||
int32_t index; // force stable sort
|
||||
|
||||
int32_t compare(const LocaleAndWeight &other) const {
|
||||
int32_t diff = other.weight - weight; // descending: other-this
|
||||
if (diff != 0) { return diff; }
|
||||
return index - other.index;
|
||||
}
|
||||
};
|
||||
|
||||
int32_t U_CALLCONV
|
||||
compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
|
||||
return static_cast<const LocaleAndWeight *>(left)->
|
||||
compare(*static_cast<const LocaleAndWeight *>(right));
|
||||
}
|
||||
|
||||
const char *skipSpaces(const char *p, const char *limit) {
|
||||
while (p < limit && *p == ' ') { ++p; }
|
||||
return p;
|
||||
}
|
||||
|
||||
int32_t findTagLength(const char *p, const char *limit) {
|
||||
// Look for accept-language delimiters.
|
||||
// Leave other validation up to the Locale constructor.
|
||||
const char *q;
|
||||
for (q = p; q < limit; ++q) {
|
||||
char c = *q;
|
||||
if (c == ' ' || c == ',' || c == ';') { break; }
|
||||
}
|
||||
return static_cast<int32_t>(q - p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses and returns a qvalue weight in millis.
|
||||
* Advances p to after the parsed substring.
|
||||
* Returns a negative value if parsing fails.
|
||||
*/
|
||||
int32_t parseWeight(const char *&p, const char *limit) {
|
||||
p = skipSpaces(p, limit);
|
||||
char c;
|
||||
if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
|
||||
int32_t weight = (c - '0') * 1000;
|
||||
if (++p == limit || *p != '.') { return weight; }
|
||||
int32_t multiplier = 100;
|
||||
while (++p != limit && '0' <= (c = *p) && c <= '9') {
|
||||
c -= '0';
|
||||
if (multiplier > 0) {
|
||||
weight += c * multiplier;
|
||||
multiplier /= 10;
|
||||
} else if (multiplier == 0) {
|
||||
// round up
|
||||
if (c >= 5) { ++weight; }
|
||||
multiplier = -1;
|
||||
} // else ignore further fraction digits
|
||||
}
|
||||
return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
|
||||
*
|
||||
* This wrapper exists (and is not in an anonymous namespace)
|
||||
* so that we can forward-declare it in the header file and
|
||||
* don't have to expose the MaybeStackArray specialization and
|
||||
* the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
|
||||
* Also, otherwise we would have to do a platform-specific
|
||||
* template export declaration of some kind for the MaybeStackArray specialization
|
||||
* to be properly exported from the common DLL.
|
||||
*/
|
||||
struct LocaleAndWeightArray : public UMemory {
|
||||
MaybeStackArray<LocaleAndWeight, 20> array;
|
||||
};
|
||||
|
||||
LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
list = new LocaleAndWeightArray();
|
||||
if (list == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
const char *p = s.data();
|
||||
const char *limit = p + s.length();
|
||||
while ((p = skipSpaces(p, limit)) != limit) {
|
||||
if (*p == ',') { // empty range field
|
||||
++p;
|
||||
continue;
|
||||
}
|
||||
int32_t tagLength = findTagLength(p, limit);
|
||||
if (tagLength == 0) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
CharString tag(p, tagLength, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
Locale locale = Locale(tag.data());
|
||||
if (locale.isBogus()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t weight = WEIGHT_ONE;
|
||||
if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
|
||||
if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
|
||||
(p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
|
||||
(++p, (weight = parseWeight(p, limit)) < 0)) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
p = skipSpaces(p, limit);
|
||||
}
|
||||
if (p != limit && *p != ',') { // trailing junk
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
add(locale, weight, errorCode);
|
||||
if (p == limit) { break; }
|
||||
++p;
|
||||
}
|
||||
sort(errorCode);
|
||||
}
|
||||
|
||||
LocalePriorityList::~LocalePriorityList() {
|
||||
if (list != nullptr) {
|
||||
for (int32_t i = 0; i < listLength; ++i) {
|
||||
delete list->array[i].locale;
|
||||
}
|
||||
delete list;
|
||||
}
|
||||
uhash_close(map);
|
||||
}
|
||||
|
||||
const Locale *LocalePriorityList::localeAt(int32_t i) const {
|
||||
return list->array[i].locale;
|
||||
}
|
||||
|
||||
Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
|
||||
if (list == nullptr) { return nullptr; }
|
||||
LocaleAndWeight &lw = list->array[i];
|
||||
Locale *l = lw.locale;
|
||||
lw.locale = nullptr;
|
||||
return l;
|
||||
}
|
||||
|
||||
bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
if (map == nullptr) {
|
||||
if (weight <= 0) { return true; } // do not add q=0
|
||||
map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
}
|
||||
LocalPointer<Locale> clone;
|
||||
int32_t index = uhash_geti(map, &locale);
|
||||
if (index != 0) {
|
||||
// Duplicate: Remove the old item and append it anew.
|
||||
LocaleAndWeight &lw = list->array[index - 1];
|
||||
clone.adoptInstead(lw.locale);
|
||||
lw.locale = nullptr;
|
||||
lw.weight = 0;
|
||||
++numRemoved;
|
||||
}
|
||||
if (weight <= 0) { // do not add q=0
|
||||
if (index != 0) {
|
||||
// Not strictly necessary but cleaner.
|
||||
uhash_removei(map, &locale);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (clone.isNull()) {
|
||||
clone.adoptInstead(locale.clone());
|
||||
if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (listLength == list->array.getCapacity()) {
|
||||
int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
|
||||
if (list->array.resize(newCapacity, listLength) == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
LocaleAndWeight &lw = list->array[listLength];
|
||||
lw.locale = clone.orphan();
|
||||
lw.weight = weight;
|
||||
lw.index = listLength++;
|
||||
if (weight < WEIGHT_ONE) { hasWeights = true; }
|
||||
U_ASSERT(uhash_count(map) == getLength());
|
||||
return true;
|
||||
}
|
||||
|
||||
void LocalePriorityList::sort(UErrorCode &errorCode) {
|
||||
// Sort by descending weights if there is a mix of weights.
|
||||
// The comparator forces a stable sort via the item index.
|
||||
if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
|
||||
uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
|
||||
compareLocaleAndWeight, nullptr, FALSE, &errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
115
icu4c/source/common/localeprioritylist.h
Normal file
115
icu4c/source/common/localeprioritylist.h
Normal file
|
@ -0,0 +1,115 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// localeprioritylist.h
|
||||
// created: 2019jul11 Markus W. Scherer
|
||||
|
||||
#ifndef __LOCALEPRIORITYLIST_H__
|
||||
#define __LOCALEPRIORITYLIST_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
struct UHashtable;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct LocaleAndWeightArray;
|
||||
|
||||
/**
|
||||
* Parses a list of locales from an accept-language string.
|
||||
* We are a bit more lenient than the spec:
|
||||
* We accept extra whitespace in more places, empty range fields,
|
||||
* and any number of qvalue fraction digits.
|
||||
*
|
||||
* https://tools.ietf.org/html/rfc2616#section-14.4
|
||||
* 14.4 Accept-Language
|
||||
*
|
||||
* Accept-Language = "Accept-Language" ":"
|
||||
* 1#( language-range [ ";" "q" "=" qvalue ] )
|
||||
* language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
|
||||
*
|
||||
* Each language-range MAY be given an associated quality value which
|
||||
* represents an estimate of the user's preference for the languages
|
||||
* specified by that range. The quality value defaults to "q=1". For
|
||||
* example,
|
||||
*
|
||||
* Accept-Language: da, en-gb;q=0.8, en;q=0.7
|
||||
*
|
||||
* https://tools.ietf.org/html/rfc2616#section-3.9
|
||||
* 3.9 Quality Values
|
||||
*
|
||||
* HTTP content negotiation (section 12) uses short "floating point"
|
||||
* numbers to indicate the relative importance ("weight") of various
|
||||
* negotiable parameters. A weight is normalized to a real number in
|
||||
* the range 0 through 1, where 0 is the minimum and 1 the maximum
|
||||
* value. If a parameter has a quality value of 0, then content with
|
||||
* this parameter is `not acceptable' for the client. HTTP/1.1
|
||||
* applications MUST NOT generate more than three digits after the
|
||||
* decimal point. User configuration of these values SHOULD also be
|
||||
* limited in this fashion.
|
||||
*
|
||||
* qvalue = ( "0" [ "." 0*3DIGIT ] )
|
||||
* | ( "1" [ "." 0*3("0") ] )
|
||||
*/
|
||||
class U_COMMON_API LocalePriorityList : public UMemory {
|
||||
public:
|
||||
class Iterator : public Locale::Iterator {
|
||||
public:
|
||||
UBool hasNext() const override { return count < length; }
|
||||
|
||||
const Locale &next() override {
|
||||
for(;;) {
|
||||
const Locale *locale = list.localeAt(index++);
|
||||
if (locale != nullptr) {
|
||||
++count;
|
||||
return *locale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
friend class LocalePriorityList;
|
||||
|
||||
Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
|
||||
|
||||
const LocalePriorityList &list;
|
||||
int32_t index = 0;
|
||||
int32_t count = 0;
|
||||
const int32_t length;
|
||||
};
|
||||
|
||||
LocalePriorityList(StringPiece s, UErrorCode &errorCode);
|
||||
|
||||
~LocalePriorityList();
|
||||
|
||||
int32_t getLength() const { return listLength - numRemoved; }
|
||||
|
||||
int32_t getLengthIncludingRemoved() const { return listLength; }
|
||||
|
||||
Iterator iterator() const { return Iterator(*this); }
|
||||
|
||||
const Locale *localeAt(int32_t i) const;
|
||||
|
||||
Locale *orphanLocaleAt(int32_t i);
|
||||
|
||||
private:
|
||||
LocalePriorityList(const LocalePriorityList &) = delete;
|
||||
LocalePriorityList &operator=(const LocalePriorityList &) = delete;
|
||||
|
||||
bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
|
||||
|
||||
void sort(UErrorCode &errorCode);
|
||||
|
||||
LocaleAndWeightArray *list = nullptr;
|
||||
int32_t listLength = 0;
|
||||
int32_t numRemoved = 0;
|
||||
bool hasWeights = false; // other than 1.0
|
||||
UHashtable *map = nullptr;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __LOCALEPRIORITYLIST_H__
|
364
icu4c/source/common/locdistance.cpp
Normal file
364
icu4c/source/common/locdistance.cpp
Normal file
|
@ -0,0 +1,364 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// locdistance.cpp
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/localematcher.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "cstring.h"
|
||||
#include "locdistance.h"
|
||||
#include "loclikelysubtags.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uinvchar.h"
|
||||
#include "umutex.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* Bit flag used on the last character of a subtag in the trie.
|
||||
* Must be set consistently by the builder and the lookup code.
|
||||
*/
|
||||
constexpr int32_t END_OF_SUBTAG = 0x80;
|
||||
/** Distance value bit flag, set by the builder. */
|
||||
constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
|
||||
/** Distance value bit flag, set by trieNext(). */
|
||||
constexpr int32_t DISTANCE_IS_FINAL = 0x100;
|
||||
constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
|
||||
|
||||
constexpr int32_t ABOVE_THRESHOLD = 100;
|
||||
|
||||
// Indexes into array of distances.
|
||||
enum {
|
||||
IX_DEF_LANG_DISTANCE,
|
||||
IX_DEF_SCRIPT_DISTANCE,
|
||||
IX_DEF_REGION_DISTANCE,
|
||||
IX_MIN_REGION_DISTANCE,
|
||||
IX_LIMIT
|
||||
};
|
||||
|
||||
LocaleDistance *gLocaleDistance = nullptr;
|
||||
UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
UBool U_CALLCONV cleanup() {
|
||||
delete gLocaleDistance;
|
||||
gLocaleDistance = nullptr;
|
||||
gInitOnce.reset();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
|
||||
// This function is invoked only via umtx_initOnce().
|
||||
U_ASSERT(gLocaleDistance == nullptr);
|
||||
const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
const LocaleDistanceData &data = likely.getDistanceData();
|
||||
if (data.distanceTrieBytes == nullptr ||
|
||||
data.regionToPartitions == nullptr || data.partitions == nullptr ||
|
||||
// ok if no paradigms
|
||||
data.distances == nullptr) {
|
||||
errorCode = U_MISSING_RESOURCE_ERROR;
|
||||
return;
|
||||
}
|
||||
gLocaleDistance = new LocaleDistance(data);
|
||||
if (gLocaleDistance == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup);
|
||||
}
|
||||
|
||||
const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
|
||||
return gLocaleDistance;
|
||||
}
|
||||
|
||||
LocaleDistance::LocaleDistance(const LocaleDistanceData &data) :
|
||||
trie(data.distanceTrieBytes),
|
||||
regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
|
||||
paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
|
||||
defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
|
||||
defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
|
||||
defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
|
||||
minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
|
||||
// For the default demotion value, use the
|
||||
// default region distance between unrelated Englishes.
|
||||
// Thus, unless demotion is turned off,
|
||||
// a mere region difference for one desired locale
|
||||
// is as good as a perfect match for the next following desired locale.
|
||||
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
|
||||
LSR en("en", "Latn", "US");
|
||||
LSR enGB("en", "Latn", "GB");
|
||||
const LSR *p_enGB = &enGB;
|
||||
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1,
|
||||
50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff;
|
||||
}
|
||||
|
||||
int32_t LocaleDistance::getBestIndexAndDistance(
|
||||
const LSR &desired,
|
||||
const LSR **supportedLSRs, int32_t supportedLSRsLength,
|
||||
int32_t threshold, ULocMatchFavorSubtag favorSubtag) const {
|
||||
BytesTrie iter(trie);
|
||||
// Look up the desired language only once for all supported LSRs.
|
||||
// Its "distance" is either a match point value of 0, or a non-match negative value.
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
int32_t desLangDistance = trieNext(iter, desired.language, false);
|
||||
uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
|
||||
// Index of the supported LSR with the lowest distance.
|
||||
int32_t bestIndex = -1;
|
||||
for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
|
||||
const LSR &supported = *supportedLSRs[slIndex];
|
||||
bool star = false;
|
||||
int32_t distance = desLangDistance;
|
||||
if (distance >= 0) {
|
||||
U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
|
||||
if (slIndex != 0) {
|
||||
iter.resetToState64(desLangState);
|
||||
}
|
||||
distance = trieNext(iter, supported.language, true);
|
||||
}
|
||||
// Note: The data builder verifies that there are no rules with "any" (*) language and
|
||||
// real (non *) script or region subtags.
|
||||
// This means that if the lookup for either language fails we can use
|
||||
// the default distances without further lookups.
|
||||
int32_t flags;
|
||||
if (distance >= 0) {
|
||||
flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
|
||||
distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
|
||||
} else { // <*, *>
|
||||
if (uprv_strcmp(desired.language, supported.language) == 0) {
|
||||
distance = 0;
|
||||
} else {
|
||||
distance = defaultLanguageDistance;
|
||||
}
|
||||
flags = 0;
|
||||
star = true;
|
||||
}
|
||||
U_ASSERT(0 <= distance && distance <= 100);
|
||||
// We implement "favor subtag" by reducing the language subtag distance
|
||||
// (unscientifically reducing it to a quarter of the normal value),
|
||||
// so that the script distance is relatively more important.
|
||||
// For example, given a default language distance of 80, we reduce it to 20,
|
||||
// which is below the default threshold of 50, which is the default script distance.
|
||||
if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
|
||||
distance >>= 2;
|
||||
}
|
||||
if (distance >= threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int32_t scriptDistance;
|
||||
if (star || flags != 0) {
|
||||
if (uprv_strcmp(desired.script, supported.script) == 0) {
|
||||
scriptDistance = 0;
|
||||
} else {
|
||||
scriptDistance = defaultScriptDistance;
|
||||
}
|
||||
} else {
|
||||
scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
|
||||
desired.script, supported.script);
|
||||
flags = scriptDistance & DISTANCE_IS_FINAL;
|
||||
scriptDistance &= ~DISTANCE_IS_FINAL;
|
||||
}
|
||||
distance += scriptDistance;
|
||||
if (distance >= threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uprv_strcmp(desired.region, supported.region) == 0) {
|
||||
// regionDistance = 0
|
||||
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
|
||||
distance += defaultRegionDistance;
|
||||
} else {
|
||||
int32_t remainingThreshold = threshold - distance;
|
||||
if (minRegionDistance >= remainingThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// From here on we know the regions are not equal.
|
||||
// Map each region to zero or more partitions. (zero = one non-matching string)
|
||||
// (Each array of single-character partition strings is encoded as one string.)
|
||||
// If either side has more than one, then we find the maximum distance.
|
||||
// This could be optimized by adding some more structure, but probably not worth it.
|
||||
distance += getRegionPartitionsDistance(
|
||||
iter, iter.getState64(),
|
||||
partitionsForRegion(desired),
|
||||
partitionsForRegion(supported),
|
||||
remainingThreshold);
|
||||
}
|
||||
if (distance < threshold) {
|
||||
if (distance == 0) {
|
||||
return slIndex << 8;
|
||||
}
|
||||
bestIndex = slIndex;
|
||||
threshold = distance;
|
||||
}
|
||||
}
|
||||
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
|
||||
}
|
||||
|
||||
int32_t LocaleDistance::getDesSuppScriptDistance(
|
||||
BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
int32_t distance = trieNext(iter, desired, false);
|
||||
if (distance >= 0) {
|
||||
distance = trieNext(iter, supported, true);
|
||||
}
|
||||
if (distance < 0) {
|
||||
UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *>
|
||||
U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
|
||||
if (uprv_strcmp(desired, supported) == 0) {
|
||||
distance = 0; // same script
|
||||
} else {
|
||||
distance = iter.getValue();
|
||||
U_ASSERT(distance >= 0);
|
||||
}
|
||||
if (result == USTRINGTRIE_FINAL_VALUE) {
|
||||
distance |= DISTANCE_IS_FINAL;
|
||||
}
|
||||
}
|
||||
return distance;
|
||||
}
|
||||
|
||||
int32_t LocaleDistance::getRegionPartitionsDistance(
|
||||
BytesTrie &iter, uint64_t startState,
|
||||
const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
|
||||
char desired = *desiredPartitions++;
|
||||
char supported = *supportedPartitions++;
|
||||
U_ASSERT(desired != 0 && supported != 0);
|
||||
// See if we have single desired/supported partitions, from NUL-terminated
|
||||
// partition strings without explicit length.
|
||||
bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character
|
||||
// equivalent to: if (desLength == 1 && suppLength == 1)
|
||||
if (*desiredPartitions == 0 && !suppLengthGt1) {
|
||||
// Fastpath for single desired/supported partitions.
|
||||
UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
|
||||
if (USTRINGTRIE_HAS_NEXT(result)) {
|
||||
result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
|
||||
if (USTRINGTRIE_HAS_VALUE(result)) {
|
||||
return iter.getValue();
|
||||
}
|
||||
}
|
||||
return getFallbackRegionDistance(iter, startState);
|
||||
}
|
||||
|
||||
const char *supportedStart = supportedPartitions - 1; // for restart of inner loop
|
||||
int32_t regionDistance = 0;
|
||||
// Fall back to * only once, not for each pair of partition strings.
|
||||
bool star = false;
|
||||
for (;;) {
|
||||
// Look up each desired-partition string only once,
|
||||
// not for each (desired, supported) pair.
|
||||
UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
|
||||
if (USTRINGTRIE_HAS_NEXT(result)) {
|
||||
uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
|
||||
for (;;) {
|
||||
result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
|
||||
int32_t d;
|
||||
if (USTRINGTRIE_HAS_VALUE(result)) {
|
||||
d = iter.getValue();
|
||||
} else if (star) {
|
||||
d = 0;
|
||||
} else {
|
||||
d = getFallbackRegionDistance(iter, startState);
|
||||
star = true;
|
||||
}
|
||||
if (d >= threshold) {
|
||||
return d;
|
||||
} else if (regionDistance < d) {
|
||||
regionDistance = d;
|
||||
}
|
||||
if ((supported = *supportedPartitions++) != 0) {
|
||||
iter.resetToState64(desState);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (!star) {
|
||||
int32_t d = getFallbackRegionDistance(iter, startState);
|
||||
if (d >= threshold) {
|
||||
return d;
|
||||
} else if (regionDistance < d) {
|
||||
regionDistance = d;
|
||||
}
|
||||
star = true;
|
||||
}
|
||||
if ((desired = *desiredPartitions++) != 0) {
|
||||
iter.resetToState64(startState);
|
||||
supportedPartitions = supportedStart;
|
||||
supported = *supportedPartitions++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return regionDistance;
|
||||
}
|
||||
|
||||
int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
|
||||
#if U_DEBUG
|
||||
UStringTrieResult result =
|
||||
#endif
|
||||
iter.resetToState64(startState).next(u'*'); // <*, *>
|
||||
U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
|
||||
int32_t distance = iter.getValue();
|
||||
U_ASSERT(distance >= 0);
|
||||
return distance;
|
||||
}
|
||||
|
||||
int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
|
||||
uint8_t c;
|
||||
if ((c = *s) == 0) {
|
||||
return -1; // no empty subtags in the distance data
|
||||
}
|
||||
for (;;) {
|
||||
c = uprv_invCharToAscii(c);
|
||||
// EBCDIC: If *s is not an invariant character,
|
||||
// then c is now 0 and will simply not match anything, which is harmless.
|
||||
uint8_t next = *++s;
|
||||
if (next != 0) {
|
||||
if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
// last character of this subtag
|
||||
UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
|
||||
if (wantValue) {
|
||||
if (USTRINGTRIE_HAS_VALUE(result)) {
|
||||
int32_t value = iter.getValue();
|
||||
if (result == USTRINGTRIE_FINAL_VALUE) {
|
||||
value |= DISTANCE_IS_FINAL;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
} else {
|
||||
if (USTRINGTRIE_HAS_NEXT(result)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
c = next;
|
||||
}
|
||||
}
|
||||
|
||||
UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
|
||||
// Linear search for a very short list (length 6 as of 2019).
|
||||
// If there are many paradigm LSRs we should use a hash set.
|
||||
U_ASSERT(paradigmLSRsLength <= 15);
|
||||
for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
|
||||
if (lsr == paradigmLSRs[i]) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
109
icu4c/source/common/locdistance.h
Normal file
109
icu4c/source/common/locdistance.h
Normal file
|
@ -0,0 +1,109 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// locdistance.h
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#ifndef __LOCDISTANCE_H__
|
||||
#define __LOCDISTANCE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/localematcher.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "lsr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct LocaleDistanceData;
|
||||
|
||||
/**
|
||||
* Offline-built data for LocaleMatcher.
|
||||
* Mostly but not only the data for mapping locales to their maximized forms.
|
||||
*/
|
||||
class LocaleDistance final : public UMemory {
|
||||
public:
|
||||
static const LocaleDistance *getSingleton(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Finds the supported LSR with the smallest distance from the desired one.
|
||||
* Equivalent LSR subtags must be normalized into a canonical form.
|
||||
*
|
||||
* <p>Returns the index of the lowest-distance supported LSR in bits 31..8
|
||||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
|
||||
*/
|
||||
int32_t getBestIndexAndDistance(const LSR &desired,
|
||||
const LSR **supportedLSRs, int32_t supportedLSRsLength,
|
||||
int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
|
||||
|
||||
int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
|
||||
|
||||
UBool isParadigmLSR(const LSR &lsr) const;
|
||||
|
||||
int32_t getDefaultScriptDistance() const {
|
||||
return defaultScriptDistance;
|
||||
}
|
||||
|
||||
int32_t getDefaultDemotionPerDesiredLocale() const {
|
||||
return defaultDemotionPerDesiredLocale;
|
||||
}
|
||||
|
||||
private:
|
||||
LocaleDistance(const LocaleDistanceData &data);
|
||||
LocaleDistance(const LocaleDistance &other) = delete;
|
||||
LocaleDistance &operator=(const LocaleDistance &other) = delete;
|
||||
|
||||
static void initLocaleDistance(UErrorCode &errorCode);
|
||||
|
||||
static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
|
||||
const char *desired, const char *supported);
|
||||
|
||||
static int32_t getRegionPartitionsDistance(
|
||||
BytesTrie &iter, uint64_t startState,
|
||||
const char *desiredPartitions, const char *supportedPartitions,
|
||||
int32_t threshold);
|
||||
|
||||
static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
|
||||
|
||||
static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
|
||||
|
||||
const char *partitionsForRegion(const LSR &lsr) const {
|
||||
// ill-formed region -> one non-matching string
|
||||
int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
|
||||
return partitionArrays[pIndex];
|
||||
}
|
||||
|
||||
int32_t getDefaultRegionDistance() const {
|
||||
return defaultRegionDistance;
|
||||
}
|
||||
|
||||
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
|
||||
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
|
||||
// There is also a trie value for each subsequence of whole subtags.
|
||||
// One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
|
||||
BytesTrie trie;
|
||||
|
||||
/**
|
||||
* Maps each region to zero or more single-character partitions.
|
||||
*/
|
||||
const uint8_t *regionToPartitionsIndex;
|
||||
const char **partitionArrays;
|
||||
|
||||
/**
|
||||
* Used to get the paradigm region for a cluster, if there is one.
|
||||
*/
|
||||
const LSR *paradigmLSRs;
|
||||
int32_t paradigmLSRsLength;
|
||||
|
||||
int32_t defaultLanguageDistance;
|
||||
int32_t defaultScriptDistance;
|
||||
int32_t defaultRegionDistance;
|
||||
int32_t minRegionDistance;
|
||||
int32_t defaultDemotionPerDesiredLocale;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __LOCDISTANCE_H__
|
|
@ -1396,5 +1396,7 @@ Locale::getBaseName() const {
|
|||
return baseName;
|
||||
}
|
||||
|
||||
Locale::Iterator::~Iterator() = default;
|
||||
|
||||
//eof
|
||||
U_NAMESPACE_END
|
||||
|
|
638
icu4c/source/common/loclikelysubtags.cpp
Normal file
638
icu4c/source/common/loclikelysubtags.cpp
Normal file
|
@ -0,0 +1,638 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// loclikelysubtags.cpp
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#include <utility>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "charstr.h"
|
||||
#include "cstring.h"
|
||||
#include "loclikelysubtags.h"
|
||||
#include "lsr.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uhash.h"
|
||||
#include "uinvchar.h"
|
||||
#include "umutex.h"
|
||||
#include "uresdata.h"
|
||||
#include "uresimp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
|
||||
constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
|
||||
constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
|
||||
|
||||
/**
|
||||
* Stores NUL-terminated strings with duplicate elimination.
|
||||
* Checks for unique UTF-16 string pointers and converts to invariant characters.
|
||||
*/
|
||||
class UniqueCharStrings {
|
||||
public:
|
||||
UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
|
||||
uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
strings = new CharString();
|
||||
if (strings == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
~UniqueCharStrings() {
|
||||
uhash_close(&map);
|
||||
delete strings;
|
||||
}
|
||||
|
||||
/** Returns/orphans the CharString that contains all strings. */
|
||||
CharString *orphanCharStrings() {
|
||||
CharString *result = strings;
|
||||
strings = nullptr;
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Adds a string and returns a unique number for it. */
|
||||
int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return 0; }
|
||||
if (isFrozen) {
|
||||
errorCode = U_NO_WRITE_PERMISSION;
|
||||
return 0;
|
||||
}
|
||||
// The string points into the resource bundle.
|
||||
const char16_t *p = s.getBuffer();
|
||||
int32_t oldIndex = uhash_geti(&map, p);
|
||||
if (oldIndex != 0) { // found duplicate
|
||||
return oldIndex;
|
||||
}
|
||||
// Explicit NUL terminator for the previous string.
|
||||
// The strings object is also terminated with one implicit NUL.
|
||||
strings->append(0, errorCode);
|
||||
int32_t newIndex = strings->length();
|
||||
strings->appendInvariantChars(s, errorCode);
|
||||
uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
|
||||
return newIndex;
|
||||
}
|
||||
|
||||
void freeze() { isFrozen = true; }
|
||||
|
||||
/**
|
||||
* Returns a string pointer for its unique number, if this object is frozen.
|
||||
* Otherwise nullptr.
|
||||
*/
|
||||
const char *get(int32_t i) const {
|
||||
U_ASSERT(isFrozen);
|
||||
return isFrozen && i > 0 ? strings->data() + i : nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
UHashtable map;
|
||||
CharString *strings;
|
||||
bool isFrozen = false;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
|
||||
distanceTrieBytes(data.distanceTrieBytes),
|
||||
regionToPartitions(data.regionToPartitions),
|
||||
partitions(data.partitions),
|
||||
paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
|
||||
distances(data.distances) {
|
||||
data.partitions = nullptr;
|
||||
data.paradigms = nullptr;
|
||||
}
|
||||
|
||||
LocaleDistanceData::~LocaleDistanceData() {
|
||||
uprv_free(partitions);
|
||||
delete[] paradigms;
|
||||
}
|
||||
|
||||
// TODO(ICU-20777): Rename to just LikelySubtagsData.
|
||||
struct XLikelySubtagsData {
|
||||
UResourceBundle *langInfoBundle = nullptr;
|
||||
UniqueCharStrings strings;
|
||||
CharStringMap languageAliases;
|
||||
CharStringMap regionAliases;
|
||||
const uint8_t *trieBytes = nullptr;
|
||||
LSR *lsrs = nullptr;
|
||||
int32_t lsrsLength = 0;
|
||||
|
||||
LocaleDistanceData distanceData;
|
||||
|
||||
XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
|
||||
|
||||
~XLikelySubtagsData() {
|
||||
ures_close(langInfoBundle);
|
||||
delete[] lsrs;
|
||||
}
|
||||
|
||||
void load(UErrorCode &errorCode) {
|
||||
langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
StackUResourceBundle stackTempBundle;
|
||||
ResourceDataValue value;
|
||||
ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
|
||||
value, errorCode);
|
||||
ResourceTable likelyTable = value.getTable(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
|
||||
// Read all strings in the resource bundle and convert them to invariant char *.
|
||||
LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
|
||||
int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
|
||||
if (!readStrings(likelyTable, "languageAliases", value,
|
||||
languageIndexes, languagesLength, errorCode) ||
|
||||
!readStrings(likelyTable, "regionAliases", value,
|
||||
regionIndexes, regionsLength, errorCode) ||
|
||||
!readStrings(likelyTable, "lsrs", value,
|
||||
lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
|
||||
return;
|
||||
}
|
||||
if ((languagesLength & 1) != 0 ||
|
||||
(regionsLength & 1) != 0 ||
|
||||
(lsrSubtagsLength % 3) != 0) {
|
||||
errorCode = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (lsrSubtagsLength == 0) {
|
||||
errorCode = U_MISSING_RESOURCE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!likelyTable.findValue("trie", value)) {
|
||||
errorCode = U_MISSING_RESOURCE_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t length;
|
||||
trieBytes = value.getBinary(length, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
|
||||
// Also read distance/matcher data if available,
|
||||
// to open & keep only one resource bundle pointer
|
||||
// and to use one single UniqueCharStrings.
|
||||
UErrorCode matchErrorCode = U_ZERO_ERROR;
|
||||
ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
|
||||
value, matchErrorCode);
|
||||
LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
|
||||
int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
|
||||
if (U_SUCCESS(matchErrorCode)) {
|
||||
ResourceTable matchTable = value.getTable(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
|
||||
if (matchTable.findValue("trie", value)) {
|
||||
distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
}
|
||||
|
||||
if (matchTable.findValue("regionToPartitions", value)) {
|
||||
distanceData.regionToPartitions = value.getBinary(length, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (length < LSR::REGION_INDEX_LIMIT) {
|
||||
errorCode = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!readStrings(matchTable, "partitions", value,
|
||||
partitionIndexes, partitionsLength, errorCode) ||
|
||||
!readStrings(matchTable, "paradigms", value,
|
||||
paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
|
||||
return;
|
||||
}
|
||||
if ((paradigmSubtagsLength % 3) != 0) {
|
||||
errorCode = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
if (matchTable.findValue("distances", value)) {
|
||||
distanceData.distances = value.getIntVector(length, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (length < 4) { // LocaleDistance IX_LIMIT
|
||||
errorCode = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
|
||||
// ok for likely subtags
|
||||
} else { // error other than missing resource
|
||||
errorCode = matchErrorCode;
|
||||
return;
|
||||
}
|
||||
|
||||
// Fetch & store invariant-character versions of strings
|
||||
// only after we have collected and de-duplicated all of them.
|
||||
strings.freeze();
|
||||
|
||||
languageAliases = CharStringMap(languagesLength / 2, errorCode);
|
||||
for (int32_t i = 0; i < languagesLength; i += 2) {
|
||||
languageAliases.put(strings.get(languageIndexes[i]),
|
||||
strings.get(languageIndexes[i + 1]), errorCode);
|
||||
}
|
||||
|
||||
regionAliases = CharStringMap(regionsLength / 2, errorCode);
|
||||
for (int32_t i = 0; i < regionsLength; i += 2) {
|
||||
regionAliases.put(strings.get(regionIndexes[i]),
|
||||
strings.get(regionIndexes[i + 1]), errorCode);
|
||||
}
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
|
||||
lsrsLength = lsrSubtagsLength / 3;
|
||||
lsrs = new LSR[lsrsLength];
|
||||
if (lsrs == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
|
||||
lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
|
||||
strings.get(lsrSubtagIndexes[i + 1]),
|
||||
strings.get(lsrSubtagIndexes[i + 2]));
|
||||
}
|
||||
|
||||
if (partitionsLength > 0) {
|
||||
distanceData.partitions = static_cast<const char **>(
|
||||
uprv_malloc(partitionsLength * sizeof(const char *)));
|
||||
if (distanceData.partitions == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
for (int32_t i = 0; i < partitionsLength; ++i) {
|
||||
distanceData.partitions[i] = strings.get(partitionIndexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (paradigmSubtagsLength > 0) {
|
||||
distanceData.paradigmsLength = paradigmSubtagsLength / 3;
|
||||
LSR *paradigms = new LSR[distanceData.paradigmsLength];
|
||||
if (paradigms == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
|
||||
paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
|
||||
strings.get(paradigmSubtagIndexes[i + 1]),
|
||||
strings.get(paradigmSubtagIndexes[i + 2]));
|
||||
}
|
||||
distanceData.paradigms = paradigms;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
|
||||
LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
|
||||
if (table.findValue(key, value)) {
|
||||
ResourceArray stringArray = value.getArray(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
length = stringArray.getSize();
|
||||
if (length == 0) { return true; }
|
||||
int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
|
||||
if (rawIndexes == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < length; ++i) {
|
||||
stringArray.getValue(i, value); // returns TRUE because i < length
|
||||
rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
XLikelySubtags *gLikelySubtags = nullptr;
|
||||
UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
UBool U_CALLCONV cleanup() {
|
||||
delete gLikelySubtags;
|
||||
gLikelySubtags = nullptr;
|
||||
gInitOnce.reset();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
|
||||
// This function is invoked only via umtx_initOnce().
|
||||
U_ASSERT(gLikelySubtags == nullptr);
|
||||
XLikelySubtagsData data(errorCode);
|
||||
data.load(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
gLikelySubtags = new XLikelySubtags(data);
|
||||
if (gLikelySubtags == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
|
||||
}
|
||||
|
||||
const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
|
||||
return gLikelySubtags;
|
||||
}
|
||||
|
||||
XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
|
||||
langInfoBundle(data.langInfoBundle),
|
||||
strings(data.strings.orphanCharStrings()),
|
||||
languageAliases(std::move(data.languageAliases)),
|
||||
regionAliases(std::move(data.regionAliases)),
|
||||
trie(data.trieBytes),
|
||||
lsrs(data.lsrs),
|
||||
#if U_DEBUG
|
||||
lsrsLength(data.lsrsLength),
|
||||
#endif
|
||||
distanceData(std::move(data.distanceData)) {
|
||||
data.langInfoBundle = nullptr;
|
||||
data.lsrs = nullptr;
|
||||
|
||||
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
|
||||
UStringTrieResult result = trie.next(u'*');
|
||||
U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
|
||||
trieUndState = trie.getState64();
|
||||
result = trie.next(u'*');
|
||||
U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
|
||||
trieUndZzzzState = trie.getState64();
|
||||
result = trie.next(u'*');
|
||||
U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
|
||||
defaultLsrIndex = trie.getValue();
|
||||
trie.reset();
|
||||
|
||||
for (char16_t c = u'a'; c <= u'z'; ++c) {
|
||||
result = trie.next(c);
|
||||
if (result == USTRINGTRIE_NO_VALUE) {
|
||||
trieFirstLetterStates[c - u'a'] = trie.getState64();
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
}
|
||||
|
||||
XLikelySubtags::~XLikelySubtags() {
|
||||
ures_close(langInfoBundle);
|
||||
delete strings;
|
||||
delete[] lsrs;
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
|
||||
const char *name = locale.getName();
|
||||
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return LSR(name, "", "");
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant(), errorCode);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const char *getCanonical(const CharStringMap &aliases, const char *alias) {
|
||||
const char *canonical = aliases.get(alias);
|
||||
return canonical == nullptr ? alias : canonical;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
|
||||
const char *variant, UErrorCode &errorCode) const {
|
||||
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
|
||||
// They should match only themselves,
|
||||
// not other locales with what looks like the same language and script subtags.
|
||||
char c1;
|
||||
if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
|
||||
switch (c1) {
|
||||
case 'A':
|
||||
return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode);
|
||||
case 'B':
|
||||
return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode);
|
||||
case 'C':
|
||||
return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode);
|
||||
default: // normal locale
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (variant[0] == 'P' && variant[1] == 'S') {
|
||||
if (uprv_strcmp(variant, "PSACCENT") == 0) {
|
||||
return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
|
||||
*region == 0 ? "XA" : region, errorCode);
|
||||
} else if (uprv_strcmp(variant, "PSBIDI") == 0) {
|
||||
return LSR(PSEUDO_BIDI_PREFIX, language, script,
|
||||
*region == 0 ? "XB" : region, errorCode);
|
||||
} else if (uprv_strcmp(variant, "PSCRACK") == 0) {
|
||||
return LSR(PSEUDO_CRACKED_PREFIX, language, script,
|
||||
*region == 0 ? "XC" : region, errorCode);
|
||||
}
|
||||
// else normal locale
|
||||
}
|
||||
|
||||
language = getCanonical(languageAliases, language);
|
||||
// (We have no script mappings.)
|
||||
region = getCanonical(regionAliases, region);
|
||||
return maximize(language, script, region);
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
|
||||
if (uprv_strcmp(language, "und") == 0) {
|
||||
language = "";
|
||||
}
|
||||
if (uprv_strcmp(script, "Zzzz") == 0) {
|
||||
script = "";
|
||||
}
|
||||
if (uprv_strcmp(region, "ZZ") == 0) {
|
||||
region = "";
|
||||
}
|
||||
if (*script != 0 && *region != 0 && *language != 0) {
|
||||
return LSR(language, script, region); // already maximized
|
||||
}
|
||||
|
||||
uint32_t retainOldMask = 0;
|
||||
BytesTrie iter(trie);
|
||||
uint64_t state;
|
||||
int32_t value;
|
||||
// Small optimization: Array lookup for first language letter.
|
||||
int32_t c0;
|
||||
if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
|
||||
language[1] != 0 && // language.length() >= 2
|
||||
(state = trieFirstLetterStates[c0]) != 0) {
|
||||
value = trieNext(iter.resetToState64(state), language, 1);
|
||||
} else {
|
||||
value = trieNext(iter, language, 0);
|
||||
}
|
||||
if (value >= 0) {
|
||||
if (*language != 0) {
|
||||
retainOldMask |= 4;
|
||||
}
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 4;
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
state = 0;
|
||||
}
|
||||
|
||||
if (value > 0) {
|
||||
// Intermediate or final value from just language.
|
||||
if (value == SKIP_SCRIPT) {
|
||||
value = 0;
|
||||
}
|
||||
if (*script != 0) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
} else {
|
||||
value = trieNext(iter, script, 0);
|
||||
if (value >= 0) {
|
||||
if (*script != 0) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 2;
|
||||
if (state == 0) {
|
||||
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value >= 0);
|
||||
state = iter.getState64();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (value > 0) {
|
||||
// Final value from just language or language+script.
|
||||
if (*region != 0) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
} else {
|
||||
value = trieNext(iter, region, 0);
|
||||
if (value >= 0) {
|
||||
if (*region != 0) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
} else {
|
||||
retainOldMask |= 1;
|
||||
if (state == 0) {
|
||||
value = defaultLsrIndex;
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
U_ASSERT(value < lsrsLength);
|
||||
const LSR &result = lsrs[value];
|
||||
|
||||
if (*language == 0) {
|
||||
language = "und";
|
||||
}
|
||||
|
||||
if (retainOldMask == 0) {
|
||||
// Quickly return a copy of the lookup-result LSR
|
||||
// without new allocation of the subtags.
|
||||
return LSR(result.language, result.script, result.region);
|
||||
}
|
||||
if ((retainOldMask & 4) == 0) {
|
||||
language = result.language;
|
||||
}
|
||||
if ((retainOldMask & 2) == 0) {
|
||||
script = result.script;
|
||||
}
|
||||
if ((retainOldMask & 1) == 0) {
|
||||
region = result.region;
|
||||
}
|
||||
return LSR(language, script, region);
|
||||
}
|
||||
|
||||
int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
|
||||
UStringTrieResult result;
|
||||
uint8_t c;
|
||||
if ((c = s[i]) == 0) {
|
||||
result = iter.next(u'*');
|
||||
} else {
|
||||
for (;;) {
|
||||
c = uprv_invCharToAscii(c);
|
||||
// EBCDIC: If s[i] is not an invariant character,
|
||||
// then c is now 0 and will simply not match anything, which is harmless.
|
||||
uint8_t next = s[++i];
|
||||
if (next != 0) {
|
||||
if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
// last character of this subtag
|
||||
result = iter.next(c | 0x80);
|
||||
break;
|
||||
}
|
||||
c = next;
|
||||
}
|
||||
}
|
||||
switch (result) {
|
||||
case USTRINGTRIE_NO_MATCH: return -1;
|
||||
case USTRINGTRIE_NO_VALUE: return 0;
|
||||
case USTRINGTRIE_INTERMEDIATE_VALUE:
|
||||
U_ASSERT(iter.getValue() == SKIP_SCRIPT);
|
||||
return SKIP_SCRIPT;
|
||||
case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
|
||||
// in loclikely.cpp to this new code, including activating this
|
||||
// minimizeSubtags() function. The LocaleMatcher does not minimize.
|
||||
#if 0
|
||||
LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
|
||||
const char *regionIn, ULocale.Minimize fieldToFavor,
|
||||
UErrorCode &errorCode) const {
|
||||
LSR result = maximize(languageIn, scriptIn, regionIn);
|
||||
|
||||
// We could try just a series of checks, like:
|
||||
// LSR result2 = addLikelySubtags(languageIn, "", "");
|
||||
// if result.equals(result2) return result2;
|
||||
// However, we can optimize 2 of the cases:
|
||||
// (languageIn, "", "")
|
||||
// (languageIn, "", regionIn)
|
||||
|
||||
// value00 = lookup(result.language, "", "")
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
int value = trieNext(iter, result.language, 0);
|
||||
U_ASSERT(value >= 0);
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value >= 0);
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
}
|
||||
}
|
||||
U_ASSERT(value > 0);
|
||||
LSR value00 = lsrs[value];
|
||||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return new LSR(result.language, "", "");
|
||||
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
|
||||
return new LSR(result.language, "", result.region);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
}
|
||||
|
||||
// The last case is not as easy to optimize.
|
||||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return new LSR(result.language, result.script, "");
|
||||
} else if (favorRegionOk) {
|
||||
return new LSR(result.language, "", result.region);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
143
icu4c/source/common/loclikelysubtags.h
Normal file
143
icu4c/source/common/loclikelysubtags.h
Normal file
|
@ -0,0 +1,143 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// loclikelysubtags.h
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#ifndef __LOCLIKELYSUBTAGS_H__
|
||||
#define __LOCLIKELYSUBTAGS_H__
|
||||
|
||||
#include <utility>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "lsr.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct XLikelySubtagsData;
|
||||
|
||||
/**
|
||||
* Map of const char * keys & values.
|
||||
* Stores pointers as is: Does not own/copy/adopt/release strings.
|
||||
*/
|
||||
class CharStringMap final : public UMemory {
|
||||
public:
|
||||
/** Constructs an unusable non-map. */
|
||||
CharStringMap() : map(nullptr) {}
|
||||
CharStringMap(int32_t size, UErrorCode &errorCode) {
|
||||
map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
|
||||
size, &errorCode);
|
||||
}
|
||||
CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
|
||||
other.map = nullptr;
|
||||
}
|
||||
CharStringMap(const CharStringMap &other) = delete;
|
||||
~CharStringMap() {
|
||||
uhash_close(map);
|
||||
}
|
||||
|
||||
CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
|
||||
map = other.map;
|
||||
other.map = nullptr;
|
||||
return *this;
|
||||
}
|
||||
CharStringMap &operator=(const CharStringMap &other) = delete;
|
||||
|
||||
const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
|
||||
void put(const char *key, const char *value, UErrorCode &errorCode) {
|
||||
uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
|
||||
}
|
||||
|
||||
private:
|
||||
UHashtable *map;
|
||||
};
|
||||
|
||||
struct LocaleDistanceData {
|
||||
LocaleDistanceData() = default;
|
||||
LocaleDistanceData(LocaleDistanceData &&data);
|
||||
~LocaleDistanceData();
|
||||
|
||||
const uint8_t *distanceTrieBytes = nullptr;
|
||||
const uint8_t *regionToPartitions = nullptr;
|
||||
const char **partitions = nullptr;
|
||||
const LSR *paradigms = nullptr;
|
||||
int32_t paradigmsLength = 0;
|
||||
const int32_t *distances = nullptr;
|
||||
|
||||
private:
|
||||
LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
|
||||
};
|
||||
|
||||
// TODO(ICU-20777): Rename to just LikelySubtags.
|
||||
class XLikelySubtags final : public UMemory {
|
||||
public:
|
||||
~XLikelySubtags();
|
||||
|
||||
static constexpr int32_t SKIP_SCRIPT = 1;
|
||||
|
||||
// VisibleForTesting
|
||||
static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
|
||||
|
||||
// VisibleForTesting
|
||||
LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
|
||||
|
||||
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
|
||||
// in loclikely.cpp to this new code, including activating this
|
||||
// minimizeSubtags() function. The LocaleMatcher does not minimize.
|
||||
#if 0
|
||||
LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
|
||||
ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
|
||||
#endif
|
||||
|
||||
// visible for LocaleDistance
|
||||
const LocaleDistanceData &getDistanceData() const { return distanceData; }
|
||||
|
||||
private:
|
||||
XLikelySubtags(XLikelySubtagsData &data);
|
||||
XLikelySubtags(const XLikelySubtags &other) = delete;
|
||||
XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
|
||||
|
||||
static void initLikelySubtags(UErrorCode &errorCode);
|
||||
|
||||
LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
|
||||
const char *variant, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
|
||||
*/
|
||||
LSR maximize(const char *language, const char *script, const char *region) const;
|
||||
|
||||
static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
|
||||
|
||||
UResourceBundle *langInfoBundle;
|
||||
// We could store the strings by value, except that if there were few enough strings,
|
||||
// moving the contents could copy it to a different array,
|
||||
// invalidating the pointers stored in the maps.
|
||||
CharString *strings;
|
||||
CharStringMap languageAliases;
|
||||
CharStringMap regionAliases;
|
||||
|
||||
// The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
|
||||
// There is also a trie value for each intermediate lang and lang+script.
|
||||
// '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
|
||||
BytesTrie trie;
|
||||
uint64_t trieUndState;
|
||||
uint64_t trieUndZzzzState;
|
||||
int32_t defaultLsrIndex;
|
||||
uint64_t trieFirstLetterStates[26];
|
||||
const LSR *lsrs;
|
||||
#if U_DEBUG
|
||||
int32_t lsrsLength;
|
||||
#endif
|
||||
|
||||
// distance/matcher data: see comment in XLikelySubtagsData::load()
|
||||
LocaleDistanceData distanceData;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __LOCLIKELYSUBTAGS_H__
|
101
icu4c/source/common/lsr.cpp
Normal file
101
icu4c/source/common/lsr.cpp
Normal file
|
@ -0,0 +1,101 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// lsr.cpp
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "lsr.h"
|
||||
#include "uinvchar.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode) :
|
||||
language(nullptr), script(nullptr), region(r),
|
||||
regionIndex(indexForRegion(region)) {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
CharString langScript;
|
||||
langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
|
||||
int32_t scriptOffset = langScript.length();
|
||||
langScript.append(prefix, errorCode).append(scr, errorCode);
|
||||
owned = langScript.cloneData(errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
language = owned;
|
||||
script = owned + scriptOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LSR::LSR(LSR &&other) U_NOEXCEPT :
|
||||
language(other.language), script(other.script), region(other.region), owned(other.owned),
|
||||
regionIndex(other.regionIndex), hashCode(other.hashCode) {
|
||||
if (owned != nullptr) {
|
||||
other.language = other.script = "";
|
||||
other.owned = nullptr;
|
||||
other.hashCode = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void LSR::deleteOwned() {
|
||||
uprv_free(owned);
|
||||
}
|
||||
|
||||
LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
|
||||
this->~LSR();
|
||||
language = other.language;
|
||||
script = other.script;
|
||||
region = other.region;
|
||||
regionIndex = other.regionIndex;
|
||||
owned = other.owned;
|
||||
hashCode = other.hashCode;
|
||||
if (owned != nullptr) {
|
||||
other.language = other.script = "";
|
||||
other.owned = nullptr;
|
||||
other.hashCode = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool LSR::operator==(const LSR &other) const {
|
||||
return
|
||||
uprv_strcmp(language, other.language) == 0 &&
|
||||
uprv_strcmp(script, other.script) == 0 &&
|
||||
regionIndex == other.regionIndex &&
|
||||
// Compare regions if both are ill-formed (and their indexes are 0).
|
||||
(regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
|
||||
}
|
||||
|
||||
int32_t LSR::indexForRegion(const char *region) {
|
||||
int32_t c = region[0];
|
||||
int32_t a = c - '0';
|
||||
if (0 <= a && a <= 9) { // digits: "419"
|
||||
int32_t b = region[1] - '0';
|
||||
if (b < 0 || 9 < b) { return 0; }
|
||||
c = region[2] - '0';
|
||||
if (c < 0 || 9 < c || region[3] != 0) { return 0; }
|
||||
return (10 * a + b) * 10 + c + 1;
|
||||
} else { // letters: "DE"
|
||||
a = uprv_upperOrdinal(c);
|
||||
if (a < 0 || 25 < a) { return 0; }
|
||||
int32_t b = uprv_upperOrdinal(region[1]);
|
||||
if (b < 0 || 25 < b || region[2] != 0) { return 0; }
|
||||
return 26 * a + b + 1001;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LSR &LSR::setHashCode() {
|
||||
if (hashCode == 0) {
|
||||
hashCode =
|
||||
(ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))) * 37 +
|
||||
ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)))) * 37 +
|
||||
regionIndex;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
72
icu4c/source/common/lsr.h
Normal file
72
icu4c/source/common/lsr.h
Normal file
|
@ -0,0 +1,72 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// lsr.h
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#ifndef __LSR_H__
|
||||
#define __LSR_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cstring.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct LSR final : public UMemory {
|
||||
static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
|
||||
|
||||
const char *language;
|
||||
const char *script;
|
||||
const char *region;
|
||||
char *owned = nullptr;
|
||||
/** Index for region, 0 if ill-formed. @see indexForRegion */
|
||||
int32_t regionIndex = 0;
|
||||
/** Only set for LSRs that will be used in a hash table. */
|
||||
int32_t hashCode = 0;
|
||||
|
||||
LSR() : language("und"), script(""), region("") {}
|
||||
|
||||
/** Constructor which aliases all subtag pointers. */
|
||||
LSR(const char *lang, const char *scr, const char *r) :
|
||||
language(lang), script(scr), region(r),
|
||||
regionIndex(indexForRegion(region)) {}
|
||||
/**
|
||||
* Constructor which prepends the prefix to the language and script,
|
||||
* copies those into owned memory, and aliases the region.
|
||||
*/
|
||||
LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode);
|
||||
LSR(LSR &&other) U_NOEXCEPT;
|
||||
LSR(const LSR &other) = delete;
|
||||
inline ~LSR() {
|
||||
// Pure inline code for almost all instances.
|
||||
if (owned != nullptr) {
|
||||
deleteOwned();
|
||||
}
|
||||
}
|
||||
|
||||
LSR &operator=(LSR &&other) U_NOEXCEPT;
|
||||
LSR &operator=(const LSR &other) = delete;
|
||||
|
||||
/**
|
||||
* Returns a positive index (>0) for a well-formed region code.
|
||||
* Do not rely on a particular region->index mapping; it may change.
|
||||
* Returns 0 for ill-formed strings.
|
||||
*/
|
||||
static int32_t indexForRegion(const char *region);
|
||||
|
||||
UBool operator==(const LSR &other) const;
|
||||
|
||||
inline UBool operator!=(const LSR &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
LSR &setHashCode();
|
||||
|
||||
private:
|
||||
void deleteOwned();
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __LSR_H__
|
|
@ -94,13 +94,20 @@ public:
|
|||
*/
|
||||
int32_t getSize() const { return length; }
|
||||
/**
|
||||
* @param i Array item index.
|
||||
* @param i Table item index.
|
||||
* @param key Output-only, receives the key of the i'th item.
|
||||
* @param value Output-only, receives the value of the i'th item.
|
||||
* @return TRUE if i is non-negative and less than getSize().
|
||||
*/
|
||||
UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const;
|
||||
|
||||
/**
|
||||
* @param key Key string to find in the table.
|
||||
* @param value Output-only, receives the value of the item with that key.
|
||||
* @return TRUE if the table contains the key.
|
||||
*/
|
||||
UBool findValue(const char *key, ResourceValue &value) const;
|
||||
|
||||
private:
|
||||
const uint16_t *keys16;
|
||||
const int32_t *keys32;
|
||||
|
|
|
@ -39,6 +39,8 @@ typedef enum ECleanupCommonType {
|
|||
UCLN_COMMON_LOCALE_KEY_TYPE,
|
||||
UCLN_COMMON_LOCALE,
|
||||
UCLN_COMMON_LOCALE_AVAILABLE,
|
||||
UCLN_COMMON_LIKELY_SUBTAGS,
|
||||
UCLN_COMMON_LOCALE_DISTANCE,
|
||||
UCLN_COMMON_ULOC,
|
||||
UCLN_COMMON_CURRENCY,
|
||||
UCLN_COMMON_LOADED_NORMALIZER2,
|
||||
|
|
|
@ -445,6 +445,13 @@ uprv_copyEbcdic(const UDataSwapper *ds,
|
|||
return length;
|
||||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
uprv_isEbcdicAtSign(char c) {
|
||||
static const uint8_t ebcdicAtSigns[] = {
|
||||
0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
|
||||
return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
|
||||
}
|
||||
|
||||
/* compare invariant strings; variant characters compare less than others and unlike each other */
|
||||
U_CFUNC int32_t
|
||||
uprv_compareInvAscii(const UDataSwapper *ds,
|
||||
|
@ -561,6 +568,11 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
|
|||
}
|
||||
}
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_ebcdicToAscii(char c) {
|
||||
return (char)asciiFromEbcdic[(uint8_t)c];
|
||||
}
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_ebcdicToLowercaseAscii(char c) {
|
||||
return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
|
||||
|
|
|
@ -68,6 +68,75 @@ uprv_isInvariantUString(const UChar *s, int32_t length);
|
|||
# error Unknown charset family!
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Like U_UPPER_ORDINAL(x) but with validation.
|
||||
* Returns 0..25 for A..Z else a value outside 0..25.
|
||||
*/
|
||||
inline int32_t uprv_upperOrdinal(int32_t c) {
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
return c - 'A';
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
// EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
|
||||
// https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
|
||||
if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
|
||||
if (c < 'J') { return -1; }
|
||||
if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
|
||||
if (c < 'S') { return -1; }
|
||||
return c - 'S' + 18; // S-Z --> 18..25
|
||||
#else
|
||||
# error Unknown charset family!
|
||||
#endif
|
||||
}
|
||||
|
||||
// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
|
||||
// Returns 0..25 for a..z else a value outside 0..25.
|
||||
inline int32_t uprv_lowerOrdinal(int32_t c) {
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
return c - 'a';
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
// EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
|
||||
// https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
|
||||
if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
|
||||
if (c < 'j') { return -1; }
|
||||
if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
|
||||
if (c < 's') { return -1; }
|
||||
return c - 's' + 18; // s-z --> 18..25
|
||||
#else
|
||||
# error Unknown charset family!
|
||||
#endif
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns true if c == '@' is possible.
|
||||
* The @ sign is variant, and the @ sign used on one
|
||||
* EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
|
||||
* @internal
|
||||
*/
|
||||
U_CFUNC UBool
|
||||
uprv_isEbcdicAtSign(char c);
|
||||
|
||||
/**
|
||||
* \def uprv_isAtSign
|
||||
* Returns true if c == '@' is possible.
|
||||
* For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
|
||||
* @internal
|
||||
*/
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_isAtSign(c) ((c)=='@')
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
|
||||
#else
|
||||
# error Unknown charset family!
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Compare two EBCDIC invariant-character strings in ASCII order.
|
||||
* @internal
|
||||
|
@ -88,6 +157,26 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
|
|||
# error Unknown charset family!
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Converts an EBCDIC invariant character to ASCII.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL char U_EXPORT2
|
||||
uprv_ebcdicToAscii(char c);
|
||||
|
||||
/**
|
||||
* \def uprv_invCharToAscii
|
||||
* Converts an invariant character to ASCII.
|
||||
* @internal
|
||||
*/
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_invCharToAscii(c) (c)
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
|
||||
#else
|
||||
# error Unknown charset family!
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Converts an EBCDIC invariant character to lowercase ASCII.
|
||||
* @internal
|
||||
|
|
|
@ -8,10 +8,10 @@
|
|||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/localematcher.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* \file
|
||||
|
@ -291,6 +291,10 @@ public:
|
|||
UBool copyErrorTo(UErrorCode &outErrorCode) const;
|
||||
|
||||
private:
|
||||
friend class LocaleMatcher::Result;
|
||||
|
||||
void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
|
||||
|
||||
UErrorCode status_;
|
||||
char language_[9];
|
||||
char script_[5];
|
||||
|
|
605
icu4c/source/common/unicode/localematcher.h
Normal file
605
icu4c/source/common/unicode/localematcher.h
Normal file
|
@ -0,0 +1,605 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// localematcher.h
|
||||
// created: 2019may08 Markus W. Scherer
|
||||
|
||||
#ifndef __LOCALEMATCHER_H__
|
||||
#define __LOCALEMATCHER_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
|
||||
*/
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Builder option for whether the language subtag or the script subtag is most important.
|
||||
*
|
||||
* @see Builder#setFavorSubtag(FavorSubtag)
|
||||
* @draft ICU 65
|
||||
*/
|
||||
enum ULocMatchFavorSubtag {
|
||||
/**
|
||||
* Language differences are most important, then script differences, then region differences.
|
||||
* (This is the default behavior.)
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
ULOCMATCH_FAVOR_LANGUAGE,
|
||||
/**
|
||||
* Makes script differences matter relatively more than language differences.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
ULOCMATCH_FAVOR_SCRIPT
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Builder option for whether all desired locales are treated equally or
|
||||
* earlier ones are preferred.
|
||||
*
|
||||
* @see Builder#setDemotionPerDesiredLocale(Demotion)
|
||||
* @draft ICU 65
|
||||
*/
|
||||
enum ULocMatchDemotion {
|
||||
/**
|
||||
* All desired locales are treated equally.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
ULOCMATCH_DEMOTION_NONE,
|
||||
/**
|
||||
* Earlier desired locales are preferred.
|
||||
*
|
||||
* <p>From each desired locale to the next,
|
||||
* the distance to any supported locale is increased by an additional amount
|
||||
* which is at least as large as most region mismatches.
|
||||
* A later desired locale has to have a better match with some supported locale
|
||||
* due to more than merely having the same region subtag.
|
||||
*
|
||||
* <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
|
||||
* yields <code>Result(en-GB, en)</code> because
|
||||
* with the demotion of sv its perfect match is no better than
|
||||
* the region distance between the earlier desired locale en-GB and en=en-US.
|
||||
*
|
||||
* <p>Notes:
|
||||
* <ul>
|
||||
* <li>In some cases, language and/or script differences can be as small as
|
||||
* the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
|
||||
* <li>It is possible for certain region differences to be larger than usual,
|
||||
* and larger than the demotion.
|
||||
* (As of CLDR 35 there is no such case, but
|
||||
* this is possible in future versions of the data.)
|
||||
* </ul>
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
ULOCMATCH_DEMOTION_REGION
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum ULocMatchDemotion ULocMatchDemotion;
|
||||
#endif
|
||||
|
||||
struct UHashtable;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
struct LSR;
|
||||
|
||||
class LocaleDistance;
|
||||
class LocaleLsrIterator;
|
||||
class UVector;
|
||||
class XLikelySubtags;
|
||||
|
||||
/**
|
||||
* Immutable class that picks the best match between a user's desired locales and
|
||||
* an application's supported locales.
|
||||
* Movable but not copyable.
|
||||
*
|
||||
* <p>Example:
|
||||
* <pre>
|
||||
* UErrorCode errorCode = U_ZERO_ERROR;
|
||||
* LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
|
||||
* Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
|
||||
* </pre>
|
||||
*
|
||||
* <p>A matcher takes into account when languages are close to one another,
|
||||
* such as Danish and Norwegian,
|
||||
* and when regional variants are close, like en-GB and en-AU as opposed to en-US.
|
||||
*
|
||||
* <p>If there are multiple supported locales with the same (language, script, region)
|
||||
* likely subtags, then the current implementation returns the first of those locales.
|
||||
* It ignores variant subtags (except for pseudolocale variants) and extensions.
|
||||
* This may change in future versions.
|
||||
*
|
||||
* <p>For example, the current implementation does not distinguish between
|
||||
* de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
|
||||
*
|
||||
* <p>If you prefer one equivalent locale over another, then provide only the preferred one,
|
||||
* or place it earlier in the list of supported locales.
|
||||
*
|
||||
* <p>Otherwise, the order of supported locales may have no effect on the best-match results.
|
||||
* The current implementation compares each desired locale with supported locales
|
||||
* in the following order:
|
||||
* 1. Default locale, if supported;
|
||||
* 2. CLDR "paradigm locales" like en-GB and es-419;
|
||||
* 3. other supported locales.
|
||||
* This may change in future versions.
|
||||
*
|
||||
* <p>Often a product will just need one matcher instance, built with the languages
|
||||
* that it supports. However, it may want multiple instances with different
|
||||
* default languages based on additional information, such as the domain.
|
||||
*
|
||||
* <p>This class is not intended for public subclassing.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
class U_COMMON_API LocaleMatcher : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Data for the best-matching pair of a desired and a supported locale.
|
||||
* Movable but not copyable.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
class U_COMMON_API Result : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Move constructor; might modify the source.
|
||||
* This object will have the same contents that the source object had.
|
||||
*
|
||||
* @param src Result to move contents from.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Result(Result &&src) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
~Result();
|
||||
|
||||
/**
|
||||
* Move assignment; might modify the source.
|
||||
* This object will have the same contents that the source object had.
|
||||
*
|
||||
* @param src Result to move contents from.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Result &operator=(Result &&src) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Returns the best-matching desired locale.
|
||||
* nullptr if the list of desired locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the best-matching desired locale, or nullptr.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
inline const Locale *getDesiredLocale() const { return desiredLocale; }
|
||||
|
||||
/**
|
||||
* Returns the best-matching supported locale.
|
||||
* If none matched well enough, this is the default locale.
|
||||
* The default locale is nullptr if the list of supported locales is empty and
|
||||
* no explicit default locale is set.
|
||||
*
|
||||
* @return the best-matching supported locale, or nullptr.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
inline const Locale *getSupportedLocale() const { return supportedLocale; }
|
||||
|
||||
/**
|
||||
* Returns the index of the best-matching desired locale in the input Iterable order.
|
||||
* -1 if the list of desired locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the index of the best-matching desired locale, or -1.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
inline int32_t getDesiredIndex() const { return desiredIndex; }
|
||||
|
||||
/**
|
||||
* Returns the index of the best-matching supported locale in the
|
||||
* constructor’s or builder’s input order (“set” Collection plus “added” locales).
|
||||
* If the matcher was built from a locale list string, then the iteration order is that
|
||||
* of a LocalePriorityList built from the same string.
|
||||
* -1 if the list of supported locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the index of the best-matching supported locale, or -1.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
inline int32_t getSupportedIndex() const { return supportedIndex; }
|
||||
|
||||
/**
|
||||
* Takes the best-matching supported locale and adds relevant fields of the
|
||||
* best-matching desired locale, such as the -t- and -u- extensions.
|
||||
* May replace some fields of the supported locale.
|
||||
* The result is the locale that should be used for date and number formatting, collation, etc.
|
||||
* Returns the root locale if getSupportedLocale() returns nullptr.
|
||||
*
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
|
||||
*
|
||||
* @return a locale combining the best-matching desired and supported locales.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Locale makeResolvedLocale(UErrorCode &errorCode) const;
|
||||
|
||||
private:
|
||||
Result(const Locale *desired, const Locale *supported,
|
||||
int32_t desIndex, int32_t suppIndex, UBool owned) :
|
||||
desiredLocale(desired), supportedLocale(supported),
|
||||
desiredIndex(desIndex), supportedIndex(suppIndex),
|
||||
desiredIsOwned(owned) {}
|
||||
|
||||
Result(const Result &other) = delete;
|
||||
Result &operator=(const Result &other) = delete;
|
||||
|
||||
const Locale *desiredLocale;
|
||||
const Locale *supportedLocale;
|
||||
int32_t desiredIndex;
|
||||
int32_t supportedIndex;
|
||||
UBool desiredIsOwned;
|
||||
|
||||
friend class LocaleMatcher;
|
||||
};
|
||||
|
||||
/**
|
||||
* LocaleMatcher builder.
|
||||
* Movable but not copyable.
|
||||
*
|
||||
* @see LocaleMatcher#builder()
|
||||
* @draft ICU 65
|
||||
*/
|
||||
class U_COMMON_API Builder : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs a builder used in chaining parameters for building a LocaleMatcher.
|
||||
*
|
||||
* @return a new Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder() {}
|
||||
|
||||
/**
|
||||
* Move constructor; might modify the source.
|
||||
* This builder will have the same contents that the source builder had.
|
||||
*
|
||||
* @param src Builder to move contents from.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder(Builder &&src) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*
|
||||
* @draft ICU 65
|
||||
*/
|
||||
~Builder();
|
||||
|
||||
/**
|
||||
* Move assignment; might modify the source.
|
||||
* This builder will have the same contents that the source builder had.
|
||||
*
|
||||
* @param src Builder to move contents from.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder &operator=(Builder &&src) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Parses an Accept-Language string
|
||||
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
|
||||
* such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
|
||||
* Allows whitespace in more places but does not allow "*".
|
||||
* Clears any previously set/added supported locales first.
|
||||
*
|
||||
* @param locales the Accept-Language string of locales to set
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder &setSupportedLocalesFromListString(StringPiece locales);
|
||||
|
||||
/**
|
||||
* Copies the supported locales, preserving iteration order.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locales the list of locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder &setSupportedLocales(Locale::Iterator &locales);
|
||||
|
||||
/**
|
||||
* Copies the supported locales from the begin/end range, preserving iteration order.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* Each of the iterator parameter values must be an
|
||||
* input iterator whose value is convertible to const Locale &.
|
||||
*
|
||||
* @param begin Start of range.
|
||||
* @param end Exclusive end of range.
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
template<typename Iter>
|
||||
Builder &setSupportedLocales(Iter begin, Iter end) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
clearSupportedLocales();
|
||||
while (begin != end) {
|
||||
addSupportedLocale(*begin++);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the supported locales from the begin/end range, preserving iteration order.
|
||||
* Calls the converter to convert each *begin to a Locale or const Locale &.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* Each of the iterator parameter values must be an
|
||||
* input iterator whose value is convertible to const Locale &.
|
||||
*
|
||||
* @param begin Start of range.
|
||||
* @param end Exclusive end of range.
|
||||
* @param converter Converter from *begin to const Locale & or compatible.
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
template<typename Iter, typename Conv>
|
||||
Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
clearSupportedLocales();
|
||||
while (begin != end) {
|
||||
addSupportedLocale(converter(*begin++));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds another supported locale.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locale another locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder &addSupportedLocale(const Locale &locale);
|
||||
|
||||
/**
|
||||
* Sets the default locale; if nullptr, or if it is not set explicitly,
|
||||
* then the first supported locale is used as the default locale.
|
||||
*
|
||||
* @param defaultLocale the default locale (will be copied)
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder &setDefaultLocale(const Locale *defaultLocale);
|
||||
|
||||
/**
|
||||
* If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
|
||||
* differences.
|
||||
* This is used in situations (such as maps) where
|
||||
* it is better to fall back to the same script than a similar language.
|
||||
*
|
||||
* @param subtag the subtag to favor
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
|
||||
|
||||
/**
|
||||
* Option for whether all desired locales are treated equally or
|
||||
* earlier ones are preferred (this is the default).
|
||||
*
|
||||
* @param demotion the demotion per desired locale to set.
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
|
||||
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while setting parameters.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
*
|
||||
* @param outErrorCode Set to an error code if it does not contain one already
|
||||
* and an error occurred while setting parameters.
|
||||
* Otherwise unchanged.
|
||||
* @return TRUE if U_FAILURE(outErrorCode)
|
||||
* @draft ICU 65
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode) const;
|
||||
|
||||
/**
|
||||
* Builds and returns a new locale matcher.
|
||||
* This builder can continue to be used.
|
||||
*
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return new LocaleMatcher.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
LocaleMatcher build(UErrorCode &errorCode) const;
|
||||
|
||||
private:
|
||||
friend class LocaleMatcher;
|
||||
|
||||
Builder(const Builder &other) = delete;
|
||||
Builder &operator=(const Builder &other) = delete;
|
||||
|
||||
void clearSupportedLocales();
|
||||
bool ensureSupportedLocaleVector();
|
||||
|
||||
UErrorCode errorCode_ = U_ZERO_ERROR;
|
||||
UVector *supportedLocales_ = nullptr;
|
||||
int32_t thresholdDistance_ = -1;
|
||||
ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
|
||||
Locale *defaultLocale_ = nullptr;
|
||||
ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
|
||||
};
|
||||
|
||||
// FYI No public LocaleMatcher constructors in C++; use the Builder.
|
||||
|
||||
/**
|
||||
* Move copy constructor; might modify the source.
|
||||
* This matcher will have the same settings that the source matcher had.
|
||||
* @param src source matcher
|
||||
* @draft ICU 65
|
||||
*/
|
||||
LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
~LocaleMatcher();
|
||||
|
||||
/**
|
||||
* Move assignment operator; might modify the source.
|
||||
* This matcher will have the same settings that the source matcher had.
|
||||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source matcher
|
||||
* @return *this
|
||||
* @draft ICU 65
|
||||
*/
|
||||
LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Returns the supported locale which best matches the desired locale.
|
||||
*
|
||||
* @param desiredLocale Typically a user's language.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching supported locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns the supported locale which best matches one of the desired locales.
|
||||
*
|
||||
* @param desiredLocales Typically a user's languages, in order of preference (descending).
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching supported locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Parses an Accept-Language string
|
||||
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
|
||||
* such as "af, en, fr;q=0.9",
|
||||
* and returns the supported locale which best matches one of the desired locales.
|
||||
* Allows whitespace in more places but does not allow "*".
|
||||
*
|
||||
* @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching supported locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns the best match between the desired locale and the supported locales.
|
||||
* If the result's desired locale is not nullptr, then it is the address of the input locale.
|
||||
* It has not been cloned.
|
||||
*
|
||||
* @param desiredLocale Typically a user's language.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching pair of the desired and a supported locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Returns the best match between the desired and supported locales.
|
||||
* If the result's desired locale is not nullptr, then it is a clone of
|
||||
* the best-matching desired locale. The Result object owns the clone.
|
||||
*
|
||||
* @param desiredLocales Typically a user's languages, in order of preference (descending).
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return the best-matching pair of a desired and a supported locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Returns a fraction between 0 and 1, where 1 means that the languages are a
|
||||
* perfect match, and 0 means that they are completely different.
|
||||
*
|
||||
* <p>This is mostly an implementation detail, and the precise values may change over time.
|
||||
* The implementation may use either the maximized forms or the others ones, or both.
|
||||
* The implementation may or may not rely on the forms to be consistent with each other.
|
||||
*
|
||||
* <p>Callers should construct and use a matcher rather than match pairs of locales directly.
|
||||
*
|
||||
* @param desired Desired locale.
|
||||
* @param supported Supported locale.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return value between 0 and 1, inclusive.
|
||||
* @internal (has a known user)
|
||||
*/
|
||||
double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
private:
|
||||
LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
|
||||
LocaleMatcher(const LocaleMatcher &other) = delete;
|
||||
LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
|
||||
|
||||
int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
|
||||
|
||||
const XLikelySubtags &likelySubtags;
|
||||
const LocaleDistance &localeDistance;
|
||||
int32_t thresholdDistance;
|
||||
int32_t demotionPerDesiredLocale;
|
||||
ULocMatchFavorSubtag favorSubtag;
|
||||
|
||||
// These are in input order.
|
||||
const Locale ** supportedLocales;
|
||||
LSR *lsrs;
|
||||
int32_t supportedLocalesLength;
|
||||
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
|
||||
UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
|
||||
// Array versions of the supportedLsrToIndex keys and values.
|
||||
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
|
||||
const LSR **supportedLSRs;
|
||||
int32_t *supportedIndexes;
|
||||
int32_t supportedLSRsLength;
|
||||
Locale *ownedDefaultLocale;
|
||||
const Locale *defaultLocale;
|
||||
int32_t defaultLocaleIndex;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
#endif // __LOCALEMATCHER_H__
|
|
@ -1011,6 +1011,104 @@ public:
|
|||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* A Locale iterator interface similar to a Java Iterator<Locale>.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ {
|
||||
public:
|
||||
/** @draft ICU 65 */
|
||||
virtual ~Iterator();
|
||||
|
||||
/**
|
||||
* @return TRUE if next() can be called again.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
virtual UBool hasNext() const = 0;
|
||||
|
||||
/**
|
||||
* @return the next locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
virtual const Locale &next() = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* A generic Locale iterator implementation over Locale input iterators.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
template<typename Iter>
|
||||
class RangeIterator : public Iterator, public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an iterator from a begin/end range.
|
||||
* Each of the iterator parameter values must be an
|
||||
* input iterator whose value is convertible to const Locale &.
|
||||
*
|
||||
* @param begin Start of range.
|
||||
* @param end Exclusive end of range.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {}
|
||||
|
||||
/**
|
||||
* @return TRUE if next() can be called again.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
UBool hasNext() const override { return it_ != end_; }
|
||||
|
||||
/**
|
||||
* @return the next locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
const Locale &next() override { return *it_++; }
|
||||
|
||||
private:
|
||||
Iter it_;
|
||||
const Iter end_;
|
||||
};
|
||||
|
||||
/**
|
||||
* A generic Locale iterator implementation over Locale input iterators.
|
||||
* Calls the converter to convert each *begin to a const Locale &.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
template<typename Iter, typename Conv>
|
||||
class ConvertingIterator : public Iterator, public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an iterator from a begin/end range.
|
||||
* Each of the iterator parameter values must be an
|
||||
* input iterator whose value the converter converts to const Locale &.
|
||||
*
|
||||
* @param begin Start of range.
|
||||
* @param end Exclusive end of range.
|
||||
* @param converter Converter from *begin to const Locale & or compatible.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
ConvertingIterator(Iter begin, Iter end, Conv converter) :
|
||||
it_(begin), end_(end), converter_(converter) {}
|
||||
|
||||
/**
|
||||
* @return TRUE if next() can be called again.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
UBool hasNext() const override { return it_ != end_; }
|
||||
|
||||
/**
|
||||
* @return the next locale.
|
||||
* @draft ICU 65
|
||||
*/
|
||||
const Locale &next() override { return converter_(*it_++); }
|
||||
|
||||
private:
|
||||
Iter it_;
|
||||
const Iter end_;
|
||||
Conv converter_;
|
||||
};
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
protected: /* only protected for testing purposes. DO NOT USE. */
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "umutex.h"
|
||||
#include "putilimp.h"
|
||||
#include "uassert.h"
|
||||
#include "uresdata.h"
|
||||
|
||||
using namespace icu;
|
||||
|
||||
|
@ -1952,7 +1953,7 @@ void getAllItemsWithFallback(
|
|||
// When the sink sees the no-fallback/no-inheritance marker,
|
||||
// then it would remove the parent's item.
|
||||
// We would deserialize parent values even though they are overridden in a child bundle.
|
||||
value.pResData = &bundle->fResData;
|
||||
value.setData(&bundle->fResData);
|
||||
UResourceDataEntry *parentEntry = bundle->fData->fParent;
|
||||
UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus);
|
||||
value.setResource(bundle->fRes, ResourceTracer(bundle));
|
||||
|
@ -2000,31 +2001,60 @@ void getAllItemsWithFallback(
|
|||
|
||||
} // namespace
|
||||
|
||||
// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue.
|
||||
// Unfortunately, the caller must know which subclass to make and pass in.
|
||||
// Alternatively, we could make it as polymorphic as in Java by
|
||||
// returning a ResourceValue pointer (possibly wrapped into a LocalPointer)
|
||||
// that the caller then owns.
|
||||
//
|
||||
// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer
|
||||
// can point to a non-local bundle.
|
||||
// Without tracing, the child bundle could be a function-local object.
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
|
||||
icu::ResourceSink &sink, UErrorCode &errorCode) {
|
||||
ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
|
||||
UResourceBundle *tempFillIn,
|
||||
ResourceDataValue &value, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (path == NULL) {
|
||||
if (path == nullptr) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
UResourceBundle stackBundle;
|
||||
ures_initStackObject(&stackBundle);
|
||||
const UResourceBundle *rb;
|
||||
if (*path == 0) {
|
||||
// empty path
|
||||
rb = bundle;
|
||||
} else {
|
||||
rb = ures_getByKeyWithFallback(bundle, path, &stackBundle, &errorCode);
|
||||
rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
value.setData(&rb->fResData);
|
||||
value.setResource(rb->fRes, ResourceTracer(rb));
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
|
||||
icu::ResourceSink &sink, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (path == nullptr) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
StackUResourceBundle stackBundle;
|
||||
const UResourceBundle *rb;
|
||||
if (*path == 0) {
|
||||
// empty path
|
||||
rb = bundle;
|
||||
} else {
|
||||
rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
ures_close(&stackBundle);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Get all table items with fallback.
|
||||
ResourceDataValue value;
|
||||
getAllItemsWithFallback(rb, value, sink, errorCode);
|
||||
ures_close(&stackBundle);
|
||||
}
|
||||
|
||||
U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) {
|
||||
|
|
|
@ -509,7 +509,7 @@ const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode
|
|||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
const UChar *s = res_getString(fTraceInfo, pResData, res, &length);
|
||||
const UChar *s = res_getString(fTraceInfo, &getData(), res, &length);
|
||||
if(s == NULL) {
|
||||
errorCode = U_RESOURCE_TYPE_MISMATCH;
|
||||
}
|
||||
|
@ -520,7 +520,7 @@ const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &erro
|
|||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
const UChar *s = res_getAlias(pResData, res, &length);
|
||||
const UChar *s = res_getAlias(&getData(), res, &length);
|
||||
if(s == NULL) {
|
||||
errorCode = U_RESOURCE_TYPE_MISMATCH;
|
||||
}
|
||||
|
@ -551,7 +551,7 @@ const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &erro
|
|||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
const int32_t *iv = res_getIntVector(fTraceInfo, pResData, res, &length);
|
||||
const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length);
|
||||
if(iv == NULL) {
|
||||
errorCode = U_RESOURCE_TYPE_MISMATCH;
|
||||
}
|
||||
|
@ -562,7 +562,7 @@ const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCo
|
|||
if(U_FAILURE(errorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
const uint8_t *b = res_getBinary(fTraceInfo, pResData, res, &length);
|
||||
const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length);
|
||||
if(b == NULL) {
|
||||
errorCode = U_RESOURCE_TYPE_MISMATCH;
|
||||
}
|
||||
|
@ -580,12 +580,12 @@ ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const {
|
|||
switch(RES_GET_TYPE(res)) {
|
||||
case URES_ARRAY:
|
||||
if (offset!=0) { // empty if offset==0
|
||||
items32 = (const Resource *)pResData->pRoot+offset;
|
||||
items32 = (const Resource *)getData().pRoot+offset;
|
||||
length = *items32++;
|
||||
}
|
||||
break;
|
||||
case URES_ARRAY16:
|
||||
items16 = pResData->p16BitUnits+offset;
|
||||
items16 = getData().p16BitUnits+offset;
|
||||
length = *items16++;
|
||||
break;
|
||||
default:
|
||||
|
@ -608,19 +608,19 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
|
|||
switch(RES_GET_TYPE(res)) {
|
||||
case URES_TABLE:
|
||||
if (offset != 0) { // empty if offset==0
|
||||
keys16 = (const uint16_t *)(pResData->pRoot+offset);
|
||||
keys16 = (const uint16_t *)(getData().pRoot+offset);
|
||||
length = *keys16++;
|
||||
items32 = (const Resource *)(keys16+length+(~length&1));
|
||||
}
|
||||
break;
|
||||
case URES_TABLE16:
|
||||
keys16 = pResData->p16BitUnits+offset;
|
||||
keys16 = getData().p16BitUnits+offset;
|
||||
length = *keys16++;
|
||||
items16 = keys16 + length;
|
||||
break;
|
||||
case URES_TABLE32:
|
||||
if (offset != 0) { // empty if offset==0
|
||||
keys32 = pResData->pRoot+offset;
|
||||
keys32 = getData().pRoot+offset;
|
||||
length = *keys32++;
|
||||
items32 = (const Resource *)keys32 + length;
|
||||
}
|
||||
|
@ -633,18 +633,18 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
|
|||
}
|
||||
|
||||
UBool ResourceDataValue::isNoInheritanceMarker() const {
|
||||
return ::isNoInheritanceMarker(pResData, res);
|
||||
return ::isNoInheritanceMarker(&getData(), res);
|
||||
}
|
||||
|
||||
int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity,
|
||||
UErrorCode &errorCode) const {
|
||||
return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
|
||||
return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
|
||||
}
|
||||
|
||||
int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
|
||||
UErrorCode &errorCode) const {
|
||||
if(URES_IS_ARRAY(res)) {
|
||||
return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
|
||||
return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
|
||||
}
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
|
@ -658,7 +658,7 @@ int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, in
|
|||
return 1;
|
||||
}
|
||||
int32_t sLength;
|
||||
const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
|
||||
const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
|
||||
if(s != NULL) {
|
||||
dest[0].setTo(TRUE, s, sLength);
|
||||
return 1;
|
||||
|
@ -673,7 +673,7 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode)
|
|||
return us;
|
||||
}
|
||||
int32_t sLength;
|
||||
const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
|
||||
const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
|
||||
if(s != NULL) {
|
||||
us.setTo(TRUE, s, sLength);
|
||||
return us;
|
||||
|
@ -684,7 +684,7 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode)
|
|||
}
|
||||
if(array.getSize() > 0) {
|
||||
// Tracing is already performed above (unimportant for trace that this is an array)
|
||||
s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, 0), &sLength);
|
||||
s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength);
|
||||
if(s != NULL) {
|
||||
us.setTo(TRUE, s, sLength);
|
||||
return us;
|
||||
|
@ -821,14 +821,14 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i,
|
|||
const char *&key, icu::ResourceValue &value) const {
|
||||
if(0 <= i && i < length) {
|
||||
icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
|
||||
if (keys16 != NULL) {
|
||||
key = RES_GET_KEY16(rdValue.pResData, keys16[i]);
|
||||
if (keys16 != nullptr) {
|
||||
key = RES_GET_KEY16(&rdValue.getData(), keys16[i]);
|
||||
} else {
|
||||
key = RES_GET_KEY32(rdValue.pResData, keys32[i]);
|
||||
key = RES_GET_KEY32(&rdValue.getData(), keys32[i]);
|
||||
}
|
||||
Resource res;
|
||||
if (items16 != NULL) {
|
||||
res = makeResourceFrom16(rdValue.pResData, items16[i]);
|
||||
if (items16 != nullptr) {
|
||||
res = makeResourceFrom16(&rdValue.getData(), items16[i]);
|
||||
} else {
|
||||
res = items32[i];
|
||||
}
|
||||
|
@ -842,6 +842,29 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i,
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const {
|
||||
icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
|
||||
const char *realKey = nullptr;
|
||||
int32_t i;
|
||||
if (keys16 != nullptr) {
|
||||
i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey);
|
||||
} else {
|
||||
i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey);
|
||||
}
|
||||
if (i >= 0) {
|
||||
Resource res;
|
||||
if (items16 != nullptr) {
|
||||
res = makeResourceFrom16(&rdValue.getData(), items16[i]);
|
||||
} else {
|
||||
res = items32[i];
|
||||
}
|
||||
// Same note about lifetime as in getKeyAndValue().
|
||||
rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_CAPI Resource U_EXPORT2
|
||||
res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) {
|
||||
uint32_t offset=RES_GET_OFFSET(array);
|
||||
|
@ -887,7 +910,7 @@ UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const {
|
|||
// alive for the duration that fields are being read from it
|
||||
// (including nested fields).
|
||||
rdValue.setResource(
|
||||
internalGetResource(rdValue.pResData, i),
|
||||
internalGetResource(&rdValue.getData(), i),
|
||||
ResourceTracer(fTraceInfo, i));
|
||||
return TRUE;
|
||||
}
|
||||
|
|
|
@ -511,13 +511,12 @@ inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) {
|
|||
class ResourceDataValue : public ResourceValue {
|
||||
public:
|
||||
ResourceDataValue() :
|
||||
pResData(NULL),
|
||||
res(static_cast<Resource>(URES_NONE)),
|
||||
fTraceInfo() {}
|
||||
virtual ~ResourceDataValue();
|
||||
|
||||
void setData(const ResourceData *data) {
|
||||
pResData = data;
|
||||
resData = *data;
|
||||
}
|
||||
|
||||
void setResource(Resource r, ResourceTracer&& traceInfo) {
|
||||
|
@ -525,6 +524,7 @@ public:
|
|||
fTraceInfo = traceInfo;
|
||||
}
|
||||
|
||||
const ResourceData &getData() const { return resData; }
|
||||
virtual UResType getType() const;
|
||||
virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const;
|
||||
virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const;
|
||||
|
@ -541,9 +541,10 @@ public:
|
|||
UErrorCode &errorCode) const;
|
||||
virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const;
|
||||
|
||||
const ResourceData *pResData;
|
||||
|
||||
private:
|
||||
// TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer,
|
||||
// then remove this value field again and just store a pResData pointer.
|
||||
ResourceData resData;
|
||||
Resource res;
|
||||
ResourceTracer fTraceInfo;
|
||||
};
|
||||
|
|
|
@ -67,6 +67,9 @@ struct UResourceBundle {
|
|||
char *fVersion;
|
||||
UResourceDataEntry *fTopLevelData; /* for getting the valid locale */
|
||||
char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */
|
||||
// TODO(ICU-20769): Try to change the by-value fResData into a pointer,
|
||||
// with the struct in only one place for each bundle.
|
||||
// Also replace class ResourceDataValue.resData with a pResData pointer again.
|
||||
ResourceData fResData;
|
||||
char fResBuf[RES_BUFSIZE];
|
||||
int32_t fResPathLen;
|
||||
|
@ -281,6 +284,11 @@ ures_getStringByKeyWithFallback(const UResourceBundle *resB,
|
|||
|
||||
#ifdef __cplusplus
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
|
||||
UResourceBundle *tempFillIn,
|
||||
icu::ResourceDataValue &value, UErrorCode &errorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
|
||||
icu::ResourceSink &sink, UErrorCode &errorCode);
|
||||
|
|
|
@ -27,11 +27,17 @@ void addCStringTest(TestNode** root);
|
|||
|
||||
static void TestInvariant(void);
|
||||
static void TestCompareInvEbcdicAsAscii(void);
|
||||
static void TestLocaleAtSign(void);
|
||||
static void TestNoInvariantAtSign(void);
|
||||
static void TestInvCharToAscii(void);
|
||||
|
||||
void addCStringTest(TestNode** root) {
|
||||
addTest(root, &TestAPI, "tsutil/cstrtest/TestAPI");
|
||||
addTest(root, &TestInvariant, "tsutil/cstrtest/TestInvariant");
|
||||
addTest(root, &TestAPI, "tsutil/cstrtest/TestAPI");
|
||||
addTest(root, &TestInvariant, "tsutil/cstrtest/TestInvariant");
|
||||
addTest(root, &TestCompareInvEbcdicAsAscii, "tsutil/cstrtest/TestCompareInvEbcdicAsAscii");
|
||||
addTest(root, &TestLocaleAtSign, "tsutil/cstrtest/TestLocaleAtSign");
|
||||
addTest(root, &TestNoInvariantAtSign, "tsutil/cstrtest/TestNoInvariantAtSign");
|
||||
addTest(root, &TestInvCharToAscii, "tsutil/cstrtest/TestInvCharToAscii");
|
||||
}
|
||||
|
||||
static void TestAPI(void)
|
||||
|
@ -339,3 +345,53 @@ TestCompareInvEbcdicAsAscii() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// See U_CHARSET_FAMILY in unicode/platform.h.
|
||||
static const char *nativeInvChars =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789 \"%&'()*+,-./:;<=>?_";
|
||||
static const UChar *asciiInvChars =
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
u"abcdefghijklmnopqrstuvwxyz"
|
||||
u"0123456789 \"%&'()*+,-./:;<=>?_";
|
||||
|
||||
static void
|
||||
TestLocaleAtSign() {
|
||||
static const char *invLocale = "de-Latn_DE@PHONEBOOK";
|
||||
for (int32_t i = 0;; ++i) {
|
||||
char ic = invLocale[i];
|
||||
if (ic == 0) { break; }
|
||||
UBool expected = i == 10;
|
||||
UBool actual = uprv_isAtSign(ic);
|
||||
if (actual != expected) {
|
||||
log_err("uprv_isAtSign('%c')=%d is wrong\n", ic, (int)actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The at sign is not an invariant character.
|
||||
static void
|
||||
TestNoInvariantAtSign() {
|
||||
for (int32_t i = 0;; ++i) {
|
||||
char ic = nativeInvChars[i];
|
||||
UBool actual = uprv_isAtSign(ic);
|
||||
if (actual) {
|
||||
log_err("uprv_isAtSign(invariant '%c')=TRUE is wrong\n", ic);
|
||||
}
|
||||
if (ic == 0) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
TestInvCharToAscii() {
|
||||
for (int32_t i = 0;; ++i) {
|
||||
char ic = nativeInvChars[i];
|
||||
uint8_t ac = asciiInvChars[i];
|
||||
uint8_t actual = uprv_invCharToAscii(ic);
|
||||
if (actual != ac) {
|
||||
log_err("uprv_invCharToAscii('%c') did not convert to ASCII 0x%02x\n", ic, (int)ac);
|
||||
}
|
||||
if (ic == 0) { break; }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -187,7 +187,8 @@ static void U_CALLCONV testTraceData(const void *context, int32_t fnNumber, int3
|
|||
/* printf(" %s() %s\n", fnName, buf); */
|
||||
}
|
||||
|
||||
static UConverter * psuedo_ucnv_open(const char *name, UErrorCode * err)
|
||||
#if !ENABLE_TRACING_ORIG_VAL
|
||||
static UConverter * pseudo_ucnv_open(const char *name, UErrorCode * err)
|
||||
{
|
||||
UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD);
|
||||
|
||||
|
@ -196,13 +197,13 @@ static UConverter * psuedo_ucnv_open(const char *name, UErrorCode * err)
|
|||
UTRACE_EXIT_PTR_STATUS(NULL, *err);
|
||||
return NULL;
|
||||
}
|
||||
static void psuedo_ucnv_close(UConverter * cnv)
|
||||
static void pseudo_ucnv_close(UConverter * cnv)
|
||||
{
|
||||
UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD);
|
||||
UTRACE_DATA1(UTRACE_OPEN_CLOSE, "unload converter %p", cnv);
|
||||
UTRACE_EXIT_VALUE((int32_t)TRUE);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* TestTraceAPI
|
||||
|
@ -282,9 +283,9 @@ static void TestTraceAPI() {
|
|||
TEST_ASSERT(U_SUCCESS(status));
|
||||
ucnv_close(cnv);
|
||||
#else
|
||||
cnv = psuedo_ucnv_open(NULL, &status);
|
||||
cnv = pseudo_ucnv_open(NULL, &status);
|
||||
TEST_ASSERT(U_SUCCESS(status));
|
||||
psuedo_ucnv_close(cnv);
|
||||
pseudo_ucnv_close(cnv);
|
||||
#endif
|
||||
TEST_ASSERT(gTraceEntryCount > 0);
|
||||
TEST_ASSERT(gTraceExitCount > 0);
|
||||
|
|
|
@ -184,7 +184,7 @@ library: common
|
|||
uinit utypes errorcode
|
||||
icuplug
|
||||
platform
|
||||
localebuilder
|
||||
localebuilder localematcher
|
||||
|
||||
group: pluralmap
|
||||
# TODO: Move to i18n library, ticket #11926.
|
||||
|
@ -631,7 +631,7 @@ group: resourcebundle
|
|||
# We can probably only disentangle basic locale ID handling from resource bundle code
|
||||
# by hardcoding all of the locale ID data.
|
||||
locid.o locmap.o wintz.o
|
||||
# Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608
|
||||
# Do we need class LocaleBased? https://unicode-org.atlassian.net/browse/ICU-8608
|
||||
locbased.o
|
||||
loclikely.o
|
||||
deps
|
||||
|
@ -646,6 +646,31 @@ group: localebuilder
|
|||
deps
|
||||
resourcebundle
|
||||
|
||||
group: localematcher
|
||||
localematcher.o
|
||||
deps
|
||||
localebuilder localeprioritylist loclikelysubtags locdistance lsr
|
||||
|
||||
group: localeprioritylist
|
||||
localeprioritylist.o
|
||||
deps
|
||||
resourcebundle
|
||||
|
||||
group: locdistance
|
||||
locdistance.o
|
||||
deps
|
||||
loclikelysubtags
|
||||
|
||||
group: loclikelysubtags
|
||||
loclikelysubtags.o
|
||||
deps
|
||||
lsr resourcebundle
|
||||
|
||||
group: lsr
|
||||
lsr.o
|
||||
deps
|
||||
platform
|
||||
|
||||
group: udata
|
||||
udata.o ucmndata.o udatamem.o restrace.o
|
||||
umapfile.o
|
||||
|
|
|
@ -44,7 +44,8 @@ caltztst.o canittst.o citrtest.o colldata.o convtest.o currcoll.o collationtest.
|
|||
fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \
|
||||
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
|
||||
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
|
||||
loctest.o localebuildertest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
|
||||
loctest.o localebuildertest.o localematchertest.o \
|
||||
miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
|
||||
numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
|
||||
sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \
|
||||
tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \
|
||||
|
|
|
@ -366,6 +366,7 @@
|
|||
<ClCompile Include="listformattertest.cpp" />
|
||||
<ClCompile Include="formattedvaluetest.cpp" />
|
||||
<ClCompile Include="localebuildertest.cpp" />
|
||||
<ClCompile Include="localematchertest.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="colldata.h" />
|
||||
|
|
|
@ -547,6 +547,9 @@
|
|||
<ClCompile Include="localebuildertest.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="localematchertest.cpp">
|
||||
<Filter>locales & resources</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="itrbbi.h">
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "usettest.h"
|
||||
|
||||
extern IntlTest *createBytesTrieTest();
|
||||
extern IntlTest *createLocaleMatcherTest();
|
||||
static IntlTest *createLocalPointerTest();
|
||||
extern IntlTest *createUCharsTrieTest();
|
||||
static IntlTest *createEnumSetTest();
|
||||
|
@ -46,113 +47,40 @@ extern IntlTest *createPluralMapTest();
|
|||
extern IntlTest *createStaticUnicodeSetsTest();
|
||||
#endif
|
||||
|
||||
|
||||
#define CASE(id, test) case id: \
|
||||
name = #test; \
|
||||
if (exec) { \
|
||||
logln(#test "---"); logln(); \
|
||||
test t; \
|
||||
callTest(t, par); \
|
||||
} \
|
||||
break
|
||||
|
||||
void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
|
||||
{
|
||||
if (exec) logln("TestSuite Utilities: ");
|
||||
switch (index) {
|
||||
CASE(0, MultithreadTest);
|
||||
CASE(1, StringTest);
|
||||
CASE(2, UnicodeStringTest);
|
||||
CASE(3, LocaleTest);
|
||||
CASE(4, CharIterTest);
|
||||
CASE(5, UObjectTest);
|
||||
CASE(6, UnicodeTest);
|
||||
CASE(7, ResourceBundleTest);
|
||||
CASE(8, NewResourceBundleTest);
|
||||
CASE(9, PUtilTest);
|
||||
CASE(10, UVector32Test);
|
||||
CASE(11, UVectorTest);
|
||||
CASE(12, UTextTest);
|
||||
CASE(13, LocaleAliasTest);
|
||||
CASE(14, UnicodeSetTest);
|
||||
CASE(15, ErrorCodeTest);
|
||||
case 16:
|
||||
name = "LocalPointerTest";
|
||||
if (exec) {
|
||||
logln("TestSuite LocalPointerTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createLocalPointerTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 17:
|
||||
name = "BytesTrieTest";
|
||||
if (exec) {
|
||||
logln("TestSuite BytesTrieTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createBytesTrieTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 18:
|
||||
name = "UCharsTrieTest";
|
||||
if (exec) {
|
||||
logln("TestSuite UCharsTrieTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createUCharsTrieTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 19:
|
||||
name = "EnumSetTest";
|
||||
if (exec) {
|
||||
logln("TestSuite EnumSetTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createEnumSetTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 20:
|
||||
name = "SimpleFormatterTest";
|
||||
if (exec) {
|
||||
logln("TestSuite SimpleFormatterTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createSimpleFormatterTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 21:
|
||||
name = "UnifiedCacheTest";
|
||||
if (exec) {
|
||||
logln("TestSuite UnifiedCacheTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createUnifiedCacheTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 22:
|
||||
name = "QuantityFormatterTest";
|
||||
if (exec) {
|
||||
logln("TestSuite QuantityFormatterTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createQuantityFormatterTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 23:
|
||||
name = "PluralMapTest";
|
||||
if (exec) {
|
||||
logln("TestSuite PluralMapTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createPluralMapTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
name = "StaticUnicodeSetsTest";
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO_CLASS(MultithreadTest);
|
||||
TESTCASE_AUTO_CLASS(StringTest);
|
||||
TESTCASE_AUTO_CLASS(UnicodeStringTest);
|
||||
TESTCASE_AUTO_CLASS(LocaleTest);
|
||||
TESTCASE_AUTO_CLASS(CharIterTest);
|
||||
TESTCASE_AUTO_CLASS(UObjectTest);
|
||||
TESTCASE_AUTO_CLASS(UnicodeTest);
|
||||
TESTCASE_AUTO_CLASS(ResourceBundleTest);
|
||||
TESTCASE_AUTO_CLASS(NewResourceBundleTest);
|
||||
TESTCASE_AUTO_CLASS(PUtilTest);
|
||||
TESTCASE_AUTO_CLASS(UVector32Test);
|
||||
TESTCASE_AUTO_CLASS(UVectorTest);
|
||||
TESTCASE_AUTO_CLASS(UTextTest);
|
||||
TESTCASE_AUTO_CLASS(LocaleAliasTest);
|
||||
TESTCASE_AUTO_CLASS(UnicodeSetTest);
|
||||
TESTCASE_AUTO_CLASS(ErrorCodeTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(LocalPointerTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(BytesTrieTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(UCharsTrieTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(EnumSetTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(SimpleFormatterTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(UnifiedCacheTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(QuantityFormatterTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(PluralMapTest);
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
if (exec) {
|
||||
logln("TestSuite StaticUnicodeSetsTest---"); logln();
|
||||
LocalPointer<IntlTest> test(createStaticUnicodeSetsTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
TESTCASE_AUTO_CREATE_CLASS(StaticUnicodeSetsTest);
|
||||
#endif
|
||||
break;
|
||||
CASE(25, LocaleBuilderTest);
|
||||
default: name = ""; break; //needed to end loop
|
||||
}
|
||||
TESTCASE_AUTO_CLASS(LocaleBuilderTest);
|
||||
TESTCASE_AUTO_CREATE_CLASS(LocaleMatcherTest);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void ErrorCodeTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) {
|
||||
|
|
589
icu4c/source/test/intltest/localematchertest.cpp
Normal file
589
icu4c/source/test/intltest/localematchertest.cpp
Normal file
|
@ -0,0 +1,589 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
// localematchertest.cpp
|
||||
// created: 2019jul04 Markus W. Scherer
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localematcher.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "intltest.h"
|
||||
#include "localeprioritylist.h"
|
||||
#include "ucbuf.h"
|
||||
|
||||
#define ARRAY_RANGE(array) (array), ((array) + UPRV_LENGTHOF(array))
|
||||
|
||||
namespace {
|
||||
|
||||
const char *locString(const Locale *loc) {
|
||||
return loc != nullptr ? loc->getName() : "(null)";
|
||||
}
|
||||
|
||||
struct TestCase {
|
||||
int32_t lineNr = 0;
|
||||
|
||||
CharString supported;
|
||||
CharString def;
|
||||
UnicodeString favor;
|
||||
UnicodeString threshold;
|
||||
CharString desired;
|
||||
CharString expMatch;
|
||||
CharString expDesired;
|
||||
CharString expCombined;
|
||||
|
||||
void reset() {
|
||||
supported.clear();
|
||||
def.clear();
|
||||
favor.remove();
|
||||
threshold.remove();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
class LocaleMatcherTest : public IntlTest {
|
||||
public:
|
||||
LocaleMatcherTest() {}
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
||||
|
||||
void testEmpty();
|
||||
void testCopyErrorTo();
|
||||
void testBasics();
|
||||
void testSupportedDefault();
|
||||
void testUnsupportedDefault();
|
||||
void testDemotion();
|
||||
void testMatch();
|
||||
void testResolvedLocale();
|
||||
void testDataDriven();
|
||||
|
||||
private:
|
||||
UBool dataDriven(const TestCase &test, IcuTestErrorCode &errorCode);
|
||||
};
|
||||
|
||||
extern IntlTest *createLocaleMatcherTest() {
|
||||
return new LocaleMatcherTest();
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
if(exec) {
|
||||
logln("TestSuite LocaleMatcherTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testEmpty);
|
||||
TESTCASE_AUTO(testCopyErrorTo);
|
||||
TESTCASE_AUTO(testBasics);
|
||||
TESTCASE_AUTO(testSupportedDefault);
|
||||
TESTCASE_AUTO(testUnsupportedDefault);
|
||||
TESTCASE_AUTO(testDemotion);
|
||||
TESTCASE_AUTO(testMatch);
|
||||
TESTCASE_AUTO(testResolvedLocale);
|
||||
TESTCASE_AUTO(testDataDriven);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testEmpty() {
|
||||
IcuTestErrorCode errorCode(*this, "testEmpty");
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch(Locale::getFrench(), errorCode);
|
||||
assertEquals("getBestMatch(fr)", "(null)", locString(best));
|
||||
LocaleMatcher::Result result = matcher.getBestMatchResult("fr", errorCode);
|
||||
assertEquals("getBestMatchResult(fr).des", "(null)", locString(result.getDesiredLocale()));
|
||||
assertEquals("getBestMatchResult(fr).desIndex", -1, result.getDesiredIndex());
|
||||
assertEquals("getBestMatchResult(fr).supp",
|
||||
"(null)", locString(result.getSupportedLocale()));
|
||||
assertEquals("getBestMatchResult(fr).suppIndex",
|
||||
-1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testCopyErrorTo() {
|
||||
IcuTestErrorCode errorCode(*this, "testCopyErrorTo");
|
||||
// The builder does not set any errors except out-of-memory.
|
||||
// Test what we can.
|
||||
LocaleMatcher::Builder builder;
|
||||
UErrorCode success = U_ZERO_ERROR;
|
||||
assertFalse("no error", builder.copyErrorTo(success));
|
||||
assertTrue("still success", U_SUCCESS(success));
|
||||
UErrorCode failure = U_INVALID_FORMAT_ERROR;
|
||||
assertTrue("failure passed in", builder.copyErrorTo(failure));
|
||||
assertEquals("same failure", U_INVALID_FORMAT_ERROR, failure);
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testBasics() {
|
||||
IcuTestErrorCode errorCode(*this, "testBasics");
|
||||
Locale locales[] = { "fr", "en_GB", "en" };
|
||||
{
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocales(ARRAY_RANGE(locales)).build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(ja_JP)", "fr", locString(best));
|
||||
}
|
||||
// Code coverage: Variations of setting supported locales.
|
||||
{
|
||||
std::vector<Locale> locales{ "fr", "en_GB", "en" };
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocales(locales.begin(), locales.end()).build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("fromRange.getBestMatch(ja_JP)", "fr", locString(best));
|
||||
}
|
||||
{
|
||||
Locale::RangeIterator<Locale *> iter(ARRAY_RANGE(locales));
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocales(iter).build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("fromIter.getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("fromIter.getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("fromIter.getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("fromIter.getBestMatch(ja_JP)", "fr", locString(best));
|
||||
}
|
||||
{
|
||||
Locale *pointers[] = { locales, locales + 1, locales + 2 };
|
||||
// Lambda with explicit reference return type to prevent copy-constructing a temporary
|
||||
// which would be destructed right away.
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocalesViaConverter(
|
||||
ARRAY_RANGE(pointers), [](const Locale *p) -> const Locale & { return *p; }).
|
||||
build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("viaConverter.getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("viaConverter.getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("viaConverter.getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("viaConverter.getBestMatch(ja_JP)", "fr", locString(best));
|
||||
}
|
||||
{
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
addSupportedLocale(locales[0]).
|
||||
addSupportedLocale(locales[1]).
|
||||
addSupportedLocale(locales[2]).
|
||||
build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("added.getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("added.getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("added.getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("added.getBestMatch(ja_JP)", "fr", locString(best));
|
||||
}
|
||||
{
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocalesFromListString(
|
||||
" el, fr;q=0.555555, en-GB ; q = 0.88 , el; q =0, en;q=0.88 , fr ").
|
||||
build(errorCode);
|
||||
const Locale *best = matcher.getBestMatchForListString("el, fr, fr;q=0, en-GB", errorCode);
|
||||
assertEquals("fromList.getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("fromList.getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("fromList.getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("fromList.getBestMatch(ja_JP)", "fr", locString(best));
|
||||
}
|
||||
// more API coverage
|
||||
{
|
||||
LocalePriorityList list("fr, en-GB", errorCode);
|
||||
LocalePriorityList::Iterator iter(list.iterator());
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocales(iter).
|
||||
addSupportedLocale(Locale::getEnglish()).
|
||||
setDefaultLocale(&Locale::getGerman()).
|
||||
build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("withDefault.getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("withDefault.getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("withDefault.getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("withDefault.getBestMatch(ja_JP)", "de", locString(best));
|
||||
|
||||
Locale desired("en_GB"); // distinct object from Locale.UK
|
||||
LocaleMatcher::Result result = matcher.getBestMatchResult(desired, errorCode);
|
||||
assertTrue("withDefault: exactly desired en-GB object",
|
||||
&desired == result.getDesiredLocale());
|
||||
assertEquals("withDefault: en-GB desired index", 0, result.getDesiredIndex());
|
||||
assertEquals("withDefault: en-GB supported",
|
||||
"en_GB", locString(result.getSupportedLocale()));
|
||||
assertEquals("withDefault: en-GB supported index", 1, result.getSupportedIndex());
|
||||
|
||||
LocalePriorityList list2("ja-JP, en-US", errorCode);
|
||||
LocalePriorityList::Iterator iter2(list2.iterator());
|
||||
result = matcher.getBestMatchResult(iter2, errorCode);
|
||||
assertEquals("withDefault: ja-JP, en-US desired index", 1, result.getDesiredIndex());
|
||||
assertEquals("withDefault: ja-JP, en-US desired",
|
||||
"en_US", locString(result.getDesiredLocale()));
|
||||
|
||||
desired = Locale("en", "US"); // distinct object from Locale.US
|
||||
result = matcher.getBestMatchResult(desired, errorCode);
|
||||
assertTrue("withDefault: exactly desired en-US object",
|
||||
&desired == result.getDesiredLocale());
|
||||
assertEquals("withDefault: en-US desired index", 0, result.getDesiredIndex());
|
||||
assertEquals("withDefault: en-US supported", "en", locString(result.getSupportedLocale()));
|
||||
assertEquals("withDefault: en-US supported index", 2, result.getSupportedIndex());
|
||||
|
||||
result = matcher.getBestMatchResult("ja_JP", errorCode);
|
||||
assertEquals("withDefault: ja-JP desired", "(null)", locString(result.getDesiredLocale()));
|
||||
assertEquals("withDefault: ja-JP desired index", -1, result.getDesiredIndex());
|
||||
assertEquals("withDefault: ja-JP supported", "de", locString(result.getSupportedLocale()));
|
||||
assertEquals("withDefault: ja-JP supported index", -1, result.getSupportedIndex());
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testSupportedDefault() {
|
||||
// The default locale is one of the supported locales.
|
||||
IcuTestErrorCode errorCode(*this, "testSupportedDefault");
|
||||
Locale locales[] = { "fr", "en_GB", "en" };
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocales(ARRAY_RANGE(locales)).
|
||||
setDefaultLocale(&locales[1]).
|
||||
build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("getBestMatch(ja_JP)", "en_GB", locString(best));
|
||||
LocaleMatcher::Result result = matcher.getBestMatchResult("ja_JP", errorCode);
|
||||
assertEquals("getBestMatchResult(ja_JP).supp",
|
||||
"en_GB", locString(result.getSupportedLocale()));
|
||||
assertEquals("getBestMatchResult(ja_JP).suppIndex",
|
||||
1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testUnsupportedDefault() {
|
||||
// The default locale does not match any of the supported locales.
|
||||
IcuTestErrorCode errorCode(*this, "testUnsupportedDefault");
|
||||
Locale locales[] = { "fr", "en_GB", "en" };
|
||||
Locale def("de");
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
setSupportedLocales(ARRAY_RANGE(locales)).
|
||||
setDefaultLocale(&def).
|
||||
build(errorCode);
|
||||
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
|
||||
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US", errorCode);
|
||||
assertEquals("getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR", errorCode);
|
||||
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP", errorCode);
|
||||
assertEquals("getBestMatch(ja_JP)", "de", locString(best));
|
||||
LocaleMatcher::Result result = matcher.getBestMatchResult("ja_JP", errorCode);
|
||||
assertEquals("getBestMatchResult(ja_JP).supp",
|
||||
"de", locString(result.getSupportedLocale()));
|
||||
assertEquals("getBestMatchResult(ja_JP).suppIndex",
|
||||
-1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testDemotion() {
|
||||
IcuTestErrorCode errorCode(*this, "testDemotion");
|
||||
Locale supported[] = { "fr", "de-CH", "it" };
|
||||
Locale desired[] = { "fr-CH", "de-CH", "it" };
|
||||
{
|
||||
LocaleMatcher noDemotion = LocaleMatcher::Builder().
|
||||
setSupportedLocales(ARRAY_RANGE(supported)).
|
||||
setDemotionPerDesiredLocale(ULOCMATCH_DEMOTION_NONE).build(errorCode);
|
||||
Locale::RangeIterator<Locale *> desiredIter(ARRAY_RANGE(desired));
|
||||
assertEquals("no demotion",
|
||||
"de_CH", locString(noDemotion.getBestMatch(desiredIter, errorCode)));
|
||||
}
|
||||
|
||||
{
|
||||
LocaleMatcher regionDemotion = LocaleMatcher::Builder().
|
||||
setSupportedLocales(ARRAY_RANGE(supported)).
|
||||
setDemotionPerDesiredLocale(ULOCMATCH_DEMOTION_REGION).build(errorCode);
|
||||
Locale::RangeIterator<Locale *> desiredIter(ARRAY_RANGE(desired));
|
||||
assertEquals("region demotion",
|
||||
"fr", locString(regionDemotion.getBestMatch(desiredIter, errorCode)));
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testMatch() {
|
||||
IcuTestErrorCode errorCode(*this, "testMatch");
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
|
||||
|
||||
// Java test function testMatch_exact()
|
||||
Locale en_CA("en_CA");
|
||||
assertEquals("exact match", 1.0, matcher.internalMatch(en_CA, en_CA, errorCode));
|
||||
|
||||
// testMatch_none
|
||||
Locale ar_MK("ar_MK");
|
||||
double match = matcher.internalMatch(ar_MK, en_CA, errorCode);
|
||||
assertTrue("mismatch: 0<=match<0.2", 0 <= match && match < 0.2);
|
||||
|
||||
// testMatch_matchOnMaximized
|
||||
Locale und_TW("und_TW");
|
||||
Locale zh("zh");
|
||||
Locale zh_Hant("zh_Hant");
|
||||
double matchZh = matcher.internalMatch(und_TW, zh, errorCode);
|
||||
double matchZhHant = matcher.internalMatch(und_TW, zh_Hant, errorCode);
|
||||
assertTrue("und_TW should be closer to zh_Hant than to zh",
|
||||
matchZh < matchZhHant);
|
||||
Locale en_Hant_TW("en_Hant_TW");
|
||||
double matchEnHantTw = matcher.internalMatch(en_Hant_TW, zh_Hant, errorCode);
|
||||
assertTrue("zh_Hant should be closer to und_TW than to en_Hant_TW",
|
||||
matchEnHantTw < matchZhHant);
|
||||
assertTrue("zh should be closer to und_TW than to en_Hant_TW",
|
||||
matchEnHantTw < matchZh);
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testResolvedLocale() {
|
||||
IcuTestErrorCode errorCode(*this, "testResolvedLocale");
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().
|
||||
addSupportedLocale("ar-EG").
|
||||
build(errorCode);
|
||||
Locale desired("ar-SA-u-nu-latn");
|
||||
LocaleMatcher::Result result = matcher.getBestMatchResult(desired, errorCode);
|
||||
assertEquals("best", "ar_EG", locString(result.getSupportedLocale()));
|
||||
Locale resolved = result.makeResolvedLocale(errorCode);
|
||||
assertEquals("ar-EG + ar-SA-u-nu-latn = ar-SA-u-nu-latn",
|
||||
"ar-SA-u-nu-latn",
|
||||
resolved.toLanguageTag<std::string>(errorCode).data());
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
bool toInvariant(const UnicodeString &s, CharString &inv, ErrorCode &errorCode) {
|
||||
if (errorCode.isSuccess()) {
|
||||
inv.clear().appendInvariantChars(s, errorCode);
|
||||
return errorCode.isSuccess();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool getSuffixAfterPrefix(const UnicodeString &s, int32_t limit,
|
||||
const UnicodeString &prefix, UnicodeString &suffix) {
|
||||
if (prefix.length() <= limit && s.startsWith(prefix)) {
|
||||
suffix.setTo(s, prefix.length(), limit - prefix.length());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool getInvariantSuffixAfterPrefix(const UnicodeString &s, int32_t limit,
|
||||
const UnicodeString &prefix, CharString &suffix,
|
||||
ErrorCode &errorCode) {
|
||||
UnicodeString u_suffix;
|
||||
return getSuffixAfterPrefix(s, limit, prefix, u_suffix) &&
|
||||
toInvariant(u_suffix, suffix, errorCode);
|
||||
}
|
||||
|
||||
bool readTestCase(const UnicodeString &line, TestCase &test, IcuTestErrorCode &errorCode) {
|
||||
if (errorCode.isFailure()) { return false; }
|
||||
++test.lineNr;
|
||||
// Start of comment, or end of line, minus trailing spaces.
|
||||
int32_t limit = line.indexOf(u'#');
|
||||
if (limit < 0) {
|
||||
limit = line.length();
|
||||
// Remove trailing CR LF.
|
||||
char16_t c;
|
||||
while (limit > 0 && ((c = line.charAt(limit - 1)) == u'\n' || c == u'\r')) {
|
||||
--limit;
|
||||
}
|
||||
}
|
||||
// Remove spaces before comment or at the end of the line.
|
||||
char16_t c;
|
||||
while (limit > 0 && ((c = line.charAt(limit - 1)) == u' ' || c == u'\t')) {
|
||||
--limit;
|
||||
}
|
||||
if (limit == 0) { // empty line
|
||||
return false;
|
||||
}
|
||||
if (line.startsWith(u"** test: ")) {
|
||||
test.reset();
|
||||
} else if (getInvariantSuffixAfterPrefix(line, limit, u"@supported=",
|
||||
test.supported, errorCode)) {
|
||||
} else if (getInvariantSuffixAfterPrefix(line, limit, u"@default=",
|
||||
test.def, errorCode)) {
|
||||
} else if (getSuffixAfterPrefix(line, limit, u"@favor=", test.favor)) {
|
||||
} else if (getSuffixAfterPrefix(line, limit, u"@threshold=", test.threshold)) {
|
||||
} else {
|
||||
int32_t matchSep = line.indexOf(u">>");
|
||||
// >> before an inline comment, and followed by more than white space.
|
||||
if (0 <= matchSep && (matchSep + 2) < limit) {
|
||||
toInvariant(line.tempSubStringBetween(0, matchSep).trim(), test.desired, errorCode);
|
||||
test.expDesired.clear();
|
||||
test.expCombined.clear();
|
||||
int32_t start = matchSep + 2;
|
||||
int32_t expLimit = line.indexOf(u'|', start);
|
||||
if (expLimit < 0) {
|
||||
toInvariant(line.tempSubStringBetween(start, limit).trim(),
|
||||
test.expMatch, errorCode);
|
||||
} else {
|
||||
toInvariant(line.tempSubStringBetween(start, expLimit).trim(),
|
||||
test.expMatch, errorCode);
|
||||
start = expLimit + 1;
|
||||
expLimit = line.indexOf(u'|', start);
|
||||
if (expLimit < 0) {
|
||||
toInvariant(line.tempSubStringBetween(start, limit).trim(),
|
||||
test.expDesired, errorCode);
|
||||
} else {
|
||||
toInvariant(line.tempSubStringBetween(start, expLimit).trim(),
|
||||
test.expDesired, errorCode);
|
||||
toInvariant(line.tempSubStringBetween(expLimit + 1, limit).trim(),
|
||||
test.expCombined, errorCode);
|
||||
}
|
||||
}
|
||||
return errorCode.isSuccess();
|
||||
} else {
|
||||
errorCode.set(U_INVALID_FORMAT_ERROR);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Locale *getLocaleOrNull(const CharString &s, Locale &locale) {
|
||||
if (s == "null") {
|
||||
return nullptr;
|
||||
} else {
|
||||
return &(locale = Locale(s.data()));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
UBool LocaleMatcherTest::dataDriven(const TestCase &test, IcuTestErrorCode &errorCode) {
|
||||
LocaleMatcher::Builder builder;
|
||||
builder.setSupportedLocalesFromListString(test.supported.toStringPiece());
|
||||
if (!test.def.isEmpty()) {
|
||||
Locale defaultLocale(test.def.data());
|
||||
builder.setDefaultLocale(&defaultLocale);
|
||||
}
|
||||
if (!test.favor.isEmpty()) {
|
||||
ULocMatchFavorSubtag favor;
|
||||
if (test.favor == u"normal") {
|
||||
favor = ULOCMATCH_FAVOR_LANGUAGE;
|
||||
} else if (test.favor == u"script") {
|
||||
favor = ULOCMATCH_FAVOR_SCRIPT;
|
||||
} else {
|
||||
errln(UnicodeString(u"unsupported FavorSubtag value ") + test.favor);
|
||||
return FALSE;
|
||||
}
|
||||
builder.setFavorSubtag(favor);
|
||||
}
|
||||
if (!test.threshold.isEmpty()) {
|
||||
infoln("skipping test case on line %d with non-default threshold: not exposed via API",
|
||||
(int)test.lineNr);
|
||||
return TRUE;
|
||||
// int32_t threshold = Integer.valueOf(test.threshold);
|
||||
// builder.internalSetThresholdDistance(threshold);
|
||||
}
|
||||
LocaleMatcher matcher = builder.build(errorCode);
|
||||
if (errorCode.errIfFailureAndReset("LocaleMatcher::Builder::build()")) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
Locale expMatchLocale("");
|
||||
Locale *expMatch = getLocaleOrNull(test.expMatch, expMatchLocale);
|
||||
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
|
||||
StringPiece desiredSP = test.desired.toStringPiece();
|
||||
const Locale *bestSupported = matcher.getBestMatchForListString(desiredSP, errorCode);
|
||||
if (!assertEquals("bestSupported from string",
|
||||
locString(expMatch), locString(bestSupported))) {
|
||||
return FALSE;
|
||||
}
|
||||
LocalePriorityList desired(test.desired.toStringPiece(), errorCode);
|
||||
LocalePriorityList::Iterator desiredIter = desired.iterator();
|
||||
if (desired.getLength() == 1) {
|
||||
const Locale &desiredLocale = desiredIter.next();
|
||||
bestSupported = matcher.getBestMatch(desiredLocale, errorCode);
|
||||
UBool ok = assertEquals("bestSupported from Locale",
|
||||
locString(expMatch), locString(bestSupported));
|
||||
|
||||
LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocale, errorCode);
|
||||
return ok & assertEquals("result.getSupportedLocale from Locale",
|
||||
locString(expMatch), locString(result.getSupportedLocale()));
|
||||
} else {
|
||||
bestSupported = matcher.getBestMatch(desiredIter, errorCode);
|
||||
return assertEquals("bestSupported from Locale iterator",
|
||||
locString(expMatch), locString(bestSupported));
|
||||
}
|
||||
} else {
|
||||
LocalePriorityList desired(test.desired.toStringPiece(), errorCode);
|
||||
LocalePriorityList::Iterator desiredIter = desired.iterator();
|
||||
LocaleMatcher::Result result = matcher.getBestMatchResult(desiredIter, errorCode);
|
||||
UBool ok = assertEquals("result.getSupportedLocale from Locales",
|
||||
locString(expMatch), locString(result.getSupportedLocale()));
|
||||
if (!test.expDesired.isEmpty()) {
|
||||
Locale expDesiredLocale("");
|
||||
Locale *expDesired = getLocaleOrNull(test.expDesired, expDesiredLocale);
|
||||
ok &= assertEquals("result.getDesiredLocale from Locales",
|
||||
locString(expDesired), locString(result.getDesiredLocale()));
|
||||
}
|
||||
if (!test.expCombined.isEmpty()) {
|
||||
if (test.expMatch.contains("-u-")) {
|
||||
logKnownIssue("20727",
|
||||
UnicodeString(u"ignoring makeResolvedLocale() line ") + test.lineNr);
|
||||
return ok;
|
||||
}
|
||||
Locale expCombinedLocale("");
|
||||
Locale *expCombined = getLocaleOrNull(test.expCombined, expCombinedLocale);
|
||||
Locale combined = result.makeResolvedLocale(errorCode);
|
||||
ok &= assertEquals("combined Locale from Locales",
|
||||
locString(expCombined), locString(&combined));
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testDataDriven() {
|
||||
IcuTestErrorCode errorCode(*this, "testDataDriven");
|
||||
CharString path(getSourceTestData(errorCode), errorCode);
|
||||
path.appendPathPart("localeMatcherTest.txt", errorCode);
|
||||
const char *codePage = "UTF-8";
|
||||
LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, errorCode));
|
||||
if(errorCode.errIfFailureAndReset("ucbuf_open(localeMatcherTest.txt)")) {
|
||||
return;
|
||||
}
|
||||
int32_t lineLength;
|
||||
const UChar *p;
|
||||
UnicodeString line;
|
||||
TestCase test;
|
||||
int32_t numPassed = 0;
|
||||
while ((p = ucbuf_readline(f.getAlias(), &lineLength, errorCode)) != nullptr &&
|
||||
errorCode.isSuccess()) {
|
||||
line.setTo(FALSE, p, lineLength);
|
||||
if (!readTestCase(line, test, errorCode)) {
|
||||
if (errorCode.errIfFailureAndReset(
|
||||
"test data syntax error on line %d", (int)test.lineNr)) {
|
||||
infoln(line);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
UBool ok = dataDriven(test, errorCode);
|
||||
if (errorCode.errIfFailureAndReset("test error on line %d", (int)test.lineNr)) {
|
||||
infoln(line);
|
||||
} else if (!ok) {
|
||||
infoln("test failure on line %d", (int)test.lineNr);
|
||||
infoln(line);
|
||||
} else {
|
||||
++numPassed;
|
||||
}
|
||||
}
|
||||
infoln("number of passing test cases: %d", (int)numPassed);
|
||||
}
|
|
@ -6,6 +6,7 @@
|
|||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
|
@ -266,6 +267,10 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
|||
TESTCASE_AUTO(TestUndScript);
|
||||
TESTCASE_AUTO(TestUndRegion);
|
||||
TESTCASE_AUTO(TestUndCAPI);
|
||||
TESTCASE_AUTO(TestRangeIterator);
|
||||
TESTCASE_AUTO(TestPointerConvertingIterator);
|
||||
TESTCASE_AUTO(TestTagConvertingIterator);
|
||||
TESTCASE_AUTO(TestCapturingTagConvertingIterator);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -3832,3 +3837,118 @@ void LocaleTest::TestUndCAPI() {
|
|||
assertTrue("reslen >= 0", reslen >= 0);
|
||||
assertEquals("uloc_getLanguage()", empty, tmp);
|
||||
}
|
||||
|
||||
#define ARRAY_RANGE(array) (array), ((array) + UPRV_LENGTHOF(array))
|
||||
|
||||
void LocaleTest::TestRangeIterator() {
|
||||
IcuTestErrorCode status(*this, "TestRangeIterator");
|
||||
Locale locales[] = { "fr", "en_GB", "en" };
|
||||
Locale::RangeIterator<Locale *> iter(ARRAY_RANGE(locales));
|
||||
|
||||
assertTrue("0.hasNext()", iter.hasNext());
|
||||
const Locale &l0 = iter.next();
|
||||
assertEquals("0.next()", "fr", l0.getName());
|
||||
assertTrue("&0.next()", &l0 == &locales[0]);
|
||||
|
||||
assertTrue("1.hasNext()", iter.hasNext());
|
||||
const Locale &l1 = iter.next();
|
||||
assertEquals("1.next()", "en_GB", l1.getName());
|
||||
assertTrue("&1.next()", &l1 == &locales[1]);
|
||||
|
||||
assertTrue("2.hasNext()", iter.hasNext());
|
||||
const Locale &l2 = iter.next();
|
||||
assertEquals("2.next()", "en", l2.getName());
|
||||
assertTrue("&2.next()", &l2 == &locales[2]);
|
||||
|
||||
assertFalse("3.hasNext()", iter.hasNext());
|
||||
}
|
||||
|
||||
void LocaleTest::TestPointerConvertingIterator() {
|
||||
IcuTestErrorCode status(*this, "TestPointerConvertingIterator");
|
||||
Locale locales[] = { "fr", "en_GB", "en" };
|
||||
Locale *pointers[] = { locales, locales + 1, locales + 2 };
|
||||
// Lambda with explicit reference return type to prevent copy-constructing a temporary
|
||||
// which would be destructed right away.
|
||||
Locale::ConvertingIterator<Locale **, std::function<const Locale &(const Locale *)>> iter(
|
||||
ARRAY_RANGE(pointers), [](const Locale *p) -> const Locale & { return *p; });
|
||||
|
||||
assertTrue("0.hasNext()", iter.hasNext());
|
||||
const Locale &l0 = iter.next();
|
||||
assertEquals("0.next()", "fr", l0.getName());
|
||||
assertTrue("&0.next()", &l0 == pointers[0]);
|
||||
|
||||
assertTrue("1.hasNext()", iter.hasNext());
|
||||
const Locale &l1 = iter.next();
|
||||
assertEquals("1.next()", "en_GB", l1.getName());
|
||||
assertTrue("&1.next()", &l1 == pointers[1]);
|
||||
|
||||
assertTrue("2.hasNext()", iter.hasNext());
|
||||
const Locale &l2 = iter.next();
|
||||
assertEquals("2.next()", "en", l2.getName());
|
||||
assertTrue("&2.next()", &l2 == pointers[2]);
|
||||
|
||||
assertFalse("3.hasNext()", iter.hasNext());
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class LocaleFromTag {
|
||||
public:
|
||||
LocaleFromTag() : locale(Locale::getRoot()) {}
|
||||
const Locale &operator()(const char *tag) { return locale = Locale(tag); }
|
||||
|
||||
private:
|
||||
// Store the locale in the converter, rather than return a reference to a temporary,
|
||||
// or a value which could go out of scope with the caller's reference to it.
|
||||
Locale locale;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void LocaleTest::TestTagConvertingIterator() {
|
||||
IcuTestErrorCode status(*this, "TestTagConvertingIterator");
|
||||
const char *tags[] = { "fr", "en_GB", "en" };
|
||||
LocaleFromTag converter;
|
||||
Locale::ConvertingIterator<const char **, LocaleFromTag> iter(ARRAY_RANGE(tags), converter);
|
||||
|
||||
assertTrue("0.hasNext()", iter.hasNext());
|
||||
const Locale &l0 = iter.next();
|
||||
assertEquals("0.next()", "fr", l0.getName());
|
||||
|
||||
assertTrue("1.hasNext()", iter.hasNext());
|
||||
const Locale &l1 = iter.next();
|
||||
assertEquals("1.next()", "en_GB", l1.getName());
|
||||
|
||||
assertTrue("2.hasNext()", iter.hasNext());
|
||||
const Locale &l2 = iter.next();
|
||||
assertEquals("2.next()", "en", l2.getName());
|
||||
|
||||
assertFalse("3.hasNext()", iter.hasNext());
|
||||
}
|
||||
|
||||
void LocaleTest::TestCapturingTagConvertingIterator() {
|
||||
IcuTestErrorCode status(*this, "TestCapturingTagConvertingIterator");
|
||||
const char *tags[] = { "fr", "en_GB", "en" };
|
||||
// Store the converted locale in a locale variable,
|
||||
// rather than return a reference to a temporary,
|
||||
// or a value which could go out of scope with the caller's reference to it.
|
||||
Locale locale;
|
||||
// Lambda with explicit reference return type to prevent copy-constructing a temporary
|
||||
// which would be destructed right away.
|
||||
Locale::ConvertingIterator<const char **, std::function<const Locale &(const char *)>> iter(
|
||||
ARRAY_RANGE(tags), [&](const char *tag) -> const Locale & { return locale = Locale(tag); });
|
||||
|
||||
assertTrue("0.hasNext()", iter.hasNext());
|
||||
const Locale &l0 = iter.next();
|
||||
assertEquals("0.next()", "fr", l0.getName());
|
||||
|
||||
assertTrue("1.hasNext()", iter.hasNext());
|
||||
const Locale &l1 = iter.next();
|
||||
assertEquals("1.next()", "en_GB", l1.getName());
|
||||
|
||||
assertTrue("2.hasNext()", iter.hasNext());
|
||||
const Locale &l2 = iter.next();
|
||||
assertEquals("2.next()", "en", l2.getName());
|
||||
|
||||
assertFalse("3.hasNext()", iter.hasNext());
|
||||
}
|
||||
|
|
|
@ -141,6 +141,10 @@ public:
|
|||
void TestUndScript();
|
||||
void TestUndRegion();
|
||||
void TestUndCAPI();
|
||||
void TestRangeIterator();
|
||||
void TestPointerConvertingIterator();
|
||||
void TestTagConvertingIterator();
|
||||
void TestCapturingTagConvertingIterator();
|
||||
|
||||
private:
|
||||
void _checklocs(const char* label,
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "cstr.h"
|
||||
#include "intltest.h"
|
||||
#include "strtest.h"
|
||||
#include "uinvchar.h"
|
||||
|
||||
StringTest::~StringTest() {}
|
||||
|
||||
|
@ -147,6 +148,64 @@ StringTest::Test_UNICODE_STRING_SIMPLE() {
|
|||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// See U_CHARSET_FAMILY in unicode/platform.h.
|
||||
const char *nativeInvChars =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789 \"%&'()*+,-./:;<=>?_";
|
||||
const char16_t *asciiInvChars =
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
u"abcdefghijklmnopqrstuvwxyz"
|
||||
u"0123456789 \"%&'()*+,-./:;<=>?_";
|
||||
|
||||
} // namespace
|
||||
|
||||
void
|
||||
StringTest::TestUpperOrdinal() {
|
||||
for (int32_t i = 0;; ++i) {
|
||||
char ic = nativeInvChars[i];
|
||||
uint8_t ac = asciiInvChars[i];
|
||||
int32_t expected = ac - 'A';
|
||||
int32_t actual = uprv_upperOrdinal(ic);
|
||||
if (0 <= expected && expected <= 25) {
|
||||
if (actual != expected) {
|
||||
errln("uprv_upperOrdinal('%c')=%d != expected %d",
|
||||
ic, (int)actual, (int)expected);
|
||||
}
|
||||
} else {
|
||||
if (0 <= actual && actual <= 25) {
|
||||
errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
|
||||
ic, (int)actual);
|
||||
}
|
||||
}
|
||||
if (ic == 0) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StringTest::TestLowerOrdinal() {
|
||||
for (int32_t i = 0;; ++i) {
|
||||
char ic = nativeInvChars[i];
|
||||
uint8_t ac = asciiInvChars[i];
|
||||
int32_t expected = ac - 'a';
|
||||
int32_t actual = uprv_lowerOrdinal(ic);
|
||||
if (0 <= expected && expected <= 25) {
|
||||
if (actual != expected) {
|
||||
errln("uprv_lowerOrdinal('%c')=%d != expected %d",
|
||||
ic, (int)actual, (int)expected);
|
||||
}
|
||||
} else {
|
||||
if (0 <= actual && actual <= 25) {
|
||||
errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
|
||||
ic, (int)actual);
|
||||
}
|
||||
}
|
||||
if (ic == 0) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
|
||||
#if !U_HIDE_OBSOLETE_UTF_OLD_H
|
||||
|
@ -178,6 +237,8 @@ void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, ch
|
|||
TESTCASE_AUTO(Test_U_STRING);
|
||||
TESTCASE_AUTO(Test_UNICODE_STRING);
|
||||
TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
|
||||
TESTCASE_AUTO(TestUpperOrdinal);
|
||||
TESTCASE_AUTO(TestLowerOrdinal);
|
||||
TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
|
||||
TESTCASE_AUTO(TestSTLCompatibility);
|
||||
TESTCASE_AUTO(TestStringPiece);
|
||||
|
|
|
@ -39,6 +39,8 @@ private:
|
|||
void Test_U_STRING();
|
||||
void Test_UNICODE_STRING();
|
||||
void Test_UNICODE_STRING_SIMPLE();
|
||||
void TestUpperOrdinal();
|
||||
void TestLowerOrdinal();
|
||||
void Test_UTF8_COUNT_TRAIL_BYTES();
|
||||
void TestStringPiece();
|
||||
void TestStringPieceComparisons();
|
||||
|
|
1959
icu4c/source/test/testdata/localeMatcherTest.txt
vendored
Normal file
1959
icu4c/source/test/testdata/localeMatcherTest.txt
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -19,10 +19,15 @@ import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
|
|||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Off-line-built data for LocaleMatcher.
|
||||
* Offline-built data for LocaleMatcher.
|
||||
* Mostly but not only the data for mapping locales to their maximized forms.
|
||||
*/
|
||||
public class LocaleDistance {
|
||||
/**
|
||||
* Bit flag used on the last character of a subtag in the trie.
|
||||
* Must be set consistently by the builder and the lookup code.
|
||||
*/
|
||||
public static final int END_OF_SUBTAG = 0x80;
|
||||
/** Distance value bit flag, set by the builder. */
|
||||
public static final int DISTANCE_SKIP_SCRIPT = 0x80;
|
||||
/** Distance value bit flag, set by trieNext(). */
|
||||
|
@ -148,15 +153,21 @@ public class LocaleDistance {
|
|||
public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
|
||||
|
||||
private LocaleDistance(Data data) {
|
||||
this.trie = new BytesTrie(data.trie, 0);
|
||||
this.regionToPartitionsIndex = data.regionToPartitionsIndex;
|
||||
this.partitionArrays = data.partitionArrays;
|
||||
this.paradigmLSRs = data.paradigmLSRs;
|
||||
trie = new BytesTrie(data.trie, 0);
|
||||
regionToPartitionsIndex = data.regionToPartitionsIndex;
|
||||
partitionArrays = data.partitionArrays;
|
||||
paradigmLSRs = data.paradigmLSRs;
|
||||
defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
|
||||
defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
|
||||
defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
|
||||
this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
|
||||
minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
|
||||
|
||||
// For the default demotion value, use the
|
||||
// default region distance between unrelated Englishes.
|
||||
// Thus, unless demotion is turned off,
|
||||
// a mere region difference for one desired locale
|
||||
// is as good as a perfect match for the next following desired locale.
|
||||
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
|
||||
LSR en = new LSR("en", "Latn", "US");
|
||||
LSR enGB = new LSR("en", "Latn", "GB");
|
||||
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
|
||||
|
@ -188,18 +199,18 @@ public class LocaleDistance {
|
|||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
|
||||
*/
|
||||
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
|
||||
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs,
|
||||
int threshold, FavorSubtag favorSubtag) {
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
// Look up the desired language only once for all supported LSRs.
|
||||
// Its "distance" is either a match point value of 0, or a non-match negative value.
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
int desLangDistance = trieNext(iter, desired.language, false);
|
||||
long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
|
||||
long desLangState = desLangDistance >= 0 && supportedLSRs.length > 1 ? iter.getState64() : 0;
|
||||
// Index of the supported LSR with the lowest distance.
|
||||
int bestIndex = -1;
|
||||
for (int slIndex = 0; slIndex < supportedLsrs.length; ++slIndex) {
|
||||
LSR supported = supportedLsrs[slIndex];
|
||||
for (int slIndex = 0; slIndex < supportedLSRs.length; ++slIndex) {
|
||||
LSR supported = supportedLSRs[slIndex];
|
||||
boolean star = false;
|
||||
int distance = desLangDistance;
|
||||
if (distance >= 0) {
|
||||
|
@ -227,6 +238,11 @@ public class LocaleDistance {
|
|||
star = true;
|
||||
}
|
||||
assert 0 <= distance && distance <= 100;
|
||||
// We implement "favor subtag" by reducing the language subtag distance
|
||||
// (unscientifically reducing it to a quarter of the normal value),
|
||||
// so that the script distance is relatively more important.
|
||||
// For example, given a default language distance of 80, we reduce it to 20,
|
||||
// which is below the default threshold of 50, which is the default script distance.
|
||||
if (favorSubtag == FavorSubtag.SCRIPT) {
|
||||
distance >>= 2;
|
||||
}
|
||||
|
@ -312,9 +328,10 @@ public class LocaleDistance {
|
|||
int desLength = desiredPartitions.length();
|
||||
int suppLength = supportedPartitions.length();
|
||||
if (desLength == 1 && suppLength == 1) {
|
||||
BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80);
|
||||
// Fastpath for single desired/supported partitions.
|
||||
BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | END_OF_SUBTAG);
|
||||
if (result.hasNext()) {
|
||||
result = iter.next(supportedPartitions.charAt(0) | 0x80);
|
||||
result = iter.next(supportedPartitions.charAt(0) | END_OF_SUBTAG);
|
||||
if (result.hasValue()) {
|
||||
return iter.getValue();
|
||||
}
|
||||
|
@ -328,11 +345,11 @@ public class LocaleDistance {
|
|||
for (int di = 0;;) {
|
||||
// Look up each desired-partition string only once,
|
||||
// not for each (desired, supported) pair.
|
||||
BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80);
|
||||
BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | END_OF_SUBTAG);
|
||||
if (result.hasNext()) {
|
||||
long desState = suppLength > 1 ? iter.getState64() : 0;
|
||||
for (int si = 0;;) {
|
||||
result = iter.next(supportedPartitions.charAt(si++) | 0x80);
|
||||
result = iter.next(supportedPartitions.charAt(si++) | END_OF_SUBTAG);
|
||||
int d;
|
||||
if (result.hasValue()) {
|
||||
d = iter.getValue();
|
||||
|
@ -391,7 +408,7 @@ public class LocaleDistance {
|
|||
}
|
||||
} else {
|
||||
// last character of this subtag
|
||||
BytesTrie.Result result = iter.next(c | 0x80);
|
||||
BytesTrie.Result result = iter.next(c | END_OF_SUBTAG);
|
||||
if (wantValue) {
|
||||
if (result.hasValue()) {
|
||||
int value = iter.getValue();
|
||||
|
|
|
@ -180,10 +180,12 @@ public final class XLikelySubtags {
|
|||
|
||||
// VisibleForTesting
|
||||
public LSR makeMaximizedLsrFrom(ULocale locale) {
|
||||
String name = locale.getName();
|
||||
String name = locale.getName(); // Faster than .toLanguageTag().
|
||||
if (name.startsWith("@x=")) {
|
||||
String tag = locale.toLanguageTag();
|
||||
assert tag.startsWith("x-");
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return new LSR(name, "", "");
|
||||
return new LSR(tag, "", "");
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant());
|
||||
|
@ -238,7 +240,7 @@ public final class XLikelySubtags {
|
|||
language = getCanonical(languageAliases, language);
|
||||
// (We have no script mappings.)
|
||||
region = getCanonical(regionAliases, region);
|
||||
return INSTANCE.maximize(language, script, region);
|
||||
return maximize(language, script, region);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -23,7 +23,7 @@ import com.ibm.icu.impl.locale.XLikelySubtags;
|
|||
|
||||
/**
|
||||
* Immutable class that picks the best match between a user's desired locales and
|
||||
* and application's supported locales.
|
||||
* an application's supported locales.
|
||||
*
|
||||
* <p>Example:
|
||||
* <pre>
|
||||
|
@ -54,18 +54,22 @@ import com.ibm.icu.impl.locale.XLikelySubtags;
|
|||
* 3. other supported locales.
|
||||
* This may change in future versions.
|
||||
*
|
||||
* <p>All classes implementing this interface should be immutable. Often a
|
||||
* product will just need one static instance, built with the languages
|
||||
* <p>Often a product will just need one matcher instance, built with the languages
|
||||
* that it supports. However, it may want multiple instances with different
|
||||
* default languages based on additional information, such as the domain.
|
||||
*
|
||||
* <p>This class is not intended for public subclassing.
|
||||
*
|
||||
* @author markdavis@google.com
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public class LocaleMatcher {
|
||||
public final class LocaleMatcher {
|
||||
private static final LSR UND_LSR = new LSR("und","","");
|
||||
// In ULocale, "und" and "" make the same object.
|
||||
private static final ULocale UND_ULOCALE = new ULocale("und");
|
||||
// In Locale, "und" and "" make different objects.
|
||||
private static final Locale UND_LOCALE = new Locale("und");
|
||||
private static final Locale EMPTY_LOCALE = new Locale("");
|
||||
|
||||
// Activates debugging output to stderr with details of GetBestMatch.
|
||||
private static final boolean TRACE_MATCHER = false;
|
||||
|
@ -253,43 +257,44 @@ public class LocaleMatcher {
|
|||
* best-matching desired locale, such as the -t- and -u- extensions.
|
||||
* May replace some fields of the supported locale.
|
||||
* The result is the locale that should be used for date and number formatting, collation, etc.
|
||||
* Returns null if getSupportedLocale() returns null.
|
||||
*
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
|
||||
*
|
||||
* @return the service locale, combining the best-matching desired and supported locales.
|
||||
* @return a locale combining the best-matching desired and supported locales.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public ULocale makeServiceULocale() {
|
||||
public ULocale makeResolvedULocale() {
|
||||
ULocale bestDesired = getDesiredULocale();
|
||||
ULocale serviceLocale = supportedULocale;
|
||||
if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
|
||||
ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
|
||||
|
||||
// Copy the region from bestDesired, if there is one.
|
||||
String region = bestDesired.getCountry();
|
||||
if (!region.isEmpty()) {
|
||||
b.setRegion(region);
|
||||
}
|
||||
|
||||
// Copy the variants from bestDesired, if there are any.
|
||||
// Note that this will override any serviceLocale variants.
|
||||
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
|
||||
String variants = bestDesired.getVariant();
|
||||
if (!variants.isEmpty()) {
|
||||
b.setVariant(variants);
|
||||
}
|
||||
|
||||
// Copy the extensions from bestDesired, if there are any.
|
||||
// Note that this will override any serviceLocale extensions.
|
||||
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
|
||||
// (replacing calendar).
|
||||
for (char extensionKey : bestDesired.getExtensionKeys()) {
|
||||
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
|
||||
}
|
||||
serviceLocale = b.build();
|
||||
if (supportedULocale == null || bestDesired == null ||
|
||||
supportedULocale.equals(bestDesired)) {
|
||||
return supportedULocale;
|
||||
}
|
||||
return serviceLocale;
|
||||
ULocale.Builder b = new ULocale.Builder().setLocale(supportedULocale);
|
||||
|
||||
// Copy the region from bestDesired, if there is one.
|
||||
String region = bestDesired.getCountry();
|
||||
if (!region.isEmpty()) {
|
||||
b.setRegion(region);
|
||||
}
|
||||
|
||||
// Copy the variants from bestDesired, if there are any.
|
||||
// Note that this will override any supportedULocale variants.
|
||||
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
|
||||
String variants = bestDesired.getVariant();
|
||||
if (!variants.isEmpty()) {
|
||||
b.setVariant(variants);
|
||||
}
|
||||
|
||||
// Copy the extensions from bestDesired, if there are any.
|
||||
// Note that this will override any supportedULocale extensions.
|
||||
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
|
||||
// (replacing calendar).
|
||||
for (char extensionKey : bestDesired.getExtensionKeys()) {
|
||||
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
|
||||
}
|
||||
return b.build();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -298,15 +303,17 @@ public class LocaleMatcher {
|
|||
* May replace some fields of the supported locale.
|
||||
* The result is the locale that should be used for
|
||||
* date and number formatting, collation, etc.
|
||||
* Returns null if getSupportedLocale() returns null.
|
||||
*
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
|
||||
*
|
||||
* @return the service locale, combining the best-matching desired and supported locales.
|
||||
* @return a locale combining the best-matching desired and supported locales.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Locale makeServiceLocale() {
|
||||
return makeServiceULocale().toLocale();
|
||||
public Locale makeResolvedLocale() {
|
||||
ULocale resolved = makeResolvedULocale();
|
||||
return resolved != null ? resolved.toLocale() : null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -320,8 +327,8 @@ public class LocaleMatcher {
|
|||
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
|
||||
private final Map<LSR, Integer> supportedLsrToIndex;
|
||||
// Array versions of the supportedLsrToIndex keys and values.
|
||||
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
|
||||
private final LSR[] supportedLsrs;
|
||||
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
|
||||
private final LSR[] supportedLSRs;
|
||||
private final int[] supportedIndexes;
|
||||
private final ULocale defaultULocale;
|
||||
private final Locale defaultLocale;
|
||||
|
@ -334,7 +341,7 @@ public class LocaleMatcher {
|
|||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static class Builder {
|
||||
public static final class Builder {
|
||||
private List<ULocale> supportedLocales;
|
||||
private int thresholdDistance = -1;
|
||||
private Demotion demotion;
|
||||
|
@ -394,7 +401,7 @@ public class LocaleMatcher {
|
|||
* Adds another supported locale.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locale the list of locale
|
||||
* @param locale another locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
|
@ -411,7 +418,7 @@ public class LocaleMatcher {
|
|||
* Adds another supported locale.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locale the list of locale
|
||||
* @param locale another locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
|
@ -514,7 +521,7 @@ public class LocaleMatcher {
|
|||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{LocaleMatcher.Builder");
|
||||
if (!supportedLocales.isEmpty()) {
|
||||
if (supportedLocales != null && !supportedLocales.isEmpty()) {
|
||||
s.append(" supported={").append(supportedLocales.toString()).append('}');
|
||||
}
|
||||
if (defaultLocale != null) {
|
||||
|
@ -572,50 +579,62 @@ public class LocaleMatcher {
|
|||
private LocaleMatcher(Builder builder) {
|
||||
thresholdDistance = builder.thresholdDistance < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
|
||||
int supportedLocalesLength = builder.supportedLocales != null ?
|
||||
builder.supportedLocales.size() : 0;
|
||||
ULocale udef = builder.defaultLocale;
|
||||
Locale def = null;
|
||||
int idef = -1;
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., java.util.Locale)
|
||||
// we can return those by parallel index.
|
||||
int supportedLocalesLength = builder.supportedLocales.size();
|
||||
supportedULocales = new ULocale[supportedLocalesLength];
|
||||
supportedLocales = new Locale[supportedLocalesLength];
|
||||
// Supported LRSs in input order.
|
||||
LSR lsrs[] = new LSR[supportedLocalesLength];
|
||||
// Also find the first supported locale whose LSR is
|
||||
// the same as that for the default locale.
|
||||
ULocale udef = builder.defaultLocale;
|
||||
Locale def = null;
|
||||
LSR defLSR = null;
|
||||
int idef = -1;
|
||||
if (udef != null) {
|
||||
def = udef.toLocale();
|
||||
defLSR = getMaximalLsrOrUnd(udef);
|
||||
}
|
||||
int i = 0;
|
||||
for (ULocale locale : builder.supportedLocales) {
|
||||
supportedULocales[i] = locale;
|
||||
supportedLocales[i] = locale.toLocale();
|
||||
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
|
||||
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
|
||||
idef = i;
|
||||
if (supportedLocalesLength > 0) {
|
||||
for (ULocale locale : builder.supportedLocales) {
|
||||
supportedULocales[i] = locale;
|
||||
supportedLocales[i] = locale.toLocale();
|
||||
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
|
||||
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
|
||||
idef = i;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
|
||||
// We need an unordered map from LSR to first supported locale with that LSR,
|
||||
// and an ordered list of (LSR, Indexes).
|
||||
// and an ordered list of (LSR, supported index).
|
||||
// We use a LinkedHashMap for both,
|
||||
// and insert the supported locales in the following order:
|
||||
// 1. Default locale, if it is supported.
|
||||
// 2. Priority locales in builder order.
|
||||
// 2. Priority locales (aka "paradigm locales") in builder order.
|
||||
// 3. Remaining locales in builder order.
|
||||
supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
|
||||
// Note: We could work with a single LinkedHashMap by storing ~i (the binary-not index)
|
||||
// for the default and paradigm locales, counting the number of those locales,
|
||||
// and keeping two indexes to fill the LSR and index arrays with
|
||||
// priority vs. normal locales. In that loop we would need to entry.setValue(~i)
|
||||
// to restore non-negative indexes in the map.
|
||||
// Probably saves little but less readable.
|
||||
Map<LSR, Integer> otherLsrToIndex = null;
|
||||
if (idef >= 0) {
|
||||
supportedLsrToIndex.put(defLSR, idef);
|
||||
}
|
||||
i = 0;
|
||||
for (ULocale locale : supportedULocales) {
|
||||
if (i == idef) { continue; }
|
||||
if (i == idef) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
LSR lsr = lsrs[i];
|
||||
if (defLSR == null) {
|
||||
assert i == 0;
|
||||
|
@ -624,7 +643,15 @@ public class LocaleMatcher {
|
|||
defLSR = lsr;
|
||||
idef = 0;
|
||||
supportedLsrToIndex.put(lsr, 0);
|
||||
} else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
|
||||
} else if (idef >= 0 && lsr.equals(defLSR)) {
|
||||
// lsr.equals(defLSR) means that this supported locale is
|
||||
// a duplicate of the default locale.
|
||||
// Either an explicit default locale is supported, and we added it before the loop,
|
||||
// or there is no explicit default locale, and this is
|
||||
// a duplicate of the first supported locale.
|
||||
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
|
||||
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
|
||||
} else if (LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
|
||||
putIfAbsent(supportedLsrToIndex, lsr, i);
|
||||
} else {
|
||||
if (otherLsrToIndex == null) {
|
||||
|
@ -637,12 +664,12 @@ public class LocaleMatcher {
|
|||
if (otherLsrToIndex != null) {
|
||||
supportedLsrToIndex.putAll(otherLsrToIndex);
|
||||
}
|
||||
int numSuppLsrs = supportedLsrToIndex.size();
|
||||
supportedLsrs = new LSR[numSuppLsrs];
|
||||
supportedIndexes = new int[numSuppLsrs];
|
||||
int supportedLSRsLength = supportedLsrToIndex.size();
|
||||
supportedLSRs = new LSR[supportedLSRsLength];
|
||||
supportedIndexes = new int[supportedLSRsLength];
|
||||
i = 0;
|
||||
for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
|
||||
supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
|
||||
supportedLSRs[i] = entry.getKey(); // = lsrs[entry.getValue()]
|
||||
supportedIndexes[i++] = entry.getValue();
|
||||
}
|
||||
|
||||
|
@ -671,7 +698,7 @@ public class LocaleMatcher {
|
|||
}
|
||||
|
||||
private static final LSR getMaximalLsrOrUnd(Locale locale) {
|
||||
if (locale.equals(UND_LOCALE)) {
|
||||
if (locale.equals(UND_LOCALE) || locale.equals(EMPTY_LOCALE)) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
|
@ -766,7 +793,7 @@ public class LocaleMatcher {
|
|||
* Parses the string like {@link LocalePriorityList} does and
|
||||
* returns the supported locale which best matches one of the desired locales.
|
||||
*
|
||||
* @param desiredLocaleList Typically a user's languages, in order of preference (descending),
|
||||
* @param desiredLocaleList Typically a user's languages,
|
||||
* as a string which is to be parsed like LocalePriorityList does.
|
||||
* @return the best-matching supported locale.
|
||||
* @stable ICU 4.4
|
||||
|
@ -808,9 +835,13 @@ public class LocaleMatcher {
|
|||
return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
|
||||
}
|
||||
|
||||
private Result defaultResult() {
|
||||
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
|
||||
}
|
||||
|
||||
private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
|
||||
if (suppIndex < 0) {
|
||||
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
|
||||
return defaultResult();
|
||||
} else if (desiredLocale != null) {
|
||||
return new Result(desiredLocale, supportedULocales[suppIndex],
|
||||
null, supportedLocales[suppIndex], 0, suppIndex);
|
||||
|
@ -822,7 +853,7 @@ public class LocaleMatcher {
|
|||
|
||||
private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
|
||||
if (suppIndex < 0) {
|
||||
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
|
||||
return defaultResult();
|
||||
} else if (desiredLocale != null) {
|
||||
return new Result(null, supportedULocales[suppIndex],
|
||||
desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
|
||||
|
@ -858,7 +889,7 @@ public class LocaleMatcher {
|
|||
public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
|
||||
Iterator<ULocale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
return makeResult(UND_ULOCALE, null, -1);
|
||||
return defaultResult();
|
||||
}
|
||||
ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
|
||||
LSR desiredLSR = lsrIter.next();
|
||||
|
@ -891,7 +922,7 @@ public class LocaleMatcher {
|
|||
public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
|
||||
Iterator<Locale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
return makeResult(UND_LOCALE, null, -1);
|
||||
return defaultResult();
|
||||
}
|
||||
LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
|
||||
LSR desiredLSR = lsrIter.next();
|
||||
|
@ -920,7 +951,7 @@ public class LocaleMatcher {
|
|||
return suppIndex;
|
||||
}
|
||||
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLsrs, bestDistance, favorSubtag);
|
||||
desiredLSR, supportedLSRs, bestDistance, favorSubtag);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestDistance = bestIndexAndDistance & 0xff;
|
||||
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
|
||||
|
@ -933,6 +964,7 @@ public class LocaleMatcher {
|
|||
break;
|
||||
}
|
||||
desiredLSR = remainingIter.next();
|
||||
++desiredIndex;
|
||||
}
|
||||
if (bestSupportedLsrIndex < 0) {
|
||||
if (TRACE_MATCHER) {
|
||||
|
@ -969,8 +1001,8 @@ public class LocaleMatcher {
|
|||
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
|
||||
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
|
||||
int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
|
||||
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
|
||||
getMaximalLsrOrUnd(desired),
|
||||
new LSR[] { getMaximalLsrOrUnd(supported) },
|
||||
thresholdDistance, favorSubtag) & 0xff;
|
||||
return (100 - distance) / 100.0;
|
||||
}
|
||||
|
|
|
@ -13,7 +13,8 @@ import java.util.Collections;
|
|||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
@ -24,10 +25,10 @@ import java.util.regex.Pattern;
|
|||
/**
|
||||
* Provides an immutable list of languages/locales in priority order.
|
||||
* The string format is based on the Accept-Language format
|
||||
* <a href="http://www.ietf.org/rfc/rfc2616.txt">http://www.ietf.org/rfc/rfc2616.txt</a>, such as
|
||||
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), such as
|
||||
* "af, en, fr;q=0.9". Syntactically it is slightly
|
||||
* more lenient, in allowing extra whitespace between elements, extra commas,
|
||||
* and more than 3 decimals (on input), and pins between 0 and 1.
|
||||
* and more than 3 decimals (on input). The qvalues must be between 0 and 1.
|
||||
*
|
||||
* <p>In theory, Accept-Language indicates the relative 'quality' of each item,
|
||||
* but in practice, all of the browsers just take an ordered list, like
|
||||
|
@ -70,7 +71,6 @@ import java.util.regex.Pattern;
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public class LocalePriorityList implements Iterable<ULocale> {
|
||||
private static final double D0 = 0.0d;
|
||||
private static final Double D1 = 1.0d;
|
||||
|
||||
private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*");
|
||||
|
@ -91,6 +91,8 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
|
||||
/**
|
||||
* Creates a Builder and adds a locale with a specified weight.
|
||||
* A zero or negative weight leads to removing the locale.
|
||||
* A weight greater than 1 is pinned to 1.
|
||||
*
|
||||
* @param locale locale/language to be added
|
||||
* @param weight value from 0.0 to 1.0
|
||||
|
@ -109,7 +111,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public static Builder add(LocalePriorityList list) {
|
||||
return new Builder().add(list);
|
||||
return new Builder(list);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -154,13 +156,14 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder result = new StringBuilder();
|
||||
for (final ULocale language : languagesAndWeights.keySet()) {
|
||||
for (Entry<ULocale, Double> entry : languagesAndWeights.entrySet()) {
|
||||
ULocale language = entry.getKey();
|
||||
double weight = entry.getValue();
|
||||
if (result.length() != 0) {
|
||||
result.append(", ");
|
||||
}
|
||||
result.append(language);
|
||||
double weight = languagesAndWeights.get(language);
|
||||
if (weight != D1) {
|
||||
if (weight != 1.0) {
|
||||
result.append(";q=").append(weight);
|
||||
}
|
||||
}
|
||||
|
@ -221,13 +224,31 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
* These store the input languages and weights, in chronological order,
|
||||
* where later additions override previous ones.
|
||||
*/
|
||||
private final Map<ULocale, Double> languageToWeight
|
||||
= new LinkedHashMap<>();
|
||||
private Map<ULocale, Double> languageToWeight;
|
||||
/**
|
||||
* The builder is reusable but rarely reused. Avoid cloning the map when not needed.
|
||||
* Exactly one of languageToWeight and built is null.
|
||||
*/
|
||||
private LocalePriorityList built;
|
||||
private boolean hasWeights = false; // other than 1.0
|
||||
|
||||
/*
|
||||
/**
|
||||
* Private constructor, only used by LocalePriorityList
|
||||
*/
|
||||
private Builder() {
|
||||
languageToWeight = new LinkedHashMap<>();
|
||||
}
|
||||
|
||||
private Builder(LocalePriorityList list) {
|
||||
built = list;
|
||||
for (Double value : list.languagesAndWeights.values()) {
|
||||
double weight = value;
|
||||
assert 0.0 < weight && weight <= 1.0;
|
||||
if (weight != 1.0) {
|
||||
hasWeights = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -249,27 +270,48 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public LocalePriorityList build(boolean preserveWeights) {
|
||||
// Walk through the input list, collecting the items with the same weights.
|
||||
final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<>(
|
||||
myDescendingDouble);
|
||||
for (final ULocale lang : languageToWeight.keySet()) {
|
||||
Double weight = languageToWeight.get(lang);
|
||||
Set<ULocale> s = doubleCheck.get(weight);
|
||||
if (s == null) {
|
||||
doubleCheck.put(weight, s = new LinkedHashSet<>());
|
||||
}
|
||||
s.add(lang);
|
||||
if (built != null) {
|
||||
// Calling build() again without changing anything in between.
|
||||
// Just return the same immutable list.
|
||||
return built;
|
||||
}
|
||||
// We now have a bunch of items sorted by weight, then chronologically.
|
||||
// We can now create a list in the right order
|
||||
final Map<ULocale, Double> temp = new LinkedHashMap<>();
|
||||
for (Entry<Double, Set<ULocale>> langEntry : doubleCheck.entrySet()) {
|
||||
final Double weight = langEntry.getKey();
|
||||
for (final ULocale lang : langEntry.getValue()) {
|
||||
temp.put(lang, preserveWeights ? weight : D1);
|
||||
Map<ULocale, Double> temp;
|
||||
if (hasWeights) {
|
||||
// Walk through the input list, collecting the items with the same weights.
|
||||
final TreeMap<Double, List<ULocale>> weightToLanguages =
|
||||
new TreeMap<>(myDescendingDouble);
|
||||
for (Entry<ULocale, Double> entry : languageToWeight.entrySet()) {
|
||||
ULocale lang = entry.getKey();
|
||||
Double weight = entry.getValue();
|
||||
List<ULocale> s = weightToLanguages.get(weight);
|
||||
if (s == null) {
|
||||
weightToLanguages.put(weight, s = new LinkedList<>());
|
||||
}
|
||||
s.add(lang);
|
||||
}
|
||||
// We now have a bunch of items sorted by weight, then chronologically.
|
||||
// We can now create a list in the right order.
|
||||
if (weightToLanguages.size() <= 1) {
|
||||
// There is at most one weight.
|
||||
temp = languageToWeight;
|
||||
if (weightToLanguages.isEmpty() || weightToLanguages.firstKey() == 1.0) {
|
||||
hasWeights = false;
|
||||
}
|
||||
} else {
|
||||
temp = new LinkedHashMap<>();
|
||||
for (Entry<Double, List<ULocale>> langEntry : weightToLanguages.entrySet()) {
|
||||
final Double weight = preserveWeights ? langEntry.getKey() : D1;
|
||||
for (final ULocale lang : langEntry.getValue()) {
|
||||
temp.put(lang, weight);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Nothing to sort.
|
||||
temp = languageToWeight;
|
||||
}
|
||||
return new LocalePriorityList(Collections.unmodifiableMap(temp));
|
||||
languageToWeight = null;
|
||||
return built = new LocalePriorityList(Collections.unmodifiableMap(temp));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -280,9 +322,8 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public Builder add(final LocalePriorityList list) {
|
||||
for (final ULocale language : list.languagesAndWeights
|
||||
.keySet()) {
|
||||
add(language, list.languagesAndWeights.get(language));
|
||||
for (Entry<ULocale, Double> entry : list.languagesAndWeights.entrySet()) {
|
||||
add(entry.getKey(), entry.getValue());
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
@ -295,7 +336,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public Builder add(final ULocale locale) {
|
||||
return add(locale, D1);
|
||||
return add(locale, 1.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -307,7 +348,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
*/
|
||||
public Builder add(ULocale... locales) {
|
||||
for (final ULocale languageCode : locales) {
|
||||
add(languageCode, D1);
|
||||
add(languageCode, 1.0);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
@ -315,7 +356,8 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
/**
|
||||
* Adds a locale with a specified weight.
|
||||
* Overrides any previous weight for the locale.
|
||||
* Removes a locale if the weight is zero.
|
||||
* A zero or negative weight leads to removing the locale.
|
||||
* A weight greater than 1 is pinned to 1.
|
||||
*
|
||||
* @param locale language/locale to add
|
||||
* @param weight value between 0.0 and 1.1
|
||||
|
@ -323,15 +365,24 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public Builder add(final ULocale locale, double weight) {
|
||||
if (languageToWeight == null) {
|
||||
// Builder reuse after build().
|
||||
languageToWeight = new LinkedHashMap<>(built.languagesAndWeights);
|
||||
built = null;
|
||||
}
|
||||
if (languageToWeight.containsKey(locale)) {
|
||||
languageToWeight.remove(locale);
|
||||
}
|
||||
if (weight <= D0) {
|
||||
Double value;
|
||||
if (weight <= 0.0) {
|
||||
return this; // skip zeros
|
||||
} else if (weight > D1) {
|
||||
weight = D1;
|
||||
} else if (weight >= 1.0) {
|
||||
value = D1;
|
||||
} else {
|
||||
value = weight;
|
||||
hasWeights = true;
|
||||
}
|
||||
languageToWeight.put(locale, weight);
|
||||
languageToWeight.put(locale, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -349,9 +400,9 @@ public class LocalePriorityList implements Iterable<ULocale> {
|
|||
if (itemMatcher.reset(item).matches()) {
|
||||
final ULocale language = new ULocale(itemMatcher.group(1));
|
||||
final double weight = Double.parseDouble(itemMatcher.group(2));
|
||||
if (!(weight >= D0 && weight <= D1)) { // do ! for NaN
|
||||
throw new IllegalArgumentException("Illegal weight, must be 0..1: "
|
||||
+ weight);
|
||||
if (!(0.0 <= weight && weight <= 1.0)) { // do ! for NaN
|
||||
throw new IllegalArgumentException(
|
||||
"Illegal weight, must be 0..1: " + weight);
|
||||
}
|
||||
add(language, weight);
|
||||
} else if (item.length() != 0) {
|
||||
|
|
|
@ -11,8 +11,11 @@ package com.ibm.icu.dev.test.util;
|
|||
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
@ -111,14 +114,111 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void testBasics() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
|
||||
.add(ULocale.ENGLISH).build());
|
||||
LocaleMatcher matcher = newLocaleMatcher(
|
||||
LocalePriorityList.
|
||||
add(ULocale.FRENCH).add(ULocale.UK).add(ULocale.ENGLISH).
|
||||
build());
|
||||
logln(matcher.toString());
|
||||
|
||||
assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
|
||||
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
|
||||
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
|
||||
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.JAPAN));
|
||||
|
||||
// API coverage
|
||||
List<Locale> locales = new ArrayList<>();
|
||||
locales.add(Locale.FRENCH);
|
||||
locales.add(Locale.UK);
|
||||
matcher = LocaleMatcher.builder().
|
||||
setSupportedLocales(locales).addSupportedLocale(Locale.ENGLISH).
|
||||
setDefaultLocale(Locale.GERMAN).build();
|
||||
assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
|
||||
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
|
||||
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
|
||||
assertEquals(ULocale.GERMAN, matcher.getBestMatch(ULocale.JAPAN));
|
||||
|
||||
ULocale udesired = new ULocale("en_GB"); // distinct object from ULocale.UK
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(udesired);
|
||||
assertTrue("exactly desired en-GB object", udesired == result.getDesiredULocale());
|
||||
assertEquals(Locale.UK, result.getDesiredLocale());
|
||||
assertEquals(0, result.getDesiredIndex());
|
||||
assertEquals(ULocale.UK, result.getSupportedULocale());
|
||||
assertEquals(Locale.UK, result.getSupportedLocale());
|
||||
assertEquals(1, result.getSupportedIndex());
|
||||
|
||||
LocalePriorityList list = LocalePriorityList.add(ULocale.JAPAN, ULocale.US).build();
|
||||
result = matcher.getBestMatchResult(list);
|
||||
assertEquals(1, result.getDesiredIndex());
|
||||
assertEquals(Locale.US, result.getDesiredLocale());
|
||||
|
||||
Locale desired = new Locale("en", "US"); // distinct object from Locale.US
|
||||
result = matcher.getBestLocaleResult(desired);
|
||||
assertEquals(ULocale.US, result.getDesiredULocale());
|
||||
assertTrue("exactly desired en-US object", desired == result.getDesiredLocale());
|
||||
assertEquals(0, result.getDesiredIndex());
|
||||
assertEquals(ULocale.ENGLISH, result.getSupportedULocale());
|
||||
assertEquals(Locale.ENGLISH, result.getSupportedLocale());
|
||||
assertEquals(2, result.getSupportedIndex());
|
||||
|
||||
result = matcher.getBestMatchResult(ULocale.JAPAN);
|
||||
assertNull(result.getDesiredLocale());
|
||||
assertNull(result.getDesiredULocale());
|
||||
assertEquals(-1, result.getDesiredIndex());
|
||||
assertEquals(ULocale.GERMAN, result.getSupportedULocale());
|
||||
assertEquals(Locale.GERMAN, result.getSupportedLocale());
|
||||
assertEquals(-1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
private static final String locString(ULocale loc) {
|
||||
return loc != null ? loc.getName() : "(null)";
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSupportedDefault() {
|
||||
// The default locale is one of the supported locales.
|
||||
List<ULocale> locales = Arrays.asList(
|
||||
new ULocale("fr"), new ULocale("en_GB"), new ULocale("en"));
|
||||
LocaleMatcher matcher = LocaleMatcher.builder().
|
||||
setSupportedULocales(locales).
|
||||
setDefaultULocale(locales.get(1)).
|
||||
build();
|
||||
ULocale best = matcher.getBestMatch("en_GB");
|
||||
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US");
|
||||
assertEquals("getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR");
|
||||
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP");
|
||||
assertEquals("getBestMatch(ja_JP)", "en_GB", locString(best));
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(new ULocale("ja_JP"));
|
||||
assertEquals("getBestMatchResult(ja_JP).supp",
|
||||
"en_GB", locString(result.getSupportedULocale()));
|
||||
assertEquals("getBestMatchResult(ja_JP).suppIndex",
|
||||
1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnsupportedDefault() {
|
||||
// The default locale does not match any of the supported locales.
|
||||
List<ULocale> locales = Arrays.asList(
|
||||
new ULocale("fr"), new ULocale("en_GB"), new ULocale("en"));
|
||||
LocaleMatcher matcher = LocaleMatcher.builder().
|
||||
setSupportedULocales(locales).
|
||||
setDefaultULocale(new ULocale("de")).
|
||||
build();
|
||||
ULocale best = matcher.getBestMatch("en_GB");
|
||||
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
|
||||
best = matcher.getBestMatch("en_US");
|
||||
assertEquals("getBestMatch(en_US)", "en", locString(best));
|
||||
best = matcher.getBestMatch("fr_FR");
|
||||
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
|
||||
best = matcher.getBestMatch("ja_JP");
|
||||
assertEquals("getBestMatch(ja_JP)", "de", locString(best));
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(new ULocale("ja_JP"));
|
||||
assertEquals("getBestMatchResult(ja_JP).supp",
|
||||
"de", locString(result.getSupportedULocale()));
|
||||
assertEquals("getBestMatchResult(ja_JP).suppIndex",
|
||||
-1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -178,8 +278,15 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void testEmpty() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher("");
|
||||
final LocaleMatcher matcher = LocaleMatcher.builder().build();
|
||||
assertNull(matcher.getBestMatch(ULocale.FRENCH));
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(ULocale.FRENCH);
|
||||
assertNull(result.getDesiredULocale());
|
||||
assertNull(result.getDesiredLocale());
|
||||
assertEquals(-1, result.getDesiredIndex());
|
||||
assertNull(result.getSupportedULocale());
|
||||
assertNull(result.getSupportedLocale());
|
||||
assertEquals(-1, result.getSupportedIndex());
|
||||
}
|
||||
|
||||
static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
|
||||
|
@ -197,12 +304,12 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
@Test
|
||||
public void testMatch_none() {
|
||||
double match = match(new ULocale("ar_MK"), ENGLISH_CANADA);
|
||||
assertTrue("Actual < 0: " + match, 0 <= match);
|
||||
assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
|
||||
assertTrue("Actual >= 0: " + match, 0 <= match);
|
||||
assertTrue("Actual < 0.2 (~ language + script distance): " + match, 0.2 > match);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatch_matchOnMazimized() {
|
||||
public void testMatch_matchOnMaximized() {
|
||||
ULocale undTw = new ULocale("und_TW");
|
||||
ULocale zhHant = new ULocale("zh_Hant");
|
||||
double matchZh = match(undTw, new ULocale("zh"));
|
||||
|
@ -219,6 +326,20 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
matchEnHantTw < matchZh);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testResolvedLocale() {
|
||||
LocaleMatcher matcher = LocaleMatcher.builder().
|
||||
addSupportedULocale(new ULocale("ar-EG")).
|
||||
build();
|
||||
ULocale desired = new ULocale("ar-SA-u-nu-latn");
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
|
||||
assertEquals("best", "ar_EG", result.getSupportedLocale().toString());
|
||||
ULocale resolved = result.makeResolvedULocale();
|
||||
assertEquals("ar-EG + ar-SA-u-nu-latn = ar-SA-u-nu-latn",
|
||||
"ar-SA-u-nu-latn",
|
||||
resolved.toLanguageTag());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatchGrandfatheredCode() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, i_klingon, en_Latn_US");
|
||||
|
@ -517,6 +638,14 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCanonicalize() {
|
||||
LocaleMatcher matcher = LocaleMatcher.builder().build();
|
||||
assertEquals("bh --> bho", new ULocale("bho"), matcher.canonicalize(new ULocale("bh")));
|
||||
assertEquals("mo-200 --> ro-CZ", new ULocale("ro_CZ"),
|
||||
matcher.canonicalize(new ULocale("mo_200")));
|
||||
}
|
||||
|
||||
private static final class PerfCase {
|
||||
ULocale desired;
|
||||
ULocale expectedShort;
|
||||
|
@ -850,6 +979,18 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
private static Locale toLocale(ULocale ulocale) {
|
||||
return ulocale != null ? ulocale.toLocale() : null;
|
||||
}
|
||||
|
||||
private static Iterable<Locale> localesFromULocales(Collection<ULocale> ulocales) {
|
||||
List<Locale> locales = new ArrayList<>(ulocales.size());
|
||||
for (ULocale ulocale : ulocales) {
|
||||
locales.add(ulocale.toLocale());
|
||||
}
|
||||
return locales;
|
||||
}
|
||||
|
||||
@Test
|
||||
@Parameters(method = "readTestCases")
|
||||
public void dataDriven(TestCase test) {
|
||||
|
@ -886,19 +1027,73 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
ULocale expMatch = getULocaleOrNull(test.expMatch);
|
||||
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
|
||||
ULocale bestSupported = matcher.getBestMatch(test.desired);
|
||||
assertEquals("bestSupported", expMatch, bestSupported);
|
||||
assertEquals("bestSupported ULocale from string", expMatch, bestSupported);
|
||||
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
|
||||
if (desired.getULocales().size() == 1) {
|
||||
ULocale desiredULocale = desired.iterator().next();
|
||||
bestSupported = matcher.getBestMatch(desiredULocale);
|
||||
assertEquals("bestSupported ULocale from ULocale", expMatch, bestSupported);
|
||||
Locale desiredLocale = desiredULocale.toLocale();
|
||||
Locale bestSupportedLocale = matcher.getBestLocale(desiredLocale);
|
||||
assertEquals("bestSupported Locale from Locale",
|
||||
toLocale(expMatch), bestSupportedLocale);
|
||||
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(desiredULocale);
|
||||
assertEquals("result.getSupportedULocale from ULocale",
|
||||
expMatch, result.getSupportedULocale());
|
||||
assertEquals("result.getSupportedLocale from ULocale",
|
||||
toLocale(expMatch), result.getSupportedLocale());
|
||||
|
||||
result = matcher.getBestLocaleResult(desiredLocale);
|
||||
assertEquals("result.getSupportedULocale from Locale",
|
||||
expMatch, result.getSupportedULocale());
|
||||
assertEquals("result.getSupportedLocale from Locale",
|
||||
toLocale(expMatch), result.getSupportedLocale());
|
||||
} else {
|
||||
bestSupported = matcher.getBestMatch(desired);
|
||||
assertEquals("bestSupported ULocale from ULocale iterator",
|
||||
expMatch, bestSupported);
|
||||
Locale bestSupportedLocale = matcher.getBestLocale(
|
||||
localesFromULocales(desired.getULocales()));
|
||||
assertEquals("bestSupported Locale from Locale iterator",
|
||||
toLocale(expMatch), bestSupportedLocale);
|
||||
}
|
||||
} else {
|
||||
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
|
||||
assertEquals("bestSupported", expMatch, result.getSupportedULocale());
|
||||
assertEquals("result.getSupportedULocale from ULocales",
|
||||
expMatch, result.getSupportedULocale());
|
||||
assertEquals("result.getSupportedLocale from ULocales",
|
||||
toLocale(expMatch), result.getSupportedLocale());
|
||||
if (!test.expDesired.isEmpty()) {
|
||||
ULocale expDesired = getULocaleOrNull(test.expDesired);
|
||||
assertEquals("bestDesired", expDesired, result.getDesiredULocale());
|
||||
assertEquals("result.getDesiredULocale from ULocales",
|
||||
expDesired, result.getDesiredULocale());
|
||||
assertEquals("result.getDesiredLocale from ULocales",
|
||||
toLocale(expDesired), result.getDesiredLocale());
|
||||
}
|
||||
if (!test.expCombined.isEmpty()) {
|
||||
ULocale expCombined = getULocaleOrNull(test.expCombined);
|
||||
ULocale combined = result.makeServiceULocale();
|
||||
assertEquals("combined", expCombined, combined);
|
||||
assertEquals("combined ULocale from ULocales", expCombined, result.makeResolvedULocale());
|
||||
assertEquals("combined Locale from ULocales", toLocale(expCombined), result.makeResolvedLocale());
|
||||
}
|
||||
|
||||
result = matcher.getBestLocaleResult(localesFromULocales(desired.getULocales()));
|
||||
assertEquals("result.getSupportedULocale from Locales",
|
||||
expMatch, result.getSupportedULocale());
|
||||
assertEquals("result.getSupportedLocale from Locales",
|
||||
toLocale(expMatch), result.getSupportedLocale());
|
||||
if (!test.expDesired.isEmpty()) {
|
||||
ULocale expDesired = getULocaleOrNull(test.expDesired);
|
||||
assertEquals("result.getDesiredULocale from Locales",
|
||||
expDesired, result.getDesiredULocale());
|
||||
assertEquals("result.getDesiredLocale from Locales",
|
||||
toLocale(expDesired), result.getDesiredLocale());
|
||||
}
|
||||
if (!test.expCombined.isEmpty()) {
|
||||
ULocale expCombined = getULocaleOrNull(test.expCombined);
|
||||
assertEquals("combined ULocale from Locales", expCombined, result.makeResolvedULocale());
|
||||
assertEquals("combined Locale from Locales", toLocale(expCombined), result.makeResolvedLocale());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
@ -23,37 +25,106 @@ import com.ibm.icu.util.ULocale;
|
|||
*/
|
||||
@RunWith(JUnit4.class)
|
||||
public class LocalePriorityListTest extends TestFmwk {
|
||||
@Test
|
||||
public void testLanguagePriorityList() {
|
||||
final String expected = "af, en, fr";
|
||||
@Test
|
||||
public void testLanguagePriorityList() {
|
||||
final String expected = "af, en, fr";
|
||||
|
||||
LocalePriorityList list = LocalePriorityList.add("af, en, fr;q=0.9").build();
|
||||
assertEquals(expected, list.toString());
|
||||
LocalePriorityList list = LocalePriorityList.add("af, en, fr;q=0.9").build();
|
||||
assertEquals(expected, list.toString());
|
||||
|
||||
// check looseness, and that later values win
|
||||
LocalePriorityList list2 = LocalePriorityList.add(
|
||||
", fr ; q = 0.9 , en;q=0.1 , af, en, de;q=0, ").build();
|
||||
assertEquals(expected, list2.toString());
|
||||
assertEquals(list, list2);
|
||||
// check looseness, and that later values win
|
||||
LocalePriorityList list2 = LocalePriorityList.add(
|
||||
", fr ; q = 0.9 , en;q=0.1 , af, en, de;q=0, ").build();
|
||||
assertEquals(expected, list2.toString());
|
||||
assertEquals(list, list2);
|
||||
|
||||
LocalePriorityList list3 = LocalePriorityList
|
||||
.add(new ULocale("af"))
|
||||
.add(ULocale.FRENCH, 0.9d)
|
||||
.add(ULocale.ENGLISH)
|
||||
.build();
|
||||
assertEquals(expected, list3.toString());
|
||||
assertEquals(list, list3);
|
||||
LocalePriorityList list3 = LocalePriorityList
|
||||
.add(new ULocale("af"))
|
||||
.add(ULocale.FRENCH, 0.9d)
|
||||
.add(ULocale.ENGLISH)
|
||||
.build();
|
||||
assertEquals(expected, list3.toString());
|
||||
assertEquals(list, list3);
|
||||
|
||||
LocalePriorityList list4 = LocalePriorityList
|
||||
.add(list).build();
|
||||
assertEquals(expected, list4.toString());
|
||||
assertEquals(list, list4);
|
||||
LocalePriorityList list4 = LocalePriorityList.add(list).build();
|
||||
assertEquals(expected, list4.toString());
|
||||
assertEquals(list, list4);
|
||||
|
||||
LocalePriorityList list5 = LocalePriorityList.add("af, fr;q=0.9, en").build(true);
|
||||
assertEquals("af, en, fr;q=0.9", list5.toString());
|
||||
}
|
||||
LocalePriorityList list5 = LocalePriorityList.add("af, fr;q=0.9, en").build(true);
|
||||
assertEquals("af, en, fr;q=0.9", list5.toString());
|
||||
}
|
||||
|
||||
private void assertEquals(Object expected, Object string) {
|
||||
assertEquals("", expected, string);
|
||||
}
|
||||
@Test
|
||||
public void testGetULocales() {
|
||||
LocalePriorityList list = LocalePriorityList.add("af, en, fr").build();
|
||||
Set<ULocale> locales = list.getULocales();
|
||||
assertEquals("number of locales", 3, locales.size());
|
||||
assertTrue("fr", locales.contains(ULocale.FRENCH));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIterator() {
|
||||
LocalePriorityList list = LocalePriorityList.add("af, en, fr").build();
|
||||
ULocale af = new ULocale("af");
|
||||
int count = 0;
|
||||
for (ULocale locale : list) {
|
||||
assertTrue("expected locale",
|
||||
locale.equals(af) || locale.equals(ULocale.ENGLISH) ||
|
||||
locale.equals(ULocale.FRENCH));
|
||||
++count;
|
||||
}
|
||||
assertEquals("number of locales", 3, count);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQValue() {
|
||||
try {
|
||||
LocalePriorityList.add("de;q=-0.1");
|
||||
errln("negative accept-language qvalue should fail");
|
||||
} catch(IllegalArgumentException expected) {
|
||||
// good
|
||||
}
|
||||
try {
|
||||
LocalePriorityList.add("de;q=1.001");
|
||||
errln("accept-language qvalue > 1 should fail");
|
||||
} catch(IllegalArgumentException expected) {
|
||||
// good
|
||||
}
|
||||
LocalePriorityList list = LocalePriorityList.add("de;q=0.555555555").build(true);
|
||||
double weight = list.getWeight(ULocale.GERMAN);
|
||||
assertTrue("many decimals", 0.555 <= weight && weight <= 0.556);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReuse() {
|
||||
// Test reusing a Builder after build(), and some other code coverage.
|
||||
LocalePriorityList.Builder builder =
|
||||
LocalePriorityList.add("el;q=0.5, de, fr;q=0.2, el;q=0");
|
||||
LocalePriorityList list = builder.build(true);
|
||||
assertEquals("initial list", "de, fr;q=0.2", list.toString());
|
||||
list = builder.add(ULocale.FRENCH, 1.0).build(true);
|
||||
assertEquals("upgrade French", "de, fr", list.toString());
|
||||
list = builder.add(ULocale.ITALIAN, 0.1).build(true);
|
||||
assertEquals("add Italian", "de, fr, it;q=0.1", list.toString());
|
||||
builder = LocalePriorityList.add(list);
|
||||
list = builder.build(true);
|
||||
assertEquals("cloned Builder", "de, fr, it;q=0.1", list.toString());
|
||||
list = builder.add(ULocale.ITALIAN).build(true);
|
||||
assertEquals("upgrage Italian", "de, fr, it", list.toString());
|
||||
// Start over with all 1.0 weights.
|
||||
builder = LocalePriorityList.add("de, fr");
|
||||
list = builder.build(true);
|
||||
assertEquals("simple", "de, fr", list.toString());
|
||||
// Add another list.
|
||||
LocalePriorityList list2 = LocalePriorityList.add(ULocale.ITALIAN, 0.2).build(true);
|
||||
assertEquals("list2", "it;q=0.2", list2.toString());
|
||||
list = builder.add(list2).build(true);
|
||||
assertEquals("list+list2", "de, fr, it;q=0.2", list.toString());
|
||||
list = builder.add(ULocale.JAPANESE).build(true);
|
||||
assertEquals("list+list2+ja", "de, fr, ja, it;q=0.2", list.toString());
|
||||
}
|
||||
|
||||
private void assertEquals(Object expected, Object string) {
|
||||
assertEquals("", expected, string);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -153,7 +153,7 @@ public final class LocaleDistanceBuilder {
|
|||
bytes[length++] = (byte) c;
|
||||
} else {
|
||||
// Mark the last character as a terminator to avoid overlap matches.
|
||||
bytes[length++] = (byte) (c | 0x80);
|
||||
bytes[length++] = (byte) (c | LocaleDistance.END_OF_SUBTAG);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue