ICU-9695 port LocaleMatcher to C++

This commit is contained in:
Markus Scherer 2019-08-16 23:35:49 +00:00
parent 7942b58b81
commit 41c24b6c00
48 changed files with 6951 additions and 344 deletions

View file

@ -88,8 +88,9 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucurr.o \
localebuilder.o \
localebuilder.o localeprioritylist.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
lsr.o loclikelysubtags.o locdistance.o localematcher.o \
bytestream.o stringpiece.o bytesinkutil.o \
stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \

View file

@ -35,6 +35,17 @@ CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
return *this;
}
char *CharString::cloneData(UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return nullptr; }
char *p = static_cast<char *>(uprv_malloc(len + 1));
if (p == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
uprv_memcpy(p, buffer.getAlias(), len + 1);
return p;
}
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
len=s.len;
@ -52,6 +63,18 @@ int32_t CharString::lastIndexOf(char c) const {
return -1;
}
bool CharString::contains(StringPiece s) const {
if (s.empty()) { return false; }
const char *p = buffer.getAlias();
int32_t lastStart = len - s.length();
for (int32_t i = 0; i <= lastStart; ++i) {
if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
return true;
}
}
return false;
}
CharString &CharString::truncate(int32_t newLength) {
if(newLength<0) {
newLength=0;

View file

@ -82,10 +82,24 @@ public:
const char *data() const { return buffer.getAlias(); }
char *data() { return buffer.getAlias(); }
/**
* Allocates length()+1 chars and copies the NUL-terminated data().
* The caller must uprv_free() the result.
*/
char *cloneData(UErrorCode &errorCode) const;
bool operator==(StringPiece other) const {
return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
}
bool operator!=(StringPiece other) const {
return !operator==(other);
}
/** @return last index of c, or -1 if c is not in this string */
int32_t lastIndexOf(char c) const;
bool contains(StringPiece s) const;
CharString &clear() { len=0; buffer[0]=0; return *this; }
CharString &truncate(int32_t newLength);

View file

@ -239,14 +239,20 @@
<ClCompile Include="punycode.cpp" />
<ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" />
<ClCompile Include="localebuilder.cpp" />
<ClCompile Include="localematcher.cpp" />
<ClCompile Include="localeprioritylist.cpp" />
<ClCompile Include="locavailable.cpp" />
<ClCompile Include="locbased.cpp" />
<ClCompile Include="locdispnames.cpp" />
<ClCompile Include="locdistance.cpp" />
<ClCompile Include="locdspnm.cpp" />
<ClCompile Include="locid.cpp" />
<ClCompile Include="loclikely.cpp" />
<ClCompile Include="loclikelysubtags.cpp" />
<ClCompile Include="locresdata.cpp" />
<ClCompile Include="locutil.cpp" />
<ClCompile Include="lsr.cpp" />
<ClCompile Include="resbund.cpp" />
<ClCompile Include="resbund_cnv.cpp" />
<ClCompile Include="ucat.cpp" />
@ -257,7 +263,6 @@
<ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" />
<ClCompile Include="localebuilder.cpp" />
<ClCompile Include="caniter.cpp" />
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" />
@ -408,8 +413,12 @@
<ClInclude Include="ustrfmt.h" />
<ClInclude Include="util.h" />
<ClInclude Include="punycode.h" />
<ClInclude Include="localeprioritylist.h" />
<ClInclude Include="locbased.h" />
<ClInclude Include="locdistance.h" />
<ClInclude Include="loclikelysubtags.h" />
<ClInclude Include="locutil.h" />
<ClInclude Include="lsr.h" />
<ClInclude Include="sharedobject.h" />
<ClCompile Include="sharedobject.cpp" />
<ClInclude Include="ulocimp.h" />
@ -449,7 +458,6 @@
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="static_unicode_sets.h" />
<ClInclude Include="capi_helper.h" />
<ClInclude Include="unicode\localebuilder.h" />
<ClInclude Include="restrace.h" />
</ItemGroup>
<ItemGroup>

View file

@ -313,6 +313,15 @@
<ClCompile Include="uts46.cpp">
<Filter>idna</Filter>
</ClCompile>
<ClCompile Include="localebuilder.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="localematcher.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="localeprioritylist.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="locavailable.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
@ -322,18 +331,27 @@
<ClCompile Include="locdispnames.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="locdistance.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="locid.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="loclikely.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="loclikelysubtags.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="locresdata.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="locutil.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="lsr.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="resbund.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
@ -361,9 +379,6 @@
<ClCompile Include="resource.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="localebuilder.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="caniter.cpp">
<Filter>normalization</Filter>
</ClCompile>
@ -816,12 +831,24 @@
<ClInclude Include="punycode.h">
<Filter>idna</Filter>
</ClInclude>
<ClInclude Include="localeprioritylist.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
<ClInclude Include="locbased.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
<ClInclude Include="locdistance.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
<ClInclude Include="loclikelysubtags.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
<ClInclude Include="locutil.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
<ClInclude Include="lsr.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
<ClInclude Include="ulocimp.h">
<Filter>locales &amp; resources</Filter>
</ClInclude>
@ -1078,6 +1105,12 @@
<CustomBuild Include="unicode\uidna.h">
<Filter>idna</Filter>
</CustomBuild>
<CustomBuild Include="unicode\localebuilder.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
<CustomBuild Include="unicode\localematcher.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
<CustomBuild Include="unicode\locid.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
@ -1237,8 +1270,5 @@
<CustomBuild Include="unicode\stringoptions.h">
<Filter>strings</Filter>
</CustomBuild>
<CustomBuild Include="unicode\localebuilder.h">
<Filter>locales &amp; resources</Filter>
</CustomBuild>
</ItemGroup>
</Project>

View file

@ -430,14 +430,20 @@
<ClCompile Include="punycode.cpp" />
<ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" />
<ClCompile Include="localebuilder.cpp" />
<ClCompile Include="localematcher.cpp" />
<ClCompile Include="localeprioritylist.cpp" />
<ClCompile Include="locavailable.cpp" />
<ClCompile Include="locbased.cpp" />
<ClCompile Include="locdispnames.cpp" />
<ClCompile Include="locdistance.cpp" />
<ClCompile Include="locdspnm.cpp" />
<ClCompile Include="locid.cpp" />
<ClCompile Include="loclikely.cpp" />
<ClCompile Include="loclikelysubtags.cpp" />
<ClCompile Include="locresdata.cpp" />
<ClCompile Include="locutil.cpp" />
<ClCompile Include="lsr.cpp" />
<ClCompile Include="resbund.cpp" />
<ClCompile Include="resbund_cnv.cpp" />
<ClCompile Include="ucat.cpp" />
@ -448,7 +454,6 @@
<ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" />
<ClCompile Include="localebuilder.cpp" />
<ClCompile Include="caniter.cpp" />
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" />
@ -600,8 +605,12 @@
<ClInclude Include="ustrfmt.h" />
<ClInclude Include="util.h" />
<ClInclude Include="punycode.h" />
<ClInclude Include="localeprioritylist.h" />
<ClInclude Include="locbased.h" />
<ClInclude Include="locdistance.h" />
<ClInclude Include="loclikelysubtags.h" />
<ClInclude Include="locutil.h" />
<ClInclude Include="lsr.h" />
<ClInclude Include="sharedobject.h" />
<ClCompile Include="sharedobject.cpp" />
<ClInclude Include="ulocimp.h" />
@ -640,7 +649,6 @@
<ClInclude Include="ustr_imp.h" />
<ClInclude Include="static_unicode_sets.h" />
<ClInclude Include="capi_helper.h" />
<ClInclude Include="unicode\localebuilder.h" />
<ClInclude Include="restrace.h" />
</ItemGroup>
<ItemGroup>

View file

@ -157,13 +157,18 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len)
}
static void
_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
Locale& to, bool validate, UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) { return; }
LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
LocalPointer<icu::StringEnumeration> ownedKeywords;
if (keywords == nullptr) {
ownedKeywords.adoptInstead(from.createKeywords(errorCode));
if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
keywords = ownedKeywords.getAlias();
}
const char* key;
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
CharString value;
CharStringByteSink sink(&value);
from.getKeywordValue(key, sink, errorCode);
@ -176,34 +181,34 @@ _copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& error
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
to->setKeywordValue(key, value.data(), errorCode);
to.setKeywordValue(key, value.data(), errorCode);
if (U_FAILURE(errorCode)) { return; }
}
}
void static
_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
{
// Clear Unicode attributes
locale->setKeywordValue(kAttributeKey, "", errorCode);
locale.setKeywordValue(kAttributeKey, "", errorCode);
// Clear all Unicode keyword values
LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
const char* key;
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
locale->setUnicodeKeywordValue(key, nullptr, errorCode);
locale.setUnicodeKeywordValue(key, nullptr, errorCode);
}
}
static void
_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
{
// Add the unicode extensions to extensions_
CharString locale_str("und-u-", errorCode);
locale_str.append(value, errorCode);
_copyExtensions(
Locale::forLanguageTag(locale_str.data(), errorCode),
Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
locale, false, errorCode);
}
@ -235,10 +240,10 @@ LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
status_);
return *this;
}
_clearUAttributesAndKeyType(extensions_, status_);
_clearUAttributesAndKeyType(*extensions_, status_);
if (U_FAILURE(status_)) { return *this; }
if (!value.empty()) {
_setUnicodeExtensions(extensions_, value_str, status_);
_setUnicodeExtensions(*extensions_, value_str, status_);
}
return *this;
}
@ -401,6 +406,24 @@ Locale makeBogusLocale() {
return bogus;
}
void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) { return; }
LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
// Error, or no extensions to copy.
return;
}
if (extensions_ == nullptr) {
extensions_ = new Locale();
if (extensions_ == nullptr) {
status_ = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
_copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
}
Locale LocaleBuilder::build(UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) {
@ -425,7 +448,7 @@ Locale LocaleBuilder::build(UErrorCode& errorCode)
}
Locale product(locale_str.data());
if (extensions_ != nullptr) {
_copyExtensions(*extensions_, &product, true, errorCode);
_copyExtensions(*extensions_, nullptr, product, true, errorCode);
}
if (U_FAILURE(errorCode)) {
return makeBogusLocale();

View file

@ -0,0 +1,720 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// localematcher.cpp
// created: 2019may08 Markus W. Scherer
#ifndef __LOCMATCHER_H__
#define __LOCMATCHER_H__
#include "unicode/utypes.h"
#include "unicode/localebuilder.h"
#include "unicode/localematcher.h"
#include "unicode/locid.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
#include "cstring.h"
#include "localeprioritylist.h"
#include "loclikelysubtags.h"
#include "locdistance.h"
#include "lsr.h"
#include "uassert.h"
#include "uhash.h"
#include "uvector.h"
#define UND_LSR LSR("und", "", "")
/**
* Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
*
* @draft ICU 65
*/
enum ULocMatchLifetime {
/**
* Locale objects are temporary.
* The matcher will make a copy of a locale that will be used beyond one function call.
*
* @draft ICU 65
*/
ULOCMATCH_TEMPORARY_LOCALES,
/**
* Locale objects are stored at least as long as the matcher is used.
* The matcher will keep only a pointer to a locale that will be used beyond one function call,
* avoiding a copy.
*
* @draft ICU 65
*/
ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
};
#ifndef U_IN_DOXYGEN
typedef enum ULocMatchLifetime ULocMatchLifetime;
#endif
U_NAMESPACE_BEGIN
LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
desiredLocale(src.desiredLocale),
supportedLocale(src.supportedLocale),
desiredIndex(src.desiredIndex),
supportedIndex(src.supportedIndex),
desiredIsOwned(src.desiredIsOwned) {
if (desiredIsOwned) {
src.desiredLocale = nullptr;
src.desiredIndex = -1;
src.desiredIsOwned = FALSE;
}
}
LocaleMatcher::Result::~Result() {
if (desiredIsOwned) {
delete desiredLocale;
}
}
LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
this->~Result();
desiredLocale = src.desiredLocale;
supportedLocale = src.supportedLocale;
desiredIndex = src.desiredIndex;
supportedIndex = src.supportedIndex;
desiredIsOwned = src.desiredIsOwned;
if (desiredIsOwned) {
src.desiredLocale = nullptr;
src.desiredIndex = -1;
src.desiredIsOwned = FALSE;
}
return *this;
}
Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
return Locale::getRoot();
}
const Locale *bestDesired = getDesiredLocale();
if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
return *supportedLocale;
}
LocaleBuilder b;
b.setLocale(*supportedLocale);
// Copy the region from bestDesired, if there is one.
const char *region = bestDesired->getCountry();
if (*region != 0) {
b.setRegion(region);
}
// Copy the variants from bestDesired, if there are any.
// Note that this will override any supportedLocale variants.
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
const char *variants = bestDesired->getVariant();
if (*variants != 0) {
b.setVariant(variants);
}
// Copy the extensions from bestDesired, if there are any.
// C++ note: The following note, copied from Java, may not be true,
// as long as C++ copies by legacy ICU keyword, not by extension singleton.
// Note that this will override any supportedLocale extensions.
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
// (replacing calendar).
b.copyExtensionsFrom(*bestDesired, errorCode);
return b.build(errorCode);
}
LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
errorCode_(src.errorCode_),
supportedLocales_(src.supportedLocales_),
thresholdDistance_(src.thresholdDistance_),
demotion_(src.demotion_),
defaultLocale_(src.defaultLocale_),
favor_(src.favor_) {
src.supportedLocales_ = nullptr;
src.defaultLocale_ = nullptr;
}
LocaleMatcher::Builder::~Builder() {
delete supportedLocales_;
delete defaultLocale_;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
this->~Builder();
errorCode_ = src.errorCode_;
supportedLocales_ = src.supportedLocales_;
thresholdDistance_ = src.thresholdDistance_;
demotion_ = src.demotion_;
defaultLocale_ = src.defaultLocale_;
favor_ = src.favor_;
src.supportedLocales_ = nullptr;
src.defaultLocale_ = nullptr;
return *this;
}
void LocaleMatcher::Builder::clearSupportedLocales() {
if (supportedLocales_ != nullptr) {
supportedLocales_->removeAllElements();
}
}
bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
if (U_FAILURE(errorCode_)) { return false; }
if (supportedLocales_ != nullptr) { return true; }
supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
if (U_FAILURE(errorCode_)) { return false; }
if (supportedLocales_ == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return false;
}
return true;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
StringPiece locales) {
LocalePriorityList list(locales, errorCode_);
if (U_FAILURE(errorCode_)) { return *this; }
clearSupportedLocales();
if (!ensureSupportedLocaleVector()) { return *this; }
int32_t length = list.getLengthIncludingRemoved();
for (int32_t i = 0; i < length; ++i) {
Locale *locale = list.orphanLocaleAt(i);
if (locale == nullptr) { continue; }
supportedLocales_->addElement(locale, errorCode_);
if (U_FAILURE(errorCode_)) {
delete locale;
break;
}
}
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
if (U_FAILURE(errorCode_)) { return *this; }
clearSupportedLocales();
if (!ensureSupportedLocaleVector()) { return *this; }
while (locales.hasNext()) {
const Locale &locale = locales.next();
Locale *clone = locale.clone();
if (clone == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
break;
}
supportedLocales_->addElement(clone, errorCode_);
if (U_FAILURE(errorCode_)) {
delete clone;
break;
}
}
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
if (!ensureSupportedLocaleVector()) { return *this; }
Locale *clone = locale.clone();
if (clone == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
supportedLocales_->addElement(clone, errorCode_);
if (U_FAILURE(errorCode_)) {
delete clone;
}
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
if (U_FAILURE(errorCode_)) { return *this; }
Locale *clone = nullptr;
if (defaultLocale != nullptr) {
clone = defaultLocale->clone();
if (clone == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
}
delete defaultLocale_;
defaultLocale_ = clone;
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
if (U_FAILURE(errorCode_)) { return *this; }
favor_ = subtag;
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
if (U_FAILURE(errorCode_)) { return *this; }
demotion_ = demotion;
return *this;
}
#if 0
/**
* <i>Internal only!</i>
*
* @param thresholdDistance the thresholdDistance to set, with -1 = default
* @return this Builder object
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
if (U_FAILURE(errorCode_)) { return *this; }
if (thresholdDistance > 100) {
thresholdDistance = 100;
}
thresholdDistance_ = thresholdDistance;
return *this;
}
#endif
UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode_)) { return FALSE; }
outErrorCode = errorCode_;
return TRUE;
}
LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
errorCode = errorCode_;
}
return LocaleMatcher(*this, errorCode);
}
namespace {
LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
return UND_LSR;
} else {
return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
}
}
int32_t hashLSR(const UHashTok token) {
const LSR *lsr = static_cast<const LSR *>(token.pointer);
return lsr->hashCode;
}
UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
return *lsr1 == *lsr2;
}
bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return false; }
U_ASSERT(i > 0);
int32_t index = uhash_geti(lsrToIndex, &lsr);
if (index != 0) {
return false;
} else {
uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
return U_SUCCESS(errorCode);
}
}
} // namespace
LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
localeDistance(*LocaleDistance::getSingleton(errorCode)),
thresholdDistance(builder.thresholdDistance_),
demotionPerDesiredLocale(0),
favorSubtag(builder.favor_),
supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
supportedLsrToIndex(nullptr),
supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) {
if (U_FAILURE(errorCode)) { return; }
if (thresholdDistance < 0) {
thresholdDistance = localeDistance.getDefaultScriptDistance();
}
supportedLocalesLength = builder.supportedLocales_ != nullptr ?
builder.supportedLocales_->size() : 0;
const Locale *def = builder.defaultLocale_;
int32_t idef = -1;
if (supportedLocalesLength > 0) {
// Store the supported locales in input order,
// so that when different types are used (e.g., language tag strings)
// we can return those by parallel index.
supportedLocales = static_cast<const Locale **>(
uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
// Supported LRSs in input order.
// In C++, we store these permanently to simplify ownership management
// in the hash tables. Duplicate LSRs (if any) are unused overhead.
lsrs = new LSR[supportedLocalesLength];
if (supportedLocales == nullptr || lsrs == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
// If the constructor fails partway, we need null pointers for destructibility.
uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
// Also find the first supported locale whose LSR is
// the same as that for the default locale.
LSR builderDefaultLSR;
const LSR *defLSR = nullptr;
if (def != nullptr) {
builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
if (U_FAILURE(errorCode)) { return; }
defLSR = &builderDefaultLSR;
}
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
supportedLocales[i] = locale.clone();
if (supportedLocales[i] == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
const Locale &supportedLocale = *supportedLocales[i];
LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
lsr.setHashCode();
if (U_FAILURE(errorCode)) { return; }
if (idef < 0 && defLSR != nullptr && lsr == *defLSR) {
idef = i;
defLSR = &lsr; // owned pointer to put into supportedLsrToIndex
if (*def == supportedLocale) {
def = &supportedLocale; // owned pointer to keep
}
}
}
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, supported index).
// We insert the supported locales in the following order:
// 1. Default locale, if it is supported.
// 2. Priority locales (aka "paradigm locales") in builder order.
// 3. Remaining locales in builder order.
// In Java, we use a LinkedHashMap for both map & ordered lists.
// In C++, we use separate structures.
// We over-allocate arrays of LSRs and indexes for simplicity.
// We reserve slots at the array starts for the default and paradigm locales,
// plus enough for all supported locales.
// If there are few paradigm locales and few duplicate supported LSRs,
// then the amount of wasted space is small.
supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
supportedLocalesLength, &errorCode);
if (U_FAILURE(errorCode)) { return; }
int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength();
int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength;
supportedLSRs = static_cast<const LSR **>(
uprv_malloc(suppLSRsCapacity * sizeof(const LSR *)));
supportedIndexes = static_cast<int32_t *>(
uprv_malloc(suppLSRsCapacity * sizeof(int32_t)));
if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t paradigmIndex = 0;
int32_t otherIndex = paradigmLimit;
if (idef >= 0) {
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
supportedLSRs[0] = defLSR;
supportedIndexes[0] = idef;
paradigmIndex = 1;
}
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
if (i == idef) { continue; }
const Locale &locale = *supportedLocales[i];
const LSR &lsr = lsrs[i];
if (defLSR == nullptr) {
U_ASSERT(i == 0);
def = &locale;
defLSR = &lsr;
idef = 0;
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode);
supportedLSRs[0] = &lsr;
supportedIndexes[0] = 0;
paradigmIndex = 1;
} else if (idef >= 0 && lsr == *defLSR) {
// lsr == *defLSR means that this supported locale is
// a duplicate of the default locale.
// Either an explicit default locale is supported, and we added it before the loop,
// or there is no explicit default locale, and this is
// a duplicate of the first supported locale.
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
} else {
if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) {
if (localeDistance.isParadigmLSR(lsr)) {
supportedLSRs[paradigmIndex] = &lsr;
supportedIndexes[paradigmIndex++] = i;
} else {
supportedLSRs[otherIndex] = &lsr;
supportedIndexes[otherIndex++] = i;
}
}
}
if (U_FAILURE(errorCode)) { return; }
}
// Squeeze out unused array slots.
if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) {
uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit,
(otherIndex - paradigmLimit) * sizeof(const LSR *));
uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit,
(otherIndex - paradigmLimit) * sizeof(int32_t));
}
supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex);
}
if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
ownedDefaultLocale = def->clone();
if (ownedDefaultLocale == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
def = ownedDefaultLocale;
}
defaultLocale = def;
defaultLocaleIndex = idef;
if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
}
}
LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
likelySubtags(src.likelySubtags),
localeDistance(src.localeDistance),
thresholdDistance(src.thresholdDistance),
demotionPerDesiredLocale(src.demotionPerDesiredLocale),
favorSubtag(src.favorSubtag),
supportedLocales(src.supportedLocales), lsrs(src.lsrs),
supportedLocalesLength(src.supportedLocalesLength),
supportedLsrToIndex(src.supportedLsrToIndex),
supportedLSRs(src.supportedLSRs),
supportedIndexes(src.supportedIndexes),
supportedLSRsLength(src.supportedLSRsLength),
ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale),
defaultLocaleIndex(src.defaultLocaleIndex) {
src.supportedLocales = nullptr;
src.lsrs = nullptr;
src.supportedLocalesLength = 0;
src.supportedLsrToIndex = nullptr;
src.supportedLSRs = nullptr;
src.supportedIndexes = nullptr;
src.supportedLSRsLength = 0;
src.ownedDefaultLocale = nullptr;
src.defaultLocale = nullptr;
src.defaultLocaleIndex = -1;
}
LocaleMatcher::~LocaleMatcher() {
for (int32_t i = 0; i < supportedLocalesLength; ++i) {
delete supportedLocales[i];
}
uprv_free(supportedLocales);
delete[] lsrs;
uhash_close(supportedLsrToIndex);
uprv_free(supportedLSRs);
uprv_free(supportedIndexes);
delete ownedDefaultLocale;
}
LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
this->~LocaleMatcher();
thresholdDistance = src.thresholdDistance;
demotionPerDesiredLocale = src.demotionPerDesiredLocale;
favorSubtag = src.favorSubtag;
supportedLocales = src.supportedLocales;
lsrs = src.lsrs;
supportedLocalesLength = src.supportedLocalesLength;
supportedLsrToIndex = src.supportedLsrToIndex;
supportedLSRs = src.supportedLSRs;
supportedIndexes = src.supportedIndexes;
supportedLSRsLength = src.supportedLSRsLength;
ownedDefaultLocale = src.ownedDefaultLocale;
defaultLocale = src.defaultLocale;
defaultLocaleIndex = src.defaultLocaleIndex;
src.supportedLocales = nullptr;
src.lsrs = nullptr;
src.supportedLocalesLength = 0;
src.supportedLsrToIndex = nullptr;
src.supportedLSRs = nullptr;
src.supportedIndexes = nullptr;
src.supportedLSRsLength = 0;
src.ownedDefaultLocale = nullptr;
src.defaultLocale = nullptr;
src.defaultLocaleIndex = -1;
return *this;
}
class LocaleLsrIterator {
public:
LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
ULocMatchLifetime lifetime) :
likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
~LocaleLsrIterator() {
if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
delete remembered;
}
}
bool hasNext() const {
return locales.hasNext();
}
LSR next(UErrorCode &errorCode) {
current = &locales.next();
return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
}
void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
bestDesiredIndex = desiredIndex;
if (lifetime == ULOCMATCH_STORED_LOCALES) {
remembered = current;
} else {
// ULOCMATCH_TEMPORARY_LOCALES
delete remembered;
remembered = new Locale(*current);
if (remembered == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
}
}
const Locale *orphanRemembered() {
const Locale *rem = remembered;
remembered = nullptr;
return rem;
}
int32_t getBestDesiredIndex() const {
return bestDesiredIndex;
}
private:
const XLikelySubtags &likelySubtags;
Locale::Iterator &locales;
ULocMatchLifetime lifetime;
const Locale *current = nullptr, *remembered = nullptr;
int32_t bestDesiredIndex = -1;
};
const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return nullptr; }
int32_t suppIndex = getBestSuppIndex(
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
nullptr, errorCode);
return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return nullptr; }
if (!desiredLocales.hasNext()) {
return defaultLocale;
}
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
const Locale *LocaleMatcher::getBestMatchForListString(
StringPiece desiredLocaleList, UErrorCode &errorCode) const {
LocalePriorityList list(desiredLocaleList, errorCode);
LocalePriorityList::Iterator iter = list.iterator();
return getBestMatch(iter, errorCode);
}
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
const Locale &desiredLocale, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
}
int32_t suppIndex = getBestSuppIndex(
getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
nullptr, errorCode);
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
} else {
return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
}
}
LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
}
LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
if (U_FAILURE(errorCode) || suppIndex < 0) {
return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
} else {
return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
}
}
int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) { return -1; }
int32_t desiredIndex = 0;
int32_t bestSupportedLsrIndex = -1;
for (int32_t bestDistance = thresholdDistance;;) {
// Quick check for exact maximized LSR.
// Returns suppIndex+1 where 0 means not found.
if (supportedLsrToIndex != nullptr) {
desiredLSR.setHashCode();
int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
if (index != 0) {
int32_t suppIndex = index - 1;
if (remainingIter != nullptr) {
remainingIter->rememberCurrent(desiredIndex, errorCode);
}
return suppIndex;
}
}
int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag);
if (bestIndexAndDistance >= 0) {
bestDistance = bestIndexAndDistance & 0xff;
if (remainingIter != nullptr) {
remainingIter->rememberCurrent(desiredIndex, errorCode);
if (U_FAILURE(errorCode)) { return -1; }
}
bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1;
}
if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
break;
}
if (remainingIter == nullptr || !remainingIter->hasNext()) {
break;
}
desiredLSR = remainingIter->next(errorCode);
if (U_FAILURE(errorCode)) { return -1; }
++desiredIndex;
}
if (bestSupportedLsrIndex < 0) {
// no good match
return -1;
}
return supportedIndexes[bestSupportedLsrIndex];
}
double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
if (U_FAILURE(errorCode)) { return 0; }
const LSR *pSuppLSR = &suppLSR;
int32_t distance = localeDistance.getBestIndexAndDistance(
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
&pSuppLSR, 1,
thresholdDistance, favorSubtag) & 0xff;
return (100 - distance) / 100.0;
}
U_NAMESPACE_END
#endif // __LOCMATCHER_H__

View file

@ -0,0 +1,239 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// localeprioritylist.cpp
// created: 2019jul11 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#include "unicode/locid.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
#include "charstr.h"
#include "cmemory.h"
#include "localeprioritylist.h"
#include "uarrsort.h"
#include "uassert.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
namespace {
int32_t hashLocale(const UHashTok token) {
auto *locale = static_cast<const Locale *>(token.pointer);
return locale->hashCode();
}
UBool compareLocales(const UHashTok t1, const UHashTok t2) {
auto *l1 = static_cast<const Locale *>(t1.pointer);
auto *l2 = static_cast<const Locale *>(t2.pointer);
return *l1 == *l2;
}
constexpr int32_t WEIGHT_ONE = 1000;
struct LocaleAndWeight {
Locale *locale;
int32_t weight; // 0..1000 = 0.0..1.0
int32_t index; // force stable sort
int32_t compare(const LocaleAndWeight &other) const {
int32_t diff = other.weight - weight; // descending: other-this
if (diff != 0) { return diff; }
return index - other.index;
}
};
int32_t U_CALLCONV
compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
return static_cast<const LocaleAndWeight *>(left)->
compare(*static_cast<const LocaleAndWeight *>(right));
}
const char *skipSpaces(const char *p, const char *limit) {
while (p < limit && *p == ' ') { ++p; }
return p;
}
int32_t findTagLength(const char *p, const char *limit) {
// Look for accept-language delimiters.
// Leave other validation up to the Locale constructor.
const char *q;
for (q = p; q < limit; ++q) {
char c = *q;
if (c == ' ' || c == ',' || c == ';') { break; }
}
return static_cast<int32_t>(q - p);
}
/**
* Parses and returns a qvalue weight in millis.
* Advances p to after the parsed substring.
* Returns a negative value if parsing fails.
*/
int32_t parseWeight(const char *&p, const char *limit) {
p = skipSpaces(p, limit);
char c;
if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
int32_t weight = (c - '0') * 1000;
if (++p == limit || *p != '.') { return weight; }
int32_t multiplier = 100;
while (++p != limit && '0' <= (c = *p) && c <= '9') {
c -= '0';
if (multiplier > 0) {
weight += c * multiplier;
multiplier /= 10;
} else if (multiplier == 0) {
// round up
if (c >= 5) { ++weight; }
multiplier = -1;
} // else ignore further fraction digits
}
return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0
}
} // namespace
/**
* Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
*
* This wrapper exists (and is not in an anonymous namespace)
* so that we can forward-declare it in the header file and
* don't have to expose the MaybeStackArray specialization and
* the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
* Also, otherwise we would have to do a platform-specific
* template export declaration of some kind for the MaybeStackArray specialization
* to be properly exported from the common DLL.
*/
struct LocaleAndWeightArray : public UMemory {
MaybeStackArray<LocaleAndWeight, 20> array;
};
LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
list = new LocaleAndWeightArray();
if (list == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
const char *p = s.data();
const char *limit = p + s.length();
while ((p = skipSpaces(p, limit)) != limit) {
if (*p == ',') { // empty range field
++p;
continue;
}
int32_t tagLength = findTagLength(p, limit);
if (tagLength == 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
CharString tag(p, tagLength, errorCode);
if (U_FAILURE(errorCode)) { return; }
Locale locale = Locale(tag.data());
if (locale.isBogus()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t weight = WEIGHT_ONE;
if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
(p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
(++p, (weight = parseWeight(p, limit)) < 0)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
p = skipSpaces(p, limit);
}
if (p != limit && *p != ',') { // trailing junk
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
add(locale, weight, errorCode);
if (p == limit) { break; }
++p;
}
sort(errorCode);
}
LocalePriorityList::~LocalePriorityList() {
if (list != nullptr) {
for (int32_t i = 0; i < listLength; ++i) {
delete list->array[i].locale;
}
delete list;
}
uhash_close(map);
}
const Locale *LocalePriorityList::localeAt(int32_t i) const {
return list->array[i].locale;
}
Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
if (list == nullptr) { return nullptr; }
LocaleAndWeight &lw = list->array[i];
Locale *l = lw.locale;
lw.locale = nullptr;
return l;
}
bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return false; }
if (map == nullptr) {
if (weight <= 0) { return true; } // do not add q=0
map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
LocalPointer<Locale> clone;
int32_t index = uhash_geti(map, &locale);
if (index != 0) {
// Duplicate: Remove the old item and append it anew.
LocaleAndWeight &lw = list->array[index - 1];
clone.adoptInstead(lw.locale);
lw.locale = nullptr;
lw.weight = 0;
++numRemoved;
}
if (weight <= 0) { // do not add q=0
if (index != 0) {
// Not strictly necessary but cleaner.
uhash_removei(map, &locale);
}
return true;
}
if (clone.isNull()) {
clone.adoptInstead(locale.clone());
if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return false;
}
}
if (listLength == list->array.getCapacity()) {
int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
if (list->array.resize(newCapacity, listLength) == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return false;
}
}
uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
if (U_FAILURE(errorCode)) { return false; }
LocaleAndWeight &lw = list->array[listLength];
lw.locale = clone.orphan();
lw.weight = weight;
lw.index = listLength++;
if (weight < WEIGHT_ONE) { hasWeights = true; }
U_ASSERT(uhash_count(map) == getLength());
return true;
}
void LocalePriorityList::sort(UErrorCode &errorCode) {
// Sort by descending weights if there is a mix of weights.
// The comparator forces a stable sort via the item index.
if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
compareLocaleAndWeight, nullptr, FALSE, &errorCode);
}
U_NAMESPACE_END

View file

@ -0,0 +1,115 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// localeprioritylist.h
// created: 2019jul11 Markus W. Scherer
#ifndef __LOCALEPRIORITYLIST_H__
#define __LOCALEPRIORITYLIST_H__
#include "unicode/utypes.h"
#include "unicode/locid.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
struct UHashtable;
U_NAMESPACE_BEGIN
struct LocaleAndWeightArray;
/**
* Parses a list of locales from an accept-language string.
* We are a bit more lenient than the spec:
* We accept extra whitespace in more places, empty range fields,
* and any number of qvalue fraction digits.
*
* https://tools.ietf.org/html/rfc2616#section-14.4
* 14.4 Accept-Language
*
* Accept-Language = "Accept-Language" ":"
* 1#( language-range [ ";" "q" "=" qvalue ] )
* language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
*
* Each language-range MAY be given an associated quality value which
* represents an estimate of the user's preference for the languages
* specified by that range. The quality value defaults to "q=1". For
* example,
*
* Accept-Language: da, en-gb;q=0.8, en;q=0.7
*
* https://tools.ietf.org/html/rfc2616#section-3.9
* 3.9 Quality Values
*
* HTTP content negotiation (section 12) uses short "floating point"
* numbers to indicate the relative importance ("weight") of various
* negotiable parameters. A weight is normalized to a real number in
* the range 0 through 1, where 0 is the minimum and 1 the maximum
* value. If a parameter has a quality value of 0, then content with
* this parameter is `not acceptable' for the client. HTTP/1.1
* applications MUST NOT generate more than three digits after the
* decimal point. User configuration of these values SHOULD also be
* limited in this fashion.
*
* qvalue = ( "0" [ "." 0*3DIGIT ] )
* | ( "1" [ "." 0*3("0") ] )
*/
class U_COMMON_API LocalePriorityList : public UMemory {
public:
class Iterator : public Locale::Iterator {
public:
UBool hasNext() const override { return count < length; }
const Locale &next() override {
for(;;) {
const Locale *locale = list.localeAt(index++);
if (locale != nullptr) {
++count;
return *locale;
}
}
}
private:
friend class LocalePriorityList;
Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
const LocalePriorityList &list;
int32_t index = 0;
int32_t count = 0;
const int32_t length;
};
LocalePriorityList(StringPiece s, UErrorCode &errorCode);
~LocalePriorityList();
int32_t getLength() const { return listLength - numRemoved; }
int32_t getLengthIncludingRemoved() const { return listLength; }
Iterator iterator() const { return Iterator(*this); }
const Locale *localeAt(int32_t i) const;
Locale *orphanLocaleAt(int32_t i);
private:
LocalePriorityList(const LocalePriorityList &) = delete;
LocalePriorityList &operator=(const LocalePriorityList &) = delete;
bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
void sort(UErrorCode &errorCode);
LocaleAndWeightArray *list = nullptr;
int32_t listLength = 0;
int32_t numRemoved = 0;
bool hasWeights = false; // other than 1.0
UHashtable *map = nullptr;
};
U_NAMESPACE_END
#endif // __LOCALEPRIORITYLIST_H__

View file

@ -0,0 +1,364 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// locdistance.cpp
// created: 2019may08 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/localematcher.h"
#include "unicode/locid.h"
#include "unicode/uobject.h"
#include "unicode/ures.h"
#include "cstring.h"
#include "locdistance.h"
#include "loclikelysubtags.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "uinvchar.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
namespace {
/**
* Bit flag used on the last character of a subtag in the trie.
* Must be set consistently by the builder and the lookup code.
*/
constexpr int32_t END_OF_SUBTAG = 0x80;
/** Distance value bit flag, set by the builder. */
constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
/** Distance value bit flag, set by trieNext(). */
constexpr int32_t DISTANCE_IS_FINAL = 0x100;
constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
constexpr int32_t ABOVE_THRESHOLD = 100;
// Indexes into array of distances.
enum {
IX_DEF_LANG_DISTANCE,
IX_DEF_SCRIPT_DISTANCE,
IX_DEF_REGION_DISTANCE,
IX_MIN_REGION_DISTANCE,
IX_LIMIT
};
LocaleDistance *gLocaleDistance = nullptr;
UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanup() {
delete gLocaleDistance;
gLocaleDistance = nullptr;
gInitOnce.reset();
return TRUE;
}
} // namespace
void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
U_ASSERT(gLocaleDistance == nullptr);
const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
if (U_FAILURE(errorCode)) { return; }
const LocaleDistanceData &data = likely.getDistanceData();
if (data.distanceTrieBytes == nullptr ||
data.regionToPartitions == nullptr || data.partitions == nullptr ||
// ok if no paradigms
data.distances == nullptr) {
errorCode = U_MISSING_RESOURCE_ERROR;
return;
}
gLocaleDistance = new LocaleDistance(data);
if (gLocaleDistance == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup);
}
const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
return gLocaleDistance;
}
LocaleDistance::LocaleDistance(const LocaleDistanceData &data) :
trie(data.distanceTrieBytes),
regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
// For the default demotion value, use the
// default region distance between unrelated Englishes.
// Thus, unless demotion is turned off,
// a mere region difference for one desired locale
// is as good as a perfect match for the next following desired locale.
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
LSR en("en", "Latn", "US");
LSR enGB("en", "Latn", "GB");
const LSR *p_enGB = &enGB;
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1,
50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff;
}
int32_t LocaleDistance::getBestIndexAndDistance(
const LSR &desired,
const LSR **supportedLSRs, int32_t supportedLSRsLength,
int32_t threshold, ULocMatchFavorSubtag favorSubtag) const {
BytesTrie iter(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
int32_t desLangDistance = trieNext(iter, desired.language, false);
uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int32_t bestIndex = -1;
for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
const LSR &supported = *supportedLSRs[slIndex];
bool star = false;
int32_t distance = desLangDistance;
if (distance >= 0) {
U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
if (slIndex != 0) {
iter.resetToState64(desLangState);
}
distance = trieNext(iter, supported.language, true);
}
// Note: The data builder verifies that there are no rules with "any" (*) language and
// real (non *) script or region subtags.
// This means that if the lookup for either language fails we can use
// the default distances without further lookups.
int32_t flags;
if (distance >= 0) {
flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
} else { // <*, *>
if (uprv_strcmp(desired.language, supported.language) == 0) {
distance = 0;
} else {
distance = defaultLanguageDistance;
}
flags = 0;
star = true;
}
U_ASSERT(0 <= distance && distance <= 100);
// We implement "favor subtag" by reducing the language subtag distance
// (unscientifically reducing it to a quarter of the normal value),
// so that the script distance is relatively more important.
// For example, given a default language distance of 80, we reduce it to 20,
// which is below the default threshold of 50, which is the default script distance.
if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
distance >>= 2;
}
if (distance >= threshold) {
continue;
}
int32_t scriptDistance;
if (star || flags != 0) {
if (uprv_strcmp(desired.script, supported.script) == 0) {
scriptDistance = 0;
} else {
scriptDistance = defaultScriptDistance;
}
} else {
scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
desired.script, supported.script);
flags = scriptDistance & DISTANCE_IS_FINAL;
scriptDistance &= ~DISTANCE_IS_FINAL;
}
distance += scriptDistance;
if (distance >= threshold) {
continue;
}
if (uprv_strcmp(desired.region, supported.region) == 0) {
// regionDistance = 0
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
distance += defaultRegionDistance;
} else {
int32_t remainingThreshold = threshold - distance;
if (minRegionDistance >= remainingThreshold) {
continue;
}
// From here on we know the regions are not equal.
// Map each region to zero or more partitions. (zero = one non-matching string)
// (Each array of single-character partition strings is encoded as one string.)
// If either side has more than one, then we find the maximum distance.
// This could be optimized by adding some more structure, but probably not worth it.
distance += getRegionPartitionsDistance(
iter, iter.getState64(),
partitionsForRegion(desired),
partitionsForRegion(supported),
remainingThreshold);
}
if (distance < threshold) {
if (distance == 0) {
return slIndex << 8;
}
bestIndex = slIndex;
threshold = distance;
}
}
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
}
int32_t LocaleDistance::getDesSuppScriptDistance(
BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
int32_t distance = trieNext(iter, desired, false);
if (distance >= 0) {
distance = trieNext(iter, supported, true);
}
if (distance < 0) {
UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *>
U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
if (uprv_strcmp(desired, supported) == 0) {
distance = 0; // same script
} else {
distance = iter.getValue();
U_ASSERT(distance >= 0);
}
if (result == USTRINGTRIE_FINAL_VALUE) {
distance |= DISTANCE_IS_FINAL;
}
}
return distance;
}
int32_t LocaleDistance::getRegionPartitionsDistance(
BytesTrie &iter, uint64_t startState,
const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
char desired = *desiredPartitions++;
char supported = *supportedPartitions++;
U_ASSERT(desired != 0 && supported != 0);
// See if we have single desired/supported partitions, from NUL-terminated
// partition strings without explicit length.
bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character
// equivalent to: if (desLength == 1 && suppLength == 1)
if (*desiredPartitions == 0 && !suppLengthGt1) {
// Fastpath for single desired/supported partitions.
UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
if (USTRINGTRIE_HAS_NEXT(result)) {
result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
if (USTRINGTRIE_HAS_VALUE(result)) {
return iter.getValue();
}
}
return getFallbackRegionDistance(iter, startState);
}
const char *supportedStart = supportedPartitions - 1; // for restart of inner loop
int32_t regionDistance = 0;
// Fall back to * only once, not for each pair of partition strings.
bool star = false;
for (;;) {
// Look up each desired-partition string only once,
// not for each (desired, supported) pair.
UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
if (USTRINGTRIE_HAS_NEXT(result)) {
uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
for (;;) {
result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
int32_t d;
if (USTRINGTRIE_HAS_VALUE(result)) {
d = iter.getValue();
} else if (star) {
d = 0;
} else {
d = getFallbackRegionDistance(iter, startState);
star = true;
}
if (d >= threshold) {
return d;
} else if (regionDistance < d) {
regionDistance = d;
}
if ((supported = *supportedPartitions++) != 0) {
iter.resetToState64(desState);
} else {
break;
}
}
} else if (!star) {
int32_t d = getFallbackRegionDistance(iter, startState);
if (d >= threshold) {
return d;
} else if (regionDistance < d) {
regionDistance = d;
}
star = true;
}
if ((desired = *desiredPartitions++) != 0) {
iter.resetToState64(startState);
supportedPartitions = supportedStart;
supported = *supportedPartitions++;
} else {
break;
}
}
return regionDistance;
}
int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
#if U_DEBUG
UStringTrieResult result =
#endif
iter.resetToState64(startState).next(u'*'); // <*, *>
U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
int32_t distance = iter.getValue();
U_ASSERT(distance >= 0);
return distance;
}
int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
uint8_t c;
if ((c = *s) == 0) {
return -1; // no empty subtags in the distance data
}
for (;;) {
c = uprv_invCharToAscii(c);
// EBCDIC: If *s is not an invariant character,
// then c is now 0 and will simply not match anything, which is harmless.
uint8_t next = *++s;
if (next != 0) {
if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
return -1;
}
} else {
// last character of this subtag
UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
if (wantValue) {
if (USTRINGTRIE_HAS_VALUE(result)) {
int32_t value = iter.getValue();
if (result == USTRINGTRIE_FINAL_VALUE) {
value |= DISTANCE_IS_FINAL;
}
return value;
}
} else {
if (USTRINGTRIE_HAS_NEXT(result)) {
return 0;
}
}
return -1;
}
c = next;
}
}
UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
// Linear search for a very short list (length 6 as of 2019).
// If there are many paradigm LSRs we should use a hash set.
U_ASSERT(paradigmLSRsLength <= 15);
for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
if (lsr == paradigmLSRs[i]) { return true; }
}
return false;
}
U_NAMESPACE_END

View file

@ -0,0 +1,109 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// locdistance.h
// created: 2019may08 Markus W. Scherer
#ifndef __LOCDISTANCE_H__
#define __LOCDISTANCE_H__
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/localematcher.h"
#include "unicode/locid.h"
#include "unicode/uobject.h"
#include "lsr.h"
U_NAMESPACE_BEGIN
struct LocaleDistanceData;
/**
* Offline-built data for LocaleMatcher.
* Mostly but not only the data for mapping locales to their maximized forms.
*/
class LocaleDistance final : public UMemory {
public:
static const LocaleDistance *getSingleton(UErrorCode &errorCode);
/**
* Finds the supported LSR with the smallest distance from the desired one.
* Equivalent LSR subtags must be normalized into a canonical form.
*
* <p>Returns the index of the lowest-distance supported LSR in bits 31..8
* (negative if none has a distance below the threshold),
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
int32_t getBestIndexAndDistance(const LSR &desired,
const LSR **supportedLSRs, int32_t supportedLSRsLength,
int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
UBool isParadigmLSR(const LSR &lsr) const;
int32_t getDefaultScriptDistance() const {
return defaultScriptDistance;
}
int32_t getDefaultDemotionPerDesiredLocale() const {
return defaultDemotionPerDesiredLocale;
}
private:
LocaleDistance(const LocaleDistanceData &data);
LocaleDistance(const LocaleDistance &other) = delete;
LocaleDistance &operator=(const LocaleDistance &other) = delete;
static void initLocaleDistance(UErrorCode &errorCode);
static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
const char *desired, const char *supported);
static int32_t getRegionPartitionsDistance(
BytesTrie &iter, uint64_t startState,
const char *desiredPartitions, const char *supportedPartitions,
int32_t threshold);
static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
const char *partitionsForRegion(const LSR &lsr) const {
// ill-formed region -> one non-matching string
int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
return partitionArrays[pIndex];
}
int32_t getDefaultRegionDistance() const {
return defaultRegionDistance;
}
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
// There is also a trie value for each subsequence of whole subtags.
// One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
BytesTrie trie;
/**
* Maps each region to zero or more single-character partitions.
*/
const uint8_t *regionToPartitionsIndex;
const char **partitionArrays;
/**
* Used to get the paradigm region for a cluster, if there is one.
*/
const LSR *paradigmLSRs;
int32_t paradigmLSRsLength;
int32_t defaultLanguageDistance;
int32_t defaultScriptDistance;
int32_t defaultRegionDistance;
int32_t minRegionDistance;
int32_t defaultDemotionPerDesiredLocale;
};
U_NAMESPACE_END
#endif // __LOCDISTANCE_H__

View file

@ -1396,5 +1396,7 @@ Locale::getBaseName() const {
return baseName;
}
Locale::Iterator::~Iterator() = default;
//eof
U_NAMESPACE_END

View file

@ -0,0 +1,638 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// loclikelysubtags.cpp
// created: 2019may08 Markus W. Scherer
#include <utility>
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/localpointer.h"
#include "unicode/locid.h"
#include "unicode/uobject.h"
#include "unicode/ures.h"
#include "charstr.h"
#include "cstring.h"
#include "loclikelysubtags.h"
#include "lsr.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "uhash.h"
#include "uinvchar.h"
#include "umutex.h"
#include "uresdata.h"
#include "uresimp.h"
U_NAMESPACE_BEGIN
namespace {
constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
/**
* Stores NUL-terminated strings with duplicate elimination.
* Checks for unique UTF-16 string pointers and converts to invariant characters.
*/
class UniqueCharStrings {
public:
UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
if (U_FAILURE(errorCode)) { return; }
strings = new CharString();
if (strings == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
}
~UniqueCharStrings() {
uhash_close(&map);
delete strings;
}
/** Returns/orphans the CharString that contains all strings. */
CharString *orphanCharStrings() {
CharString *result = strings;
strings = nullptr;
return result;
}
/** Adds a string and returns a unique number for it. */
int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return 0; }
if (isFrozen) {
errorCode = U_NO_WRITE_PERMISSION;
return 0;
}
// The string points into the resource bundle.
const char16_t *p = s.getBuffer();
int32_t oldIndex = uhash_geti(&map, p);
if (oldIndex != 0) { // found duplicate
return oldIndex;
}
// Explicit NUL terminator for the previous string.
// The strings object is also terminated with one implicit NUL.
strings->append(0, errorCode);
int32_t newIndex = strings->length();
strings->appendInvariantChars(s, errorCode);
uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
return newIndex;
}
void freeze() { isFrozen = true; }
/**
* Returns a string pointer for its unique number, if this object is frozen.
* Otherwise nullptr.
*/
const char *get(int32_t i) const {
U_ASSERT(isFrozen);
return isFrozen && i > 0 ? strings->data() + i : nullptr;
}
private:
UHashtable map;
CharString *strings;
bool isFrozen = false;
};
} // namespace
LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
distanceTrieBytes(data.distanceTrieBytes),
regionToPartitions(data.regionToPartitions),
partitions(data.partitions),
paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
distances(data.distances) {
data.partitions = nullptr;
data.paradigms = nullptr;
}
LocaleDistanceData::~LocaleDistanceData() {
uprv_free(partitions);
delete[] paradigms;
}
// TODO(ICU-20777): Rename to just LikelySubtagsData.
struct XLikelySubtagsData {
UResourceBundle *langInfoBundle = nullptr;
UniqueCharStrings strings;
CharStringMap languageAliases;
CharStringMap regionAliases;
const uint8_t *trieBytes = nullptr;
LSR *lsrs = nullptr;
int32_t lsrsLength = 0;
LocaleDistanceData distanceData;
XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
~XLikelySubtagsData() {
ures_close(langInfoBundle);
delete[] lsrs;
}
void load(UErrorCode &errorCode) {
langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
if (U_FAILURE(errorCode)) { return; }
StackUResourceBundle stackTempBundle;
ResourceDataValue value;
ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
value, errorCode);
ResourceTable likelyTable = value.getTable(errorCode);
if (U_FAILURE(errorCode)) { return; }
// Read all strings in the resource bundle and convert them to invariant char *.
LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
if (!readStrings(likelyTable, "languageAliases", value,
languageIndexes, languagesLength, errorCode) ||
!readStrings(likelyTable, "regionAliases", value,
regionIndexes, regionsLength, errorCode) ||
!readStrings(likelyTable, "lsrs", value,
lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
return;
}
if ((languagesLength & 1) != 0 ||
(regionsLength & 1) != 0 ||
(lsrSubtagsLength % 3) != 0) {
errorCode = U_INVALID_FORMAT_ERROR;
return;
}
if (lsrSubtagsLength == 0) {
errorCode = U_MISSING_RESOURCE_ERROR;
return;
}
if (!likelyTable.findValue("trie", value)) {
errorCode = U_MISSING_RESOURCE_ERROR;
return;
}
int32_t length;
trieBytes = value.getBinary(length, errorCode);
if (U_FAILURE(errorCode)) { return; }
// Also read distance/matcher data if available,
// to open & keep only one resource bundle pointer
// and to use one single UniqueCharStrings.
UErrorCode matchErrorCode = U_ZERO_ERROR;
ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
value, matchErrorCode);
LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
if (U_SUCCESS(matchErrorCode)) {
ResourceTable matchTable = value.getTable(errorCode);
if (U_FAILURE(errorCode)) { return; }
if (matchTable.findValue("trie", value)) {
distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
if (U_FAILURE(errorCode)) { return; }
}
if (matchTable.findValue("regionToPartitions", value)) {
distanceData.regionToPartitions = value.getBinary(length, errorCode);
if (U_FAILURE(errorCode)) { return; }
if (length < LSR::REGION_INDEX_LIMIT) {
errorCode = U_INVALID_FORMAT_ERROR;
return;
}
}
if (!readStrings(matchTable, "partitions", value,
partitionIndexes, partitionsLength, errorCode) ||
!readStrings(matchTable, "paradigms", value,
paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
return;
}
if ((paradigmSubtagsLength % 3) != 0) {
errorCode = U_INVALID_FORMAT_ERROR;
return;
}
if (matchTable.findValue("distances", value)) {
distanceData.distances = value.getIntVector(length, errorCode);
if (U_FAILURE(errorCode)) { return; }
if (length < 4) { // LocaleDistance IX_LIMIT
errorCode = U_INVALID_FORMAT_ERROR;
return;
}
}
} else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
// ok for likely subtags
} else { // error other than missing resource
errorCode = matchErrorCode;
return;
}
// Fetch & store invariant-character versions of strings
// only after we have collected and de-duplicated all of them.
strings.freeze();
languageAliases = CharStringMap(languagesLength / 2, errorCode);
for (int32_t i = 0; i < languagesLength; i += 2) {
languageAliases.put(strings.get(languageIndexes[i]),
strings.get(languageIndexes[i + 1]), errorCode);
}
regionAliases = CharStringMap(regionsLength / 2, errorCode);
for (int32_t i = 0; i < regionsLength; i += 2) {
regionAliases.put(strings.get(regionIndexes[i]),
strings.get(regionIndexes[i + 1]), errorCode);
}
if (U_FAILURE(errorCode)) { return; }
lsrsLength = lsrSubtagsLength / 3;
lsrs = new LSR[lsrsLength];
if (lsrs == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
strings.get(lsrSubtagIndexes[i + 1]),
strings.get(lsrSubtagIndexes[i + 2]));
}
if (partitionsLength > 0) {
distanceData.partitions = static_cast<const char **>(
uprv_malloc(partitionsLength * sizeof(const char *)));
if (distanceData.partitions == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (int32_t i = 0; i < partitionsLength; ++i) {
distanceData.partitions[i] = strings.get(partitionIndexes[i]);
}
}
if (paradigmSubtagsLength > 0) {
distanceData.paradigmsLength = paradigmSubtagsLength / 3;
LSR *paradigms = new LSR[distanceData.paradigmsLength];
if (paradigms == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
strings.get(paradigmSubtagIndexes[i + 1]),
strings.get(paradigmSubtagIndexes[i + 2]));
}
distanceData.paradigms = paradigms;
}
}
private:
bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
if (table.findValue(key, value)) {
ResourceArray stringArray = value.getArray(errorCode);
if (U_FAILURE(errorCode)) { return false; }
length = stringArray.getSize();
if (length == 0) { return true; }
int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
if (rawIndexes == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return false;
}
for (int i = 0; i < length; ++i) {
stringArray.getValue(i, value); // returns TRUE because i < length
rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
}
return true;
}
};
namespace {
XLikelySubtags *gLikelySubtags = nullptr;
UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanup() {
delete gLikelySubtags;
gLikelySubtags = nullptr;
gInitOnce.reset();
return TRUE;
}
} // namespace
void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
U_ASSERT(gLikelySubtags == nullptr);
XLikelySubtagsData data(errorCode);
data.load(errorCode);
if (U_FAILURE(errorCode)) { return; }
gLikelySubtags = new XLikelySubtags(data);
if (gLikelySubtags == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
}
const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
return gLikelySubtags;
}
XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
langInfoBundle(data.langInfoBundle),
strings(data.strings.orphanCharStrings()),
languageAliases(std::move(data.languageAliases)),
regionAliases(std::move(data.regionAliases)),
trie(data.trieBytes),
lsrs(data.lsrs),
#if U_DEBUG
lsrsLength(data.lsrsLength),
#endif
distanceData(std::move(data.distanceData)) {
data.langInfoBundle = nullptr;
data.lsrs = nullptr;
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
UStringTrieResult result = trie.next(u'*');
U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
trieUndState = trie.getState64();
result = trie.next(u'*');
U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
trieUndZzzzState = trie.getState64();
result = trie.next(u'*');
U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
defaultLsrIndex = trie.getValue();
trie.reset();
for (char16_t c = u'a'; c <= u'z'; ++c) {
result = trie.next(c);
if (result == USTRINGTRIE_NO_VALUE) {
trieFirstLetterStates[c - u'a'] = trie.getState64();
}
trie.reset();
}
}
XLikelySubtags::~XLikelySubtags() {
ures_close(langInfoBundle);
delete strings;
delete[] lsrs;
}
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
// Private use language tag x-subtag-subtag...
return LSR(name, "", "");
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant(), errorCode);
}
namespace {
const char *getCanonical(const CharStringMap &aliases, const char *alias) {
const char *canonical = aliases.get(alias);
return canonical == nullptr ? alias : canonical;
}
} // namespace
LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
const char *variant, UErrorCode &errorCode) const {
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
// They should match only themselves,
// not other locales with what looks like the same language and script subtags.
char c1;
if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
switch (c1) {
case 'A':
return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode);
case 'B':
return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode);
case 'C':
return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode);
default: // normal locale
break;
}
}
if (variant[0] == 'P' && variant[1] == 'S') {
if (uprv_strcmp(variant, "PSACCENT") == 0) {
return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
*region == 0 ? "XA" : region, errorCode);
} else if (uprv_strcmp(variant, "PSBIDI") == 0) {
return LSR(PSEUDO_BIDI_PREFIX, language, script,
*region == 0 ? "XB" : region, errorCode);
} else if (uprv_strcmp(variant, "PSCRACK") == 0) {
return LSR(PSEUDO_CRACKED_PREFIX, language, script,
*region == 0 ? "XC" : region, errorCode);
}
// else normal locale
}
language = getCanonical(languageAliases, language);
// (We have no script mappings.)
region = getCanonical(regionAliases, region);
return maximize(language, script, region);
}
LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
if (uprv_strcmp(language, "und") == 0) {
language = "";
}
if (uprv_strcmp(script, "Zzzz") == 0) {
script = "";
}
if (uprv_strcmp(region, "ZZ") == 0) {
region = "";
}
if (*script != 0 && *region != 0 && *language != 0) {
return LSR(language, script, region); // already maximized
}
uint32_t retainOldMask = 0;
BytesTrie iter(trie);
uint64_t state;
int32_t value;
// Small optimization: Array lookup for first language letter.
int32_t c0;
if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
language[1] != 0 && // language.length() >= 2
(state = trieFirstLetterStates[c0]) != 0) {
value = trieNext(iter.resetToState64(state), language, 1);
} else {
value = trieNext(iter, language, 0);
}
if (value >= 0) {
if (*language != 0) {
retainOldMask |= 4;
}
state = iter.getState64();
} else {
retainOldMask |= 4;
iter.resetToState64(trieUndState); // "und" ("*")
state = 0;
}
if (value > 0) {
// Intermediate or final value from just language.
if (value == SKIP_SCRIPT) {
value = 0;
}
if (*script != 0) {
retainOldMask |= 2;
}
} else {
value = trieNext(iter, script, 0);
if (value >= 0) {
if (*script != 0) {
retainOldMask |= 2;
}
state = iter.getState64();
} else {
retainOldMask |= 2;
if (state == 0) {
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
} else {
iter.resetToState64(state);
value = trieNext(iter, "", 0);
U_ASSERT(value >= 0);
state = iter.getState64();
}
}
}
if (value > 0) {
// Final value from just language or language+script.
if (*region != 0) {
retainOldMask |= 1;
}
} else {
value = trieNext(iter, region, 0);
if (value >= 0) {
if (*region != 0) {
retainOldMask |= 1;
}
} else {
retainOldMask |= 1;
if (state == 0) {
value = defaultLsrIndex;
} else {
iter.resetToState64(state);
value = trieNext(iter, "", 0);
U_ASSERT(value > 0);
}
}
}
U_ASSERT(value < lsrsLength);
const LSR &result = lsrs[value];
if (*language == 0) {
language = "und";
}
if (retainOldMask == 0) {
// Quickly return a copy of the lookup-result LSR
// without new allocation of the subtags.
return LSR(result.language, result.script, result.region);
}
if ((retainOldMask & 4) == 0) {
language = result.language;
}
if ((retainOldMask & 2) == 0) {
script = result.script;
}
if ((retainOldMask & 1) == 0) {
region = result.region;
}
return LSR(language, script, region);
}
int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
UStringTrieResult result;
uint8_t c;
if ((c = s[i]) == 0) {
result = iter.next(u'*');
} else {
for (;;) {
c = uprv_invCharToAscii(c);
// EBCDIC: If s[i] is not an invariant character,
// then c is now 0 and will simply not match anything, which is harmless.
uint8_t next = s[++i];
if (next != 0) {
if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
return -1;
}
} else {
// last character of this subtag
result = iter.next(c | 0x80);
break;
}
c = next;
}
}
switch (result) {
case USTRINGTRIE_NO_MATCH: return -1;
case USTRINGTRIE_NO_VALUE: return 0;
case USTRINGTRIE_INTERMEDIATE_VALUE:
U_ASSERT(iter.getValue() == SKIP_SCRIPT);
return SKIP_SCRIPT;
case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
default: return -1;
}
}
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
// in loclikely.cpp to this new code, including activating this
// minimizeSubtags() function. The LocaleMatcher does not minimize.
#if 0
LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
const char *regionIn, ULocale.Minimize fieldToFavor,
UErrorCode &errorCode) const {
LSR result = maximize(languageIn, scriptIn, regionIn);
// We could try just a series of checks, like:
// LSR result2 = addLikelySubtags(languageIn, "", "");
// if result.equals(result2) return result2;
// However, we can optimize 2 of the cases:
// (languageIn, "", "")
// (languageIn, "", regionIn)
// value00 = lookup(result.language, "", "")
BytesTrie iter = new BytesTrie(trie);
int value = trieNext(iter, result.language, 0);
U_ASSERT(value >= 0);
if (value == 0) {
value = trieNext(iter, "", 0);
U_ASSERT(value >= 0);
if (value == 0) {
value = trieNext(iter, "", 0);
}
}
U_ASSERT(value > 0);
LSR value00 = lsrs[value];
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
if (result.region.equals(value00.region)) {
return new LSR(result.language, "", "");
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
return new LSR(result.language, "", result.region);
} else {
favorRegionOk = true;
}
}
// The last case is not as easy to optimize.
// Maybe do later, but for now use the straightforward code.
LSR result2 = maximize(languageIn, scriptIn, "");
if (result2.equals(result)) {
return new LSR(result.language, result.script, "");
} else if (favorRegionOk) {
return new LSR(result.language, "", result.region);
}
return result;
}
#endif
U_NAMESPACE_END

View file

@ -0,0 +1,143 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// loclikelysubtags.h
// created: 2019may08 Markus W. Scherer
#ifndef __LOCLIKELYSUBTAGS_H__
#define __LOCLIKELYSUBTAGS_H__
#include <utility>
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/locid.h"
#include "unicode/uobject.h"
#include "unicode/ures.h"
#include "lsr.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
struct XLikelySubtagsData;
/**
* Map of const char * keys & values.
* Stores pointers as is: Does not own/copy/adopt/release strings.
*/
class CharStringMap final : public UMemory {
public:
/** Constructs an unusable non-map. */
CharStringMap() : map(nullptr) {}
CharStringMap(int32_t size, UErrorCode &errorCode) {
map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
size, &errorCode);
}
CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
other.map = nullptr;
}
CharStringMap(const CharStringMap &other) = delete;
~CharStringMap() {
uhash_close(map);
}
CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
map = other.map;
other.map = nullptr;
return *this;
}
CharStringMap &operator=(const CharStringMap &other) = delete;
const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
void put(const char *key, const char *value, UErrorCode &errorCode) {
uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
}
private:
UHashtable *map;
};
struct LocaleDistanceData {
LocaleDistanceData() = default;
LocaleDistanceData(LocaleDistanceData &&data);
~LocaleDistanceData();
const uint8_t *distanceTrieBytes = nullptr;
const uint8_t *regionToPartitions = nullptr;
const char **partitions = nullptr;
const LSR *paradigms = nullptr;
int32_t paradigmsLength = 0;
const int32_t *distances = nullptr;
private:
LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
};
// TODO(ICU-20777): Rename to just LikelySubtags.
class XLikelySubtags final : public UMemory {
public:
~XLikelySubtags();
static constexpr int32_t SKIP_SCRIPT = 1;
// VisibleForTesting
static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
// VisibleForTesting
LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
// in loclikely.cpp to this new code, including activating this
// minimizeSubtags() function. The LocaleMatcher does not minimize.
#if 0
LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
#endif
// visible for LocaleDistance
const LocaleDistanceData &getDistanceData() const { return distanceData; }
private:
XLikelySubtags(XLikelySubtagsData &data);
XLikelySubtags(const XLikelySubtags &other) = delete;
XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
static void initLikelySubtags(UErrorCode &errorCode);
LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
const char *variant, UErrorCode &errorCode) const;
/**
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
*/
LSR maximize(const char *language, const char *script, const char *region) const;
static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
UResourceBundle *langInfoBundle;
// We could store the strings by value, except that if there were few enough strings,
// moving the contents could copy it to a different array,
// invalidating the pointers stored in the maps.
CharString *strings;
CharStringMap languageAliases;
CharStringMap regionAliases;
// The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
// There is also a trie value for each intermediate lang and lang+script.
// '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
BytesTrie trie;
uint64_t trieUndState;
uint64_t trieUndZzzzState;
int32_t defaultLsrIndex;
uint64_t trieFirstLetterStates[26];
const LSR *lsrs;
#if U_DEBUG
int32_t lsrsLength;
#endif
// distance/matcher data: see comment in XLikelySubtagsData::load()
LocaleDistanceData distanceData;
};
U_NAMESPACE_END
#endif // __LOCLIKELYSUBTAGS_H__

101
icu4c/source/common/lsr.cpp Normal file
View file

@ -0,0 +1,101 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// lsr.cpp
// created: 2019may08 Markus W. Scherer
#include "unicode/utypes.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "lsr.h"
#include "uinvchar.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode) :
language(nullptr), script(nullptr), region(r),
regionIndex(indexForRegion(region)) {
if (U_SUCCESS(errorCode)) {
CharString langScript;
langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
int32_t scriptOffset = langScript.length();
langScript.append(prefix, errorCode).append(scr, errorCode);
owned = langScript.cloneData(errorCode);
if (U_SUCCESS(errorCode)) {
language = owned;
script = owned + scriptOffset;
}
}
}
LSR::LSR(LSR &&other) U_NOEXCEPT :
language(other.language), script(other.script), region(other.region), owned(other.owned),
regionIndex(other.regionIndex), hashCode(other.hashCode) {
if (owned != nullptr) {
other.language = other.script = "";
other.owned = nullptr;
other.hashCode = 0;
}
}
void LSR::deleteOwned() {
uprv_free(owned);
}
LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
this->~LSR();
language = other.language;
script = other.script;
region = other.region;
regionIndex = other.regionIndex;
owned = other.owned;
hashCode = other.hashCode;
if (owned != nullptr) {
other.language = other.script = "";
other.owned = nullptr;
other.hashCode = 0;
}
return *this;
}
UBool LSR::operator==(const LSR &other) const {
return
uprv_strcmp(language, other.language) == 0 &&
uprv_strcmp(script, other.script) == 0 &&
regionIndex == other.regionIndex &&
// Compare regions if both are ill-formed (and their indexes are 0).
(regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
}
int32_t LSR::indexForRegion(const char *region) {
int32_t c = region[0];
int32_t a = c - '0';
if (0 <= a && a <= 9) { // digits: "419"
int32_t b = region[1] - '0';
if (b < 0 || 9 < b) { return 0; }
c = region[2] - '0';
if (c < 0 || 9 < c || region[3] != 0) { return 0; }
return (10 * a + b) * 10 + c + 1;
} else { // letters: "DE"
a = uprv_upperOrdinal(c);
if (a < 0 || 25 < a) { return 0; }
int32_t b = uprv_upperOrdinal(region[1]);
if (b < 0 || 25 < b || region[2] != 0) { return 0; }
return 26 * a + b + 1001;
}
return 0;
}
LSR &LSR::setHashCode() {
if (hashCode == 0) {
hashCode =
(ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))) * 37 +
ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)))) * 37 +
regionIndex;
}
return *this;
}
U_NAMESPACE_END

72
icu4c/source/common/lsr.h Normal file
View file

@ -0,0 +1,72 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// lsr.h
// created: 2019may08 Markus W. Scherer
#ifndef __LSR_H__
#define __LSR_H__
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "cstring.h"
U_NAMESPACE_BEGIN
struct LSR final : public UMemory {
static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
const char *language;
const char *script;
const char *region;
char *owned = nullptr;
/** Index for region, 0 if ill-formed. @see indexForRegion */
int32_t regionIndex = 0;
/** Only set for LSRs that will be used in a hash table. */
int32_t hashCode = 0;
LSR() : language("und"), script(""), region("") {}
/** Constructor which aliases all subtag pointers. */
LSR(const char *lang, const char *scr, const char *r) :
language(lang), script(scr), region(r),
regionIndex(indexForRegion(region)) {}
/**
* Constructor which prepends the prefix to the language and script,
* copies those into owned memory, and aliases the region.
*/
LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode);
LSR(LSR &&other) U_NOEXCEPT;
LSR(const LSR &other) = delete;
inline ~LSR() {
// Pure inline code for almost all instances.
if (owned != nullptr) {
deleteOwned();
}
}
LSR &operator=(LSR &&other) U_NOEXCEPT;
LSR &operator=(const LSR &other) = delete;
/**
* Returns a positive index (>0) for a well-formed region code.
* Do not rely on a particular region->index mapping; it may change.
* Returns 0 for ill-formed strings.
*/
static int32_t indexForRegion(const char *region);
UBool operator==(const LSR &other) const;
inline UBool operator!=(const LSR &other) const {
return !operator==(other);
}
LSR &setHashCode();
private:
void deleteOwned();
};
U_NAMESPACE_END
#endif // __LSR_H__

View file

@ -94,13 +94,20 @@ public:
*/
int32_t getSize() const { return length; }
/**
* @param i Array item index.
* @param i Table item index.
* @param key Output-only, receives the key of the i'th item.
* @param value Output-only, receives the value of the i'th item.
* @return TRUE if i is non-negative and less than getSize().
*/
UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const;
/**
* @param key Key string to find in the table.
* @param value Output-only, receives the value of the item with that key.
* @return TRUE if the table contains the key.
*/
UBool findValue(const char *key, ResourceValue &value) const;
private:
const uint16_t *keys16;
const int32_t *keys32;

View file

@ -39,6 +39,8 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,
UCLN_COMMON_LOCALE_AVAILABLE,
UCLN_COMMON_LIKELY_SUBTAGS,
UCLN_COMMON_LOCALE_DISTANCE,
UCLN_COMMON_ULOC,
UCLN_COMMON_CURRENCY,
UCLN_COMMON_LOADED_NORMALIZER2,

View file

@ -445,6 +445,13 @@ uprv_copyEbcdic(const UDataSwapper *ds,
return length;
}
U_CFUNC UBool
uprv_isEbcdicAtSign(char c) {
static const uint8_t ebcdicAtSigns[] = {
0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
}
/* compare invariant strings; variant characters compare less than others and unlike each other */
U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper *ds,
@ -561,6 +568,11 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
}
}
U_CAPI char U_EXPORT2
uprv_ebcdicToAscii(char c) {
return (char)asciiFromEbcdic[(uint8_t)c];
}
U_CAPI char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c) {
return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];

View file

@ -68,6 +68,75 @@ uprv_isInvariantUString(const UChar *s, int32_t length);
# error Unknown charset family!
#endif
#ifdef __cplusplus
U_NAMESPACE_BEGIN
/**
* Like U_UPPER_ORDINAL(x) but with validation.
* Returns 0..25 for A..Z else a value outside 0..25.
*/
inline int32_t uprv_upperOrdinal(int32_t c) {
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
return c - 'A';
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
// EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
// https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
if (c < 'J') { return -1; }
if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
if (c < 'S') { return -1; }
return c - 'S' + 18; // S-Z --> 18..25
#else
# error Unknown charset family!
#endif
}
// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
// Returns 0..25 for a..z else a value outside 0..25.
inline int32_t uprv_lowerOrdinal(int32_t c) {
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
return c - 'a';
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
// EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
// https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
if (c < 'j') { return -1; }
if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
if (c < 's') { return -1; }
return c - 's' + 18; // s-z --> 18..25
#else
# error Unknown charset family!
#endif
}
U_NAMESPACE_END
#endif
/**
* Returns true if c == '@' is possible.
* The @ sign is variant, and the @ sign used on one
* EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
* @internal
*/
U_CFUNC UBool
uprv_isEbcdicAtSign(char c);
/**
* \def uprv_isAtSign
* Returns true if c == '@' is possible.
* For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
* @internal
*/
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_isAtSign(c) ((c)=='@')
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
#else
# error Unknown charset family!
#endif
/**
* Compare two EBCDIC invariant-character strings in ASCII order.
* @internal
@ -88,6 +157,26 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
# error Unknown charset family!
#endif
/**
* Converts an EBCDIC invariant character to ASCII.
* @internal
*/
U_INTERNAL char U_EXPORT2
uprv_ebcdicToAscii(char c);
/**
* \def uprv_invCharToAscii
* Converts an invariant character to ASCII.
* @internal
*/
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_invCharToAscii(c) (c)
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
#else
# error Unknown charset family!
#endif
/**
* Converts an EBCDIC invariant character to lowercase ASCII.
* @internal

View file

@ -8,10 +8,10 @@
#if U_SHOW_CPLUSPLUS_API
#include "unicode/locid.h"
#include "unicode/localematcher.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
#ifndef U_HIDE_DRAFT_API
/**
* \file
@ -291,6 +291,10 @@ public:
UBool copyErrorTo(UErrorCode &outErrorCode) const;
private:
friend class LocaleMatcher::Result;
void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
UErrorCode status_;
char language_[9];
char script_[5];

View file

@ -0,0 +1,605 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// localematcher.h
// created: 2019may08 Markus W. Scherer
#ifndef __LOCALEMATCHER_H__
#define __LOCALEMATCHER_H__
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/locid.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
/**
* \file
* \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
*/
#ifndef U_HIDE_DRAFT_API
/**
* Builder option for whether the language subtag or the script subtag is most important.
*
* @see Builder#setFavorSubtag(FavorSubtag)
* @draft ICU 65
*/
enum ULocMatchFavorSubtag {
/**
* Language differences are most important, then script differences, then region differences.
* (This is the default behavior.)
*
* @draft ICU 65
*/
ULOCMATCH_FAVOR_LANGUAGE,
/**
* Makes script differences matter relatively more than language differences.
*
* @draft ICU 65
*/
ULOCMATCH_FAVOR_SCRIPT
};
#ifndef U_IN_DOXYGEN
typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
#endif
/**
* Builder option for whether all desired locales are treated equally or
* earlier ones are preferred.
*
* @see Builder#setDemotionPerDesiredLocale(Demotion)
* @draft ICU 65
*/
enum ULocMatchDemotion {
/**
* All desired locales are treated equally.
*
* @draft ICU 65
*/
ULOCMATCH_DEMOTION_NONE,
/**
* Earlier desired locales are preferred.
*
* <p>From each desired locale to the next,
* the distance to any supported locale is increased by an additional amount
* which is at least as large as most region mismatches.
* A later desired locale has to have a better match with some supported locale
* due to more than merely having the same region subtag.
*
* <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
* yields <code>Result(en-GB, en)</code> because
* with the demotion of sv its perfect match is no better than
* the region distance between the earlier desired locale en-GB and en=en-US.
*
* <p>Notes:
* <ul>
* <li>In some cases, language and/or script differences can be as small as
* the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
* <li>It is possible for certain region differences to be larger than usual,
* and larger than the demotion.
* (As of CLDR 35 there is no such case, but
* this is possible in future versions of the data.)
* </ul>
*
* @draft ICU 65
*/
ULOCMATCH_DEMOTION_REGION
};
#ifndef U_IN_DOXYGEN
typedef enum ULocMatchDemotion ULocMatchDemotion;
#endif
struct UHashtable;
U_NAMESPACE_BEGIN
struct LSR;
class LocaleDistance;
class LocaleLsrIterator;
class UVector;
class XLikelySubtags;
/**
* Immutable class that picks the best match between a user's desired locales and
* an application's supported locales.
* Movable but not copyable.
*
* <p>Example:
* <pre>
* UErrorCode errorCode = U_ZERO_ERROR;
* LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
* Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
* </pre>
*
* <p>A matcher takes into account when languages are close to one another,
* such as Danish and Norwegian,
* and when regional variants are close, like en-GB and en-AU as opposed to en-US.
*
* <p>If there are multiple supported locales with the same (language, script, region)
* likely subtags, then the current implementation returns the first of those locales.
* It ignores variant subtags (except for pseudolocale variants) and extensions.
* This may change in future versions.
*
* <p>For example, the current implementation does not distinguish between
* de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
*
* <p>If you prefer one equivalent locale over another, then provide only the preferred one,
* or place it earlier in the list of supported locales.
*
* <p>Otherwise, the order of supported locales may have no effect on the best-match results.
* The current implementation compares each desired locale with supported locales
* in the following order:
* 1. Default locale, if supported;
* 2. CLDR "paradigm locales" like en-GB and es-419;
* 3. other supported locales.
* This may change in future versions.
*
* <p>Often a product will just need one matcher instance, built with the languages
* that it supports. However, it may want multiple instances with different
* default languages based on additional information, such as the domain.
*
* <p>This class is not intended for public subclassing.
*
* @draft ICU 65
*/
class U_COMMON_API LocaleMatcher : public UMemory {
public:
/**
* Data for the best-matching pair of a desired and a supported locale.
* Movable but not copyable.
*
* @draft ICU 65
*/
class U_COMMON_API Result : public UMemory {
public:
/**
* Move constructor; might modify the source.
* This object will have the same contents that the source object had.
*
* @param src Result to move contents from.
* @draft ICU 65
*/
Result(Result &&src) U_NOEXCEPT;
/**
* Destructor.
*
* @draft ICU 65
*/
~Result();
/**
* Move assignment; might modify the source.
* This object will have the same contents that the source object had.
*
* @param src Result to move contents from.
* @draft ICU 65
*/
Result &operator=(Result &&src) U_NOEXCEPT;
/**
* Returns the best-matching desired locale.
* nullptr if the list of desired locales is empty or if none matched well enough.
*
* @return the best-matching desired locale, or nullptr.
* @draft ICU 65
*/
inline const Locale *getDesiredLocale() const { return desiredLocale; }
/**
* Returns the best-matching supported locale.
* If none matched well enough, this is the default locale.
* The default locale is nullptr if the list of supported locales is empty and
* no explicit default locale is set.
*
* @return the best-matching supported locale, or nullptr.
* @draft ICU 65
*/
inline const Locale *getSupportedLocale() const { return supportedLocale; }
/**
* Returns the index of the best-matching desired locale in the input Iterable order.
* -1 if the list of desired locales is empty or if none matched well enough.
*
* @return the index of the best-matching desired locale, or -1.
* @draft ICU 65
*/
inline int32_t getDesiredIndex() const { return desiredIndex; }
/**
* Returns the index of the best-matching supported locale in the
* constructors or builders input order (set Collection plus added locales).
* If the matcher was built from a locale list string, then the iteration order is that
* of a LocalePriorityList built from the same string.
* -1 if the list of supported locales is empty or if none matched well enough.
*
* @return the index of the best-matching supported locale, or -1.
* @draft ICU 65
*/
inline int32_t getSupportedIndex() const { return supportedIndex; }
/**
* Takes the best-matching supported locale and adds relevant fields of the
* best-matching desired locale, such as the -t- and -u- extensions.
* May replace some fields of the supported locale.
* The result is the locale that should be used for date and number formatting, collation, etc.
* Returns the root locale if getSupportedLocale() returns nullptr.
*
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
*
* @return a locale combining the best-matching desired and supported locales.
* @draft ICU 65
*/
Locale makeResolvedLocale(UErrorCode &errorCode) const;
private:
Result(const Locale *desired, const Locale *supported,
int32_t desIndex, int32_t suppIndex, UBool owned) :
desiredLocale(desired), supportedLocale(supported),
desiredIndex(desIndex), supportedIndex(suppIndex),
desiredIsOwned(owned) {}
Result(const Result &other) = delete;
Result &operator=(const Result &other) = delete;
const Locale *desiredLocale;
const Locale *supportedLocale;
int32_t desiredIndex;
int32_t supportedIndex;
UBool desiredIsOwned;
friend class LocaleMatcher;
};
/**
* LocaleMatcher builder.
* Movable but not copyable.
*
* @see LocaleMatcher#builder()
* @draft ICU 65
*/
class U_COMMON_API Builder : public UMemory {
public:
/**
* Constructs a builder used in chaining parameters for building a LocaleMatcher.
*
* @return a new Builder object
* @draft ICU 65
*/
Builder() {}
/**
* Move constructor; might modify the source.
* This builder will have the same contents that the source builder had.
*
* @param src Builder to move contents from.
* @draft ICU 65
*/
Builder(Builder &&src) U_NOEXCEPT;
/**
* Destructor.
*
* @draft ICU 65
*/
~Builder();
/**
* Move assignment; might modify the source.
* This builder will have the same contents that the source builder had.
*
* @param src Builder to move contents from.
* @draft ICU 65
*/
Builder &operator=(Builder &&src) U_NOEXCEPT;
/**
* Parses an Accept-Language string
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
* such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
* Allows whitespace in more places but does not allow "*".
* Clears any previously set/added supported locales first.
*
* @param locales the Accept-Language string of locales to set
* @return this Builder object
* @draft ICU 65
*/
Builder &setSupportedLocalesFromListString(StringPiece locales);
/**
* Copies the supported locales, preserving iteration order.
* Clears any previously set/added supported locales first.
* Duplicates are allowed, and are not removed.
*
* @param locales the list of locale
* @return this Builder object
* @draft ICU 65
*/
Builder &setSupportedLocales(Locale::Iterator &locales);
/**
* Copies the supported locales from the begin/end range, preserving iteration order.
* Clears any previously set/added supported locales first.
* Duplicates are allowed, and are not removed.
*
* Each of the iterator parameter values must be an
* input iterator whose value is convertible to const Locale &.
*
* @param begin Start of range.
* @param end Exclusive end of range.
* @return this Builder object
* @draft ICU 65
*/
template<typename Iter>
Builder &setSupportedLocales(Iter begin, Iter end) {
if (U_FAILURE(errorCode_)) { return *this; }
clearSupportedLocales();
while (begin != end) {
addSupportedLocale(*begin++);
}
return *this;
}
/**
* Copies the supported locales from the begin/end range, preserving iteration order.
* Calls the converter to convert each *begin to a Locale or const Locale &.
* Clears any previously set/added supported locales first.
* Duplicates are allowed, and are not removed.
*
* Each of the iterator parameter values must be an
* input iterator whose value is convertible to const Locale &.
*
* @param begin Start of range.
* @param end Exclusive end of range.
* @param converter Converter from *begin to const Locale & or compatible.
* @return this Builder object
* @draft ICU 65
*/
template<typename Iter, typename Conv>
Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
if (U_FAILURE(errorCode_)) { return *this; }
clearSupportedLocales();
while (begin != end) {
addSupportedLocale(converter(*begin++));
}
return *this;
}
/**
* Adds another supported locale.
* Duplicates are allowed, and are not removed.
*
* @param locale another locale
* @return this Builder object
* @draft ICU 65
*/
Builder &addSupportedLocale(const Locale &locale);
/**
* Sets the default locale; if nullptr, or if it is not set explicitly,
* then the first supported locale is used as the default locale.
*
* @param defaultLocale the default locale (will be copied)
* @return this Builder object
* @draft ICU 65
*/
Builder &setDefaultLocale(const Locale *defaultLocale);
/**
* If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
* differences.
* This is used in situations (such as maps) where
* it is better to fall back to the same script than a similar language.
*
* @param subtag the subtag to favor
* @return this Builder object
* @draft ICU 65
*/
Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
/**
* Option for whether all desired locales are treated equally or
* earlier ones are preferred (this is the default).
*
* @param demotion the demotion per desired locale to set.
* @return this Builder object
* @draft ICU 65
*/
Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
/**
* Sets the UErrorCode if an error occurred while setting parameters.
* Preserves older error codes in the outErrorCode.
*
* @param outErrorCode Set to an error code if it does not contain one already
* and an error occurred while setting parameters.
* Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 65
*/
UBool copyErrorTo(UErrorCode &outErrorCode) const;
/**
* Builds and returns a new locale matcher.
* This builder can continue to be used.
*
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return new LocaleMatcher.
* @draft ICU 65
*/
LocaleMatcher build(UErrorCode &errorCode) const;
private:
friend class LocaleMatcher;
Builder(const Builder &other) = delete;
Builder &operator=(const Builder &other) = delete;
void clearSupportedLocales();
bool ensureSupportedLocaleVector();
UErrorCode errorCode_ = U_ZERO_ERROR;
UVector *supportedLocales_ = nullptr;
int32_t thresholdDistance_ = -1;
ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
Locale *defaultLocale_ = nullptr;
ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
};
// FYI No public LocaleMatcher constructors in C++; use the Builder.
/**
* Move copy constructor; might modify the source.
* This matcher will have the same settings that the source matcher had.
* @param src source matcher
* @draft ICU 65
*/
LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
/**
* Destructor.
* @draft ICU 65
*/
~LocaleMatcher();
/**
* Move assignment operator; might modify the source.
* This matcher will have the same settings that the source matcher had.
* The behavior is undefined if *this and src are the same object.
* @param src source matcher
* @return *this
* @draft ICU 65
*/
LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
/**
* Returns the supported locale which best matches the desired locale.
*
* @param desiredLocale Typically a user's language.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return the best-matching supported locale.
* @draft ICU 65
*/
const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
/**
* Returns the supported locale which best matches one of the desired locales.
*
* @param desiredLocales Typically a user's languages, in order of preference (descending).
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return the best-matching supported locale.
* @draft ICU 65
*/
const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
/**
* Parses an Accept-Language string
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
* such as "af, en, fr;q=0.9",
* and returns the supported locale which best matches one of the desired locales.
* Allows whitespace in more places but does not allow "*".
*
* @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return the best-matching supported locale.
* @draft ICU 65
*/
const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
/**
* Returns the best match between the desired locale and the supported locales.
* If the result's desired locale is not nullptr, then it is the address of the input locale.
* It has not been cloned.
*
* @param desiredLocale Typically a user's language.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return the best-matching pair of the desired and a supported locale.
* @draft ICU 65
*/
Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
/**
* Returns the best match between the desired and supported locales.
* If the result's desired locale is not nullptr, then it is a clone of
* the best-matching desired locale. The Result object owns the clone.
*
* @param desiredLocales Typically a user's languages, in order of preference (descending).
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return the best-matching pair of a desired and a supported locale.
* @draft ICU 65
*/
Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
#ifndef U_HIDE_INTERNAL_API
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
* perfect match, and 0 means that they are completely different.
*
* <p>This is mostly an implementation detail, and the precise values may change over time.
* The implementation may use either the maximized forms or the others ones, or both.
* The implementation may or may not rely on the forms to be consistent with each other.
*
* <p>Callers should construct and use a matcher rather than match pairs of locales directly.
*
* @param desired Desired locale.
* @param supported Supported locale.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return value between 0 and 1, inclusive.
* @internal (has a known user)
*/
double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
#endif // U_HIDE_INTERNAL_API
private:
LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
LocaleMatcher(const LocaleMatcher &other) = delete;
LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
const XLikelySubtags &likelySubtags;
const LocaleDistance &localeDistance;
int32_t thresholdDistance;
int32_t demotionPerDesiredLocale;
ULocMatchFavorSubtag favorSubtag;
// These are in input order.
const Locale ** supportedLocales;
LSR *lsrs;
int32_t supportedLocalesLength;
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
// Array versions of the supportedLsrToIndex keys and values.
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
const LSR **supportedLSRs;
int32_t *supportedIndexes;
int32_t supportedLSRsLength;
Locale *ownedDefaultLocale;
const Locale *defaultLocale;
int32_t defaultLocaleIndex;
};
U_NAMESPACE_END
#endif // U_HIDE_DRAFT_API
#endif // U_SHOW_CPLUSPLUS_API
#endif // __LOCALEMATCHER_H__

View file

@ -1011,6 +1011,104 @@ public:
*/
virtual UClassID getDynamicClassID() const;
#ifndef U_HIDE_DRAFT_API
/**
* A Locale iterator interface similar to a Java Iterator<Locale>.
* @draft ICU 65
*/
class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ {
public:
/** @draft ICU 65 */
virtual ~Iterator();
/**
* @return TRUE if next() can be called again.
* @draft ICU 65
*/
virtual UBool hasNext() const = 0;
/**
* @return the next locale.
* @draft ICU 65
*/
virtual const Locale &next() = 0;
};
/**
* A generic Locale iterator implementation over Locale input iterators.
* @draft ICU 65
*/
template<typename Iter>
class RangeIterator : public Iterator, public UMemory {
public:
/**
* Constructs an iterator from a begin/end range.
* Each of the iterator parameter values must be an
* input iterator whose value is convertible to const Locale &.
*
* @param begin Start of range.
* @param end Exclusive end of range.
* @draft ICU 65
*/
RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {}
/**
* @return TRUE if next() can be called again.
* @draft ICU 65
*/
UBool hasNext() const override { return it_ != end_; }
/**
* @return the next locale.
* @draft ICU 65
*/
const Locale &next() override { return *it_++; }
private:
Iter it_;
const Iter end_;
};
/**
* A generic Locale iterator implementation over Locale input iterators.
* Calls the converter to convert each *begin to a const Locale &.
* @draft ICU 65
*/
template<typename Iter, typename Conv>
class ConvertingIterator : public Iterator, public UMemory {
public:
/**
* Constructs an iterator from a begin/end range.
* Each of the iterator parameter values must be an
* input iterator whose value the converter converts to const Locale &.
*
* @param begin Start of range.
* @param end Exclusive end of range.
* @param converter Converter from *begin to const Locale & or compatible.
* @draft ICU 65
*/
ConvertingIterator(Iter begin, Iter end, Conv converter) :
it_(begin), end_(end), converter_(converter) {}
/**
* @return TRUE if next() can be called again.
* @draft ICU 65
*/
UBool hasNext() const override { return it_ != end_; }
/**
* @return the next locale.
* @draft ICU 65
*/
const Locale &next() override { return converter_(*it_++); }
private:
Iter it_;
const Iter end_;
Conv converter_;
};
#endif // U_HIDE_DRAFT_API
protected: /* only protected for testing purposes. DO NOT USE. */
#ifndef U_HIDE_INTERNAL_API
/**

View file

@ -39,6 +39,7 @@
#include "umutex.h"
#include "putilimp.h"
#include "uassert.h"
#include "uresdata.h"
using namespace icu;
@ -1952,7 +1953,7 @@ void getAllItemsWithFallback(
// When the sink sees the no-fallback/no-inheritance marker,
// then it would remove the parent's item.
// We would deserialize parent values even though they are overridden in a child bundle.
value.pResData = &bundle->fResData;
value.setData(&bundle->fResData);
UResourceDataEntry *parentEntry = bundle->fData->fParent;
UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus);
value.setResource(bundle->fRes, ResourceTracer(bundle));
@ -2000,31 +2001,60 @@ void getAllItemsWithFallback(
} // namespace
// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue.
// Unfortunately, the caller must know which subclass to make and pass in.
// Alternatively, we could make it as polymorphic as in Java by
// returning a ResourceValue pointer (possibly wrapped into a LocalPointer)
// that the caller then owns.
//
// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer
// can point to a non-local bundle.
// Without tracing, the child bundle could be a function-local object.
U_CAPI void U_EXPORT2
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
icu::ResourceSink &sink, UErrorCode &errorCode) {
ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
UResourceBundle *tempFillIn,
ResourceDataValue &value, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
if (path == NULL) {
if (path == nullptr) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
UResourceBundle stackBundle;
ures_initStackObject(&stackBundle);
const UResourceBundle *rb;
if (*path == 0) {
// empty path
rb = bundle;
} else {
rb = ures_getByKeyWithFallback(bundle, path, &stackBundle, &errorCode);
rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode);
if (U_FAILURE(errorCode)) {
return;
}
}
value.setData(&rb->fResData);
value.setResource(rb->fRes, ResourceTracer(rb));
}
U_CAPI void U_EXPORT2
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
icu::ResourceSink &sink, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
if (path == nullptr) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
StackUResourceBundle stackBundle;
const UResourceBundle *rb;
if (*path == 0) {
// empty path
rb = bundle;
} else {
rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode);
if (U_FAILURE(errorCode)) {
ures_close(&stackBundle);
return;
}
}
// Get all table items with fallback.
ResourceDataValue value;
getAllItemsWithFallback(rb, value, sink, errorCode);
ures_close(&stackBundle);
}
U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) {

View file

@ -509,7 +509,7 @@ const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode
if(U_FAILURE(errorCode)) {
return NULL;
}
const UChar *s = res_getString(fTraceInfo, pResData, res, &length);
const UChar *s = res_getString(fTraceInfo, &getData(), res, &length);
if(s == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@ -520,7 +520,7 @@ const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &erro
if(U_FAILURE(errorCode)) {
return NULL;
}
const UChar *s = res_getAlias(pResData, res, &length);
const UChar *s = res_getAlias(&getData(), res, &length);
if(s == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@ -551,7 +551,7 @@ const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &erro
if(U_FAILURE(errorCode)) {
return NULL;
}
const int32_t *iv = res_getIntVector(fTraceInfo, pResData, res, &length);
const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length);
if(iv == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@ -562,7 +562,7 @@ const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCo
if(U_FAILURE(errorCode)) {
return NULL;
}
const uint8_t *b = res_getBinary(fTraceInfo, pResData, res, &length);
const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length);
if(b == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@ -580,12 +580,12 @@ ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const {
switch(RES_GET_TYPE(res)) {
case URES_ARRAY:
if (offset!=0) { // empty if offset==0
items32 = (const Resource *)pResData->pRoot+offset;
items32 = (const Resource *)getData().pRoot+offset;
length = *items32++;
}
break;
case URES_ARRAY16:
items16 = pResData->p16BitUnits+offset;
items16 = getData().p16BitUnits+offset;
length = *items16++;
break;
default:
@ -608,19 +608,19 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
switch(RES_GET_TYPE(res)) {
case URES_TABLE:
if (offset != 0) { // empty if offset==0
keys16 = (const uint16_t *)(pResData->pRoot+offset);
keys16 = (const uint16_t *)(getData().pRoot+offset);
length = *keys16++;
items32 = (const Resource *)(keys16+length+(~length&1));
}
break;
case URES_TABLE16:
keys16 = pResData->p16BitUnits+offset;
keys16 = getData().p16BitUnits+offset;
length = *keys16++;
items16 = keys16 + length;
break;
case URES_TABLE32:
if (offset != 0) { // empty if offset==0
keys32 = pResData->pRoot+offset;
keys32 = getData().pRoot+offset;
length = *keys32++;
items32 = (const Resource *)keys32 + length;
}
@ -633,18 +633,18 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
}
UBool ResourceDataValue::isNoInheritanceMarker() const {
return ::isNoInheritanceMarker(pResData, res);
return ::isNoInheritanceMarker(&getData(), res);
}
int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity,
UErrorCode &errorCode) const {
return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
}
int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
UErrorCode &errorCode) const {
if(URES_IS_ARRAY(res)) {
return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
}
if(U_FAILURE(errorCode)) {
return 0;
@ -658,7 +658,7 @@ int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, in
return 1;
}
int32_t sLength;
const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
if(s != NULL) {
dest[0].setTo(TRUE, s, sLength);
return 1;
@ -673,7 +673,7 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode)
return us;
}
int32_t sLength;
const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
if(s != NULL) {
us.setTo(TRUE, s, sLength);
return us;
@ -684,7 +684,7 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode)
}
if(array.getSize() > 0) {
// Tracing is already performed above (unimportant for trace that this is an array)
s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, 0), &sLength);
s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength);
if(s != NULL) {
us.setTo(TRUE, s, sLength);
return us;
@ -821,14 +821,14 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i,
const char *&key, icu::ResourceValue &value) const {
if(0 <= i && i < length) {
icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
if (keys16 != NULL) {
key = RES_GET_KEY16(rdValue.pResData, keys16[i]);
if (keys16 != nullptr) {
key = RES_GET_KEY16(&rdValue.getData(), keys16[i]);
} else {
key = RES_GET_KEY32(rdValue.pResData, keys32[i]);
key = RES_GET_KEY32(&rdValue.getData(), keys32[i]);
}
Resource res;
if (items16 != NULL) {
res = makeResourceFrom16(rdValue.pResData, items16[i]);
if (items16 != nullptr) {
res = makeResourceFrom16(&rdValue.getData(), items16[i]);
} else {
res = items32[i];
}
@ -842,6 +842,29 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i,
return FALSE;
}
UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const {
icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
const char *realKey = nullptr;
int32_t i;
if (keys16 != nullptr) {
i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey);
} else {
i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey);
}
if (i >= 0) {
Resource res;
if (items16 != nullptr) {
res = makeResourceFrom16(&rdValue.getData(), items16[i]);
} else {
res = items32[i];
}
// Same note about lifetime as in getKeyAndValue().
rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
return TRUE;
}
return FALSE;
}
U_CAPI Resource U_EXPORT2
res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) {
uint32_t offset=RES_GET_OFFSET(array);
@ -887,7 +910,7 @@ UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const {
// alive for the duration that fields are being read from it
// (including nested fields).
rdValue.setResource(
internalGetResource(rdValue.pResData, i),
internalGetResource(&rdValue.getData(), i),
ResourceTracer(fTraceInfo, i));
return TRUE;
}

View file

@ -511,13 +511,12 @@ inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) {
class ResourceDataValue : public ResourceValue {
public:
ResourceDataValue() :
pResData(NULL),
res(static_cast<Resource>(URES_NONE)),
fTraceInfo() {}
virtual ~ResourceDataValue();
void setData(const ResourceData *data) {
pResData = data;
resData = *data;
}
void setResource(Resource r, ResourceTracer&& traceInfo) {
@ -525,6 +524,7 @@ public:
fTraceInfo = traceInfo;
}
const ResourceData &getData() const { return resData; }
virtual UResType getType() const;
virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const;
virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const;
@ -541,9 +541,10 @@ public:
UErrorCode &errorCode) const;
virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const;
const ResourceData *pResData;
private:
// TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer,
// then remove this value field again and just store a pResData pointer.
ResourceData resData;
Resource res;
ResourceTracer fTraceInfo;
};

View file

@ -67,6 +67,9 @@ struct UResourceBundle {
char *fVersion;
UResourceDataEntry *fTopLevelData; /* for getting the valid locale */
char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */
// TODO(ICU-20769): Try to change the by-value fResData into a pointer,
// with the struct in only one place for each bundle.
// Also replace class ResourceDataValue.resData with a pResData pointer again.
ResourceData fResData;
char fResBuf[RES_BUFSIZE];
int32_t fResPathLen;
@ -281,6 +284,11 @@ ures_getStringByKeyWithFallback(const UResourceBundle *resB,
#ifdef __cplusplus
U_CAPI void U_EXPORT2
ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
UResourceBundle *tempFillIn,
icu::ResourceDataValue &value, UErrorCode &errorCode);
U_CAPI void U_EXPORT2
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
icu::ResourceSink &sink, UErrorCode &errorCode);

View file

@ -27,11 +27,17 @@ void addCStringTest(TestNode** root);
static void TestInvariant(void);
static void TestCompareInvEbcdicAsAscii(void);
static void TestLocaleAtSign(void);
static void TestNoInvariantAtSign(void);
static void TestInvCharToAscii(void);
void addCStringTest(TestNode** root) {
addTest(root, &TestAPI, "tsutil/cstrtest/TestAPI");
addTest(root, &TestInvariant, "tsutil/cstrtest/TestInvariant");
addTest(root, &TestAPI, "tsutil/cstrtest/TestAPI");
addTest(root, &TestInvariant, "tsutil/cstrtest/TestInvariant");
addTest(root, &TestCompareInvEbcdicAsAscii, "tsutil/cstrtest/TestCompareInvEbcdicAsAscii");
addTest(root, &TestLocaleAtSign, "tsutil/cstrtest/TestLocaleAtSign");
addTest(root, &TestNoInvariantAtSign, "tsutil/cstrtest/TestNoInvariantAtSign");
addTest(root, &TestInvCharToAscii, "tsutil/cstrtest/TestInvCharToAscii");
}
static void TestAPI(void)
@ -339,3 +345,53 @@ TestCompareInvEbcdicAsAscii() {
}
}
}
// See U_CHARSET_FAMILY in unicode/platform.h.
static const char *nativeInvChars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789 \"%&'()*+,-./:;<=>?_";
static const UChar *asciiInvChars =
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
u"abcdefghijklmnopqrstuvwxyz"
u"0123456789 \"%&'()*+,-./:;<=>?_";
static void
TestLocaleAtSign() {
static const char *invLocale = "de-Latn_DE@PHONEBOOK";
for (int32_t i = 0;; ++i) {
char ic = invLocale[i];
if (ic == 0) { break; }
UBool expected = i == 10;
UBool actual = uprv_isAtSign(ic);
if (actual != expected) {
log_err("uprv_isAtSign('%c')=%d is wrong\n", ic, (int)actual);
}
}
}
// The at sign is not an invariant character.
static void
TestNoInvariantAtSign() {
for (int32_t i = 0;; ++i) {
char ic = nativeInvChars[i];
UBool actual = uprv_isAtSign(ic);
if (actual) {
log_err("uprv_isAtSign(invariant '%c')=TRUE is wrong\n", ic);
}
if (ic == 0) { break; }
}
}
static void
TestInvCharToAscii() {
for (int32_t i = 0;; ++i) {
char ic = nativeInvChars[i];
uint8_t ac = asciiInvChars[i];
uint8_t actual = uprv_invCharToAscii(ic);
if (actual != ac) {
log_err("uprv_invCharToAscii('%c') did not convert to ASCII 0x%02x\n", ic, (int)ac);
}
if (ic == 0) { break; }
}
}

View file

@ -187,7 +187,8 @@ static void U_CALLCONV testTraceData(const void *context, int32_t fnNumber, int3
/* printf(" %s() %s\n", fnName, buf); */
}
static UConverter * psuedo_ucnv_open(const char *name, UErrorCode * err)
#if !ENABLE_TRACING_ORIG_VAL
static UConverter * pseudo_ucnv_open(const char *name, UErrorCode * err)
{
UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD);
@ -196,13 +197,13 @@ static UConverter * psuedo_ucnv_open(const char *name, UErrorCode * err)
UTRACE_EXIT_PTR_STATUS(NULL, *err);
return NULL;
}
static void psuedo_ucnv_close(UConverter * cnv)
static void pseudo_ucnv_close(UConverter * cnv)
{
UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD);
UTRACE_DATA1(UTRACE_OPEN_CLOSE, "unload converter %p", cnv);
UTRACE_EXIT_VALUE((int32_t)TRUE);
}
#endif
/*
* TestTraceAPI
@ -282,9 +283,9 @@ static void TestTraceAPI() {
TEST_ASSERT(U_SUCCESS(status));
ucnv_close(cnv);
#else
cnv = psuedo_ucnv_open(NULL, &status);
cnv = pseudo_ucnv_open(NULL, &status);
TEST_ASSERT(U_SUCCESS(status));
psuedo_ucnv_close(cnv);
pseudo_ucnv_close(cnv);
#endif
TEST_ASSERT(gTraceEntryCount > 0);
TEST_ASSERT(gTraceExitCount > 0);

View file

@ -184,7 +184,7 @@ library: common
uinit utypes errorcode
icuplug
platform
localebuilder
localebuilder localematcher
group: pluralmap
# TODO: Move to i18n library, ticket #11926.
@ -631,7 +631,7 @@ group: resourcebundle
# We can probably only disentangle basic locale ID handling from resource bundle code
# by hardcoding all of the locale ID data.
locid.o locmap.o wintz.o
# Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608
# Do we need class LocaleBased? https://unicode-org.atlassian.net/browse/ICU-8608
locbased.o
loclikely.o
deps
@ -646,6 +646,31 @@ group: localebuilder
deps
resourcebundle
group: localematcher
localematcher.o
deps
localebuilder localeprioritylist loclikelysubtags locdistance lsr
group: localeprioritylist
localeprioritylist.o
deps
resourcebundle
group: locdistance
locdistance.o
deps
loclikelysubtags
group: loclikelysubtags
loclikelysubtags.o
deps
lsr resourcebundle
group: lsr
lsr.o
deps
platform
group: udata
udata.o ucmndata.o udatamem.o restrace.o
umapfile.o

View file

@ -44,7 +44,8 @@ caltztst.o canittst.o citrtest.o colldata.o convtest.o currcoll.o collationtest.
fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts.o dtfmttst.o \
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
loctest.o localebuildertest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
loctest.o localebuildertest.o localematchertest.o \
miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \
tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \

View file

@ -366,6 +366,7 @@
<ClCompile Include="listformattertest.cpp" />
<ClCompile Include="formattedvaluetest.cpp" />
<ClCompile Include="localebuildertest.cpp" />
<ClCompile Include="localematchertest.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="colldata.h" />

View file

@ -547,6 +547,9 @@
<ClCompile Include="localebuildertest.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
<ClCompile Include="localematchertest.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="itrbbi.h">

View file

@ -35,6 +35,7 @@
#include "usettest.h"
extern IntlTest *createBytesTrieTest();
extern IntlTest *createLocaleMatcherTest();
static IntlTest *createLocalPointerTest();
extern IntlTest *createUCharsTrieTest();
static IntlTest *createEnumSetTest();
@ -46,113 +47,40 @@ extern IntlTest *createPluralMapTest();
extern IntlTest *createStaticUnicodeSetsTest();
#endif
#define CASE(id, test) case id: \
name = #test; \
if (exec) { \
logln(#test "---"); logln(); \
test t; \
callTest(t, par); \
} \
break
void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
{
if (exec) logln("TestSuite Utilities: ");
switch (index) {
CASE(0, MultithreadTest);
CASE(1, StringTest);
CASE(2, UnicodeStringTest);
CASE(3, LocaleTest);
CASE(4, CharIterTest);
CASE(5, UObjectTest);
CASE(6, UnicodeTest);
CASE(7, ResourceBundleTest);
CASE(8, NewResourceBundleTest);
CASE(9, PUtilTest);
CASE(10, UVector32Test);
CASE(11, UVectorTest);
CASE(12, UTextTest);
CASE(13, LocaleAliasTest);
CASE(14, UnicodeSetTest);
CASE(15, ErrorCodeTest);
case 16:
name = "LocalPointerTest";
if (exec) {
logln("TestSuite LocalPointerTest---"); logln();
LocalPointer<IntlTest> test(createLocalPointerTest());
callTest(*test, par);
}
break;
case 17:
name = "BytesTrieTest";
if (exec) {
logln("TestSuite BytesTrieTest---"); logln();
LocalPointer<IntlTest> test(createBytesTrieTest());
callTest(*test, par);
}
break;
case 18:
name = "UCharsTrieTest";
if (exec) {
logln("TestSuite UCharsTrieTest---"); logln();
LocalPointer<IntlTest> test(createUCharsTrieTest());
callTest(*test, par);
}
break;
case 19:
name = "EnumSetTest";
if (exec) {
logln("TestSuite EnumSetTest---"); logln();
LocalPointer<IntlTest> test(createEnumSetTest());
callTest(*test, par);
}
break;
case 20:
name = "SimpleFormatterTest";
if (exec) {
logln("TestSuite SimpleFormatterTest---"); logln();
LocalPointer<IntlTest> test(createSimpleFormatterTest());
callTest(*test, par);
}
break;
case 21:
name = "UnifiedCacheTest";
if (exec) {
logln("TestSuite UnifiedCacheTest---"); logln();
LocalPointer<IntlTest> test(createUnifiedCacheTest());
callTest(*test, par);
}
break;
case 22:
name = "QuantityFormatterTest";
if (exec) {
logln("TestSuite QuantityFormatterTest---"); logln();
LocalPointer<IntlTest> test(createQuantityFormatterTest());
callTest(*test, par);
}
break;
case 23:
name = "PluralMapTest";
if (exec) {
logln("TestSuite PluralMapTest---"); logln();
LocalPointer<IntlTest> test(createPluralMapTest());
callTest(*test, par);
}
break;
case 24:
name = "StaticUnicodeSetsTest";
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO_CLASS(MultithreadTest);
TESTCASE_AUTO_CLASS(StringTest);
TESTCASE_AUTO_CLASS(UnicodeStringTest);
TESTCASE_AUTO_CLASS(LocaleTest);
TESTCASE_AUTO_CLASS(CharIterTest);
TESTCASE_AUTO_CLASS(UObjectTest);
TESTCASE_AUTO_CLASS(UnicodeTest);
TESTCASE_AUTO_CLASS(ResourceBundleTest);
TESTCASE_AUTO_CLASS(NewResourceBundleTest);
TESTCASE_AUTO_CLASS(PUtilTest);
TESTCASE_AUTO_CLASS(UVector32Test);
TESTCASE_AUTO_CLASS(UVectorTest);
TESTCASE_AUTO_CLASS(UTextTest);
TESTCASE_AUTO_CLASS(LocaleAliasTest);
TESTCASE_AUTO_CLASS(UnicodeSetTest);
TESTCASE_AUTO_CLASS(ErrorCodeTest);
TESTCASE_AUTO_CREATE_CLASS(LocalPointerTest);
TESTCASE_AUTO_CREATE_CLASS(BytesTrieTest);
TESTCASE_AUTO_CREATE_CLASS(UCharsTrieTest);
TESTCASE_AUTO_CREATE_CLASS(EnumSetTest);
TESTCASE_AUTO_CREATE_CLASS(SimpleFormatterTest);
TESTCASE_AUTO_CREATE_CLASS(UnifiedCacheTest);
TESTCASE_AUTO_CREATE_CLASS(QuantityFormatterTest);
TESTCASE_AUTO_CREATE_CLASS(PluralMapTest);
#if !UCONFIG_NO_FORMATTING
if (exec) {
logln("TestSuite StaticUnicodeSetsTest---"); logln();
LocalPointer<IntlTest> test(createStaticUnicodeSetsTest());
callTest(*test, par);
}
TESTCASE_AUTO_CREATE_CLASS(StaticUnicodeSetsTest);
#endif
break;
CASE(25, LocaleBuilderTest);
default: name = ""; break; //needed to end loop
}
TESTCASE_AUTO_CLASS(LocaleBuilderTest);
TESTCASE_AUTO_CREATE_CLASS(LocaleMatcherTest);
TESTCASE_AUTO_END;
}
void ErrorCodeTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) {

View file

@ -0,0 +1,589 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// localematchertest.cpp
// created: 2019jul04 Markus W. Scherer
#include <string>
#include <vector>
#include "unicode/utypes.h"
#include "unicode/localematcher.h"
#include "unicode/locid.h"
#include "charstr.h"
#include "cmemory.h"
#include "intltest.h"
#include "localeprioritylist.h"
#include "ucbuf.h"
#define ARRAY_RANGE(array) (array), ((array) + UPRV_LENGTHOF(array))
namespace {
const char *locString(const Locale *loc) {
return loc != nullptr ? loc->getName() : "(null)";
}
struct TestCase {
int32_t lineNr = 0;
CharString supported;
CharString def;
UnicodeString favor;
UnicodeString threshold;
CharString desired;
CharString expMatch;
CharString expDesired;
CharString expCombined;
void reset() {
supported.clear();
def.clear();
favor.remove();
threshold.remove();
}
};
} // namespace
class LocaleMatcherTest : public IntlTest {
public:
LocaleMatcherTest() {}
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
void testEmpty();
void testCopyErrorTo();
void testBasics();
void testSupportedDefault();
void testUnsupportedDefault();
void testDemotion();
void testMatch();
void testResolvedLocale();
void testDataDriven();
private:
UBool dataDriven(const TestCase &test, IcuTestErrorCode &errorCode);
};
extern IntlTest *createLocaleMatcherTest() {
return new LocaleMatcherTest();
}
void LocaleMatcherTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if(exec) {
logln("TestSuite LocaleMatcherTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testEmpty);
TESTCASE_AUTO(testCopyErrorTo);
TESTCASE_AUTO(testBasics);
TESTCASE_AUTO(testSupportedDefault);
TESTCASE_AUTO(testUnsupportedDefault);
TESTCASE_AUTO(testDemotion);
TESTCASE_AUTO(testMatch);
TESTCASE_AUTO(testResolvedLocale);
TESTCASE_AUTO(testDataDriven);
TESTCASE_AUTO_END;
}
void LocaleMatcherTest::testEmpty() {
IcuTestErrorCode errorCode(*this, "testEmpty");
LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
const Locale *best = matcher.getBestMatch(Locale::getFrench(), errorCode);
assertEquals("getBestMatch(fr)", "(null)", locString(best));
LocaleMatcher::Result result = matcher.getBestMatchResult("fr", errorCode);
assertEquals("getBestMatchResult(fr).des", "(null)", locString(result.getDesiredLocale()));
assertEquals("getBestMatchResult(fr).desIndex", -1, result.getDesiredIndex());
assertEquals("getBestMatchResult(fr).supp",
"(null)", locString(result.getSupportedLocale()));
assertEquals("getBestMatchResult(fr).suppIndex",
-1, result.getSupportedIndex());
}
void LocaleMatcherTest::testCopyErrorTo() {
IcuTestErrorCode errorCode(*this, "testCopyErrorTo");
// The builder does not set any errors except out-of-memory.
// Test what we can.
LocaleMatcher::Builder builder;
UErrorCode success = U_ZERO_ERROR;
assertFalse("no error", builder.copyErrorTo(success));
assertTrue("still success", U_SUCCESS(success));
UErrorCode failure = U_INVALID_FORMAT_ERROR;
assertTrue("failure passed in", builder.copyErrorTo(failure));
assertEquals("same failure", U_INVALID_FORMAT_ERROR, failure);
}
void LocaleMatcherTest::testBasics() {
IcuTestErrorCode errorCode(*this, "testBasics");
Locale locales[] = { "fr", "en_GB", "en" };
{
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocales(ARRAY_RANGE(locales)).build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("fromRange.getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("fromRange.getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("fromRange.getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("fromRange.getBestMatch(ja_JP)", "fr", locString(best));
}
// Code coverage: Variations of setting supported locales.
{
std::vector<Locale> locales{ "fr", "en_GB", "en" };
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocales(locales.begin(), locales.end()).build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("fromRange.getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("fromRange.getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("fromRange.getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("fromRange.getBestMatch(ja_JP)", "fr", locString(best));
}
{
Locale::RangeIterator<Locale *> iter(ARRAY_RANGE(locales));
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocales(iter).build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("fromIter.getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("fromIter.getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("fromIter.getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("fromIter.getBestMatch(ja_JP)", "fr", locString(best));
}
{
Locale *pointers[] = { locales, locales + 1, locales + 2 };
// Lambda with explicit reference return type to prevent copy-constructing a temporary
// which would be destructed right away.
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocalesViaConverter(
ARRAY_RANGE(pointers), [](const Locale *p) -> const Locale & { return *p; }).
build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("viaConverter.getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("viaConverter.getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("viaConverter.getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("viaConverter.getBestMatch(ja_JP)", "fr", locString(best));
}
{
LocaleMatcher matcher = LocaleMatcher::Builder().
addSupportedLocale(locales[0]).
addSupportedLocale(locales[1]).
addSupportedLocale(locales[2]).
build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("added.getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("added.getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("added.getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("added.getBestMatch(ja_JP)", "fr", locString(best));
}
{
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocalesFromListString(
" el, fr;q=0.555555, en-GB ; q = 0.88 , el; q =0, en;q=0.88 , fr ").
build(errorCode);
const Locale *best = matcher.getBestMatchForListString("el, fr, fr;q=0, en-GB", errorCode);
assertEquals("fromList.getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("fromList.getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("fromList.getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("fromList.getBestMatch(ja_JP)", "fr", locString(best));
}
// more API coverage
{
LocalePriorityList list("fr, en-GB", errorCode);
LocalePriorityList::Iterator iter(list.iterator());
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocales(iter).
addSupportedLocale(Locale::getEnglish()).
setDefaultLocale(&Locale::getGerman()).
build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("withDefault.getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("withDefault.getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("withDefault.getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("withDefault.getBestMatch(ja_JP)", "de", locString(best));
Locale desired("en_GB"); // distinct object from Locale.UK
LocaleMatcher::Result result = matcher.getBestMatchResult(desired, errorCode);
assertTrue("withDefault: exactly desired en-GB object",
&desired == result.getDesiredLocale());
assertEquals("withDefault: en-GB desired index", 0, result.getDesiredIndex());
assertEquals("withDefault: en-GB supported",
"en_GB", locString(result.getSupportedLocale()));
assertEquals("withDefault: en-GB supported index", 1, result.getSupportedIndex());
LocalePriorityList list2("ja-JP, en-US", errorCode);
LocalePriorityList::Iterator iter2(list2.iterator());
result = matcher.getBestMatchResult(iter2, errorCode);
assertEquals("withDefault: ja-JP, en-US desired index", 1, result.getDesiredIndex());
assertEquals("withDefault: ja-JP, en-US desired",
"en_US", locString(result.getDesiredLocale()));
desired = Locale("en", "US"); // distinct object from Locale.US
result = matcher.getBestMatchResult(desired, errorCode);
assertTrue("withDefault: exactly desired en-US object",
&desired == result.getDesiredLocale());
assertEquals("withDefault: en-US desired index", 0, result.getDesiredIndex());
assertEquals("withDefault: en-US supported", "en", locString(result.getSupportedLocale()));
assertEquals("withDefault: en-US supported index", 2, result.getSupportedIndex());
result = matcher.getBestMatchResult("ja_JP", errorCode);
assertEquals("withDefault: ja-JP desired", "(null)", locString(result.getDesiredLocale()));
assertEquals("withDefault: ja-JP desired index", -1, result.getDesiredIndex());
assertEquals("withDefault: ja-JP supported", "de", locString(result.getSupportedLocale()));
assertEquals("withDefault: ja-JP supported index", -1, result.getSupportedIndex());
}
}
void LocaleMatcherTest::testSupportedDefault() {
// The default locale is one of the supported locales.
IcuTestErrorCode errorCode(*this, "testSupportedDefault");
Locale locales[] = { "fr", "en_GB", "en" };
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocales(ARRAY_RANGE(locales)).
setDefaultLocale(&locales[1]).
build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("getBestMatch(ja_JP)", "en_GB", locString(best));
LocaleMatcher::Result result = matcher.getBestMatchResult("ja_JP", errorCode);
assertEquals("getBestMatchResult(ja_JP).supp",
"en_GB", locString(result.getSupportedLocale()));
assertEquals("getBestMatchResult(ja_JP).suppIndex",
1, result.getSupportedIndex());
}
void LocaleMatcherTest::testUnsupportedDefault() {
// The default locale does not match any of the supported locales.
IcuTestErrorCode errorCode(*this, "testUnsupportedDefault");
Locale locales[] = { "fr", "en_GB", "en" };
Locale def("de");
LocaleMatcher matcher = LocaleMatcher::Builder().
setSupportedLocales(ARRAY_RANGE(locales)).
setDefaultLocale(&def).
build(errorCode);
const Locale *best = matcher.getBestMatch("en_GB", errorCode);
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US", errorCode);
assertEquals("getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR", errorCode);
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP", errorCode);
assertEquals("getBestMatch(ja_JP)", "de", locString(best));
LocaleMatcher::Result result = matcher.getBestMatchResult("ja_JP", errorCode);
assertEquals("getBestMatchResult(ja_JP).supp",
"de", locString(result.getSupportedLocale()));
assertEquals("getBestMatchResult(ja_JP).suppIndex",
-1, result.getSupportedIndex());
}
void LocaleMatcherTest::testDemotion() {
IcuTestErrorCode errorCode(*this, "testDemotion");
Locale supported[] = { "fr", "de-CH", "it" };
Locale desired[] = { "fr-CH", "de-CH", "it" };
{
LocaleMatcher noDemotion = LocaleMatcher::Builder().
setSupportedLocales(ARRAY_RANGE(supported)).
setDemotionPerDesiredLocale(ULOCMATCH_DEMOTION_NONE).build(errorCode);
Locale::RangeIterator<Locale *> desiredIter(ARRAY_RANGE(desired));
assertEquals("no demotion",
"de_CH", locString(noDemotion.getBestMatch(desiredIter, errorCode)));
}
{
LocaleMatcher regionDemotion = LocaleMatcher::Builder().
setSupportedLocales(ARRAY_RANGE(supported)).
setDemotionPerDesiredLocale(ULOCMATCH_DEMOTION_REGION).build(errorCode);
Locale::RangeIterator<Locale *> desiredIter(ARRAY_RANGE(desired));
assertEquals("region demotion",
"fr", locString(regionDemotion.getBestMatch(desiredIter, errorCode)));
}
}
void LocaleMatcherTest::testMatch() {
IcuTestErrorCode errorCode(*this, "testMatch");
LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
// Java test function testMatch_exact()
Locale en_CA("en_CA");
assertEquals("exact match", 1.0, matcher.internalMatch(en_CA, en_CA, errorCode));
// testMatch_none
Locale ar_MK("ar_MK");
double match = matcher.internalMatch(ar_MK, en_CA, errorCode);
assertTrue("mismatch: 0<=match<0.2", 0 <= match && match < 0.2);
// testMatch_matchOnMaximized
Locale und_TW("und_TW");
Locale zh("zh");
Locale zh_Hant("zh_Hant");
double matchZh = matcher.internalMatch(und_TW, zh, errorCode);
double matchZhHant = matcher.internalMatch(und_TW, zh_Hant, errorCode);
assertTrue("und_TW should be closer to zh_Hant than to zh",
matchZh < matchZhHant);
Locale en_Hant_TW("en_Hant_TW");
double matchEnHantTw = matcher.internalMatch(en_Hant_TW, zh_Hant, errorCode);
assertTrue("zh_Hant should be closer to und_TW than to en_Hant_TW",
matchEnHantTw < matchZhHant);
assertTrue("zh should be closer to und_TW than to en_Hant_TW",
matchEnHantTw < matchZh);
}
void LocaleMatcherTest::testResolvedLocale() {
IcuTestErrorCode errorCode(*this, "testResolvedLocale");
LocaleMatcher matcher = LocaleMatcher::Builder().
addSupportedLocale("ar-EG").
build(errorCode);
Locale desired("ar-SA-u-nu-latn");
LocaleMatcher::Result result = matcher.getBestMatchResult(desired, errorCode);
assertEquals("best", "ar_EG", locString(result.getSupportedLocale()));
Locale resolved = result.makeResolvedLocale(errorCode);
assertEquals("ar-EG + ar-SA-u-nu-latn = ar-SA-u-nu-latn",
"ar-SA-u-nu-latn",
resolved.toLanguageTag<std::string>(errorCode).data());
}
namespace {
bool toInvariant(const UnicodeString &s, CharString &inv, ErrorCode &errorCode) {
if (errorCode.isSuccess()) {
inv.clear().appendInvariantChars(s, errorCode);
return errorCode.isSuccess();
}
return false;
}
bool getSuffixAfterPrefix(const UnicodeString &s, int32_t limit,
const UnicodeString &prefix, UnicodeString &suffix) {
if (prefix.length() <= limit && s.startsWith(prefix)) {
suffix.setTo(s, prefix.length(), limit - prefix.length());
return true;
} else {
return false;
}
}
bool getInvariantSuffixAfterPrefix(const UnicodeString &s, int32_t limit,
const UnicodeString &prefix, CharString &suffix,
ErrorCode &errorCode) {
UnicodeString u_suffix;
return getSuffixAfterPrefix(s, limit, prefix, u_suffix) &&
toInvariant(u_suffix, suffix, errorCode);
}
bool readTestCase(const UnicodeString &line, TestCase &test, IcuTestErrorCode &errorCode) {
if (errorCode.isFailure()) { return false; }
++test.lineNr;
// Start of comment, or end of line, minus trailing spaces.
int32_t limit = line.indexOf(u'#');
if (limit < 0) {
limit = line.length();
// Remove trailing CR LF.
char16_t c;
while (limit > 0 && ((c = line.charAt(limit - 1)) == u'\n' || c == u'\r')) {
--limit;
}
}
// Remove spaces before comment or at the end of the line.
char16_t c;
while (limit > 0 && ((c = line.charAt(limit - 1)) == u' ' || c == u'\t')) {
--limit;
}
if (limit == 0) { // empty line
return false;
}
if (line.startsWith(u"** test: ")) {
test.reset();
} else if (getInvariantSuffixAfterPrefix(line, limit, u"@supported=",
test.supported, errorCode)) {
} else if (getInvariantSuffixAfterPrefix(line, limit, u"@default=",
test.def, errorCode)) {
} else if (getSuffixAfterPrefix(line, limit, u"@favor=", test.favor)) {
} else if (getSuffixAfterPrefix(line, limit, u"@threshold=", test.threshold)) {
} else {
int32_t matchSep = line.indexOf(u">>");
// >> before an inline comment, and followed by more than white space.
if (0 <= matchSep && (matchSep + 2) < limit) {
toInvariant(line.tempSubStringBetween(0, matchSep).trim(), test.desired, errorCode);
test.expDesired.clear();
test.expCombined.clear();
int32_t start = matchSep + 2;
int32_t expLimit = line.indexOf(u'|', start);
if (expLimit < 0) {
toInvariant(line.tempSubStringBetween(start, limit).trim(),
test.expMatch, errorCode);
} else {
toInvariant(line.tempSubStringBetween(start, expLimit).trim(),
test.expMatch, errorCode);
start = expLimit + 1;
expLimit = line.indexOf(u'|', start);
if (expLimit < 0) {
toInvariant(line.tempSubStringBetween(start, limit).trim(),
test.expDesired, errorCode);
} else {
toInvariant(line.tempSubStringBetween(start, expLimit).trim(),
test.expDesired, errorCode);
toInvariant(line.tempSubStringBetween(expLimit + 1, limit).trim(),
test.expCombined, errorCode);
}
}
return errorCode.isSuccess();
} else {
errorCode.set(U_INVALID_FORMAT_ERROR);
}
}
return false;
}
Locale *getLocaleOrNull(const CharString &s, Locale &locale) {
if (s == "null") {
return nullptr;
} else {
return &(locale = Locale(s.data()));
}
}
} // namespace
UBool LocaleMatcherTest::dataDriven(const TestCase &test, IcuTestErrorCode &errorCode) {
LocaleMatcher::Builder builder;
builder.setSupportedLocalesFromListString(test.supported.toStringPiece());
if (!test.def.isEmpty()) {
Locale defaultLocale(test.def.data());
builder.setDefaultLocale(&defaultLocale);
}
if (!test.favor.isEmpty()) {
ULocMatchFavorSubtag favor;
if (test.favor == u"normal") {
favor = ULOCMATCH_FAVOR_LANGUAGE;
} else if (test.favor == u"script") {
favor = ULOCMATCH_FAVOR_SCRIPT;
} else {
errln(UnicodeString(u"unsupported FavorSubtag value ") + test.favor);
return FALSE;
}
builder.setFavorSubtag(favor);
}
if (!test.threshold.isEmpty()) {
infoln("skipping test case on line %d with non-default threshold: not exposed via API",
(int)test.lineNr);
return TRUE;
// int32_t threshold = Integer.valueOf(test.threshold);
// builder.internalSetThresholdDistance(threshold);
}
LocaleMatcher matcher = builder.build(errorCode);
if (errorCode.errIfFailureAndReset("LocaleMatcher::Builder::build()")) {
return FALSE;
}
Locale expMatchLocale("");
Locale *expMatch = getLocaleOrNull(test.expMatch, expMatchLocale);
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
StringPiece desiredSP = test.desired.toStringPiece();
const Locale *bestSupported = matcher.getBestMatchForListString(desiredSP, errorCode);
if (!assertEquals("bestSupported from string",
locString(expMatch), locString(bestSupported))) {
return FALSE;
}
LocalePriorityList desired(test.desired.toStringPiece(), errorCode);
LocalePriorityList::Iterator desiredIter = desired.iterator();
if (desired.getLength() == 1) {
const Locale &desiredLocale = desiredIter.next();
bestSupported = matcher.getBestMatch(desiredLocale, errorCode);
UBool ok = assertEquals("bestSupported from Locale",
locString(expMatch), locString(bestSupported));
LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocale, errorCode);
return ok & assertEquals("result.getSupportedLocale from Locale",
locString(expMatch), locString(result.getSupportedLocale()));
} else {
bestSupported = matcher.getBestMatch(desiredIter, errorCode);
return assertEquals("bestSupported from Locale iterator",
locString(expMatch), locString(bestSupported));
}
} else {
LocalePriorityList desired(test.desired.toStringPiece(), errorCode);
LocalePriorityList::Iterator desiredIter = desired.iterator();
LocaleMatcher::Result result = matcher.getBestMatchResult(desiredIter, errorCode);
UBool ok = assertEquals("result.getSupportedLocale from Locales",
locString(expMatch), locString(result.getSupportedLocale()));
if (!test.expDesired.isEmpty()) {
Locale expDesiredLocale("");
Locale *expDesired = getLocaleOrNull(test.expDesired, expDesiredLocale);
ok &= assertEquals("result.getDesiredLocale from Locales",
locString(expDesired), locString(result.getDesiredLocale()));
}
if (!test.expCombined.isEmpty()) {
if (test.expMatch.contains("-u-")) {
logKnownIssue("20727",
UnicodeString(u"ignoring makeResolvedLocale() line ") + test.lineNr);
return ok;
}
Locale expCombinedLocale("");
Locale *expCombined = getLocaleOrNull(test.expCombined, expCombinedLocale);
Locale combined = result.makeResolvedLocale(errorCode);
ok &= assertEquals("combined Locale from Locales",
locString(expCombined), locString(&combined));
}
return ok;
}
}
void LocaleMatcherTest::testDataDriven() {
IcuTestErrorCode errorCode(*this, "testDataDriven");
CharString path(getSourceTestData(errorCode), errorCode);
path.appendPathPart("localeMatcherTest.txt", errorCode);
const char *codePage = "UTF-8";
LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, errorCode));
if(errorCode.errIfFailureAndReset("ucbuf_open(localeMatcherTest.txt)")) {
return;
}
int32_t lineLength;
const UChar *p;
UnicodeString line;
TestCase test;
int32_t numPassed = 0;
while ((p = ucbuf_readline(f.getAlias(), &lineLength, errorCode)) != nullptr &&
errorCode.isSuccess()) {
line.setTo(FALSE, p, lineLength);
if (!readTestCase(line, test, errorCode)) {
if (errorCode.errIfFailureAndReset(
"test data syntax error on line %d", (int)test.lineNr)) {
infoln(line);
}
continue;
}
UBool ok = dataDriven(test, errorCode);
if (errorCode.errIfFailureAndReset("test error on line %d", (int)test.lineNr)) {
infoln(line);
} else if (!ok) {
infoln("test failure on line %d", (int)test.lineNr);
infoln(line);
} else {
++numPassed;
}
}
infoln("number of passing test cases: %d", (int)numPassed);
}

View file

@ -6,6 +6,7 @@
* others. All Rights Reserved.
********************************************************************/
#include <functional>
#include <iterator>
#include <set>
#include <utility>
@ -266,6 +267,10 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
TESTCASE_AUTO(TestUndScript);
TESTCASE_AUTO(TestUndRegion);
TESTCASE_AUTO(TestUndCAPI);
TESTCASE_AUTO(TestRangeIterator);
TESTCASE_AUTO(TestPointerConvertingIterator);
TESTCASE_AUTO(TestTagConvertingIterator);
TESTCASE_AUTO(TestCapturingTagConvertingIterator);
TESTCASE_AUTO_END;
}
@ -3832,3 +3837,118 @@ void LocaleTest::TestUndCAPI() {
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
}
#define ARRAY_RANGE(array) (array), ((array) + UPRV_LENGTHOF(array))
void LocaleTest::TestRangeIterator() {
IcuTestErrorCode status(*this, "TestRangeIterator");
Locale locales[] = { "fr", "en_GB", "en" };
Locale::RangeIterator<Locale *> iter(ARRAY_RANGE(locales));
assertTrue("0.hasNext()", iter.hasNext());
const Locale &l0 = iter.next();
assertEquals("0.next()", "fr", l0.getName());
assertTrue("&0.next()", &l0 == &locales[0]);
assertTrue("1.hasNext()", iter.hasNext());
const Locale &l1 = iter.next();
assertEquals("1.next()", "en_GB", l1.getName());
assertTrue("&1.next()", &l1 == &locales[1]);
assertTrue("2.hasNext()", iter.hasNext());
const Locale &l2 = iter.next();
assertEquals("2.next()", "en", l2.getName());
assertTrue("&2.next()", &l2 == &locales[2]);
assertFalse("3.hasNext()", iter.hasNext());
}
void LocaleTest::TestPointerConvertingIterator() {
IcuTestErrorCode status(*this, "TestPointerConvertingIterator");
Locale locales[] = { "fr", "en_GB", "en" };
Locale *pointers[] = { locales, locales + 1, locales + 2 };
// Lambda with explicit reference return type to prevent copy-constructing a temporary
// which would be destructed right away.
Locale::ConvertingIterator<Locale **, std::function<const Locale &(const Locale *)>> iter(
ARRAY_RANGE(pointers), [](const Locale *p) -> const Locale & { return *p; });
assertTrue("0.hasNext()", iter.hasNext());
const Locale &l0 = iter.next();
assertEquals("0.next()", "fr", l0.getName());
assertTrue("&0.next()", &l0 == pointers[0]);
assertTrue("1.hasNext()", iter.hasNext());
const Locale &l1 = iter.next();
assertEquals("1.next()", "en_GB", l1.getName());
assertTrue("&1.next()", &l1 == pointers[1]);
assertTrue("2.hasNext()", iter.hasNext());
const Locale &l2 = iter.next();
assertEquals("2.next()", "en", l2.getName());
assertTrue("&2.next()", &l2 == pointers[2]);
assertFalse("3.hasNext()", iter.hasNext());
}
namespace {
class LocaleFromTag {
public:
LocaleFromTag() : locale(Locale::getRoot()) {}
const Locale &operator()(const char *tag) { return locale = Locale(tag); }
private:
// Store the locale in the converter, rather than return a reference to a temporary,
// or a value which could go out of scope with the caller's reference to it.
Locale locale;
};
} // namespace
void LocaleTest::TestTagConvertingIterator() {
IcuTestErrorCode status(*this, "TestTagConvertingIterator");
const char *tags[] = { "fr", "en_GB", "en" };
LocaleFromTag converter;
Locale::ConvertingIterator<const char **, LocaleFromTag> iter(ARRAY_RANGE(tags), converter);
assertTrue("0.hasNext()", iter.hasNext());
const Locale &l0 = iter.next();
assertEquals("0.next()", "fr", l0.getName());
assertTrue("1.hasNext()", iter.hasNext());
const Locale &l1 = iter.next();
assertEquals("1.next()", "en_GB", l1.getName());
assertTrue("2.hasNext()", iter.hasNext());
const Locale &l2 = iter.next();
assertEquals("2.next()", "en", l2.getName());
assertFalse("3.hasNext()", iter.hasNext());
}
void LocaleTest::TestCapturingTagConvertingIterator() {
IcuTestErrorCode status(*this, "TestCapturingTagConvertingIterator");
const char *tags[] = { "fr", "en_GB", "en" };
// Store the converted locale in a locale variable,
// rather than return a reference to a temporary,
// or a value which could go out of scope with the caller's reference to it.
Locale locale;
// Lambda with explicit reference return type to prevent copy-constructing a temporary
// which would be destructed right away.
Locale::ConvertingIterator<const char **, std::function<const Locale &(const char *)>> iter(
ARRAY_RANGE(tags), [&](const char *tag) -> const Locale & { return locale = Locale(tag); });
assertTrue("0.hasNext()", iter.hasNext());
const Locale &l0 = iter.next();
assertEquals("0.next()", "fr", l0.getName());
assertTrue("1.hasNext()", iter.hasNext());
const Locale &l1 = iter.next();
assertEquals("1.next()", "en_GB", l1.getName());
assertTrue("2.hasNext()", iter.hasNext());
const Locale &l2 = iter.next();
assertEquals("2.next()", "en", l2.getName());
assertFalse("3.hasNext()", iter.hasNext());
}

View file

@ -141,6 +141,10 @@ public:
void TestUndScript();
void TestUndRegion();
void TestUndCAPI();
void TestRangeIterator();
void TestPointerConvertingIterator();
void TestTagConvertingIterator();
void TestCapturingTagConvertingIterator();
private:
void _checklocs(const char* label,

View file

@ -33,6 +33,7 @@
#include "cstr.h"
#include "intltest.h"
#include "strtest.h"
#include "uinvchar.h"
StringTest::~StringTest() {}
@ -147,6 +148,64 @@ StringTest::Test_UNICODE_STRING_SIMPLE() {
}
}
namespace {
// See U_CHARSET_FAMILY in unicode/platform.h.
const char *nativeInvChars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789 \"%&'()*+,-./:;<=>?_";
const char16_t *asciiInvChars =
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
u"abcdefghijklmnopqrstuvwxyz"
u"0123456789 \"%&'()*+,-./:;<=>?_";
} // namespace
void
StringTest::TestUpperOrdinal() {
for (int32_t i = 0;; ++i) {
char ic = nativeInvChars[i];
uint8_t ac = asciiInvChars[i];
int32_t expected = ac - 'A';
int32_t actual = uprv_upperOrdinal(ic);
if (0 <= expected && expected <= 25) {
if (actual != expected) {
errln("uprv_upperOrdinal('%c')=%d != expected %d",
ic, (int)actual, (int)expected);
}
} else {
if (0 <= actual && actual <= 25) {
errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
ic, (int)actual);
}
}
if (ic == 0) { break; }
}
}
void
StringTest::TestLowerOrdinal() {
for (int32_t i = 0;; ++i) {
char ic = nativeInvChars[i];
uint8_t ac = asciiInvChars[i];
int32_t expected = ac - 'a';
int32_t actual = uprv_lowerOrdinal(ic);
if (0 <= expected && expected <= 25) {
if (actual != expected) {
errln("uprv_lowerOrdinal('%c')=%d != expected %d",
ic, (int)actual, (int)expected);
}
} else {
if (0 <= actual && actual <= 25) {
errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
ic, (int)actual);
}
}
if (ic == 0) { break; }
}
}
void
StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
#if !U_HIDE_OBSOLETE_UTF_OLD_H
@ -178,6 +237,8 @@ void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, ch
TESTCASE_AUTO(Test_U_STRING);
TESTCASE_AUTO(Test_UNICODE_STRING);
TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
TESTCASE_AUTO(TestUpperOrdinal);
TESTCASE_AUTO(TestLowerOrdinal);
TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
TESTCASE_AUTO(TestSTLCompatibility);
TESTCASE_AUTO(TestStringPiece);

View file

@ -39,6 +39,8 @@ private:
void Test_U_STRING();
void Test_UNICODE_STRING();
void Test_UNICODE_STRING_SIMPLE();
void TestUpperOrdinal();
void TestLowerOrdinal();
void Test_UTF8_COUNT_TRAIL_BYTES();
void TestStringPiece();
void TestStringPieceComparisons();

File diff suppressed because it is too large Load diff

View file

@ -19,10 +19,15 @@ import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.ULocale;
/**
* Off-line-built data for LocaleMatcher.
* Offline-built data for LocaleMatcher.
* Mostly but not only the data for mapping locales to their maximized forms.
*/
public class LocaleDistance {
/**
* Bit flag used on the last character of a subtag in the trie.
* Must be set consistently by the builder and the lookup code.
*/
public static final int END_OF_SUBTAG = 0x80;
/** Distance value bit flag, set by the builder. */
public static final int DISTANCE_SKIP_SCRIPT = 0x80;
/** Distance value bit flag, set by trieNext(). */
@ -148,15 +153,21 @@ public class LocaleDistance {
public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
private LocaleDistance(Data data) {
this.trie = new BytesTrie(data.trie, 0);
this.regionToPartitionsIndex = data.regionToPartitionsIndex;
this.partitionArrays = data.partitionArrays;
this.paradigmLSRs = data.paradigmLSRs;
trie = new BytesTrie(data.trie, 0);
regionToPartitionsIndex = data.regionToPartitionsIndex;
partitionArrays = data.partitionArrays;
paradigmLSRs = data.paradigmLSRs;
defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
// For the default demotion value, use the
// default region distance between unrelated Englishes.
// Thus, unless demotion is turned off,
// a mere region difference for one desired locale
// is as good as a perfect match for the next following desired locale.
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
LSR en = new LSR("en", "Latn", "US");
LSR enGB = new LSR("en", "Latn", "GB");
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
@ -188,18 +199,18 @@ public class LocaleDistance {
* (negative if none has a distance below the threshold),
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs,
int threshold, FavorSubtag favorSubtag) {
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
int desLangDistance = trieNext(iter, desired.language, false);
long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
long desLangState = desLangDistance >= 0 && supportedLSRs.length > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int bestIndex = -1;
for (int slIndex = 0; slIndex < supportedLsrs.length; ++slIndex) {
LSR supported = supportedLsrs[slIndex];
for (int slIndex = 0; slIndex < supportedLSRs.length; ++slIndex) {
LSR supported = supportedLSRs[slIndex];
boolean star = false;
int distance = desLangDistance;
if (distance >= 0) {
@ -227,6 +238,11 @@ public class LocaleDistance {
star = true;
}
assert 0 <= distance && distance <= 100;
// We implement "favor subtag" by reducing the language subtag distance
// (unscientifically reducing it to a quarter of the normal value),
// so that the script distance is relatively more important.
// For example, given a default language distance of 80, we reduce it to 20,
// which is below the default threshold of 50, which is the default script distance.
if (favorSubtag == FavorSubtag.SCRIPT) {
distance >>= 2;
}
@ -312,9 +328,10 @@ public class LocaleDistance {
int desLength = desiredPartitions.length();
int suppLength = supportedPartitions.length();
if (desLength == 1 && suppLength == 1) {
BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80);
// Fastpath for single desired/supported partitions.
BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | END_OF_SUBTAG);
if (result.hasNext()) {
result = iter.next(supportedPartitions.charAt(0) | 0x80);
result = iter.next(supportedPartitions.charAt(0) | END_OF_SUBTAG);
if (result.hasValue()) {
return iter.getValue();
}
@ -328,11 +345,11 @@ public class LocaleDistance {
for (int di = 0;;) {
// Look up each desired-partition string only once,
// not for each (desired, supported) pair.
BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80);
BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | END_OF_SUBTAG);
if (result.hasNext()) {
long desState = suppLength > 1 ? iter.getState64() : 0;
for (int si = 0;;) {
result = iter.next(supportedPartitions.charAt(si++) | 0x80);
result = iter.next(supportedPartitions.charAt(si++) | END_OF_SUBTAG);
int d;
if (result.hasValue()) {
d = iter.getValue();
@ -391,7 +408,7 @@ public class LocaleDistance {
}
} else {
// last character of this subtag
BytesTrie.Result result = iter.next(c | 0x80);
BytesTrie.Result result = iter.next(c | END_OF_SUBTAG);
if (wantValue) {
if (result.hasValue()) {
int value = iter.getValue();

View file

@ -180,10 +180,12 @@ public final class XLikelySubtags {
// VisibleForTesting
public LSR makeMaximizedLsrFrom(ULocale locale) {
String name = locale.getName();
String name = locale.getName(); // Faster than .toLanguageTag().
if (name.startsWith("@x=")) {
String tag = locale.toLanguageTag();
assert tag.startsWith("x-");
// Private use language tag x-subtag-subtag...
return new LSR(name, "", "");
return new LSR(tag, "", "");
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
@ -238,7 +240,7 @@ public final class XLikelySubtags {
language = getCanonical(languageAliases, language);
// (We have no script mappings.)
region = getCanonical(regionAliases, region);
return INSTANCE.maximize(language, script, region);
return maximize(language, script, region);
}
/**

View file

@ -23,7 +23,7 @@ import com.ibm.icu.impl.locale.XLikelySubtags;
/**
* Immutable class that picks the best match between a user's desired locales and
* and application's supported locales.
* an application's supported locales.
*
* <p>Example:
* <pre>
@ -54,18 +54,22 @@ import com.ibm.icu.impl.locale.XLikelySubtags;
* 3. other supported locales.
* This may change in future versions.
*
* <p>All classes implementing this interface should be immutable. Often a
* product will just need one static instance, built with the languages
* <p>Often a product will just need one matcher instance, built with the languages
* that it supports. However, it may want multiple instances with different
* default languages based on additional information, such as the domain.
*
* <p>This class is not intended for public subclassing.
*
* @author markdavis@google.com
* @stable ICU 4.4
*/
public class LocaleMatcher {
public final class LocaleMatcher {
private static final LSR UND_LSR = new LSR("und","","");
// In ULocale, "und" and "" make the same object.
private static final ULocale UND_ULOCALE = new ULocale("und");
// In Locale, "und" and "" make different objects.
private static final Locale UND_LOCALE = new Locale("und");
private static final Locale EMPTY_LOCALE = new Locale("");
// Activates debugging output to stderr with details of GetBestMatch.
private static final boolean TRACE_MATCHER = false;
@ -253,43 +257,44 @@ public class LocaleMatcher {
* best-matching desired locale, such as the -t- and -u- extensions.
* May replace some fields of the supported locale.
* The result is the locale that should be used for date and number formatting, collation, etc.
* Returns null if getSupportedLocale() returns null.
*
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
*
* @return the service locale, combining the best-matching desired and supported locales.
* @return a locale combining the best-matching desired and supported locales.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public ULocale makeServiceULocale() {
public ULocale makeResolvedULocale() {
ULocale bestDesired = getDesiredULocale();
ULocale serviceLocale = supportedULocale;
if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
// Copy the region from bestDesired, if there is one.
String region = bestDesired.getCountry();
if (!region.isEmpty()) {
b.setRegion(region);
}
// Copy the variants from bestDesired, if there are any.
// Note that this will override any serviceLocale variants.
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
String variants = bestDesired.getVariant();
if (!variants.isEmpty()) {
b.setVariant(variants);
}
// Copy the extensions from bestDesired, if there are any.
// Note that this will override any serviceLocale extensions.
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
// (replacing calendar).
for (char extensionKey : bestDesired.getExtensionKeys()) {
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
}
serviceLocale = b.build();
if (supportedULocale == null || bestDesired == null ||
supportedULocale.equals(bestDesired)) {
return supportedULocale;
}
return serviceLocale;
ULocale.Builder b = new ULocale.Builder().setLocale(supportedULocale);
// Copy the region from bestDesired, if there is one.
String region = bestDesired.getCountry();
if (!region.isEmpty()) {
b.setRegion(region);
}
// Copy the variants from bestDesired, if there are any.
// Note that this will override any supportedULocale variants.
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
String variants = bestDesired.getVariant();
if (!variants.isEmpty()) {
b.setVariant(variants);
}
// Copy the extensions from bestDesired, if there are any.
// Note that this will override any supportedULocale extensions.
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
// (replacing calendar).
for (char extensionKey : bestDesired.getExtensionKeys()) {
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
}
return b.build();
}
/**
@ -298,15 +303,17 @@ public class LocaleMatcher {
* May replace some fields of the supported locale.
* The result is the locale that should be used for
* date and number formatting, collation, etc.
* Returns null if getSupportedLocale() returns null.
*
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
*
* @return the service locale, combining the best-matching desired and supported locales.
* @return a locale combining the best-matching desired and supported locales.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Locale makeServiceLocale() {
return makeServiceULocale().toLocale();
public Locale makeResolvedLocale() {
ULocale resolved = makeResolvedULocale();
return resolved != null ? resolved.toLocale() : null;
}
}
@ -320,8 +327,8 @@ public class LocaleMatcher {
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
private final Map<LSR, Integer> supportedLsrToIndex;
// Array versions of the supportedLsrToIndex keys and values.
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
private final LSR[] supportedLsrs;
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
private final LSR[] supportedLSRs;
private final int[] supportedIndexes;
private final ULocale defaultULocale;
private final Locale defaultLocale;
@ -334,7 +341,7 @@ public class LocaleMatcher {
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public static class Builder {
public static final class Builder {
private List<ULocale> supportedLocales;
private int thresholdDistance = -1;
private Demotion demotion;
@ -394,7 +401,7 @@ public class LocaleMatcher {
* Adds another supported locale.
* Duplicates are allowed, and are not removed.
*
* @param locale the list of locale
* @param locale another locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
@ -411,7 +418,7 @@ public class LocaleMatcher {
* Adds another supported locale.
* Duplicates are allowed, and are not removed.
*
* @param locale the list of locale
* @param locale another locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
@ -514,7 +521,7 @@ public class LocaleMatcher {
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{LocaleMatcher.Builder");
if (!supportedLocales.isEmpty()) {
if (supportedLocales != null && !supportedLocales.isEmpty()) {
s.append(" supported={").append(supportedLocales.toString()).append('}');
}
if (defaultLocale != null) {
@ -572,50 +579,62 @@ public class LocaleMatcher {
private LocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
int supportedLocalesLength = builder.supportedLocales != null ?
builder.supportedLocales.size() : 0;
ULocale udef = builder.defaultLocale;
Locale def = null;
int idef = -1;
// Store the supported locales in input order,
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
int supportedLocalesLength = builder.supportedLocales.size();
supportedULocales = new ULocale[supportedLocalesLength];
supportedLocales = new Locale[supportedLocalesLength];
// Supported LRSs in input order.
LSR lsrs[] = new LSR[supportedLocalesLength];
// Also find the first supported locale whose LSR is
// the same as that for the default locale.
ULocale udef = builder.defaultLocale;
Locale def = null;
LSR defLSR = null;
int idef = -1;
if (udef != null) {
def = udef.toLocale();
defLSR = getMaximalLsrOrUnd(udef);
}
int i = 0;
for (ULocale locale : builder.supportedLocales) {
supportedULocales[i] = locale;
supportedLocales[i] = locale.toLocale();
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
idef = i;
if (supportedLocalesLength > 0) {
for (ULocale locale : builder.supportedLocales) {
supportedULocales[i] = locale;
supportedLocales[i] = locale.toLocale();
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
idef = i;
}
++i;
}
++i;
}
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, Indexes).
// and an ordered list of (LSR, supported index).
// We use a LinkedHashMap for both,
// and insert the supported locales in the following order:
// 1. Default locale, if it is supported.
// 2. Priority locales in builder order.
// 2. Priority locales (aka "paradigm locales") in builder order.
// 3. Remaining locales in builder order.
supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
// Note: We could work with a single LinkedHashMap by storing ~i (the binary-not index)
// for the default and paradigm locales, counting the number of those locales,
// and keeping two indexes to fill the LSR and index arrays with
// priority vs. normal locales. In that loop we would need to entry.setValue(~i)
// to restore non-negative indexes in the map.
// Probably saves little but less readable.
Map<LSR, Integer> otherLsrToIndex = null;
if (idef >= 0) {
supportedLsrToIndex.put(defLSR, idef);
}
i = 0;
for (ULocale locale : supportedULocales) {
if (i == idef) { continue; }
if (i == idef) {
++i;
continue;
}
LSR lsr = lsrs[i];
if (defLSR == null) {
assert i == 0;
@ -624,7 +643,15 @@ public class LocaleMatcher {
defLSR = lsr;
idef = 0;
supportedLsrToIndex.put(lsr, 0);
} else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
} else if (idef >= 0 && lsr.equals(defLSR)) {
// lsr.equals(defLSR) means that this supported locale is
// a duplicate of the default locale.
// Either an explicit default locale is supported, and we added it before the loop,
// or there is no explicit default locale, and this is
// a duplicate of the first supported locale.
// In both cases, idef >= 0 now, so otherwise we can skip the comparison.
// For a duplicate, putIfAbsent() is a no-op, so nothing to do.
} else if (LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
putIfAbsent(supportedLsrToIndex, lsr, i);
} else {
if (otherLsrToIndex == null) {
@ -637,12 +664,12 @@ public class LocaleMatcher {
if (otherLsrToIndex != null) {
supportedLsrToIndex.putAll(otherLsrToIndex);
}
int numSuppLsrs = supportedLsrToIndex.size();
supportedLsrs = new LSR[numSuppLsrs];
supportedIndexes = new int[numSuppLsrs];
int supportedLSRsLength = supportedLsrToIndex.size();
supportedLSRs = new LSR[supportedLSRsLength];
supportedIndexes = new int[supportedLSRsLength];
i = 0;
for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
supportedLSRs[i] = entry.getKey(); // = lsrs[entry.getValue()]
supportedIndexes[i++] = entry.getValue();
}
@ -671,7 +698,7 @@ public class LocaleMatcher {
}
private static final LSR getMaximalLsrOrUnd(Locale locale) {
if (locale.equals(UND_LOCALE)) {
if (locale.equals(UND_LOCALE) || locale.equals(EMPTY_LOCALE)) {
return UND_LSR;
} else {
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
@ -766,7 +793,7 @@ public class LocaleMatcher {
* Parses the string like {@link LocalePriorityList} does and
* returns the supported locale which best matches one of the desired locales.
*
* @param desiredLocaleList Typically a user's languages, in order of preference (descending),
* @param desiredLocaleList Typically a user's languages,
* as a string which is to be parsed like LocalePriorityList does.
* @return the best-matching supported locale.
* @stable ICU 4.4
@ -808,9 +835,13 @@ public class LocaleMatcher {
return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
private Result defaultResult() {
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
}
private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
if (suppIndex < 0) {
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
return defaultResult();
} else if (desiredLocale != null) {
return new Result(desiredLocale, supportedULocales[suppIndex],
null, supportedLocales[suppIndex], 0, suppIndex);
@ -822,7 +853,7 @@ public class LocaleMatcher {
private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
if (suppIndex < 0) {
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
return defaultResult();
} else if (desiredLocale != null) {
return new Result(null, supportedULocales[suppIndex],
desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
@ -858,7 +889,7 @@ public class LocaleMatcher {
public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
Iterator<ULocale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
return makeResult(UND_ULOCALE, null, -1);
return defaultResult();
}
ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
@ -891,7 +922,7 @@ public class LocaleMatcher {
public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
Iterator<Locale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
return makeResult(UND_LOCALE, null, -1);
return defaultResult();
}
LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
@ -920,7 +951,7 @@ public class LocaleMatcher {
return suppIndex;
}
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
desiredLSR, supportedLsrs, bestDistance, favorSubtag);
desiredLSR, supportedLSRs, bestDistance, favorSubtag);
if (bestIndexAndDistance >= 0) {
bestDistance = bestIndexAndDistance & 0xff;
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
@ -933,6 +964,7 @@ public class LocaleMatcher {
break;
}
desiredLSR = remainingIter.next();
++desiredIndex;
}
if (bestSupportedLsrIndex < 0) {
if (TRACE_MATCHER) {
@ -969,8 +1001,8 @@ public class LocaleMatcher {
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
getMaximalLsrOrUnd(desired),
new LSR[] { getMaximalLsrOrUnd(supported) },
thresholdDistance, favorSubtag) & 0xff;
return (100 - distance) / 100.0;
}

View file

@ -13,7 +13,8 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@ -24,10 +25,10 @@ import java.util.regex.Pattern;
/**
* Provides an immutable list of languages/locales in priority order.
* The string format is based on the Accept-Language format
* <a href="http://www.ietf.org/rfc/rfc2616.txt">http://www.ietf.org/rfc/rfc2616.txt</a>, such as
* (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), such as
* "af, en, fr;q=0.9". Syntactically it is slightly
* more lenient, in allowing extra whitespace between elements, extra commas,
* and more than 3 decimals (on input), and pins between 0 and 1.
* and more than 3 decimals (on input). The qvalues must be between 0 and 1.
*
* <p>In theory, Accept-Language indicates the relative 'quality' of each item,
* but in practice, all of the browsers just take an ordered list, like
@ -70,7 +71,6 @@ import java.util.regex.Pattern;
* @stable ICU 4.4
*/
public class LocalePriorityList implements Iterable<ULocale> {
private static final double D0 = 0.0d;
private static final Double D1 = 1.0d;
private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*");
@ -91,6 +91,8 @@ public class LocalePriorityList implements Iterable<ULocale> {
/**
* Creates a Builder and adds a locale with a specified weight.
* A zero or negative weight leads to removing the locale.
* A weight greater than 1 is pinned to 1.
*
* @param locale locale/language to be added
* @param weight value from 0.0 to 1.0
@ -109,7 +111,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
* @stable ICU 4.4
*/
public static Builder add(LocalePriorityList list) {
return new Builder().add(list);
return new Builder(list);
}
/**
@ -154,13 +156,14 @@ public class LocalePriorityList implements Iterable<ULocale> {
@Override
public String toString() {
final StringBuilder result = new StringBuilder();
for (final ULocale language : languagesAndWeights.keySet()) {
for (Entry<ULocale, Double> entry : languagesAndWeights.entrySet()) {
ULocale language = entry.getKey();
double weight = entry.getValue();
if (result.length() != 0) {
result.append(", ");
}
result.append(language);
double weight = languagesAndWeights.get(language);
if (weight != D1) {
if (weight != 1.0) {
result.append(";q=").append(weight);
}
}
@ -221,13 +224,31 @@ public class LocalePriorityList implements Iterable<ULocale> {
* These store the input languages and weights, in chronological order,
* where later additions override previous ones.
*/
private final Map<ULocale, Double> languageToWeight
= new LinkedHashMap<>();
private Map<ULocale, Double> languageToWeight;
/**
* The builder is reusable but rarely reused. Avoid cloning the map when not needed.
* Exactly one of languageToWeight and built is null.
*/
private LocalePriorityList built;
private boolean hasWeights = false; // other than 1.0
/*
/**
* Private constructor, only used by LocalePriorityList
*/
private Builder() {
languageToWeight = new LinkedHashMap<>();
}
private Builder(LocalePriorityList list) {
built = list;
for (Double value : list.languagesAndWeights.values()) {
double weight = value;
assert 0.0 < weight && weight <= 1.0;
if (weight != 1.0) {
hasWeights = true;
break;
}
}
}
/**
@ -249,27 +270,48 @@ public class LocalePriorityList implements Iterable<ULocale> {
* @stable ICU 4.4
*/
public LocalePriorityList build(boolean preserveWeights) {
// Walk through the input list, collecting the items with the same weights.
final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<>(
myDescendingDouble);
for (final ULocale lang : languageToWeight.keySet()) {
Double weight = languageToWeight.get(lang);
Set<ULocale> s = doubleCheck.get(weight);
if (s == null) {
doubleCheck.put(weight, s = new LinkedHashSet<>());
}
s.add(lang);
if (built != null) {
// Calling build() again without changing anything in between.
// Just return the same immutable list.
return built;
}
// We now have a bunch of items sorted by weight, then chronologically.
// We can now create a list in the right order
final Map<ULocale, Double> temp = new LinkedHashMap<>();
for (Entry<Double, Set<ULocale>> langEntry : doubleCheck.entrySet()) {
final Double weight = langEntry.getKey();
for (final ULocale lang : langEntry.getValue()) {
temp.put(lang, preserveWeights ? weight : D1);
Map<ULocale, Double> temp;
if (hasWeights) {
// Walk through the input list, collecting the items with the same weights.
final TreeMap<Double, List<ULocale>> weightToLanguages =
new TreeMap<>(myDescendingDouble);
for (Entry<ULocale, Double> entry : languageToWeight.entrySet()) {
ULocale lang = entry.getKey();
Double weight = entry.getValue();
List<ULocale> s = weightToLanguages.get(weight);
if (s == null) {
weightToLanguages.put(weight, s = new LinkedList<>());
}
s.add(lang);
}
// We now have a bunch of items sorted by weight, then chronologically.
// We can now create a list in the right order.
if (weightToLanguages.size() <= 1) {
// There is at most one weight.
temp = languageToWeight;
if (weightToLanguages.isEmpty() || weightToLanguages.firstKey() == 1.0) {
hasWeights = false;
}
} else {
temp = new LinkedHashMap<>();
for (Entry<Double, List<ULocale>> langEntry : weightToLanguages.entrySet()) {
final Double weight = preserveWeights ? langEntry.getKey() : D1;
for (final ULocale lang : langEntry.getValue()) {
temp.put(lang, weight);
}
}
}
} else {
// Nothing to sort.
temp = languageToWeight;
}
return new LocalePriorityList(Collections.unmodifiableMap(temp));
languageToWeight = null;
return built = new LocalePriorityList(Collections.unmodifiableMap(temp));
}
/**
@ -280,9 +322,8 @@ public class LocalePriorityList implements Iterable<ULocale> {
* @stable ICU 4.4
*/
public Builder add(final LocalePriorityList list) {
for (final ULocale language : list.languagesAndWeights
.keySet()) {
add(language, list.languagesAndWeights.get(language));
for (Entry<ULocale, Double> entry : list.languagesAndWeights.entrySet()) {
add(entry.getKey(), entry.getValue());
}
return this;
}
@ -295,7 +336,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
* @stable ICU 4.4
*/
public Builder add(final ULocale locale) {
return add(locale, D1);
return add(locale, 1.0);
}
/**
@ -307,7 +348,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
*/
public Builder add(ULocale... locales) {
for (final ULocale languageCode : locales) {
add(languageCode, D1);
add(languageCode, 1.0);
}
return this;
}
@ -315,7 +356,8 @@ public class LocalePriorityList implements Iterable<ULocale> {
/**
* Adds a locale with a specified weight.
* Overrides any previous weight for the locale.
* Removes a locale if the weight is zero.
* A zero or negative weight leads to removing the locale.
* A weight greater than 1 is pinned to 1.
*
* @param locale language/locale to add
* @param weight value between 0.0 and 1.1
@ -323,15 +365,24 @@ public class LocalePriorityList implements Iterable<ULocale> {
* @stable ICU 4.4
*/
public Builder add(final ULocale locale, double weight) {
if (languageToWeight == null) {
// Builder reuse after build().
languageToWeight = new LinkedHashMap<>(built.languagesAndWeights);
built = null;
}
if (languageToWeight.containsKey(locale)) {
languageToWeight.remove(locale);
}
if (weight <= D0) {
Double value;
if (weight <= 0.0) {
return this; // skip zeros
} else if (weight > D1) {
weight = D1;
} else if (weight >= 1.0) {
value = D1;
} else {
value = weight;
hasWeights = true;
}
languageToWeight.put(locale, weight);
languageToWeight.put(locale, value);
return this;
}
@ -349,9 +400,9 @@ public class LocalePriorityList implements Iterable<ULocale> {
if (itemMatcher.reset(item).matches()) {
final ULocale language = new ULocale(itemMatcher.group(1));
final double weight = Double.parseDouble(itemMatcher.group(2));
if (!(weight >= D0 && weight <= D1)) { // do ! for NaN
throw new IllegalArgumentException("Illegal weight, must be 0..1: "
+ weight);
if (!(0.0 <= weight && weight <= 1.0)) { // do ! for NaN
throw new IllegalArgumentException(
"Illegal weight, must be 0..1: " + weight);
}
add(language, weight);
} else if (item.length() != 0) {

View file

@ -11,8 +11,11 @@ package com.ibm.icu.dev.test.util;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
@ -111,14 +114,111 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testBasics() {
final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
.add(ULocale.ENGLISH).build());
LocaleMatcher matcher = newLocaleMatcher(
LocalePriorityList.
add(ULocale.FRENCH).add(ULocale.UK).add(ULocale.ENGLISH).
build());
logln(matcher.toString());
assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.JAPAN));
// API coverage
List<Locale> locales = new ArrayList<>();
locales.add(Locale.FRENCH);
locales.add(Locale.UK);
matcher = LocaleMatcher.builder().
setSupportedLocales(locales).addSupportedLocale(Locale.ENGLISH).
setDefaultLocale(Locale.GERMAN).build();
assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
assertEquals(ULocale.GERMAN, matcher.getBestMatch(ULocale.JAPAN));
ULocale udesired = new ULocale("en_GB"); // distinct object from ULocale.UK
LocaleMatcher.Result result = matcher.getBestMatchResult(udesired);
assertTrue("exactly desired en-GB object", udesired == result.getDesiredULocale());
assertEquals(Locale.UK, result.getDesiredLocale());
assertEquals(0, result.getDesiredIndex());
assertEquals(ULocale.UK, result.getSupportedULocale());
assertEquals(Locale.UK, result.getSupportedLocale());
assertEquals(1, result.getSupportedIndex());
LocalePriorityList list = LocalePriorityList.add(ULocale.JAPAN, ULocale.US).build();
result = matcher.getBestMatchResult(list);
assertEquals(1, result.getDesiredIndex());
assertEquals(Locale.US, result.getDesiredLocale());
Locale desired = new Locale("en", "US"); // distinct object from Locale.US
result = matcher.getBestLocaleResult(desired);
assertEquals(ULocale.US, result.getDesiredULocale());
assertTrue("exactly desired en-US object", desired == result.getDesiredLocale());
assertEquals(0, result.getDesiredIndex());
assertEquals(ULocale.ENGLISH, result.getSupportedULocale());
assertEquals(Locale.ENGLISH, result.getSupportedLocale());
assertEquals(2, result.getSupportedIndex());
result = matcher.getBestMatchResult(ULocale.JAPAN);
assertNull(result.getDesiredLocale());
assertNull(result.getDesiredULocale());
assertEquals(-1, result.getDesiredIndex());
assertEquals(ULocale.GERMAN, result.getSupportedULocale());
assertEquals(Locale.GERMAN, result.getSupportedLocale());
assertEquals(-1, result.getSupportedIndex());
}
private static final String locString(ULocale loc) {
return loc != null ? loc.getName() : "(null)";
}
@Test
public void testSupportedDefault() {
// The default locale is one of the supported locales.
List<ULocale> locales = Arrays.asList(
new ULocale("fr"), new ULocale("en_GB"), new ULocale("en"));
LocaleMatcher matcher = LocaleMatcher.builder().
setSupportedULocales(locales).
setDefaultULocale(locales.get(1)).
build();
ULocale best = matcher.getBestMatch("en_GB");
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US");
assertEquals("getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR");
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP");
assertEquals("getBestMatch(ja_JP)", "en_GB", locString(best));
LocaleMatcher.Result result = matcher.getBestMatchResult(new ULocale("ja_JP"));
assertEquals("getBestMatchResult(ja_JP).supp",
"en_GB", locString(result.getSupportedULocale()));
assertEquals("getBestMatchResult(ja_JP).suppIndex",
1, result.getSupportedIndex());
}
@Test
public void testUnsupportedDefault() {
// The default locale does not match any of the supported locales.
List<ULocale> locales = Arrays.asList(
new ULocale("fr"), new ULocale("en_GB"), new ULocale("en"));
LocaleMatcher matcher = LocaleMatcher.builder().
setSupportedULocales(locales).
setDefaultULocale(new ULocale("de")).
build();
ULocale best = matcher.getBestMatch("en_GB");
assertEquals("getBestMatch(en_GB)", "en_GB", locString(best));
best = matcher.getBestMatch("en_US");
assertEquals("getBestMatch(en_US)", "en", locString(best));
best = matcher.getBestMatch("fr_FR");
assertEquals("getBestMatch(fr_FR)", "fr", locString(best));
best = matcher.getBestMatch("ja_JP");
assertEquals("getBestMatch(ja_JP)", "de", locString(best));
LocaleMatcher.Result result = matcher.getBestMatchResult(new ULocale("ja_JP"));
assertEquals("getBestMatchResult(ja_JP).supp",
"de", locString(result.getSupportedULocale()));
assertEquals("getBestMatchResult(ja_JP).suppIndex",
-1, result.getSupportedIndex());
}
@Test
@ -178,8 +278,15 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testEmpty() {
final LocaleMatcher matcher = newLocaleMatcher("");
final LocaleMatcher matcher = LocaleMatcher.builder().build();
assertNull(matcher.getBestMatch(ULocale.FRENCH));
LocaleMatcher.Result result = matcher.getBestMatchResult(ULocale.FRENCH);
assertNull(result.getDesiredULocale());
assertNull(result.getDesiredLocale());
assertEquals(-1, result.getDesiredIndex());
assertNull(result.getSupportedULocale());
assertNull(result.getSupportedLocale());
assertEquals(-1, result.getSupportedIndex());
}
static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
@ -197,12 +304,12 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testMatch_none() {
double match = match(new ULocale("ar_MK"), ENGLISH_CANADA);
assertTrue("Actual < 0: " + match, 0 <= match);
assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
assertTrue("Actual >= 0: " + match, 0 <= match);
assertTrue("Actual < 0.2 (~ language + script distance): " + match, 0.2 > match);
}
@Test
public void testMatch_matchOnMazimized() {
public void testMatch_matchOnMaximized() {
ULocale undTw = new ULocale("und_TW");
ULocale zhHant = new ULocale("zh_Hant");
double matchZh = match(undTw, new ULocale("zh"));
@ -219,6 +326,20 @@ public class LocaleMatcherTest extends TestFmwk {
matchEnHantTw < matchZh);
}
@Test
public void testResolvedLocale() {
LocaleMatcher matcher = LocaleMatcher.builder().
addSupportedULocale(new ULocale("ar-EG")).
build();
ULocale desired = new ULocale("ar-SA-u-nu-latn");
LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
assertEquals("best", "ar_EG", result.getSupportedLocale().toString());
ULocale resolved = result.makeResolvedULocale();
assertEquals("ar-EG + ar-SA-u-nu-latn = ar-SA-u-nu-latn",
"ar-SA-u-nu-latn",
resolved.toLanguageTag());
}
@Test
public void testMatchGrandfatheredCode() {
final LocaleMatcher matcher = newLocaleMatcher("fr, i_klingon, en_Latn_US");
@ -517,6 +638,14 @@ public class LocaleMatcherTest extends TestFmwk {
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
}
@Test
public void testCanonicalize() {
LocaleMatcher matcher = LocaleMatcher.builder().build();
assertEquals("bh --> bho", new ULocale("bho"), matcher.canonicalize(new ULocale("bh")));
assertEquals("mo-200 --> ro-CZ", new ULocale("ro_CZ"),
matcher.canonicalize(new ULocale("mo_200")));
}
private static final class PerfCase {
ULocale desired;
ULocale expectedShort;
@ -850,6 +979,18 @@ public class LocaleMatcherTest extends TestFmwk {
}
}
private static Locale toLocale(ULocale ulocale) {
return ulocale != null ? ulocale.toLocale() : null;
}
private static Iterable<Locale> localesFromULocales(Collection<ULocale> ulocales) {
List<Locale> locales = new ArrayList<>(ulocales.size());
for (ULocale ulocale : ulocales) {
locales.add(ulocale.toLocale());
}
return locales;
}
@Test
@Parameters(method = "readTestCases")
public void dataDriven(TestCase test) {
@ -886,19 +1027,73 @@ public class LocaleMatcherTest extends TestFmwk {
ULocale expMatch = getULocaleOrNull(test.expMatch);
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
ULocale bestSupported = matcher.getBestMatch(test.desired);
assertEquals("bestSupported", expMatch, bestSupported);
assertEquals("bestSupported ULocale from string", expMatch, bestSupported);
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
if (desired.getULocales().size() == 1) {
ULocale desiredULocale = desired.iterator().next();
bestSupported = matcher.getBestMatch(desiredULocale);
assertEquals("bestSupported ULocale from ULocale", expMatch, bestSupported);
Locale desiredLocale = desiredULocale.toLocale();
Locale bestSupportedLocale = matcher.getBestLocale(desiredLocale);
assertEquals("bestSupported Locale from Locale",
toLocale(expMatch), bestSupportedLocale);
LocaleMatcher.Result result = matcher.getBestMatchResult(desiredULocale);
assertEquals("result.getSupportedULocale from ULocale",
expMatch, result.getSupportedULocale());
assertEquals("result.getSupportedLocale from ULocale",
toLocale(expMatch), result.getSupportedLocale());
result = matcher.getBestLocaleResult(desiredLocale);
assertEquals("result.getSupportedULocale from Locale",
expMatch, result.getSupportedULocale());
assertEquals("result.getSupportedLocale from Locale",
toLocale(expMatch), result.getSupportedLocale());
} else {
bestSupported = matcher.getBestMatch(desired);
assertEquals("bestSupported ULocale from ULocale iterator",
expMatch, bestSupported);
Locale bestSupportedLocale = matcher.getBestLocale(
localesFromULocales(desired.getULocales()));
assertEquals("bestSupported Locale from Locale iterator",
toLocale(expMatch), bestSupportedLocale);
}
} else {
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
assertEquals("bestSupported", expMatch, result.getSupportedULocale());
assertEquals("result.getSupportedULocale from ULocales",
expMatch, result.getSupportedULocale());
assertEquals("result.getSupportedLocale from ULocales",
toLocale(expMatch), result.getSupportedLocale());
if (!test.expDesired.isEmpty()) {
ULocale expDesired = getULocaleOrNull(test.expDesired);
assertEquals("bestDesired", expDesired, result.getDesiredULocale());
assertEquals("result.getDesiredULocale from ULocales",
expDesired, result.getDesiredULocale());
assertEquals("result.getDesiredLocale from ULocales",
toLocale(expDesired), result.getDesiredLocale());
}
if (!test.expCombined.isEmpty()) {
ULocale expCombined = getULocaleOrNull(test.expCombined);
ULocale combined = result.makeServiceULocale();
assertEquals("combined", expCombined, combined);
assertEquals("combined ULocale from ULocales", expCombined, result.makeResolvedULocale());
assertEquals("combined Locale from ULocales", toLocale(expCombined), result.makeResolvedLocale());
}
result = matcher.getBestLocaleResult(localesFromULocales(desired.getULocales()));
assertEquals("result.getSupportedULocale from Locales",
expMatch, result.getSupportedULocale());
assertEquals("result.getSupportedLocale from Locales",
toLocale(expMatch), result.getSupportedLocale());
if (!test.expDesired.isEmpty()) {
ULocale expDesired = getULocaleOrNull(test.expDesired);
assertEquals("result.getDesiredULocale from Locales",
expDesired, result.getDesiredULocale());
assertEquals("result.getDesiredLocale from Locales",
toLocale(expDesired), result.getDesiredLocale());
}
if (!test.expCombined.isEmpty()) {
ULocale expCombined = getULocaleOrNull(test.expCombined);
assertEquals("combined ULocale from Locales", expCombined, result.makeResolvedULocale());
assertEquals("combined Locale from Locales", toLocale(expCombined), result.makeResolvedLocale());
}
}
}

View file

@ -9,6 +9,8 @@
package com.ibm.icu.dev.test.util;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@ -23,37 +25,106 @@ import com.ibm.icu.util.ULocale;
*/
@RunWith(JUnit4.class)
public class LocalePriorityListTest extends TestFmwk {
@Test
public void testLanguagePriorityList() {
final String expected = "af, en, fr";
@Test
public void testLanguagePriorityList() {
final String expected = "af, en, fr";
LocalePriorityList list = LocalePriorityList.add("af, en, fr;q=0.9").build();
assertEquals(expected, list.toString());
LocalePriorityList list = LocalePriorityList.add("af, en, fr;q=0.9").build();
assertEquals(expected, list.toString());
// check looseness, and that later values win
LocalePriorityList list2 = LocalePriorityList.add(
", fr ; q = 0.9 , en;q=0.1 , af, en, de;q=0, ").build();
assertEquals(expected, list2.toString());
assertEquals(list, list2);
// check looseness, and that later values win
LocalePriorityList list2 = LocalePriorityList.add(
", fr ; q = 0.9 , en;q=0.1 , af, en, de;q=0, ").build();
assertEquals(expected, list2.toString());
assertEquals(list, list2);
LocalePriorityList list3 = LocalePriorityList
.add(new ULocale("af"))
.add(ULocale.FRENCH, 0.9d)
.add(ULocale.ENGLISH)
.build();
assertEquals(expected, list3.toString());
assertEquals(list, list3);
LocalePriorityList list3 = LocalePriorityList
.add(new ULocale("af"))
.add(ULocale.FRENCH, 0.9d)
.add(ULocale.ENGLISH)
.build();
assertEquals(expected, list3.toString());
assertEquals(list, list3);
LocalePriorityList list4 = LocalePriorityList
.add(list).build();
assertEquals(expected, list4.toString());
assertEquals(list, list4);
LocalePriorityList list4 = LocalePriorityList.add(list).build();
assertEquals(expected, list4.toString());
assertEquals(list, list4);
LocalePriorityList list5 = LocalePriorityList.add("af, fr;q=0.9, en").build(true);
assertEquals("af, en, fr;q=0.9", list5.toString());
}
LocalePriorityList list5 = LocalePriorityList.add("af, fr;q=0.9, en").build(true);
assertEquals("af, en, fr;q=0.9", list5.toString());
}
private void assertEquals(Object expected, Object string) {
assertEquals("", expected, string);
}
@Test
public void testGetULocales() {
LocalePriorityList list = LocalePriorityList.add("af, en, fr").build();
Set<ULocale> locales = list.getULocales();
assertEquals("number of locales", 3, locales.size());
assertTrue("fr", locales.contains(ULocale.FRENCH));
}
@Test
public void testIterator() {
LocalePriorityList list = LocalePriorityList.add("af, en, fr").build();
ULocale af = new ULocale("af");
int count = 0;
for (ULocale locale : list) {
assertTrue("expected locale",
locale.equals(af) || locale.equals(ULocale.ENGLISH) ||
locale.equals(ULocale.FRENCH));
++count;
}
assertEquals("number of locales", 3, count);
}
@Test
public void testQValue() {
try {
LocalePriorityList.add("de;q=-0.1");
errln("negative accept-language qvalue should fail");
} catch(IllegalArgumentException expected) {
// good
}
try {
LocalePriorityList.add("de;q=1.001");
errln("accept-language qvalue > 1 should fail");
} catch(IllegalArgumentException expected) {
// good
}
LocalePriorityList list = LocalePriorityList.add("de;q=0.555555555").build(true);
double weight = list.getWeight(ULocale.GERMAN);
assertTrue("many decimals", 0.555 <= weight && weight <= 0.556);
}
@Test
public void testReuse() {
// Test reusing a Builder after build(), and some other code coverage.
LocalePriorityList.Builder builder =
LocalePriorityList.add("el;q=0.5, de, fr;q=0.2, el;q=0");
LocalePriorityList list = builder.build(true);
assertEquals("initial list", "de, fr;q=0.2", list.toString());
list = builder.add(ULocale.FRENCH, 1.0).build(true);
assertEquals("upgrade French", "de, fr", list.toString());
list = builder.add(ULocale.ITALIAN, 0.1).build(true);
assertEquals("add Italian", "de, fr, it;q=0.1", list.toString());
builder = LocalePriorityList.add(list);
list = builder.build(true);
assertEquals("cloned Builder", "de, fr, it;q=0.1", list.toString());
list = builder.add(ULocale.ITALIAN).build(true);
assertEquals("upgrage Italian", "de, fr, it", list.toString());
// Start over with all 1.0 weights.
builder = LocalePriorityList.add("de, fr");
list = builder.build(true);
assertEquals("simple", "de, fr", list.toString());
// Add another list.
LocalePriorityList list2 = LocalePriorityList.add(ULocale.ITALIAN, 0.2).build(true);
assertEquals("list2", "it;q=0.2", list2.toString());
list = builder.add(list2).build(true);
assertEquals("list+list2", "de, fr, it;q=0.2", list.toString());
list = builder.add(ULocale.JAPANESE).build(true);
assertEquals("list+list2+ja", "de, fr, ja, it;q=0.2", list.toString());
}
private void assertEquals(Object expected, Object string) {
assertEquals("", expected, string);
}
}

View file

@ -153,7 +153,7 @@ public final class LocaleDistanceBuilder {
bytes[length++] = (byte) c;
} else {
// Mark the last character as a terminator to avoid overlap matches.
bytes[length++] = (byte) (c | 0x80);
bytes[length++] = (byte) (c | LocaleDistance.END_OF_SUBTAG);
break;
}
}