From 3edff033933593975170ce0fc9a2e57e6c3dd1fb Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 5 Mar 2020 15:03:42 -0800 Subject: [PATCH] ICU-20915 LocaleMatcher no match: always getSupportedIndex()=-1; remove defaultLocaleIndex field; constructor check if locales are equivalent to default, not just equal; simplify locale sorting; minor builder & test deflaking --- icu4c/source/common/localematcher.cpp | 177 +++++++----------- icu4c/source/common/unicode/localematcher.h | 3 +- .../test/intltest/localematchertest.cpp | 2 +- .../ibm/icu/impl/locale/LocaleDistance.java | 15 +- .../src/com/ibm/icu/util/LocaleMatcher.java | 125 ++++++------- .../icu/dev/test/util/LocaleMatcherTest.java | 2 +- .../tool/locale/LocaleDistanceBuilder.java | 4 +- 7 files changed, 137 insertions(+), 191 deletions(-) diff --git a/icu4c/source/common/localematcher.cpp b/icu4c/source/common/localematcher.cpp index 0723bc1d459..381df212655 100644 --- a/icu4c/source/common/localematcher.cpp +++ b/icu4c/source/common/localematcher.cpp @@ -308,20 +308,22 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) { return *lsr1 == *lsr2; } -bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return false; } - U_ASSERT(i > 0); - int32_t index = uhash_geti(lsrToIndex, &lsr); - if (index != 0) { - return false; - } else { - uhash_puti(lsrToIndex, const_cast(&lsr), i, &errorCode); - return U_SUCCESS(errorCode); - } -} - } // namespace +int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return suppLength; } + int32_t index = uhash_geti(supportedLsrToIndex, &lsr); + if (index == 0) { + uhash_puti(supportedLsrToIndex, const_cast(&lsr), i + 1, &errorCode); + if (U_SUCCESS(errorCode)) { + supportedLSRs[suppLength] = &lsr; + supportedIndexes[suppLength++] = i; + } + } + return suppLength; +} + LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) : likelySubtags(*XLikelySubtags::getSingleton(errorCode)), localeDistance(*LocaleDistance::getSingleton(errorCode)), @@ -331,15 +333,27 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) : supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0), supportedLsrToIndex(nullptr), supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0), - ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) { + ownedDefaultLocale(nullptr), defaultLocale(nullptr) { if (U_FAILURE(errorCode)) { return; } if (thresholdDistance < 0) { thresholdDistance = localeDistance.getDefaultScriptDistance(); } + const Locale *def = builder.defaultLocale_; + LSR builderDefaultLSR; + const LSR *defLSR = nullptr; + if (def != nullptr) { + ownedDefaultLocale = def->clone(); + if (ownedDefaultLocale == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + def = ownedDefaultLocale; + builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode); + if (U_FAILURE(errorCode)) { return; } + defLSR = &builderDefaultLSR; + } supportedLocalesLength = builder.supportedLocales_ != nullptr ? builder.supportedLocales_->size() : 0; - const Locale *def = builder.defaultLocale_; - int32_t idef = -1; if (supportedLocalesLength > 0) { // Store the supported locales in input order, // so that when different types are used (e.g., language tag strings) @@ -356,15 +370,6 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) : } // If the constructor fails partway, we need null pointers for destructibility. uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *)); - // Also find the first supported locale whose LSR is - // the same as that for the default locale. - LSR builderDefaultLSR; - const LSR *defLSR = nullptr; - if (def != nullptr) { - builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode); - if (U_FAILURE(errorCode)) { return; } - defLSR = &builderDefaultLSR; - } for (int32_t i = 0; i < supportedLocalesLength; ++i) { const Locale &locale = *static_cast(builder.supportedLocales_->elementAt(i)); supportedLocales[i] = locale.clone(); @@ -376,31 +381,14 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) : LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode); lsr.setHashCode(); if (U_FAILURE(errorCode)) { return; } - if (idef < 0 && defLSR != nullptr && lsr == *defLSR) { - idef = i; - defLSR = &lsr; // owned pointer to put into supportedLsrToIndex - if (*def == supportedLocale) { - def = &supportedLocale; // owned pointer to keep - } - } } // We need an unordered map from LSR to first supported locale with that LSR, - // and an ordered list of (LSR, supported index). - // We insert the supported locales in the following order: + // and an ordered list of (LSR, supported index) for + // the supported locales in the following order: // 1. Default locale, if it is supported. // 2. Priority locales (aka "paradigm locales") in builder order. // 3. Remaining locales in builder order. - // In Java, we use a LinkedHashMap for both map & ordered lists. - // In C++, we use separate structures. - // - // We allocate arrays of LSRs and indexes, - // with as many slots as supported locales, for simplicity. - // We write the default and paradigm LSRs starting from the front of the arrays, - // and others starting from the back. - // At the end we reverse the non-paradigm LSRs. - // We end up wasting as many array slots as there are duplicate supported LSRs, - // but the amount of wasted space is small as long as there are few duplicates. supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong, supportedLocalesLength, &errorCode); if (U_FAILURE(errorCode)) { return; } @@ -412,79 +400,53 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) : errorCode = U_MEMORY_ALLOCATION_ERROR; return; } - int32_t paradigmIndex = 0; - int32_t otherIndex = supportedLocalesLength; - if (idef >= 0) { - uhash_puti(supportedLsrToIndex, const_cast(defLSR), idef + 1, &errorCode); - supportedLSRs[0] = defLSR; - supportedIndexes[0] = idef; - paradigmIndex = 1; + int32_t suppLength = 0; + // Determine insertion order. + // Add locales immediately that are equivalent to the default. + MaybeStackArray order(supportedLocalesLength); + if (order.getAlias() == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; } + int32_t numParadigms = 0; for (int32_t i = 0; i < supportedLocalesLength; ++i) { - if (i == idef) { continue; } const Locale &locale = *supportedLocales[i]; const LSR &lsr = lsrs[i]; if (defLSR == nullptr) { U_ASSERT(i == 0); def = &locale; defLSR = &lsr; - idef = 0; - uhash_puti(supportedLsrToIndex, const_cast(&lsr), 0 + 1, &errorCode); - supportedLSRs[0] = &lsr; - supportedIndexes[0] = 0; - paradigmIndex = 1; - } else if (idef >= 0 && lsr == *defLSR) { - // lsr == *defLSR means that this supported locale is - // a duplicate of the default locale. - // Either an explicit default locale is supported, and we added it before the loop, - // or there is no explicit default locale, and this is - // a duplicate of the first supported locale. - // In both cases, idef >= 0 now, so otherwise we can skip the comparison. - // For a duplicate, putIfAbsent() is a no-op, so nothing to do. + suppLength = putIfAbsent(lsr, 0, suppLength, errorCode); + } else if (lsr.isEquivalentTo(*defLSR)) { + suppLength = putIfAbsent(lsr, i, suppLength, errorCode); + } else if (localeDistance.isParadigmLSR(lsr)) { + order[i] = 2; + ++numParadigms; } else { - if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) { - if (localeDistance.isParadigmLSR(lsr)) { - supportedLSRs[paradigmIndex] = &lsr; - supportedIndexes[paradigmIndex++] = i; - } else { - supportedLSRs[--otherIndex] = &lsr; - supportedIndexes[otherIndex] = i; - } - } + order[i] = 3; } if (U_FAILURE(errorCode)) { return; } } - // Reverse the non-paradigm LSRs to be in order, right after the paradigm LSRs. - // First fill the unused slots between paradigm LSRs and other LSRs. - // This gap is as large as the number of locales with duplicate LSRs. - int32_t i = paradigmIndex; - int32_t j = supportedLocalesLength - 1; - while (i < otherIndex && otherIndex <= j) { - supportedLSRs[i] = supportedLSRs[j]; - supportedIndexes[i++] = supportedIndexes[j--]; + // Add supported paradigm locales. + int32_t paradigmLimit = suppLength + numParadigms; + for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) { + if (order[i] == 2) { + suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode); + } } - // Swap remaining non-paradigm LSRs in place. - while (i < j) { - const LSR *tempLSR = supportedLSRs[i]; - supportedLSRs[i] = supportedLSRs[j]; - supportedLSRs[j] = tempLSR; - int32_t tempIndex = supportedIndexes[i]; - supportedIndexes[i++] = supportedIndexes[j]; - supportedIndexes[j--] = tempIndex; + // Add remaining supported locales. + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + if (order[i] == 3) { + suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode); + } } - supportedLSRsLength = supportedLocalesLength - (otherIndex - paradigmIndex); + supportedLSRsLength = suppLength; + // If supportedLSRsLength < supportedLocalesLength then + // we waste as many array slots as there are duplicate supported LSRs, + // but the amount of wasted space is small as long as there are few duplicates. } - if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) { - ownedDefaultLocale = def->clone(); - if (ownedDefaultLocale == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - def = ownedDefaultLocale; - } defaultLocale = def; - defaultLocaleIndex = idef; if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) { demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale(); @@ -503,8 +465,7 @@ LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT : supportedLSRs(src.supportedLSRs), supportedIndexes(src.supportedIndexes), supportedLSRsLength(src.supportedLSRsLength), - ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale), - defaultLocaleIndex(src.defaultLocaleIndex) { + ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) { src.supportedLocales = nullptr; src.lsrs = nullptr; src.supportedLocalesLength = 0; @@ -514,7 +475,6 @@ LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT : src.supportedLSRsLength = 0; src.ownedDefaultLocale = nullptr; src.defaultLocale = nullptr; - src.defaultLocaleIndex = -1; } LocaleMatcher::~LocaleMatcher() { @@ -544,7 +504,6 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT { supportedLSRsLength = src.supportedLSRsLength; ownedDefaultLocale = src.ownedDefaultLocale; defaultLocale = src.defaultLocale; - defaultLocaleIndex = src.defaultLocaleIndex; src.supportedLocales = nullptr; src.lsrs = nullptr; @@ -555,7 +514,6 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT { src.supportedLSRsLength = 0; src.ownedDefaultLocale = nullptr; src.defaultLocale = nullptr; - src.defaultLocaleIndex = -1; return *this; } @@ -642,13 +600,13 @@ const Locale *LocaleMatcher::getBestMatchForListString( LocaleMatcher::Result LocaleMatcher::getBestMatchResult( const Locale &desiredLocale, UErrorCode &errorCode) const { if (U_FAILURE(errorCode)) { - return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + return Result(nullptr, defaultLocale, -1, -1, FALSE); } int32_t suppIndex = getBestSuppIndex( getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode), nullptr, errorCode); if (U_FAILURE(errorCode) || suppIndex < 0) { - return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + return Result(nullptr, defaultLocale, -1, -1, FALSE); } else { return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE); } @@ -657,12 +615,12 @@ LocaleMatcher::Result LocaleMatcher::getBestMatchResult( LocaleMatcher::Result LocaleMatcher::getBestMatchResult( Locale::Iterator &desiredLocales, UErrorCode &errorCode) const { if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) { - return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + return Result(nullptr, defaultLocale, -1, -1, FALSE); } LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES); int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode); if (U_FAILURE(errorCode) || suppIndex < 0) { - return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + return Result(nullptr, defaultLocale, -1, -1, FALSE); } else { return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex], lsrIter.getBestDesiredIndex(), suppIndex, TRUE); @@ -696,8 +654,7 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai remainingIter->rememberCurrent(desiredIndex, errorCode); if (U_FAILURE(errorCode)) { return -1; } } - bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? - LocaleDistance::getIndex(bestIndexAndDistance) : -1; + bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance); } if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) { break; diff --git a/icu4c/source/common/unicode/localematcher.h b/icu4c/source/common/unicode/localematcher.h index 701123f750b..1bab23ef8ca 100644 --- a/icu4c/source/common/unicode/localematcher.h +++ b/icu4c/source/common/unicode/localematcher.h @@ -574,6 +574,8 @@ private: LocaleMatcher(const LocaleMatcher &other) = delete; LocaleMatcher &operator=(const LocaleMatcher &other) = delete; + int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode); + int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const; const XLikelySubtags &likelySubtags; @@ -595,7 +597,6 @@ private: int32_t supportedLSRsLength; Locale *ownedDefaultLocale; const Locale *defaultLocale; - int32_t defaultLocaleIndex; }; U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/localematchertest.cpp b/icu4c/source/test/intltest/localematchertest.cpp index f8cb7a311d5..60278077662 100644 --- a/icu4c/source/test/intltest/localematchertest.cpp +++ b/icu4c/source/test/intltest/localematchertest.cpp @@ -272,7 +272,7 @@ void LocaleMatcherTest::testSupportedDefault() { assertEquals("getBestMatchResult(ja_JP).supp", "en_GB", locString(result.getSupportedLocale())); assertEquals("getBestMatchResult(ja_JP).suppIndex", - 1, result.getSupportedIndex()); + -1, result.getSupportedIndex()); } void LocaleMatcherTest::testUnsupportedDefault() { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java index fce5a9c1c71..3d785c213f7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java @@ -5,7 +5,7 @@ package com.ibm.icu.impl.locale; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.Map; import java.util.MissingResourceException; import java.util.Set; @@ -152,7 +152,8 @@ public class LocaleDistance { Set paradigmLSRs; if (matchTable.findValue("paradigms", value)) { String[] paradigms = value.getStringArray(); - paradigmLSRs = new HashSet<>(paradigms.length / 3); + // LinkedHashSet for stable order; otherwise a unit test is flaky. + paradigmLSRs = new LinkedHashSet<>(paradigms.length / 3); for (int i = 0; i < paradigms.length; i += 3) { paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2], LSR.DONT_CARE_FLAGS)); @@ -209,7 +210,7 @@ public class LocaleDistance { // As of CLDR 36, we have . LSR en = new LSR("en", "Latn", "US", LSR.EXPLICIT_LSR); LSR enGB = new LSR("en", "Latn", "GB", LSR.EXPLICIT_LSR); - int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB }, + int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB }, 1, shiftDistance(50), FavorSubtag.LANGUAGE); defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance); @@ -227,7 +228,7 @@ public class LocaleDistance { int threshold, FavorSubtag favorSubtag) { LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported); LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired); - int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR }, + int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR }, 1, shiftDistance(threshold), favorSubtag); return getDistanceFloor(indexAndDistance); } @@ -240,7 +241,7 @@ public class LocaleDistance { * (negative if none has a distance below the threshold), * and its distance (0..ABOVE_THRESHOLD) in the low bits. */ - public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs, + public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs, int supportedLSRsLength, int shiftedThreshold, FavorSubtag favorSubtag) { // Round up the shifted threshold (if fraction bits are not 0) // for comparison with un-shifted distances until we need fraction bits. @@ -252,12 +253,12 @@ public class LocaleDistance { // Its "distance" is either a match point value of 0, or a non-match negative value. // Note: The data builder verifies that there are no <*, supported> or rules. int desLangDistance = trieNext(iter, desired.language, false); - long desLangState = desLangDistance >= 0 && supportedLSRs.length > 1 ? iter.getState64() : 0; + long desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0; // Index of the supported LSR with the lowest distance. int bestIndex = -1; // Cached lookup info from XLikelySubtags.compareLikely(). int bestLikelyInfo = -1; - for (int slIndex = 0; slIndex < supportedLSRs.length; ++slIndex) { + for (int slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) { LSR supported = supportedLSRs[slIndex]; boolean star = false; int distance = desLangDistance; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java index f424679cbdb..e96b6478050 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java @@ -10,8 +10,8 @@ package com.ibm.icu.util; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -329,9 +329,9 @@ public final class LocaleMatcher { // The distance lookup loops over the supportedLSRs and returns the index of the best match. private final LSR[] supportedLSRs; private final int[] supportedIndexes; + private final int supportedLSRsLength; private final ULocale defaultULocale; private final Locale defaultLocale; - private final int defaultLocaleIndex; /** * LocaleMatcher Builder. @@ -578,103 +578,85 @@ public final class LocaleMatcher { private LocaleMatcher(Builder builder) { thresholdDistance = builder.thresholdDistance < 0 ? LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance; - int supportedLocalesLength = builder.supportedLocales != null ? - builder.supportedLocales.size() : 0; ULocale udef = builder.defaultLocale; Locale def = null; - int idef = -1; - // Store the supported locales in input order, - // so that when different types are used (e.g., java.util.Locale) - // we can return those by parallel index. - supportedULocales = new ULocale[supportedLocalesLength]; - supportedLocales = new Locale[supportedLocalesLength]; - // Supported LRSs in input order. - LSR lsrs[] = new LSR[supportedLocalesLength]; - // Also find the first supported locale whose LSR is - // the same as that for the default locale. LSR defLSR = null; if (udef != null) { def = udef.toLocale(); defLSR = getMaximalLsrOrUnd(udef); } + // Store the supported locales in input order, + // so that when different types are used (e.g., java.util.Locale) + // we can return those by parallel index. + int supportedLocalesLength = builder.supportedLocales != null ? + builder.supportedLocales.size() : 0; + supportedULocales = new ULocale[supportedLocalesLength]; + supportedLocales = new Locale[supportedLocalesLength]; + // Supported LRSs in input order. + LSR lsrs[] = new LSR[supportedLocalesLength]; int i = 0; if (supportedLocalesLength > 0) { for (ULocale locale : builder.supportedLocales) { supportedULocales[i] = locale; supportedLocales[i] = locale.toLocale(); - LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale); - if (idef < 0 && defLSR != null && lsr.equals(defLSR)) { - idef = i; - } + lsrs[i] = getMaximalLsrOrUnd(locale); ++i; } } // We need an unordered map from LSR to first supported locale with that LSR, - // and an ordered list of (LSR, supported index). - // We use a LinkedHashMap for both, - // and insert the supported locales in the following order: + // and an ordered list of (LSR, supported index) for + // the supported locales in the following order: // 1. Default locale, if it is supported. // 2. Priority locales (aka "paradigm locales") in builder order. // 3. Remaining locales in builder order. - supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); - // Note: We could work with a single LinkedHashMap by storing ~i (the binary-not index) - // for the default and paradigm locales, counting the number of those locales, - // and keeping two indexes to fill the LSR and index arrays with - // priority vs. normal locales. In that loop we would need to entry.setValue(~i) - // to restore non-negative indexes in the map. - // Probably saves little but less readable. - Map otherLsrToIndex = null; - if (idef >= 0) { - supportedLsrToIndex.put(defLSR, idef); - } + supportedLsrToIndex = new HashMap<>(supportedLocalesLength); + supportedLSRs = new LSR[supportedLocalesLength]; + supportedIndexes = new int[supportedLocalesLength]; + int suppLength = 0; + // Determine insertion order. + // Add locales immediately that are equivalent to the default. + byte[] order = new byte[supportedLocalesLength]; + int numParadigms = 0; i = 0; for (ULocale locale : supportedULocales) { - if (i == idef) { - ++i; - continue; - } LSR lsr = lsrs[i]; if (defLSR == null) { assert i == 0; udef = locale; def = supportedLocales[0]; defLSR = lsr; - idef = 0; - supportedLsrToIndex.put(lsr, 0); - } else if (idef >= 0 && lsr.equals(defLSR)) { - // lsr.equals(defLSR) means that this supported locale is - // a duplicate of the default locale. - // Either an explicit default locale is supported, and we added it before the loop, - // or there is no explicit default locale, and this is - // a duplicate of the first supported locale. - // In both cases, idef >= 0 now, so otherwise we can skip the comparison. - // For a duplicate, putIfAbsent() is a no-op, so nothing to do. + suppLength = putIfAbsent(lsr, 0, suppLength); + } else if (lsr.isEquivalentTo(defLSR)) { + suppLength = putIfAbsent(lsr, i, suppLength); } else if (LocaleDistance.INSTANCE.isParadigmLSR(lsr)) { - putIfAbsent(supportedLsrToIndex, lsr, i); + order[i] = 2; + ++numParadigms; } else { - if (otherLsrToIndex == null) { - otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); - } - putIfAbsent(otherLsrToIndex, lsr, i); + order[i] = 3; } ++i; } - if (otherLsrToIndex != null) { - supportedLsrToIndex.putAll(otherLsrToIndex); + // Add supported paradigm locales. + int paradigmLimit = suppLength + numParadigms; + for (i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) { + if (order[i] == 2) { + suppLength = putIfAbsent(lsrs[i], i, suppLength); + } } - int supportedLSRsLength = supportedLsrToIndex.size(); - supportedLSRs = new LSR[supportedLSRsLength]; - supportedIndexes = new int[supportedLSRsLength]; - i = 0; - for (Map.Entry entry : supportedLsrToIndex.entrySet()) { - supportedLSRs[i] = entry.getKey(); // = lsrs[entry.getValue()] - supportedIndexes[i++] = entry.getValue(); + // Add remaining supported locales. + for (i = 0; i < supportedLocalesLength; ++i) { + if (order[i] == 3) { + suppLength = putIfAbsent(lsrs[i], i, suppLength); + } } + supportedLSRsLength = suppLength; + // If supportedLSRsLength < supportedLocalesLength then + // we waste as many array slots as there are duplicate supported LSRs, + // but the amount of wasted space is small as long as there are few duplicates. defaultULocale = udef; defaultLocale = def; - defaultLocaleIndex = idef; demotionPerDesiredLocale = builder.demotion == Demotion.NONE ? 0 : LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION @@ -684,11 +666,13 @@ public final class LocaleMatcher { } } - private static final void putIfAbsent(Map lsrToIndex, LSR lsr, int i) { - Integer index = lsrToIndex.get(lsr); - if (index == null) { - lsrToIndex.put(lsr, i); + private final int putIfAbsent(LSR lsr, int i, int suppLength) { + if (!supportedLsrToIndex.containsKey(lsr)) { + supportedLsrToIndex.put(lsr, i); + supportedLSRs[suppLength] = lsr; + supportedIndexes[suppLength++] = i; } + return suppLength; } private static final LSR getMaximalLsrOrUnd(ULocale locale) { @@ -838,7 +822,7 @@ public final class LocaleMatcher { } private Result defaultResult() { - return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex); + return new Result(null, defaultULocale, null, defaultLocale, -1, -1); } private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) { @@ -960,7 +944,8 @@ public final class LocaleMatcher { return suppIndex; } int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance( - desiredLSR, supportedLSRs, bestShiftedDistance, favorSubtag); + desiredLSR, supportedLSRs, supportedLSRsLength, + bestShiftedDistance, favorSubtag); if (bestIndexAndDistance >= 0) { bestShiftedDistance = LocaleDistance.getShiftedDistance(bestIndexAndDistance); if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); } @@ -1012,7 +997,7 @@ public final class LocaleMatcher { // Returns the inverse of the distance: That is, 1-distance(desired, supported). int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance( getMaximalLsrOrUnd(desired), - new LSR[] { getMaximalLsrOrUnd(supported) }, + new LSR[] { getMaximalLsrOrUnd(supported) }, 1, LocaleDistance.shiftDistance(thresholdDistance), favorSubtag); double distance = LocaleDistance.getDistanceDouble(indexAndDistance); if (TRACE_MATCHER) { @@ -1048,9 +1033,9 @@ public final class LocaleMatcher { public String toString() { StringBuilder s = new StringBuilder().append("{LocaleMatcher"); // Supported languages in the order that we try to match them. - if (supportedLSRs.length > 0) { + if (supportedLSRsLength > 0) { s.append(" supportedLSRs={").append(supportedLSRs[0]); - for (int i = 1; i < supportedLSRs.length; ++i) { + for (int i = 1; i < supportedLSRsLength; ++i) { s.append(", ").append(supportedLSRs[i]); } s.append('}'); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java index f20cc6862d6..194403f8ab1 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java @@ -194,7 +194,7 @@ public class LocaleMatcherTest extends TestFmwk { assertEquals("getBestMatchResult(ja_JP).supp", "en_GB", locString(result.getSupportedULocale())); assertEquals("getBestMatchResult(ja_JP).suppIndex", - 1, result.getSupportedIndex()); + -1, result.getSupportedIndex()); } @Test diff --git a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java index 43b3cf856bc..1db81193183 100644 --- a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java +++ b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java @@ -15,6 +15,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -484,7 +485,8 @@ public final class LocaleDistanceBuilder { ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData"); String[] paradigms = supplementalData.getValueWithFallback( "languageMatchingInfo/written/paradigmLocales").getStringArray(); - Set paradigmLSRs = new HashSet<>(); // could be TreeSet if LSR were Comparable + // LinkedHashSet for stable order; otherwise a unit test is flaky. + Set paradigmLSRs = new LinkedHashSet<>(); // could be TreeSet if LSR were Comparable for (String paradigm : paradigms) { ULocale pl = new ULocale(paradigm); LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);