mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-23052 Fix addLikelySubtags
Remove hack in the conversion tool by fixing the code ICU-23052 Assert the value will never be 0
This commit is contained in:
parent
ee90520429
commit
2d1c3ed684
5 changed files with 2083 additions and 2049 deletions
|
@ -715,13 +715,29 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
|
|||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value > 0);
|
||||
U_ASSERT(value != 0);
|
||||
// For the case of und_Latn
|
||||
if (value < 0) {
|
||||
retainLanguage = !language.empty();
|
||||
retainScript = !script.empty();
|
||||
retainRegion = !region.empty();
|
||||
// Fallback to und_$region =>
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value == 0);
|
||||
int64_t trieUndEmptyState = iter.getState64();
|
||||
value = trieNext(iter, region, 0);
|
||||
// Fallback to und =>
|
||||
if (value < 0) {
|
||||
iter.resetToState64(trieUndEmptyState);
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
U_ASSERT(value < lsrsLength);
|
||||
const LSR &matched = lsrs[value];
|
||||
|
||||
if (returnInputIfUnmatch &&
|
||||
(!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
|
||||
return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching.
|
||||
|
@ -731,18 +747,23 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
|
|||
}
|
||||
|
||||
if (!(retainLanguage || retainScript || retainRegion)) {
|
||||
U_ASSERT(value >= 0);
|
||||
// Quickly return a copy of the lookup-result LSR
|
||||
// without new allocation of the subtags.
|
||||
const LSR &matched = lsrs[value];
|
||||
return LSR(matched.language, matched.script, matched.region, matched.flags);
|
||||
}
|
||||
if (!retainLanguage) {
|
||||
language = matched.language;
|
||||
U_ASSERT(value >= 0);
|
||||
language = lsrs[value].language;
|
||||
}
|
||||
if (!retainScript) {
|
||||
script = matched.script;
|
||||
U_ASSERT(value >= 0);
|
||||
script = lsrs[value].script;
|
||||
}
|
||||
if (!retainRegion) {
|
||||
region = matched.region;
|
||||
U_ASSERT(value >= 0);
|
||||
region = lsrs[value].region;
|
||||
}
|
||||
int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -355,11 +355,27 @@ public final class LikelySubtags {
|
|||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value > 0;
|
||||
assert value != 0;
|
||||
if (value < 0) {
|
||||
retainLanguage = !language.isEmpty();
|
||||
retainScript = !script.isEmpty();
|
||||
retainRegion = !region.isEmpty();
|
||||
// Fallback to und_$region =>
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value == 0;
|
||||
long trieUndEmptyState = iter.getState64();
|
||||
value = trieNext(iter, region, 0);
|
||||
// Fallback to und =>
|
||||
if (value < 0) {
|
||||
iter.resetToState64(trieUndEmptyState);
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value > 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
LSR result = lsrs[value];
|
||||
|
||||
if (returnInputIfUnmatch &&
|
||||
(!(matchLanguage || matchScript || (matchRegion && language.isEmpty())))) {
|
||||
|
@ -370,17 +386,21 @@ public final class LikelySubtags {
|
|||
}
|
||||
|
||||
if (! (retainLanguage || retainScript || retainRegion)) {
|
||||
assert result.flags == LSR.IMPLICIT_LSR;
|
||||
return result;
|
||||
assert value >= 0;
|
||||
assert lsrs[value].flags == LSR.IMPLICIT_LSR;
|
||||
return lsrs[value];
|
||||
}
|
||||
if (!retainLanguage) {
|
||||
language = result.language;
|
||||
assert value >= 0;
|
||||
language = lsrs[value].language;
|
||||
}
|
||||
if (!retainScript) {
|
||||
script = result.script;
|
||||
assert value >= 0;
|
||||
script = lsrs[value].script;
|
||||
}
|
||||
if (!retainRegion) {
|
||||
region = result.region;
|
||||
assert value >= 0;
|
||||
region = lsrs[value].region;
|
||||
}
|
||||
int retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
|
|
Binary file not shown.
|
@ -268,12 +268,6 @@ final class LikelySubtagsBuilder {
|
|||
}
|
||||
});
|
||||
|
||||
// Add the special case for "und-Latn" => "en-Latn-US" (which is a bit of a
|
||||
// hack for language matching).
|
||||
// Temporary patch. Needs an update of the ICU algorithm to match CLDR.
|
||||
// See https://unicode-org.atlassian.net/browse/ICU-23052
|
||||
set(lsrTable, "und", "Latn", "", lsr("en", "Latn", "US"));
|
||||
set(lsrTable, "und", "Latn", "RS", lsr("sr", "Latn", "RS"));
|
||||
logger.fine(lsrTable::toString);
|
||||
|
||||
// Ensure that if "und-RR" => "ll-Ssss-RR", then we also add "Ssss" => "RR".
|
||||
|
@ -293,10 +287,11 @@ final class LikelySubtagsBuilder {
|
|||
|
||||
// Check that every level has "*" (mapped from "und" or "").
|
||||
lsrTable.forEach((lang, scripts) -> {
|
||||
checkArgument(scripts.containsKey("*"), "missing likely subtag mapping for: %s", asLocale(lang));
|
||||
checkArgument(asLocale(lang).equals("und_Latn") || scripts.containsKey("*"), "missing likely subtag mapping for: %s", asLocale(lang));
|
||||
scripts.forEach(
|
||||
(script, regions) -> checkArgument(regions.containsKey("*"),
|
||||
"missing likely subtag mapping for: %s", asLocale(lang, script)));
|
||||
(script, regions) -> checkArgument(
|
||||
(asLocale(lang, script).equals("und_Latn")) || regions.containsKey("*"),
|
||||
"missing likely subtag mapping for: %s", asLocale(lang, script)));
|
||||
});
|
||||
return lsrTable;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue