mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-21135 Fix pseudo locales to filter only non-root paths and avoid aliases.
See #1157
This commit is contained in:
parent
bb7b8481bd
commit
87bbd3d067
5 changed files with 85 additions and 37 deletions
|
@ -9,7 +9,9 @@ import static com.google.common.collect.ImmutableMap.toImmutableMap;
|
|||
import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT;
|
||||
import static java.util.function.Function.identity;
|
||||
import static java.util.regex.Pattern.CASE_INSENSITIVE;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
|
||||
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
|
||||
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
@ -113,25 +115,42 @@ public final class PseudoLocales {
|
|||
private final CldrDataSupplier src;
|
||||
private final Set<String> srcIds;
|
||||
private final CldrData enData;
|
||||
private final ImmutableSet<CldrPath> pathsToProcess;
|
||||
|
||||
PseudoSupplier(CldrDataSupplier src) {
|
||||
this.src = checkNotNull(src);
|
||||
this.srcIds = src.getAvailableLocaleIds();
|
||||
// Use resolved data to ensure we get all the values (e.g. values in "en_001").
|
||||
// Start with resolved data so we can merge values from "en" and "en_001" for coverage
|
||||
// and supply the unfiltered values if someone wants the resolved version of the pseudo
|
||||
// locale data.
|
||||
this.enData = src.getDataForLocale("en", RESOLVED);
|
||||
// But since we don't want to filter paths which come from the "root" locale (such as
|
||||
// aliases) then we need to find the union of "English" paths we expect to filter.
|
||||
this.pathsToProcess = getUnresolvedPaths(src, "en", "en_001");
|
||||
// Just check that we aren't wrapping an already wrapped supplier.
|
||||
PseudoType.getLocaleIds()
|
||||
.forEach(id -> checkArgument(!srcIds.contains(id),
|
||||
"pseudo locale %s already supported by given data supplier", id));
|
||||
}
|
||||
|
||||
private static ImmutableSet<CldrPath> getUnresolvedPaths(
|
||||
CldrDataSupplier src, String... ids) {
|
||||
|
||||
ImmutableSet.Builder<CldrPath> paths = ImmutableSet.builder();
|
||||
for (String id : ids) {
|
||||
src.getDataForLocale(id, UNRESOLVED).accept(ARBITRARY, v -> paths.add(v.getPath()));
|
||||
}
|
||||
return paths.build();
|
||||
}
|
||||
|
||||
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
|
||||
return new PseudoSupplier(src.withDraftStatusAtLeast(draftStatus));
|
||||
}
|
||||
|
||||
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
|
||||
if (PseudoType.getLocaleIds().contains(localeId)) {
|
||||
return new PseudoLocaleData(enData, resolution, PseudoType.fromId(localeId));
|
||||
return new PseudoLocaleData(
|
||||
enData, pathsToProcess, resolution, PseudoType.fromId(localeId));
|
||||
} else {
|
||||
return src.getDataForLocale(localeId, resolution);
|
||||
}
|
||||
|
@ -210,11 +229,18 @@ public final class PseudoLocales {
|
|||
|
||||
private final PseudoType type;
|
||||
private final boolean isResolved;
|
||||
private final ImmutableSet<CldrPath> pathsToProcess;
|
||||
|
||||
private PseudoLocaleData(
|
||||
CldrData srcData,
|
||||
ImmutableSet<CldrPath> pathsToProcess,
|
||||
CldrResolution resolution,
|
||||
PseudoType type) {
|
||||
|
||||
private PseudoLocaleData(CldrData srcData, CldrResolution resolution, PseudoType type) {
|
||||
super(srcData);
|
||||
this.isResolved = checkNotNull(resolution) == RESOLVED;
|
||||
this.type = checkNotNull(type);
|
||||
this.pathsToProcess = pathsToProcess;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -236,7 +262,7 @@ public final class PseudoLocales {
|
|||
|
||||
CldrValue defaultReturnValue = isResolved ? value : null;
|
||||
// This makes it look like we have explicit values only for the included paths.
|
||||
if (!IS_PSEUDO_PATH.test(path)) {
|
||||
if (!pathsToProcess.contains(path) || !IS_PSEUDO_PATH.test(path)) {
|
||||
return defaultReturnValue;
|
||||
}
|
||||
String fullPath = value.getFullPath();
|
||||
|
|
|
@ -45,7 +45,7 @@ public class AlternateLocaleDataTest {
|
|||
|
||||
FakeDataSupplier src = new FakeDataSupplier()
|
||||
.addLocaleData("xx", target, source, other)
|
||||
.addInheritedData("xx", inherited);
|
||||
.addLocaleData("root", inherited);
|
||||
CldrDataSupplier transformed =
|
||||
AlternateLocaleData.transform(
|
||||
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
|
||||
|
@ -97,10 +97,10 @@ public class AlternateLocaleDataTest {
|
|||
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
|
||||
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
|
||||
|
||||
// Even though the missing target is not matched (so no change there) the source is always
|
||||
// removed from the transformed data.
|
||||
assertValuesUnordered(unresolved, other);
|
||||
assertValuesUnordered(resolved, other);
|
||||
// If there's no target the alt-path mapping is incomplete and we do nothing (this matches
|
||||
// the old CLDR tool behaviour and reasonable but can hide inconsistencies in CLDR data).
|
||||
assertValuesUnordered(unresolved, source, other);
|
||||
assertValuesUnordered(resolved, source, other);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -40,8 +40,12 @@ public class PseudoLocalesTest {
|
|||
value(excluded, "Skipped"),
|
||||
value(pattern, "'plus' HH:mm; 'minus' HH:mm"),
|
||||
value(narrow, "Skipped"))
|
||||
.addInheritedData("en",
|
||||
value(inherited, "UTC"));
|
||||
.addLocaleData("en_001", value(inherited, "UTC"))
|
||||
.setLocaleParent("en", "en_001")
|
||||
// Root is the eventual parent of everything but its value should not appear, even
|
||||
// though the expansion would apply if the paths were overridden.
|
||||
.addLocaleData("root", value(ldmlPath("delimiters/quotationStart"), "“"))
|
||||
.addLocaleData("root", value(ldmlPath("delimiters/quotationEnd"), "”"));
|
||||
|
||||
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
|
||||
assertThat(pseudo.getAvailableLocaleIds()).containsAtLeast("en_XA", "ar_XB");
|
||||
|
@ -75,12 +79,12 @@ public class PseudoLocalesTest {
|
|||
.addLocaleData("en",
|
||||
value(included, "{Hello} {0} {World} 100x"),
|
||||
value(pattern, "'plus' HH:mm; 'minus' HH:mm"))
|
||||
.addInheritedData("en",
|
||||
value(inherited, "UTC"));
|
||||
.addLocaleData("en_001", value(inherited, "UTC"))
|
||||
.setLocaleParent("en", "en_001");
|
||||
|
||||
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
|
||||
|
||||
// The pseudo locale should combine both explicit and inherited data from 'en'.
|
||||
// The pseudo locale should combine both explicit and inherited data from 'en' and 'en_001'.
|
||||
CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED);
|
||||
|
||||
// These are a kind of golden data test because it's super hard to really reason about
|
||||
|
@ -101,7 +105,7 @@ public class PseudoLocalesTest {
|
|||
@Test
|
||||
public void testLatinNumbering() {
|
||||
CldrValue latn = value(ldmlPath("numbers/defaultNumberingSystem"), "latn");
|
||||
FakeDataSupplier src = new FakeDataSupplier().addInheritedData("en", latn);
|
||||
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("root", latn);
|
||||
|
||||
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ public class LocaleMapperTest {
|
|||
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
|
||||
simpleResult("/durationUnits/foo", "Bar"));
|
||||
//ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"),
|
||||
simpleResult("/Keys/sometype", "Value"));
|
||||
|
||||
|
@ -86,7 +86,7 @@ public class LocaleMapperTest {
|
|||
// This is included because the resource bundle path is the same as above. Note that we
|
||||
// have to use the index to distinguish results here (this corresponds to the line number
|
||||
// or the real when the real regex based config is used and determines result ordering).
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"),
|
||||
simpleResult("/Currencies/USD", 2, "US Dollar"));
|
||||
|
||||
|
@ -110,7 +110,7 @@ public class LocaleMapperTest {
|
|||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
|
||||
|
@ -125,7 +125,7 @@ public class LocaleMapperTest {
|
|||
@Test
|
||||
public void testParentPathsNotIncludedByDefault() {
|
||||
// Same as above but swapping inherited vs explicit mappings.
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
|
||||
|
@ -154,11 +154,11 @@ public class LocaleMapperTest {
|
|||
public void testHiddenLabelsIncludeParentPaths() {
|
||||
// Testing that the existence of a child element using a hidden label *does* trigger the
|
||||
// parent element to be included.
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
|
||||
|
@ -226,13 +226,13 @@ public class LocaleMapperTest {
|
|||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
|
||||
|
||||
// This is the only explicit mapping and it triggers the sibling _and_ the parent.
|
||||
addInheritedMapping("xx",
|
||||
addRootMapping(
|
||||
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
|
||||
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
|
||||
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
|
||||
|
@ -375,8 +375,8 @@ public class LocaleMapperTest {
|
|||
transformer.addResults(value, results);
|
||||
}
|
||||
|
||||
private void addInheritedMapping(String locale, CldrValue value, Result... results) {
|
||||
src.addInheritedData(locale, value);
|
||||
private void addRootMapping(CldrValue value, Result... results) {
|
||||
src.addLocaleData("root", value);
|
||||
transformer.addResults(value, results);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,10 +2,10 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu.testing;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -17,7 +17,9 @@ import org.unicode.cldr.api.CldrDraftStatus;
|
|||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.collect.Table;
|
||||
import com.google.common.collect.TreeBasedTable;
|
||||
|
||||
|
@ -27,20 +29,17 @@ import com.google.common.collect.TreeBasedTable;
|
|||
public final class FakeDataSupplier extends CldrDataSupplier {
|
||||
private final Map<CldrPath, CldrValue> nonLocaleData = new LinkedHashMap<>();
|
||||
private final Table<String, CldrPath, CldrValue> unresolvedData = TreeBasedTable.create();
|
||||
private final Table<String, CldrPath, CldrValue> resolvedData = TreeBasedTable.create();
|
||||
private final Map<String, String> parentLocales = new HashMap<>();
|
||||
|
||||
public FakeDataSupplier addLocaleData(String localeId, CldrValue... values) {
|
||||
Arrays.stream(values).forEach(v -> {
|
||||
unresolvedData.put(localeId, v.getPath(), v);
|
||||
resolvedData.put(localeId, v.getPath(), v);
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
public FakeDataSupplier addInheritedData(String localeId, CldrValue... values) {
|
||||
Arrays.stream(values)
|
||||
.forEach(v -> checkArgument(resolvedData.put(localeId, v.getPath(), v) == null,
|
||||
"path already present in unresolved CLDR data: %s", v.getPath()));
|
||||
public FakeDataSupplier setLocaleParent(String localeId, String parentId) {
|
||||
parentLocales.put(localeId, parentId);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -50,9 +49,27 @@ public final class FakeDataSupplier extends CldrDataSupplier {
|
|||
}
|
||||
|
||||
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
|
||||
Table<String, CldrPath, CldrValue> data =
|
||||
resolution == CldrResolution.UNRESOLVED ? unresolvedData : resolvedData;
|
||||
return CldrDataSupplier.forValues(data.row(localeId).values());
|
||||
Collection<CldrValue> values;
|
||||
if (resolution == CldrResolution.UNRESOLVED) {
|
||||
values = unresolvedData.row(localeId).values();
|
||||
} else {
|
||||
// This is not "real" resolving since it doesn't handle aliases etc. but it's good
|
||||
// enough for tests.
|
||||
Map<CldrPath, CldrValue> resolved = new HashMap<>();
|
||||
while (true) {
|
||||
unresolvedData.row(localeId).forEach((p, v) -> {
|
||||
if (!resolved.containsKey(p)) {
|
||||
resolved.put(p, v);
|
||||
}
|
||||
});
|
||||
if (localeId.equals("root")) {
|
||||
break;
|
||||
}
|
||||
localeId = parentLocales.getOrDefault(localeId, "root");
|
||||
}
|
||||
values = resolved.values();
|
||||
}
|
||||
return CldrDataSupplier.forValues(values);
|
||||
}
|
||||
|
||||
@Override public CldrData getDataForType(CldrDataType type) {
|
||||
|
@ -61,7 +78,8 @@ public final class FakeDataSupplier extends CldrDataSupplier {
|
|||
}
|
||||
|
||||
@Override public Set<String> getAvailableLocaleIds() {
|
||||
return Collections.unmodifiableSet(resolvedData.rowKeySet());
|
||||
return Collections.unmodifiableSet(
|
||||
Sets.union(unresolvedData.rowKeySet(), ImmutableSet.of("root")));
|
||||
}
|
||||
|
||||
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus cldrDraftStatus) {
|
||||
|
|
Loading…
Add table
Reference in a new issue