ICU-21135 Fix pseudo locales to filter only non-root paths and avoid aliases.

See #1157
This commit is contained in:
David Beaumont 2020-05-26 23:12:42 +00:00 committed by David Beaumont
parent bb7b8481bd
commit 87bbd3d067
5 changed files with 85 additions and 37 deletions

View file

@ -9,7 +9,9 @@ import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT;
import static java.util.function.Function.identity;
import static java.util.regex.Pattern.CASE_INSENSITIVE;
import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Arrays;
import java.util.Set;
@ -113,25 +115,42 @@ public final class PseudoLocales {
private final CldrDataSupplier src;
private final Set<String> srcIds;
private final CldrData enData;
private final ImmutableSet<CldrPath> pathsToProcess;
PseudoSupplier(CldrDataSupplier src) {
this.src = checkNotNull(src);
this.srcIds = src.getAvailableLocaleIds();
// Use resolved data to ensure we get all the values (e.g. values in "en_001").
// Start with resolved data so we can merge values from "en" and "en_001" for coverage
// and supply the unfiltered values if someone wants the resolved version of the pseudo
// locale data.
this.enData = src.getDataForLocale("en", RESOLVED);
// But since we don't want to filter paths which come from the "root" locale (such as
// aliases) then we need to find the union of "English" paths we expect to filter.
this.pathsToProcess = getUnresolvedPaths(src, "en", "en_001");
// Just check that we aren't wrapping an already wrapped supplier.
PseudoType.getLocaleIds()
.forEach(id -> checkArgument(!srcIds.contains(id),
"pseudo locale %s already supported by given data supplier", id));
}
private static ImmutableSet<CldrPath> getUnresolvedPaths(
CldrDataSupplier src, String... ids) {
ImmutableSet.Builder<CldrPath> paths = ImmutableSet.builder();
for (String id : ids) {
src.getDataForLocale(id, UNRESOLVED).accept(ARBITRARY, v -> paths.add(v.getPath()));
}
return paths.build();
}
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
return new PseudoSupplier(src.withDraftStatusAtLeast(draftStatus));
}
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
if (PseudoType.getLocaleIds().contains(localeId)) {
return new PseudoLocaleData(enData, resolution, PseudoType.fromId(localeId));
return new PseudoLocaleData(
enData, pathsToProcess, resolution, PseudoType.fromId(localeId));
} else {
return src.getDataForLocale(localeId, resolution);
}
@ -210,11 +229,18 @@ public final class PseudoLocales {
private final PseudoType type;
private final boolean isResolved;
private final ImmutableSet<CldrPath> pathsToProcess;
private PseudoLocaleData(
CldrData srcData,
ImmutableSet<CldrPath> pathsToProcess,
CldrResolution resolution,
PseudoType type) {
private PseudoLocaleData(CldrData srcData, CldrResolution resolution, PseudoType type) {
super(srcData);
this.isResolved = checkNotNull(resolution) == RESOLVED;
this.type = checkNotNull(type);
this.pathsToProcess = pathsToProcess;
}
@Override
@ -236,7 +262,7 @@ public final class PseudoLocales {
CldrValue defaultReturnValue = isResolved ? value : null;
// This makes it look like we have explicit values only for the included paths.
if (!IS_PSEUDO_PATH.test(path)) {
if (!pathsToProcess.contains(path) || !IS_PSEUDO_PATH.test(path)) {
return defaultReturnValue;
}
String fullPath = value.getFullPath();

View file

@ -45,7 +45,7 @@ public class AlternateLocaleDataTest {
FakeDataSupplier src = new FakeDataSupplier()
.addLocaleData("xx", target, source, other)
.addInheritedData("xx", inherited);
.addLocaleData("root", inherited);
CldrDataSupplier transformed =
AlternateLocaleData.transform(
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
@ -97,10 +97,10 @@ public class AlternateLocaleDataTest {
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
// Even though the missing target is not matched (so no change there) the source is always
// removed from the transformed data.
assertValuesUnordered(unresolved, other);
assertValuesUnordered(resolved, other);
// If there's no target the alt-path mapping is incomplete and we do nothing (this matches
// the old CLDR tool behaviour and reasonable but can hide inconsistencies in CLDR data).
assertValuesUnordered(unresolved, source, other);
assertValuesUnordered(resolved, source, other);
}
@Test

View file

@ -40,8 +40,12 @@ public class PseudoLocalesTest {
value(excluded, "Skipped"),
value(pattern, "'plus' HH:mm; 'minus' HH:mm"),
value(narrow, "Skipped"))
.addInheritedData("en",
value(inherited, "UTC"));
.addLocaleData("en_001", value(inherited, "UTC"))
.setLocaleParent("en", "en_001")
// Root is the eventual parent of everything but its value should not appear, even
// though the expansion would apply if the paths were overridden.
.addLocaleData("root", value(ldmlPath("delimiters/quotationStart"), ""))
.addLocaleData("root", value(ldmlPath("delimiters/quotationEnd"), ""));
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
assertThat(pseudo.getAvailableLocaleIds()).containsAtLeast("en_XA", "ar_XB");
@ -75,12 +79,12 @@ public class PseudoLocalesTest {
.addLocaleData("en",
value(included, "{Hello} {0} {World} 100x"),
value(pattern, "'plus' HH:mm; 'minus' HH:mm"))
.addInheritedData("en",
value(inherited, "UTC"));
.addLocaleData("en_001", value(inherited, "UTC"))
.setLocaleParent("en", "en_001");
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
// The pseudo locale should combine both explicit and inherited data from 'en'.
// The pseudo locale should combine both explicit and inherited data from 'en' and 'en_001'.
CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED);
// These are a kind of golden data test because it's super hard to really reason about
@ -101,7 +105,7 @@ public class PseudoLocalesTest {
@Test
public void testLatinNumbering() {
CldrValue latn = value(ldmlPath("numbers/defaultNumberingSystem"), "latn");
FakeDataSupplier src = new FakeDataSupplier().addInheritedData("en", latn);
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("root", latn);
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);

View file

@ -65,7 +65,7 @@ public class LocaleMapperTest {
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
simpleResult("/durationUnits/foo", "Bar"));
//ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1
addInheritedMapping("xx",
addRootMapping(
ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"),
simpleResult("/Keys/sometype", "Value"));
@ -86,7 +86,7 @@ public class LocaleMapperTest {
// This is included because the resource bundle path is the same as above. Note that we
// have to use the index to distinguish results here (this corresponds to the line number
// or the real when the real regex based config is used and determines result ordering).
addInheritedMapping("xx",
addRootMapping(
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"),
simpleResult("/Currencies/USD", 2, "US Dollar"));
@ -110,7 +110,7 @@ public class LocaleMapperTest {
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
addInheritedMapping("xx",
addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
@ -125,7 +125,7 @@ public class LocaleMapperTest {
@Test
public void testParentPathsNotIncludedByDefault() {
// Same as above but swapping inherited vs explicit mappings.
addInheritedMapping("xx",
addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
@ -154,11 +154,11 @@ public class LocaleMapperTest {
public void testHiddenLabelsIncludeParentPaths() {
// Testing that the existence of a child element using a hidden label *does* trigger the
// parent element to be included.
addInheritedMapping("xx",
addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
addInheritedMapping("xx",
addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
@ -226,13 +226,13 @@ public class LocaleMapperTest {
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
addInheritedMapping("xx",
addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
// This is the only explicit mapping and it triggers the sibling _and_ the parent.
addInheritedMapping("xx",
addRootMapping(
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
@ -375,8 +375,8 @@ public class LocaleMapperTest {
transformer.addResults(value, results);
}
private void addInheritedMapping(String locale, CldrValue value, Result... results) {
src.addInheritedData(locale, value);
private void addRootMapping(CldrValue value, Result... results) {
src.addLocaleData("root", value);
transformer.addResults(value, results);
}

View file

@ -2,10 +2,10 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import static com.google.common.base.Preconditions.checkArgument;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
@ -17,7 +17,9 @@ import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.common.collect.TreeBasedTable;
@ -27,20 +29,17 @@ import com.google.common.collect.TreeBasedTable;
public final class FakeDataSupplier extends CldrDataSupplier {
private final Map<CldrPath, CldrValue> nonLocaleData = new LinkedHashMap<>();
private final Table<String, CldrPath, CldrValue> unresolvedData = TreeBasedTable.create();
private final Table<String, CldrPath, CldrValue> resolvedData = TreeBasedTable.create();
private final Map<String, String> parentLocales = new HashMap<>();
public FakeDataSupplier addLocaleData(String localeId, CldrValue... values) {
Arrays.stream(values).forEach(v -> {
unresolvedData.put(localeId, v.getPath(), v);
resolvedData.put(localeId, v.getPath(), v);
});
return this;
}
public FakeDataSupplier addInheritedData(String localeId, CldrValue... values) {
Arrays.stream(values)
.forEach(v -> checkArgument(resolvedData.put(localeId, v.getPath(), v) == null,
"path already present in unresolved CLDR data: %s", v.getPath()));
public FakeDataSupplier setLocaleParent(String localeId, String parentId) {
parentLocales.put(localeId, parentId);
return this;
}
@ -50,9 +49,27 @@ public final class FakeDataSupplier extends CldrDataSupplier {
}
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
Table<String, CldrPath, CldrValue> data =
resolution == CldrResolution.UNRESOLVED ? unresolvedData : resolvedData;
return CldrDataSupplier.forValues(data.row(localeId).values());
Collection<CldrValue> values;
if (resolution == CldrResolution.UNRESOLVED) {
values = unresolvedData.row(localeId).values();
} else {
// This is not "real" resolving since it doesn't handle aliases etc. but it's good
// enough for tests.
Map<CldrPath, CldrValue> resolved = new HashMap<>();
while (true) {
unresolvedData.row(localeId).forEach((p, v) -> {
if (!resolved.containsKey(p)) {
resolved.put(p, v);
}
});
if (localeId.equals("root")) {
break;
}
localeId = parentLocales.getOrDefault(localeId, "root");
}
values = resolved.values();
}
return CldrDataSupplier.forValues(values);
}
@Override public CldrData getDataForType(CldrDataType type) {
@ -61,7 +78,8 @@ public final class FakeDataSupplier extends CldrDataSupplier {
}
@Override public Set<String> getAvailableLocaleIds() {
return Collections.unmodifiableSet(resolvedData.rowKeySet());
return Collections.unmodifiableSet(
Sets.union(unresolvedData.rowKeySet(), ImmutableSet.of("root")));
}
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus cldrDraftStatus) {