diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java index 8747b1f0607..36c14e27b2d 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java @@ -9,7 +9,9 @@ import static com.google.common.collect.ImmutableMap.toImmutableMap; import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT; import static java.util.function.Function.identity; import static java.util.regex.Pattern.CASE_INSENSITIVE; +import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY; import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import java.util.Arrays; import java.util.Set; @@ -113,25 +115,42 @@ public final class PseudoLocales { private final CldrDataSupplier src; private final Set srcIds; private final CldrData enData; + private final ImmutableSet pathsToProcess; PseudoSupplier(CldrDataSupplier src) { this.src = checkNotNull(src); this.srcIds = src.getAvailableLocaleIds(); - // Use resolved data to ensure we get all the values (e.g. values in "en_001"). + // Start with resolved data so we can merge values from "en" and "en_001" for coverage + // and supply the unfiltered values if someone wants the resolved version of the pseudo + // locale data. this.enData = src.getDataForLocale("en", RESOLVED); + // But since we don't want to filter paths which come from the "root" locale (such as + // aliases) then we need to find the union of "English" paths we expect to filter. + this.pathsToProcess = getUnresolvedPaths(src, "en", "en_001"); // Just check that we aren't wrapping an already wrapped supplier. PseudoType.getLocaleIds() .forEach(id -> checkArgument(!srcIds.contains(id), "pseudo locale %s already supported by given data supplier", id)); } + private static ImmutableSet getUnresolvedPaths( + CldrDataSupplier src, String... ids) { + + ImmutableSet.Builder paths = ImmutableSet.builder(); + for (String id : ids) { + src.getDataForLocale(id, UNRESOLVED).accept(ARBITRARY, v -> paths.add(v.getPath())); + } + return paths.build(); + } + @Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) { return new PseudoSupplier(src.withDraftStatusAtLeast(draftStatus)); } @Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) { if (PseudoType.getLocaleIds().contains(localeId)) { - return new PseudoLocaleData(enData, resolution, PseudoType.fromId(localeId)); + return new PseudoLocaleData( + enData, pathsToProcess, resolution, PseudoType.fromId(localeId)); } else { return src.getDataForLocale(localeId, resolution); } @@ -210,11 +229,18 @@ public final class PseudoLocales { private final PseudoType type; private final boolean isResolved; + private final ImmutableSet pathsToProcess; + + private PseudoLocaleData( + CldrData srcData, + ImmutableSet pathsToProcess, + CldrResolution resolution, + PseudoType type) { - private PseudoLocaleData(CldrData srcData, CldrResolution resolution, PseudoType type) { super(srcData); this.isResolved = checkNotNull(resolution) == RESOLVED; this.type = checkNotNull(type); + this.pathsToProcess = pathsToProcess; } @Override @@ -236,7 +262,7 @@ public final class PseudoLocales { CldrValue defaultReturnValue = isResolved ? value : null; // This makes it look like we have explicit values only for the included paths. - if (!IS_PSEUDO_PATH.test(path)) { + if (!pathsToProcess.contains(path) || !IS_PSEUDO_PATH.test(path)) { return defaultReturnValue; } String fullPath = value.getFullPath(); diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java index 494a81957d9..54b758a9dda 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java @@ -45,7 +45,7 @@ public class AlternateLocaleDataTest { FakeDataSupplier src = new FakeDataSupplier() .addLocaleData("xx", target, source, other) - .addInheritedData("xx", inherited); + .addLocaleData("root", inherited); CldrDataSupplier transformed = AlternateLocaleData.transform( src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of()); @@ -97,10 +97,10 @@ public class AlternateLocaleDataTest { CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED); CldrData resolved = transformed.getDataForLocale("xx", RESOLVED); - // Even though the missing target is not matched (so no change there) the source is always - // removed from the transformed data. - assertValuesUnordered(unresolved, other); - assertValuesUnordered(resolved, other); + // If there's no target the alt-path mapping is incomplete and we do nothing (this matches + // the old CLDR tool behaviour and reasonable but can hide inconsistencies in CLDR data). + assertValuesUnordered(unresolved, source, other); + assertValuesUnordered(resolved, source, other); } @Test diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java index 8c2322d30b2..4ffd2f396b4 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java @@ -40,8 +40,12 @@ public class PseudoLocalesTest { value(excluded, "Skipped"), value(pattern, "'plus' HH:mm; 'minus' HH:mm"), value(narrow, "Skipped")) - .addInheritedData("en", - value(inherited, "UTC")); + .addLocaleData("en_001", value(inherited, "UTC")) + .setLocaleParent("en", "en_001") + // Root is the eventual parent of everything but its value should not appear, even + // though the expansion would apply if the paths were overridden. + .addLocaleData("root", value(ldmlPath("delimiters/quotationStart"), "“")) + .addLocaleData("root", value(ldmlPath("delimiters/quotationEnd"), "”")); CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src); assertThat(pseudo.getAvailableLocaleIds()).containsAtLeast("en_XA", "ar_XB"); @@ -75,12 +79,12 @@ public class PseudoLocalesTest { .addLocaleData("en", value(included, "{Hello} {0} {World} 100x"), value(pattern, "'plus' HH:mm; 'minus' HH:mm")) - .addInheritedData("en", - value(inherited, "UTC")); + .addLocaleData("en_001", value(inherited, "UTC")) + .setLocaleParent("en", "en_001"); CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src); - // The pseudo locale should combine both explicit and inherited data from 'en'. + // The pseudo locale should combine both explicit and inherited data from 'en' and 'en_001'. CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED); // These are a kind of golden data test because it's super hard to really reason about @@ -101,7 +105,7 @@ public class PseudoLocalesTest { @Test public void testLatinNumbering() { CldrValue latn = value(ldmlPath("numbers/defaultNumberingSystem"), "latn"); - FakeDataSupplier src = new FakeDataSupplier().addInheritedData("en", latn); + FakeDataSupplier src = new FakeDataSupplier().addLocaleData("root", latn); CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src); diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java index f218b4bc3c7..4445facc63d 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java @@ -65,7 +65,7 @@ public class LocaleMapperTest { ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"), simpleResult("/durationUnits/foo", "Bar")); //ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1 - addInheritedMapping("xx", + addRootMapping( ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"), simpleResult("/Keys/sometype", "Value")); @@ -86,7 +86,7 @@ public class LocaleMapperTest { // This is included because the resource bundle path is the same as above. Note that we // have to use the index to distinguish results here (this corresponds to the line number // or the real when the real regex based config is used and determines result ordering). - addInheritedMapping("xx", + addRootMapping( ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"), simpleResult("/Currencies/USD", 2, "US Dollar")); @@ -110,7 +110,7 @@ public class LocaleMapperTest { ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"), simpleResult("/calendar/foo/availableFormats/bar", "Foo")); - addInheritedMapping("xx", + addRootMapping( ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"), simpleResult("/calendar/foo/availableFormats/bar/one", "Bar")); @@ -125,7 +125,7 @@ public class LocaleMapperTest { @Test public void testParentPathsNotIncludedByDefault() { // Same as above but swapping inherited vs explicit mappings. - addInheritedMapping("xx", + addRootMapping( ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"), simpleResult("/calendar/foo/availableFormats/bar", "Foo")); @@ -154,11 +154,11 @@ public class LocaleMapperTest { public void testHiddenLabelsIncludeParentPaths() { // Testing that the existence of a child element using a hidden label *does* trigger the // parent element to be included. - addInheritedMapping("xx", + addRootMapping( ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"), simpleResult("/calendar/foo/availableFormats/bar", "Parent")); - addInheritedMapping("xx", + addRootMapping( ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"), simpleResult("/calendar/foo/availableFormats/bar/", 1, "Child-1")); @@ -226,13 +226,13 @@ public class LocaleMapperTest { ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"), simpleResult("/calendar/foo/availableFormats/bar", "Parent")); - addInheritedMapping("xx", + addRootMapping( ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"), simpleResult("/calendar/foo/availableFormats/bar/", 1, "Child-1")); // This is the only explicit mapping and it triggers the sibling _and_ the parent. - addInheritedMapping("xx", + addRootMapping( ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"), simpleResult("/calendar/foo/availableFormats/bar/", 2, "Child-2")); @@ -375,8 +375,8 @@ public class LocaleMapperTest { transformer.addResults(value, results); } - private void addInheritedMapping(String locale, CldrValue value, Result... results) { - src.addInheritedData(locale, value); + private void addRootMapping(CldrValue value, Result... results) { + src.addLocaleData("root", value); transformer.addResults(value, results); } diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java index 287ff3590c3..cf6bb83b944 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java @@ -2,10 +2,10 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu.testing; -import static com.google.common.base.Preconditions.checkArgument; - import java.util.Arrays; +import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; @@ -17,7 +17,9 @@ import org.unicode.cldr.api.CldrDraftStatus; import org.unicode.cldr.api.CldrPath; import org.unicode.cldr.api.CldrValue; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; import com.google.common.collect.Table; import com.google.common.collect.TreeBasedTable; @@ -27,20 +29,17 @@ import com.google.common.collect.TreeBasedTable; public final class FakeDataSupplier extends CldrDataSupplier { private final Map nonLocaleData = new LinkedHashMap<>(); private final Table unresolvedData = TreeBasedTable.create(); - private final Table resolvedData = TreeBasedTable.create(); + private final Map parentLocales = new HashMap<>(); public FakeDataSupplier addLocaleData(String localeId, CldrValue... values) { Arrays.stream(values).forEach(v -> { unresolvedData.put(localeId, v.getPath(), v); - resolvedData.put(localeId, v.getPath(), v); }); return this; } - public FakeDataSupplier addInheritedData(String localeId, CldrValue... values) { - Arrays.stream(values) - .forEach(v -> checkArgument(resolvedData.put(localeId, v.getPath(), v) == null, - "path already present in unresolved CLDR data: %s", v.getPath())); + public FakeDataSupplier setLocaleParent(String localeId, String parentId) { + parentLocales.put(localeId, parentId); return this; } @@ -50,9 +49,27 @@ public final class FakeDataSupplier extends CldrDataSupplier { } @Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) { - Table data = - resolution == CldrResolution.UNRESOLVED ? unresolvedData : resolvedData; - return CldrDataSupplier.forValues(data.row(localeId).values()); + Collection values; + if (resolution == CldrResolution.UNRESOLVED) { + values = unresolvedData.row(localeId).values(); + } else { + // This is not "real" resolving since it doesn't handle aliases etc. but it's good + // enough for tests. + Map resolved = new HashMap<>(); + while (true) { + unresolvedData.row(localeId).forEach((p, v) -> { + if (!resolved.containsKey(p)) { + resolved.put(p, v); + } + }); + if (localeId.equals("root")) { + break; + } + localeId = parentLocales.getOrDefault(localeId, "root"); + } + values = resolved.values(); + } + return CldrDataSupplier.forValues(values); } @Override public CldrData getDataForType(CldrDataType type) { @@ -61,7 +78,8 @@ public final class FakeDataSupplier extends CldrDataSupplier { } @Override public Set getAvailableLocaleIds() { - return Collections.unmodifiableSet(resolvedData.rowKeySet()); + return Collections.unmodifiableSet( + Sets.union(unresolvedData.rowKeySet(), ImmutableSet.of("root"))); } @Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus cldrDraftStatus) {