From ba7f1b61ecf7a9c2867355847a1a71dd2fde4791 Mon Sep 17 00:00:00 2001 From: David Beaumont Date: Wed, 23 Oct 2019 12:34:36 +0200 Subject: [PATCH] ICU-20693 Pseudo-locale "alt path" filtering support. (#869) * ICU-20693 Pseudo-locale "alt path" filtering support. --- tools/cldr/cldr-to-icu/build-icu-data.xml | 26 +- .../tool/cldrtoicu/AlternateLocaleData.java | 126 ++++++ .../icu/tool/cldrtoicu/FilteredData.java | 67 +++ .../icu/tool/cldrtoicu/LdmlConverter.java | 260 ++++++------ .../icu/tool/cldrtoicu/PseudoLocales.java | 387 ++++++++++++++++++ .../cldrtoicu/ant/ConvertIcuDataTask.java | 87 +++- .../mapper/AbstractPathValueMapper.java | 17 +- .../cldrtoicu/mapper/BreakIteratorMapper.java | 22 +- .../cldrtoicu/mapper/CollationMapper.java | 22 +- .../tool/cldrtoicu/mapper/LocaleMapper.java | 48 +-- .../icu/tool/cldrtoicu/mapper/RbnfMapper.java | 21 +- .../cldrtoicu/mapper/SupplementalMapper.java | 4 +- .../cldrtoicu/AlternateLocaleDataTest.java | 152 +++++++ .../icu/tool/cldrtoicu/FilteredDataTest.java | 101 +++++ .../icu/tool/cldrtoicu/IcuDataTest.java | 2 +- .../icu/tool/cldrtoicu/PseudoLocalesTest.java | 141 +++++++ .../mapper/AbstractPathValueMapperTest.java | 74 +--- .../cldrtoicu/mapper/Bcp47MapperTest.java | 8 +- .../mapper/BreakIteratorMapperTest.java | 12 +- .../cldrtoicu/mapper/CollationMapperTest.java | 22 +- .../cldrtoicu/mapper/LocaleMapperTest.java | 66 ++- .../tool/cldrtoicu/mapper/RbnfMapperTest.java | 12 +- 22 files changed, 1334 insertions(+), 343 deletions(-) create mode 100644 tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleData.java create mode 100644 tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/FilteredData.java create mode 100644 tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java create mode 100644 tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java create mode 100644 tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/FilteredDataTest.java create mode 100644 tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java diff --git a/tools/cldr/cldr-to-icu/build-icu-data.xml b/tools/cldr/cldr-to-icu/build-icu-data.xml index 665d7150a9c..fa9ab719976 100644 --- a/tools/cldr/cldr-to-icu/build-icu-data.xml +++ b/tools/cldr/cldr-to-icu/build-icu-data.xml @@ -39,6 +39,15 @@ CldrDraftStatus for more details. --> + + + + + + @@ -78,7 +87,9 @@ + outputTypes="${outputTypes}" minimalDraftStatus="${minDraftStatus}" + localeIdFilter="${localeIdFilter}" includePseudoLocales="${includePseudoLocales}" + emitReport="${emitReport}"> + + + + diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleData.java new file mode 100644 index 00000000000..55a3015672f --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleData.java @@ -0,0 +1,126 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; +import static org.unicode.cldr.api.CldrDataType.LDML; + +import java.util.Map; +import java.util.Set; + +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrDataType; +import org.unicode.cldr.api.CldrDraftStatus; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; + +import com.google.common.collect.ImmutableMap; + +/** + * A factory for data suppliers which can filter CLDR values by substituting values from one path + * to another. The replaced value must retain the original "target" path but will have the value + * and value attributes of the "source". A value will only be replaced if both the source and + * target paths have associated values. The replacement retains its original position in the value + * ordering. + * + *

This class DOES NOT transform supplemental or BCP-47 data, because the use of "alt" values + * is completely different for that data (it would require merging specific attributes together). + * + *

Note that this is not a general purpose transformation of CLDR data, since it is generally + * not possible to "move" values between arbitrary paths. Target and source paths must be in the + * same "namespace" (i.e. share the same element names) but attributes can differ. + * + *

Note also that the mapping is not recursive, so mapping {@code A -> B} and {@code B -> C} + * will NOT cause {@code A} to be mapped to {@code C}. + * + *

Typically this class is expected to be used for selecting alternate values of locale data + * based on the {@code "alt"} path attribute (e.g. selecting the short form of a region name). + */ +public final class AlternateLocaleData { + /** + * Returns a wrapped data supplier which will transform any {@link CldrValue}s according to the + * supplied {@link CldrPath} mapping. Keys in the path map are the "target" paths of values to + * be modified, and the values in the map are the "source" paths from which the replacement + * values are obtained. For each map entry, the target and source paths must be in the same + * namespace (i.e. have the same path element names). + */ + public static CldrDataSupplier transform(CldrDataSupplier src, Map altPaths) { + return new CldrDataFilter(src, altPaths); + } + + private static final class CldrDataFilter extends CldrDataSupplier { + private final CldrDataSupplier src; + // Mapping from target (destination) to source path. This is necessary since two targets + // could come from the same source). + private final ImmutableMap altPaths; + + CldrDataFilter( + CldrDataSupplier src, Map altPaths) { + this.src = checkNotNull(src); + this.altPaths = ImmutableMap.copyOf(altPaths); + altPaths.forEach((t, s) -> checkArgument(hasSameNamespace(checkLdml(t), checkLdml(s)), + "alternate paths must have the same namespace: target=%s, source=%s", t, s)); + } + + @Override + public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) { + return new CldrDataFilter(src.withDraftStatusAtLeast(draftStatus), altPaths); + } + + @Override + public CldrData getDataForLocale(String localeId, CldrResolution resolution) { + return new AltData(src.getDataForLocale(localeId, resolution)); + } + + @Override + public Set getAvailableLocaleIds() { + return src.getAvailableLocaleIds(); + } + + @Override + public CldrData getDataForType(CldrDataType type) { + return src.getDataForType(type); + } + + private final class AltData extends FilteredData { + AltData(CldrData srcData) { + super(srcData); + } + + @Override + protected CldrValue filter(CldrValue value) { + CldrPath altPath = altPaths.get(value.getPath()); + if (altPath != null) { + CldrValue altValue = getSourceData().get(altPath); + if (altValue != null) { + return altValue.replacePath(value.getPath()); + } + } + return value; + } + } + } + + private static boolean hasSameNamespace(CldrPath x, CldrPath y) { + if (x.getLength() != y.getLength()) { + return false; + } + do { + if (!x.getName().equals(y.getName())) { + return false; + } + x = x.getParent(); + y = y.getParent(); + } while (x != null); + return true; + } + + private static CldrPath checkLdml(CldrPath path) { + checkArgument(path.getDataType() == LDML, "only locale data (LDML) is supported: %s", path); + return path; + } + + private AlternateLocaleData() {} +} diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/FilteredData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/FilteredData.java new file mode 100644 index 00000000000..62551b0a6e0 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/FilteredData.java @@ -0,0 +1,67 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; + +import javax.annotation.Nullable; + +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; + +/** + * A class which allows data from some underlying {@link CldrData} source to be filtered or + * removed (but not added). + */ +// TODO: Once DTD ordering is the only allowed order, this can be extended to allow adding paths. +abstract class FilteredData implements CldrData { + private final CldrData src; + + public FilteredData(CldrData src) { + this.src = checkNotNull(src); + } + + /** For sub-classes to access the underlying source data. */ + protected CldrData getSourceData() { + return src; + } + + /** + * Returns a filtered CLDR value, replacing or removing the original value during visitation. + * The filtered value can only differ in it's base value or value attributes, and must have + * the same {@link CldrPath} associated with it. + * + * @return the filtered to be replaced, or {@code null} to remove the value. + */ + @Nullable + protected abstract CldrValue filter(CldrValue value); + + @Override + public void accept(PathOrder order, ValueVisitor visitor) { + src.accept(order, v -> visitFiltered(v, visitor)); + } + + @Override + public CldrValue get(CldrPath path) { + CldrValue value = src.get(path); + return value != null ? checkFiltered(value) : null; + } + + private void visitFiltered(CldrValue value, ValueVisitor visitor) { + CldrValue filteredValue = checkFiltered(value); + if (filteredValue != null) { + visitor.visit(filteredValue); + } + } + + @Nullable + private CldrValue checkFiltered(CldrValue value) { + CldrValue filteredValue = filter(value); + checkArgument(filteredValue == null || filteredValue.getPath().equals(value.getPath()), + "filtering is not permitted to modify distinguishing paths: source=%s, filtered=%s", + value, filteredValue); + return filteredValue; + } +} diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java index e8206de9e09..0e9c3930aa7 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java @@ -4,7 +4,9 @@ package org.unicode.icu.tool.cldrtoicu; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; -import static java.util.stream.Collectors.toList; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import static org.unicode.cldr.api.CldrDataType.BCP47; import static org.unicode.cldr.api.CldrDataType.LDML; import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL; @@ -32,8 +34,6 @@ import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.TreeSet; -import java.util.function.Consumer; -import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -56,6 +56,7 @@ import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer; import com.google.common.base.CharMatcher; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableListMultimap; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.LinkedListMultimap; @@ -137,69 +138,30 @@ public final class LdmlConverter { * hide what are essentially implementation specific data splits. */ public enum OutputType { - LOCALES(LDML, LdmlConverter::processLocales), - BRKITR(LDML, LdmlConverter::processBrkitr), - COLL(LDML, LdmlConverter::processCollation), - RBNF(LDML, LdmlConverter::processRbnf), + LOCALES(LDML), + BRKITR(LDML), + COLL(LDML), + RBNF(LDML), + DAY_PERIODS(SUPPLEMENTAL), + GENDER_LIST(SUPPLEMENTAL), + LIKELY_SUBTAGS(SUPPLEMENTAL), + SUPPLEMENTAL_DATA(SUPPLEMENTAL), + CURRENCY_DATA(SUPPLEMENTAL), + METADATA(SUPPLEMENTAL), + META_ZONES(SUPPLEMENTAL), + NUMBERING_SYSTEMS(SUPPLEMENTAL), + PLURALS(SUPPLEMENTAL), + PLURAL_RANGES(SUPPLEMENTAL), + WINDOWS_ZONES(SUPPLEMENTAL), + TRANSFORMS(SUPPLEMENTAL), + KEY_TYPE_DATA(BCP47); - DAY_PERIODS( - SUPPLEMENTAL, - LdmlConverter::processDayPeriods), - GENDER_LIST( - SUPPLEMENTAL, - c -> c.processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false)), - LIKELY_SUBTAGS( - SUPPLEMENTAL, - c -> c.processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false)), - SUPPLEMENTAL_DATA( - SUPPLEMENTAL, - c -> c.processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true)), - CURRENCY_DATA( - SUPPLEMENTAL, - c -> c.processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", true)), - METADATA( - SUPPLEMENTAL, - c -> c.processSupplemental("metadata", METADATA_PATHS, "misc", false)), - META_ZONES( - SUPPLEMENTAL, - c -> c.processSupplemental("metaZones", METAZONE_PATHS, "misc", false)), - NUMBERING_SYSTEMS( - SUPPLEMENTAL, - c -> c.processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false)), - PLURALS( - SUPPLEMENTAL, - LdmlConverter::processPlurals), - PLURAL_RANGES( - SUPPLEMENTAL, - LdmlConverter::processPluralRanges), - WINDOWS_ZONES( - SUPPLEMENTAL, - c -> c.processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false)), - TRANSFORMS( - SUPPLEMENTAL, - LdmlConverter::processTransforms), - KEY_TYPE_DATA( - BCP47, - LdmlConverter::processKeyTypeData), - - // Batching by type. - DTD_LDML(LDML, c -> c.processAll(LDML)), - DTD_SUPPLEMENTAL(SUPPLEMENTAL, c -> c.processAll(SUPPLEMENTAL)), - DTD_BCP47(BCP47, c -> c.processAll(BCP47)); - - public static final ImmutableSet ALL = - ImmutableSet.of(DTD_BCP47, DTD_SUPPLEMENTAL, DTD_LDML); + public static final ImmutableSet ALL = ImmutableSet.copyOf(OutputType.values()); private final CldrDataType type; - private final Consumer converterFn; - OutputType(CldrDataType type, Consumer converterFn) { + OutputType(CldrDataType type) { this.type = checkNotNull(type); - this.converterFn = checkNotNull(converterFn); - } - - void convert(LdmlConverter converter) { - converterFn.accept(converter); } CldrDataType getCldrType() { @@ -207,6 +169,17 @@ public final class LdmlConverter { } } + // Map to convert the rather arbitrarily defined "output types" to the directories into which + // the data is written. This is only for "LDML" types since other mappers don't need to split + // data into multiple directories. + private static final ImmutableListMultimap TYPE_TO_DIR = + ImmutableListMultimap.builder() + .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE) + .putAll(OutputType.BRKITR, BRKITR) + .putAll(OutputType.COLL, COLL) + .putAll(OutputType.RBNF, RBNF) + .build(); + /** Converts CLDR data according to the given configuration. */ public static void convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { @@ -252,15 +225,8 @@ public final class LdmlConverter { } private void convertAll() { - ListMultimap groupByType = LinkedListMultimap.create(); - for (OutputType t : config.getOutputTypes()) { - groupByType.put(t.getCldrType(), t); - } - for (CldrDataType cldrType : groupByType.keySet()) { - for (OutputType t : groupByType.get(cldrType)) { - t.convert(this); - } - } + processLdml(); + processSupplemental(); if (config.emitReport()) { System.out.println("Supplemental Data Transformer=" + supplementalTransformer); System.out.println("Locale Data Transformer=" + localeTransformer); @@ -275,24 +241,6 @@ public final class LdmlConverter { } } - private PathValueTransformer getLocaleTransformer() { - return localeTransformer; - } - - private PathValueTransformer getSupplementalTransformer() { - return supplementalTransformer; - } - - private void processAll(CldrDataType cldrType) { - List targets = Arrays.stream(OutputType.values()) - .filter(t -> t.getCldrType().equals(cldrType)) - .filter(t -> !t.name().startsWith("DTD_")) - .collect(toList()); - for (OutputType t : targets) { - t.convert(this); - } - } - private Optional loadSpecialsData(String localeId) { String expected = localeId + ".xml"; try (Stream files = Files.walk(config.getSpecialsDir())) { @@ -310,31 +258,12 @@ public final class LdmlConverter { } } - private void processLocales() { - // TODO: Pre-load specials files to avoid repeatedly re-loading them. - processAndSplitLocaleFiles( - id -> LocaleMapper.process( - id, src, loadSpecialsData(id), getLocaleTransformer(), supplementalData), - CURR, LANG, LOCALES, REGION, UNIT, ZONE); - } - - private void processBrkitr() { - processAndSplitLocaleFiles( - id -> BreakIteratorMapper.process(id, src, loadSpecialsData(id)), BRKITR); - } - - private void processCollation() { - processAndSplitLocaleFiles( - id -> CollationMapper.process(id, src, loadSpecialsData(id)), COLL); - } - - private void processRbnf() { - processAndSplitLocaleFiles( - id -> RbnfMapper.process(id, src, loadSpecialsData(id)), RBNF); - } - - private void processAndSplitLocaleFiles( - Function icuFn, IcuLocaleDir... splitDirs) { + private void processLdml() { + ImmutableList splitDirs = + config.getOutputTypes().stream() + .filter(t -> t.getCldrType() == LDML) + .flatMap(t -> TYPE_TO_DIR.get(t).stream()) + .collect(toImmutableList()); SetMultimap writtenLocaleIds = HashMultimap.create(); Path baseDir = config.getOutputDir(); @@ -344,7 +273,20 @@ public final class LdmlConverter { if (!availableIds.contains(id)) { continue; } - IcuData icuData = icuFn.apply(id); + + IcuData icuData = new IcuData(id, true); + + Optional specials = loadSpecialsData(id); + CldrData unresolved = src.getDataForLocale(id, UNRESOLVED); + + BreakIteratorMapper.process(icuData, unresolved, specials); + CollationMapper.process(icuData, unresolved, specials); + RbnfMapper.process(icuData, unresolved, specials); + + CldrData resolved = src.getDataForLocale(id, RESOLVED); + Optional defaultCalendar = supplementalData.getDefaultCalendar(id); + LocaleMapper.process( + icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar); ListMultimap splitPaths = LinkedListMultimap.create(); for (RbPath p : icuData.getPaths()) { @@ -399,6 +341,15 @@ public final class LdmlConverter { } } + private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%"); + + // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type + // annotations (e.g. "languages:intvector"). We strip these when considering the element name. + private static String getBaseSegmentName(String segment) { + int idx = PATH_MODIFIER.indexIn(segment); + return idx == -1 ? segment : segment.substring(0, idx); + } + private Map getAliasMap(Set localeIds, IcuLocaleDir dir) { // There are four reasons for treating a locale ID as an alias. // 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS"). @@ -445,34 +396,69 @@ public final class LdmlConverter { return aliasMap; } - private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%"); + private void processSupplemental() { + for (OutputType type : config.getOutputTypes()) { + if (type.getCldrType() == LDML) { + continue; + } + switch (type) { + case DAY_PERIODS: + write(DayPeriodsMapper.process(src), "misc"); + break; - // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type - // annotations (e.g. "languages:intvector"). We strip these when considering the element name. - private static String getBaseSegmentName(String segment) { - int idx = PATH_MODIFIER.indexIn(segment); - return idx == -1 ? segment : segment.substring(0, idx); - } + case GENDER_LIST: + processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false); + break; - private void processDayPeriods() { - write(DayPeriodsMapper.process(src), "misc"); - } + case LIKELY_SUBTAGS: + processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false); + break; - private void processPlurals() { - write(PluralsMapper.process(src), "misc"); - } + case SUPPLEMENTAL_DATA: + processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true); + break; - private void processPluralRanges() { - write(PluralRangesMapper.process(src), "misc"); - } + case CURRENCY_DATA: + processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", true); + break; - private void processKeyTypeData() { - Bcp47Mapper.process(src).forEach(d -> write(d, "misc")); - } + case METADATA: + processSupplemental("metadata", METADATA_PATHS, "misc", false); + break; - private void processTransforms() { - Path transformDir = createDirectory(config.getOutputDir().resolve("translit")); - write(TransformsMapper.process(src, transformDir, fileHeader), transformDir); + case META_ZONES: + processSupplemental("metaZones", METAZONE_PATHS, "misc", false); + break; + + case NUMBERING_SYSTEMS: + processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false); + break; + + case PLURALS: + write(PluralsMapper.process(src), "misc"); + break; + + case PLURAL_RANGES: + write(PluralRangesMapper.process(src), "misc"); + break; + + case WINDOWS_ZONES: + processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false); + break; + + case TRANSFORMS: + Path transformDir = createDirectory(config.getOutputDir().resolve("translit")); + write(TransformsMapper.process(src, transformDir, fileHeader), transformDir); + break; + + case KEY_TYPE_DATA: + Bcp47Mapper.process(src).forEach(d -> write(d, "misc")); + break; + + default: + throw new AssertionError("Unsupported supplemental type: " + type); + } + } } private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion"); @@ -480,7 +466,7 @@ public final class LdmlConverter { private void processSupplemental( String label, PathMatcher paths, String dir, boolean addCldrVersion) { IcuData icuData = - SupplementalMapper.process(src, getSupplementalTransformer(), label, paths); + SupplementalMapper.process(src, supplementalTransformer, label, paths); // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the // supplemental data XML files. if (addCldrVersion) { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java new file mode 100644 index 00000000000..850d17d9aad --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java @@ -0,0 +1,387 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT; +import static java.util.function.Function.identity; +import static java.util.regex.Pattern.CASE_INSENSITIVE; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; + +import java.util.Arrays; +import java.util.Set; +import java.util.function.Function; +import java.util.function.IntUnaryOperator; +import java.util.function.Predicate; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.IntStream; + +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrDataSupplier.CldrResolution; +import org.unicode.cldr.api.CldrDataType; +import org.unicode.cldr.api.CldrDraftStatus; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; + +import com.google.common.base.CharMatcher; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; + +/** + * A factory for wrapping data suppliers to add synthetic locales for debugging. The currently + * supported synthetic locales are: + *

    + *
  • {@code en_XA}: A pseudo locale which generates expanded text with many non-Latin accents. + *
  • {@code ar_XB}: A pseudo locale which generates BiDi text for debugging. + *
+ * + *

Both pseudo locales are based on {@code "en"} data, and generate values which are readable + * by English speaking developers. For example, the CLDR value "Hello World" will be turned into + * something like: + *

    + *
  • {@code en_XA}: [Ĥéļļö Ŵöŕļð one two] + *
  • {@code ar_XB}: dlroW elloH + *
+ * + *

In the case of BiDi pseudo localization, bi-directional markers are also inserted into the + * text so that, if the system using the data is configured correctly, the results will look + * "normal" (i.e. Latin text will appear displayed left-to-right because of the BiDi markers). + */ +// TODO(CLDR-13381): Move this all into the CLDR API once the dust has settled. +public final class PseudoLocales { + private enum PseudoType { + BIDI("ar_XB", PseudoLocales::bidi, "abcdefghijklmnopqrstuvwxyz"), + EXPAND("en_XA", PseudoLocales::expanding, + "a\u00e5b\u0180c\u00e7d\u00f0e\u00e9f\u0192g\u011dh\u0125i\u00eej\u0135k\u0137l\u013cm" + + "\u0271n\u00f1o\u00f6p\u00feq\u01ebr\u0155s\u0161t\u0163u\u00fbv\u1e7dw\u0175" + + "x\u1e8by\u00fdz\u017e"); + + private static final ImmutableMap ID_MAP = + Arrays.stream(values()).collect(toImmutableMap(PseudoType::getLocaleId, identity())); + + private static PseudoType fromId(String localeId) { + return checkNotNull(ID_MAP.get(localeId), "unknown pseduo locale: %s", localeId); + } + + private static ImmutableSet getLocaleIds() { + return ID_MAP.keySet(); + } + + private final String localeId; + private final Function textSupplier; + // A string whose code points form the exemplar set for the pseudo locale. + private final String exemplars; + + PseudoType(String localeId, Function textSupplier, String exemplars) { + this.localeId = localeId; + this.textSupplier = textSupplier; + this.exemplars = exemplars; + } + + String getLocaleId() { + return localeId; + } + + PseudoText getText(boolean isPattern) { + return textSupplier.apply(isPattern); + } + + String getExemplars() { + return exemplars; + } + } + + /** + * Returns a wrapped data supplier which will inject {@link CldrData} for the pseudo locales + * {@code en_XA} and {@code ar_XB}. These locales should behave in all respects like normal + * locales and can be processed accordingly. + */ + public static CldrDataSupplier addPseudoLocalesTo(CldrDataSupplier src) { + return new PseudoSupplier(src); + } + + private static final class PseudoSupplier extends CldrDataSupplier { + private final CldrDataSupplier src; + private final Set srcIds; + private final CldrData enData; + + PseudoSupplier(CldrDataSupplier src) { + this.src = checkNotNull(src); + this.srcIds = src.getAvailableLocaleIds(); + // Use resolved data to ensure we get all the values (e.g. values in "en_001"). + this.enData = src.getDataForLocale("en", RESOLVED); + // Just check that we aren't wrapping an already wrapped supplier. + PseudoType.getLocaleIds() + .forEach(id -> checkArgument(!srcIds.contains(id), + "pseudo locale %s already supported by given data supplier", id)); + } + + @Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) { + return new PseudoSupplier(src.withDraftStatusAtLeast(draftStatus)); + } + + @Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) { + if (PseudoType.getLocaleIds().contains(localeId)) { + return new PseudoLocaleData(enData, resolution, PseudoType.fromId(localeId)); + } else { + return src.getDataForLocale(localeId, resolution); + } + } + + @Override public Set getAvailableLocaleIds() { + return Sets.union(src.getAvailableLocaleIds(), PseudoType.getLocaleIds()); + } + + @Override public CldrData getDataForType(CldrDataType type) { + return src.getDataForType(type); + } + } + + private interface PseudoText { + void addFragment(String text, boolean isLocalizable); + } + + private static final class PseudoLocaleData extends FilteredData { + private static final PathMatcher AUX_EXEMPLARS = + PathMatcher.of("ldml/characters/exemplarCharacters[@type=\"auxiliary\"]"); + + private static final PathMatcher NUMBERING_SYSTEM = + PathMatcher.of("ldml/numbers/defaultNumberingSystem"); + + // These paths were mostly derived from looking at the previous implementation's behaviour + // and can be modified as needed. Notably there are no "units" here (but they were also + // excluded in the original code). + private static final PathMatcher PSEUDO_PATHS = PathMatcher.anyOf( + ldml("localeDisplayNames"), + ldml("delimiters"), + ldml("dates/calendars/calendar"), + ldml("dates/fields"), + ldml("dates/timeZoneNames"), + ldml("listPatterns"), + ldml("posix/messages"), + ldml("characterLabels"), + ldml("typographicNames")); + + // Paths which contain non-localizable data. It is important that these paths catch all the + // non-localizable sub-paths of the list above. This list must be accurate. + private static final PathMatcher EXCLUDE_PATHS = PathMatcher.anyOf( + ldml("localeDisplayNames/localeDisplayPattern"), + ldml("dates/timeZoneNames/fallbackFormat")); + + // The expectation is that all non-alias paths with values under these roots are "date/time + // pattern like" (such as "E h:mm:ss B") in which care must be taken to not pseudo localize + // the patterns in such as way as to break them. This list must be accurate. + private static final PathMatcher PATTERN_PATHS = PathMatcher.anyOf( + ldml("dates/calendars/calendar/timeFormats"), + ldml("dates/calendars/calendar/dateFormats"), + ldml("dates/calendars/calendar/dateTimeFormats"), + ldml("dates/timeZoneNames/hourFormat")); + + private static PathMatcher ldml(String matcherSuffix) { + return PathMatcher.of("ldml/" + matcherSuffix); + } + + // Look for any attribute in the path with "narrow" in its value. Since "narrow" values + // have strong expectations of width, we should not expand these (but might alter them + // otherwise). + private static final Predicate IS_NARROW = + Pattern.compile("\\[@[a-z]+=\"[^\"]*narrow[^\"]*\"]", CASE_INSENSITIVE).asPredicate(); + + private static final Pattern NUMERIC_PLACEHOLDER = Pattern.compile("\\{\\d+\\}"); + private static final Pattern QUOTED_TEXT = Pattern.compile("'.*?'"); + + private final PseudoType type; + private final boolean isResolved; + + private PseudoLocaleData(CldrData srcData, CldrResolution resolution, PseudoType type) { + super(srcData); + this.isResolved = checkNotNull(resolution) == RESOLVED; + this.type = checkNotNull(type); + } + + @Override + protected CldrValue filter(CldrValue value) { + CldrPath path = value.getPath(); + + // Special case(s) first... + // We add the exemplar character list according to the pseudo type. + if (AUX_EXEMPLARS.matches(path)) { + return getExemplarValue(path); + } + // Force "latn" for the "ar_XB" pseudo locale (since otherwise it inherits from "ar". + // The path we get here was from "en" so should already be "latn", but we just have + // to return it in order for it to take effect. + if (type == PseudoType.BIDI && NUMBERING_SYSTEM.matches(path)) { + checkArgument(value.getValue().equals("latn")); + return value; + } + + CldrValue defaultReturnValue = isResolved ? value : null; + // This makes it look like we have explicit values only for the included paths. + if (!PSEUDO_PATHS.matchesPrefixOf(path) || EXCLUDE_PATHS.matchesPrefixOf(path)) { + return defaultReturnValue; + } + String fullPath = value.getFullPath(); + // For now don't do anything with "narrow" data (this matches the previous behaviour). + // We can always add something here later if necessary. + if (IS_NARROW.test(fullPath)) { + return defaultReturnValue; + } + String text = createMessage(value.getValue(), PATTERN_PATHS.matchesPrefixOf(path)); + return CldrValue.parseValue(fullPath, text); + } + + // It's tempting to think that the existing exemplar list in "en" could be parsed to + // generate list automatically (rather than having a hard coded list in the type) but + // https://unicode.org/reports/tr35/tr35-general.html#ExemplarSyntax + // makes it quite clear that this is infeasible, since there are many equivalent + // representations of the examplar characters that could appear in the value + // (e.g. "[a b ... z]", "[a-z]", "[{a} {b} ... {z}]") + private CldrValue getExemplarValue(CldrPath path) { + StringBuilder exemplarList = new StringBuilder("["); + type.getExemplars().codePoints() + .forEach(cp -> exemplarList.appendCodePoint(cp).append(' ')); + exemplarList.setCharAt(exemplarList.length() - 1, ']'); + return CldrValue.parseValue(path.toString(), exemplarList.toString()); + } + + private String createMessage(String text, boolean isPattern) { + // Pattern text is split by the quoted sections (which are localizable) whereas + // non-pattern text is split by placeholder (e.g. {0}) which are not localizable. + // This is why "isPattern" is used to signal "isLocalizable" in addFragment(). + Matcher match = (isPattern ? QUOTED_TEXT : NUMERIC_PLACEHOLDER).matcher(text); + // Alternate between unmatched and matched sections in the text, always localizing one + // but not the other (depending the type). Append the trailing section at the end. + PseudoText out = type.getText(isPattern); + int start = 0; + for (; match.find(); start = match.end()) { + out.addFragment(text.substring(start, match.start()), !isPattern); + out.addFragment(match.group(), isPattern); + } + out.addFragment(text.substring(start), !isPattern); + return out.toString(); + } + } + + // ---- Expanding Pseudo-localizer (e.g. "November" --> "[Ñöṽéɱƀéŕ one two]") ---- + + // A map from a string of alternating key/value code-points; e.g. '1' -> '①'. + // Note that a subset of this is also used to form the "exemplar" set (see PseudoType). + private static final IntUnaryOperator CONVERT_CODEPOINT = toCodePointFunction( + " \u2003!\u00a1\"\u2033#\u266f$\u20ac%\u2030&\u214b*\u204e+\u207a,\u060c-\u2010.\u00b7" + + "/\u20440\u24ea1\u24602\u24613\u24624\u24635\u24646\u24657\u24668\u24679\u2468" + + ":\u2236;\u204f<\u2264=\u2242>\u2265?\u00bf@\u055eA\u00c5B\u0181C\u00c7D\u00d0" + + "E\u00c9F\u0191G\u011cH\u0124I\u00ceJ\u0134K\u0136L\u013bM\u1e40N\u00d1O\u00d6" + + "P\u00deQ\u01eaR\u0154S\u0160T\u0162U\u00dbV\u1e7cW\u0174X\u1e8aY\u00ddZ\u017d" + + "[\u2045\\\u2216]\u2046^\u02c4_\u203f`\u2035a\u00e5b\u0180c\u00e7d\u00f0e\u00e9" + + "f\u0192g\u011dh\u0125i\u00eej\u0135k\u0137l\u013cm\u0271n\u00f1o\u00f6p\u00fe" + + "q\u01ebr\u0155s\u0161t\u0163u\u00fbv\u1e7dw\u0175x\u1e8by\u00fdz\u017e|\u00a6" + + "~\u02de"); + + // Converts a source/target alternating code-points into a map. + private static IntUnaryOperator toCodePointFunction(String s) { + // Not pretty, but there's no nice way to "pair up" successive stream elements without + // extra library dependencies, so we collect them and then iterate via index. + int[] codePoints = s.codePoints().toArray(); + checkArgument((codePoints.length & 1) == 0, + "must have an even number of code points (was %s)", codePoints.length); + ImmutableMap map = + IntStream.range(0, codePoints.length / 2) + .boxed() + .collect(toImmutableMap(n -> codePoints[2 * n], n -> codePoints[(2 * n) + 1])); + return cp -> map.getOrDefault(cp, cp); + } + + // A list of words to be added to text when it is expanded. A whole number of words are + // always added (and the fact they are numeric words is irrelevant, could be Lorem Ipsum). + // So far nothing goes above "ten" in en_XA, but this can always be trivially extended. + private static final String PADDING = "one two three four five six seven eight nine ten"; + + private static PseudoText expanding(boolean isPattern) { + return new PseudoText() { + IntStream.Builder codePoints = IntStream.builder(); + + @Override + public void addFragment(String text, boolean isLocalizable) { + text.codePoints() + .map(isLocalizable ? CONVERT_CODEPOINT : cp -> cp) + .forEach(codePoints::add); + } + + @Override + public String toString() { + int[] cp = codePoints.build().toArray(); + // Copy the original code and round up the 50% calculation (it's not important). + int endIndex = CharMatcher.whitespace().indexIn(PADDING, (cp.length + 1) / 2); + String suffix = PADDING.substring(0, Math.min(endIndex, PADDING.length())); + // For pattern strings, any literal text must be quoted (the fragment text + // already was). Note that this is why we don't transform single-quotes. + if (isPattern) { + suffix = "'" + suffix.replace(" ", "' '") + "'"; + } + // Final output is something like "November" --> "[Ñöṽéɱƀéŕ one two]" + // Where the additional padding adds at least 50% to the length of the text. + return "[" + new String(cp, 0, cp.length) + " " + suffix + "]"; + } + }; + } + + // ---- Bidi Pseudo-localizer (e.g. "November" --> "rebmevoN" using BiDi tags)---- + + // Right-to-left override character. + private static final String RLO = "\u202e"; + // Arabic letter mark character. + private static final String ALM = "\u061C"; + // Pop direction formatting character. + private static final String PDF = "\u202c"; + // Prefix to add before each LTR word. + private static final String BIDI_PREFIX = ALM + RLO; + // Postfix to add after each LTR word. + private static final String BIDI_POSTFIX = PDF + ALM; + + // Bidi localization doesn't care if the fragment is a pattern or not. + @SuppressWarnings("unused") + private static PseudoText bidi(boolean isPattern) { + return new PseudoText() { + private final StringBuilder out = new StringBuilder(); + + // This was largely copied from the original CLDRFilePseudolocalizer class and + // while it appears to work fine, I don't know enough to comment it clearly. + // TODO: Find someone who can add a decent comment here! + @Override + public void addFragment(String text, boolean isLocalizable) { + if (isLocalizable) { + boolean wrapping = false; + for (int index = 0; index < text.length();) { + int codePoint = text.codePointAt(index); + index += Character.charCount(codePoint); + byte directionality = Character.getDirectionality(codePoint); + boolean needsWrap = (directionality == DIRECTIONALITY_LEFT_TO_RIGHT); + if (needsWrap != wrapping) { + wrapping = needsWrap; + out.append(wrapping ? BIDI_PREFIX : BIDI_POSTFIX); + } + out.appendCodePoint(codePoint); + } + if (wrapping) { + out.append(BIDI_POSTFIX); + } + } else { + out.append(text); + } + } + + @Override + public String toString() { + return out.toString(); + } + }; + } + + private PseudoLocales() {} +} diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java index 876393c03c9..f9f3ad9fe1b 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java @@ -9,19 +9,27 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.util.stream.Collectors.joining; +import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath; import java.nio.file.Path; import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; +import java.util.function.Predicate; +import java.util.regex.Pattern; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Task; import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDraftStatus; +import org.unicode.cldr.api.CldrPath; +import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData; import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig; import org.unicode.icu.tool.cldrtoicu.LdmlConverter; import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType; import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; +import org.unicode.icu.tool.cldrtoicu.PseudoLocales; import org.unicode.icu.tool.cldrtoicu.SupplementalData; import com.google.common.base.Ascii; @@ -31,6 +39,7 @@ import com.google.common.base.Splitter; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; import com.google.common.collect.SetMultimap; // Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed. @@ -50,6 +59,11 @@ public final class ConvertIcuDataTask extends Task { // Per directory overrides (fully specified locale IDs). private final SetMultimap perDirectoryIds = HashMultimap.create(); private final IcuConverterConfig.Builder config = IcuConverterConfig.builder(); + // Don't try and resolve actual paths until inside the execute method. + private final Map altPathMap = new HashMap<>(); + // TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales() + private boolean includePseudoLocales = false; + private Predicate idFilter = id -> true; @SuppressWarnings("unused") public void setOutputDir(Path path) { @@ -83,6 +97,16 @@ public final class ConvertIcuDataTask extends Task { config.setSpecialsDir(path); } + @SuppressWarnings("unused") + public void setIncludePseudoLocales(boolean includePseudoLocales) { + this.includePseudoLocales = includePseudoLocales; + } + + @SuppressWarnings("unused") + public void setLocaleIdFilter(String idFilterRegex) { + this.idFilter = Pattern.compile(idFilterRegex).asPredicate(); + } + @SuppressWarnings("unused") public void setEmitReport(boolean emit) { config.setEmitReport(emit); @@ -130,7 +154,7 @@ public final class ConvertIcuDataTask extends Task { @SuppressWarnings("unused") public void setDir(String directory) { - this.dir = resolveOpt(IcuLocaleDir.class, directory); + this.dir = resolveDir(directory); } @SuppressWarnings("unused") @@ -150,6 +174,28 @@ public final class ConvertIcuDataTask extends Task { } } + + public static final class AltPath extends Task { + private String source = ""; + private String target = ""; + + @SuppressWarnings("unused") + public void setTarget(String target) { + this.target = target.replace('\'', '"'); + } + + @SuppressWarnings("unused") + public void setSource(String source) { + this.source = source.replace('\'', '"'); + } + + @Override + public void init() throws BuildException { + checkBuild(!source.isEmpty(), "Source path not be empty"); + checkBuild(!target.isEmpty(), "Target path not be empty"); + } + } + @SuppressWarnings("unused") public void addConfiguredLocaleIds(LocaleIds localeIds) { checkBuild(this.localeIdSpec == null, "Cannot add more that one element"); @@ -172,23 +218,48 @@ public final class ConvertIcuDataTask extends Task { } } + @SuppressWarnings("unused") + public void addConfiguredAltPath(AltPath altPath) { + // Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs). + // Wait until the "execute()" method since in future we expect to use the configured CLDR + // directory explicitly there. + checkBuild(this.altPathMap.put(altPath.target, altPath.source) == null, + "Duplicate elements (same target): %s", altPath.target); + } + @SuppressWarnings("unused") public void execute() throws BuildException { - CldrDataSupplier src = - CldrDataSupplier.forCldrFilesIn(cldrPath).withDraftStatusAtLeast(minimumDraftStatus); + CldrDataSupplier src = CldrDataSupplier + .forCldrFilesIn(cldrPath) + .withDraftStatusAtLeast(minimumDraftStatus); + + // We must do this wrapping of the data supplier _before_ creating the supplemental data + // instance since adding pseudo locales affects the set of available locales. + // TODO: Move some/all of this into the base converter and control it via the config. + if (!altPathMap.isEmpty()) { + Map pathMap = new HashMap<>(); + altPathMap.forEach( + (t, s) -> pathMap.put(parseDistinguishingPath(t), parseDistinguishingPath(s))); + src = AlternateLocaleData.transform(src, pathMap); + } + if (includePseudoLocales) { + src = PseudoLocales.addPseudoLocalesTo(src); + } + SupplementalData supplementalData = SupplementalData.create(src); ImmutableSet defaultTargetIds = LocaleIdResolver.expandTargetIds(this.localeIdSpec, supplementalData); for (IcuLocaleDir dir : IcuLocaleDir.values()) { - config.addLocaleIds(dir, perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds)); + Iterable ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds); + config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test)); } config.setMinimumDraftStatus(minimumDraftStatus); LdmlConverter.convert(src, supplementalData, config.build()); } - private static void checkBuild(boolean condition, String message) { + private static void checkBuild(boolean condition, String message, Object... args) { if (!condition) { - throw new BuildException(message); + throw new BuildException(String.format(message, args)); } } @@ -199,8 +270,8 @@ public final class ConvertIcuDataTask extends Task { return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds)); } - private static > Optional resolveOpt(Class enumClass, String name) { - return !name.isEmpty() ? Optional.of(resolve(enumClass, name)) : Optional.empty(); + private static Optional resolveDir(String name) { + return !name.isEmpty() ? Optional.of(resolve(IcuLocaleDir.class, name)) : Optional.empty(); } private static > T resolve(Class enumClass, String name) { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java index b896d2126a7..b66b7d1c9bf 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java @@ -55,12 +55,11 @@ abstract class AbstractPathValueMapper { } /** - * Returns a new {@code IcuData} instance produced by post-processing a set of results - * generated by calling sub-class method {@link #addResults()}. This is the only method which - * need be directly invoked by the sub-class implementation (other methods are optionally used - * from within the {@link #addResults()} callback). + * Post-processes results generated by calling the subclass method {@link #addResults()}. This + * is the only method which need be directly invoked by the sub-class implementation (other + * methods are optionally used from within the {@link #addResults()} callback). */ - final IcuData generateIcuData(String icuName, boolean hasFallback) { + final void addIcuData(IcuData icuData) { // This subclass mostly exists to control the fact that results need to be added in one go // to the IcuData because of how referenced paths are handled. If results could be added in // multiple passes, you could have confusing situations in which values has path references @@ -70,9 +69,8 @@ abstract class AbstractPathValueMapper { checkState(resultsByRbPath.isEmpty(), "results must not be added outside the call to addResults(): %s", resultsByRbPath); addResults(); - IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback)); + addResultsToIcuData(finalizeResults(), icuData); resultsByRbPath.clear(); - return icuData; } /** @@ -81,7 +79,7 @@ abstract class AbstractPathValueMapper { * {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the * results to this mapper using {@link #addResult(RbPath, Result)}. * - *

This method is called once for each call to {@link #generateIcuData(String, boolean)} and + *

This method is called once for each call to {@link #addIcuData(IcuData)} and * is responsible for adding all necessary results for the returned {@link IcuData}. */ abstract void addResults(); @@ -145,7 +143,7 @@ abstract class AbstractPathValueMapper { * having an array index) then the referenced value is assumed to be an existing path whose * value is then substituted. */ - private static IcuData addResultsToIcuData( + private static void addResultsToIcuData( ImmutableListMultimap results, IcuData icuData) { // Ordering of paths should not matter here (IcuData will re-sort them) and ordering of @@ -178,7 +176,6 @@ abstract class AbstractPathValueMapper { } // This works because insertion order is maintained for values of each path. map.forEach((p, v) -> icuData.add(p, v.resolve(map))); - return icuData; } /* diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java index fc6fe5984ae..8451d73d92b 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java @@ -2,15 +2,14 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu.mapper; +import static com.google.common.base.Preconditions.checkNotNull; import static org.unicode.cldr.api.AttributeKey.keyOf; import static org.unicode.cldr.api.CldrData.PathOrder.DTD; -import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import java.util.Optional; import org.unicode.cldr.api.AttributeKey; import org.unicode.cldr.api.CldrData; -import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrPath; import org.unicode.cldr.api.CldrValue; @@ -18,7 +17,6 @@ import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; -import com.google.common.annotations.VisibleForTesting; import com.google.common.escape.UnicodeEscaper; /** @@ -52,21 +50,15 @@ public final class BreakIteratorMapper { * Processes data from the given supplier to generate break-iterator data for a set of locale * IDs. * - * @param localeId the locale ID to generate data for. - * @param src the CLDR data supplier to process. + * @param icuData the ICU data to be filled. + * @param cldrData the unresolved CLDR data to process. * @param icuSpecialData additional ICU data (in the "icu:" namespace) * @return IcuData containing break-iterator data for the given locale ID. */ public static IcuData process( - String localeId, CldrDataSupplier src, Optional icuSpecialData) { + IcuData icuData, CldrData cldrData, Optional icuSpecialData) { - CldrData cldrData = src.getDataForLocale(localeId, UNRESOLVED); - return process(localeId, cldrData, icuSpecialData); - } - - @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. - static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) { - BreakIteratorMapper mapper = new BreakIteratorMapper(localeId); + BreakIteratorMapper mapper = new BreakIteratorMapper(icuData); icuSpecialData.ifPresent(s -> s.accept(DTD, mapper::addSpecials)); cldrData.accept(DTD, mapper::addSuppression); return mapper.icuData; @@ -75,8 +67,8 @@ public final class BreakIteratorMapper { // The per-locale ICU data being collected by this visitor. private final IcuData icuData; - private BreakIteratorMapper(String localeId) { - this.icuData = new IcuData(localeId, true); + private BreakIteratorMapper(IcuData icuData) { + this.icuData = checkNotNull(icuData); } private void addSuppression(CldrValue v) { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java index 385cadd3cbd..5bef09fc52b 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java @@ -3,9 +3,9 @@ package org.unicode.icu.tool.cldrtoicu.mapper; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; import static org.unicode.cldr.api.AttributeKey.keyOf; import static org.unicode.cldr.api.CldrData.PathOrder.DTD; -import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import java.util.Optional; @@ -21,7 +21,6 @@ import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.CharMatcher; import com.google.common.base.Splitter; @@ -61,20 +60,15 @@ public final class CollationMapper { /** * Processes data from the given supplier to generate collation data for a set of locale IDs. * - * @param localeId the locale ID to generate data for. - * @param src the CLDR data supplier to process. + * @param icuData the ICU data to be filled. + * @param cldrData the unresolved CLDR data to process. * @param icuSpecialData additional ICU data (in the "icu:" namespace) * @return IcuData containing RBNF data for the given locale ID. */ public static IcuData process( - String localeId, CldrDataSupplier src, Optional icuSpecialData) { + IcuData icuData, CldrData cldrData, Optional icuSpecialData) { - return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData); - } - - @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. - static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) { - CollationVisitor visitor = new CollationVisitor(localeId); + CollationVisitor visitor = new CollationVisitor(icuData); icuSpecialData.ifPresent(s -> s.accept(DTD, visitor)); cldrData.accept(DTD, visitor); return visitor.icuData; @@ -83,13 +77,13 @@ public final class CollationMapper { final static class CollationVisitor implements PrefixVisitor { private final IcuData icuData; - CollationVisitor(String localeId) { - this.icuData = new IcuData(localeId, true); + CollationVisitor(IcuData icuData) { + this.icuData = checkNotNull(icuData); // Super special hack case because the XML data is a bit broken for the root collation // data (there's an empty element that's a non-leaf element and thus not // visited, but we should add an empty sequence to the output data. // TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131) - if (localeId.equals("root")) { + if (icuData.getName().equals("root")) { icuData.replace(RB_STANDARD_SEQUENCE, ""); // TODO: Collation versioning probably needs to be improved. icuData.replace(RB_STANDARD_VERSION, CldrDataSupplier.getCldrVersionString()); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java index ce196662e6f..d3f39f64884 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java @@ -5,8 +5,6 @@ package org.unicode.icu.tool.cldrtoicu.mapper; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static org.unicode.cldr.api.CldrData.PathOrder.DTD; -import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; -import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import java.util.HashSet; import java.util.List; @@ -14,16 +12,12 @@ import java.util.Optional; import java.util.Set; import org.unicode.cldr.api.CldrData; -import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; -import org.unicode.icu.tool.cldrtoicu.SupplementalData; - -import com.google.common.annotations.VisibleForTesting; /** * Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a @@ -47,50 +41,30 @@ public final class LocaleMapper extends AbstractPathValueMapper { * {@link org.unicode.cldr.api.CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data. * @return IcuData containing locale data for the given locale ID. */ - public static IcuData process( - String localeId, - CldrDataSupplier src, - Optional icuSpecialData, - PathValueTransformer transformer, - SupplementalData supplementalData) { - - return process( - localeId, - src, - icuSpecialData, - transformer, - supplementalData.getDefaultCalendar(localeId)); - } - - @VisibleForTesting // Avoids needing to pass a complete SupplementalData instance in tests. - public static IcuData process( - String localeId, - CldrDataSupplier src, + public static void process( + IcuData icuData, + CldrData unresolved, + CldrData resolved, Optional icuSpecialData, PathValueTransformer transformer, Optional defaultCalendar) { - IcuData icuData = - new LocaleMapper(localeId, src, icuSpecialData, transformer) - .generateIcuData(localeId, true); + new LocaleMapper(unresolved, resolved, icuSpecialData, transformer).addIcuData(icuData); doDateTimeHack(icuData); defaultCalendar.ifPresent(c -> icuData.add(RB_CALENDAR, c)); - return icuData; } - private final String localeId; - private final CldrDataSupplier src; + private final CldrData unresolved; private final Optional icuSpecialData; private LocaleMapper( - String localeId, - CldrDataSupplier src, + CldrData unresolved, + CldrData resolved, Optional icuSpecialData, PathValueTransformer transformer) { - super(src.getDataForLocale(localeId, RESOLVED), transformer); - this.localeId = localeId; - this.src = checkNotNull(src); + super(resolved, transformer); + this.unresolved = checkNotNull(unresolved); this.icuSpecialData = checkNotNull(icuSpecialData); } @@ -102,7 +76,7 @@ public final class LocaleMapper extends AbstractPathValueMapper { private Set collectPaths() { Set validRbPaths = new HashSet<>(); - src.getDataForLocale(localeId, UNRESOLVED) + unresolved .accept(DTD, v -> transformValue(v).forEach(r -> collectResultPath(r, validRbPaths))); return validRbPaths; } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java index 45cbf3863d2..d6d89ffc98f 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java @@ -2,9 +2,9 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu.mapper; +import static com.google.common.base.Preconditions.checkNotNull; import static org.unicode.cldr.api.AttributeKey.keyOf; import static org.unicode.cldr.api.CldrData.PathOrder.DTD; -import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; @@ -12,14 +12,12 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.unicode.cldr.api.AttributeKey; import org.unicode.cldr.api.CldrData; import org.unicode.cldr.api.CldrData.PrefixVisitor; -import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrPath; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; -import com.google.common.annotations.VisibleForTesting; import com.google.common.escape.UnicodeEscaper; /** @@ -46,23 +44,18 @@ public final class RbnfMapper { /** * Processes data from the given supplier to generate RBNF data for a set of locale IDs. * - * @param localeId the locale ID to generate data for. - * @param src the CLDR data supplier to process. + * @param icuData the ICU data to be filled. + * @param cldrData the unresolved CLDR data to process. * @param icuSpecialData additional ICU data (in the "icu:" namespace) * @return IcuData containing RBNF data for the given locale ID. */ public static IcuData process( - String localeId, CldrDataSupplier src, Optional icuSpecialData) { + IcuData icuData, CldrData cldrData, Optional icuSpecialData) { - return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData); - } - - @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. - static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) { // Using DTD order is essential here because the RBNF paths contain ordered elements, // so we must ensure that they appear in sorted order (otherwise we'd have to do more // work at this end to re-sort the results). - RulesetVisitor visitor = new RulesetVisitor(localeId); + RulesetVisitor visitor = new RulesetVisitor(icuData); icuSpecialData.ifPresent(s -> s.accept(DTD, visitor)); cldrData.accept(DTD, visitor); return visitor.icuData; @@ -72,8 +65,8 @@ public final class RbnfMapper { private final IcuData icuData; - private RulesetVisitor(String localeId) { - this.icuData = new IcuData(localeId, true); + private RulesetVisitor(IcuData icuData) { + this.icuData = checkNotNull(icuData); } @Override public void visitPrefixStart(CldrPath prefix, Context context) { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java index ae6f12c3ee8..b3c57050254 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java @@ -38,7 +38,9 @@ public final class SupplementalMapper extends AbstractPathValueMapper { public static IcuData process( CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) { - return new SupplementalMapper(src, transformer, paths).generateIcuData(icuName, false); + IcuData icuData = new IcuData(icuName, false); + new SupplementalMapper(src, transformer, paths).addIcuData(icuData); + return icuData; } private final PathMatcher paths; diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java new file mode 100644 index 00000000000..009cdce8b7e --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java @@ -0,0 +1,152 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.truth.Truth.assertThat; +import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; +import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; + +import java.util.HashSet; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier; + +import com.google.common.collect.ImmutableMap; + +@RunWith(JUnit4.class) +public class AlternateLocaleDataTest { + @Test + public void testLocaleData() { + // Target and source values. + CldrValue target = + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name"); + CldrValue source = + ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name"); + // The target path with the source value we expect to be seen in the transformed data. + CldrValue altValue = + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Name"); + + // Something that's not transformed. + CldrValue other = + ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$"); + // Something that should only exist in the resolved data. + CldrValue inherited = + ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YYY"); + + FakeDataSupplier src = new FakeDataSupplier() + .addLocaleData("xx", target, source, other) + .addInheritedData("xx", inherited); + CldrDataSupplier transformed = + AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath())); + + CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED); + CldrData resolved = transformed.getDataForLocale("xx", RESOLVED); + + assertValuesUnordered(unresolved, altValue, source, other); + assertValuesUnordered(resolved, altValue, source, other, inherited); + } + + @Test + public void testMissingSource() { + // Target and source values. + CldrValue target = + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name"); + CldrValue source = + ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name"); + + FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", target); + CldrDataSupplier transformed = + AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath())); + + CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED); + CldrData resolved = transformed.getDataForLocale("xx", RESOLVED); + + // No change because there's nothing to get an alternate value from. + assertValuesUnordered(unresolved, target); + assertValuesUnordered(resolved, target); + } + + @Test + public void testMissingTarget() { + // Target and source values. + CldrValue target = + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name"); + CldrValue source = + ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name"); + + FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source); + CldrDataSupplier transformed = + AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath())); + + CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED); + CldrData resolved = transformed.getDataForLocale("xx", RESOLVED); + + // No change because there's nothing to replace. + assertValuesUnordered(unresolved, source); + assertValuesUnordered(resolved, source); + } + + @Test + public void testBadPaths() { + // Target and source values. + CldrPath target = CldrPath.parseDistinguishingPath( + "//ldml/numbers/currencies/currency[@type=\"USD\"]/displayName"); + CldrPath source = CldrPath.parseDistinguishingPath( + "//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol"); + + FakeDataSupplier src = new FakeDataSupplier(); + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source))); + assertThat(e).hasMessageThat().contains("alternate paths must have the same namespace"); + assertThat(e).hasMessageThat().contains(target.toString()); + assertThat(e).hasMessageThat().contains(source.toString()); + } + + @Test + public void testNonLdml() { + // Real supplemental data with "values" in the value attributes: + // target: territories=[AG AR AS AU ...] + // source: territories=[GB] + // where GB is also listed as having "mon" as the first day in it's primary path. + // + // You can see why swapping paths based on 'alt' for supplemental data would be very wrong, + // because it would remove "XX" and "YY" by replacing the value attribute. Supplemental + // and BCP-47 data doesn't have a single value per path, so isn't suitable for swapping. + // + // The right way to do this would be to merge the 'territories' attribute and remove the + // alt territoy from its original list, but that's very complex and depends on the specific + // meaning of each path in question, and will probably never be supported. + CldrPath target = CldrPath.parseDistinguishingPath( + "//supplementalData/weekData/firstDay[@day=\"sun\"]"); + CldrPath source = CldrPath.parseDistinguishingPath( + "//supplementalData/weekData/firstDay[@day=\"sun\"][@alt=\"variant\"]"); + + FakeDataSupplier src = new FakeDataSupplier(); + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source))); + assertThat(e).hasMessageThat().contains("only locale data (LDML) is supported"); + // At least one of the paths should be in the error message, so look for common substring. + assertThat(e).hasMessageThat().contains("/weekData/firstDay[@day=\"sun\"]"); + } + + public static void assertValuesUnordered(CldrData data, CldrValue... values) { + Set captured = new HashSet<>(); + data.accept(ARBITRARY, captured::add); + assertThat(captured).containsExactlyElementsIn(values); + } + + private static CldrValue ldml(String path, String value) { + return CldrValue.parseValue("//ldml/" + path, value); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/FilteredDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/FilteredDataTest.java new file mode 100644 index 00000000000..603157685a0 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/FilteredDataTest.java @@ -0,0 +1,101 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.truth.Truth.assertThat; +import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY; +import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; + +import java.util.ArrayList; +import java.util.List; + +import javax.annotation.Nullable; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; + +import com.google.common.collect.ImmutableList; + +@RunWith(JUnit4.class) +public class FilteredDataTest { + @Test + public void testSimple() { + CldrValue keep = + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"); + CldrValue remove = + ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$"); + CldrValue replace = + ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YYY"); + CldrValue replacement = + ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZZ"); + + CldrData src = CldrDataSupplier.forValues(ImmutableList.of(keep, remove, replace)); + CldrData filtered = new FilteredData(src) { + @Nullable @Override protected CldrValue filter(CldrValue value) { + if (value.equals(remove)) { + return null; + } else if (value.equals(replace)) { + return replacement; + } else { + return value; + } + } + }; + + List filteredValues = new ArrayList<>(); + filtered.accept(ARBITRARY, filteredValues::add); + assertThat(filteredValues).containsExactly(keep, replacement).inOrder(); + + assertThat(filtered.get(remove.getPath())).isNull(); + assertThat(filtered.get(keep.getPath())).isEqualTo(keep); + assertThat(filtered.get(replace.getPath())).isEqualTo(replacement); + } + + @Test + public void testBadReplacementPath() { + CldrValue replace = + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "VALUE"); + CldrValue replacement = + ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "VALUE"); + + CldrData src = CldrDataSupplier.forValues(ImmutableList.of(replace)); + CldrData filtered = new FilteredData(src) { + @Nullable @Override protected CldrValue filter(CldrValue value) { + return replacement; + } + }; + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, () -> filtered.accept(ARBITRARY, v -> {})); + assertThat(e).hasMessageThat().contains("not permitted to modify distinguishing paths"); + assertThat(e).hasMessageThat().contains(replace.toString()); + assertThat(e).hasMessageThat().contains(replacement.toString()); + } + + @Test + public void testBadReplacementAttributes() { + CldrValue replace = + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "XXX"); + CldrValue replacement = + ldml("numbers/currencies/currency[@type=\"GBP\"]/displayName", "XXX"); + + CldrData src = CldrDataSupplier.forValues(ImmutableList.of(replace)); + CldrData filtered = new FilteredData(src) { + @Nullable @Override protected CldrValue filter(CldrValue value) { + return replacement; + } + }; + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, () -> filtered.accept(ARBITRARY, v -> {})); + assertThat(e).hasMessageThat().contains("not permitted to modify distinguishing paths"); + assertThat(e).hasMessageThat().contains(replace.toString()); + assertThat(e).hasMessageThat().contains(replacement.toString()); + } + + private static CldrValue ldml(String path, String value) { + return CldrValue.parseValue("//ldml/" + path, value); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java index 85f6ef2ad2a..cc3f962128d 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java @@ -107,4 +107,4 @@ public class IcuDataTest { icuData.replace(fooBar, "another-value"); assertThat(icuData.get(fooBar)).containsExactly(value2); } -} \ No newline at end of file +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java new file mode 100644 index 00000000000..8c2322d30b2 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java @@ -0,0 +1,141 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.truth.Truth.assertThat; +import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; + +import java.util.HashSet; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier; + +@RunWith(JUnit4.class) +public class PseudoLocalesTest { + @Test + public void testExpansion() { + // Target and source values. + CldrPath included = + ldmlPath("localeDisplayNames/languages/language[@type=\"xx\"]"); + CldrPath excluded = + ldmlPath("localeDisplayNames/localeDisplayPattern/localePattern[@alt=\"testing\"]"); + CldrPath pattern = + ldmlPath("dates/timeZoneNames/hourFormat[@alt=\"testing\"]"); + CldrPath narrow = + ldmlPath("dates/fields/field[@type=\"sun-narrow\"]/relative[@type=\"0\"]"); + CldrPath inherited = + ldmlPath("dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard"); + + FakeDataSupplier src = new FakeDataSupplier() + .addLocaleData("en", + value(included, "{Hello} {0} {World} 100x"), + value(excluded, "Skipped"), + value(pattern, "'plus' HH:mm; 'minus' HH:mm"), + value(narrow, "Skipped")) + .addInheritedData("en", + value(inherited, "UTC")); + + CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src); + assertThat(pseudo.getAvailableLocaleIds()).containsAtLeast("en_XA", "ar_XB"); + + // The pseudo locale should combine both explicit and inherited data from 'en'. + CldrData unresolved = pseudo.getDataForLocale("en_XA", UNRESOLVED); + + assertValuesUnordered(unresolved, + // Note how {n} placeholders are not affected, but digits elsewhere are. + value(included, "[{Ĥéļļö} {0} {Ŵöŕļð} ①⓪⓪ẋ one two three]"), + // Note the quoting of any padding added to a pattern string. + value(pattern, "['þļûš' HH:mm; 'ɱîñûš' HH:mm 'one' 'two' 'three' 'four']"), + // Value obtained from the resolved "en" data is here in unresolved data. + value(inherited, "[ÛŢÇ one]")); + } + + // This tests behaviour expected by Android (previously patched in earlier ICU versions). + // https://android-review.googlesource.com/c/platform/external/cldr/+/689949 + // In particular the use of "ALM" (U+061c) rather than "RLM" (U+200F) as the BiDi marker. + @Test + public void testBidi() { + // Target and source values (same as above but not including the skipped paths). + CldrPath included = + ldmlPath("localeDisplayNames/languages/language[@type=\"xx\"]"); + CldrPath pattern = + ldmlPath("dates/timeZoneNames/hourFormat[@alt=\"testing\"]"); + CldrPath inherited = + ldmlPath("dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard"); + + FakeDataSupplier src = new FakeDataSupplier() + .addLocaleData("en", + value(included, "{Hello} {0} {World} 100x"), + value(pattern, "'plus' HH:mm; 'minus' HH:mm")) + .addInheritedData("en", + value(inherited, "UTC")); + + CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src); + + // The pseudo locale should combine both explicit and inherited data from 'en'. + CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED); + + // These are a kind of golden data test because it's super hard to really reason about + // what should be coming out (note how direction markers are added for the 'x' in 100x). + assertValuesUnordered(unresolved, + value(included, + "{\u061C\u202EHello\u202C\u061C} {0}" + + " {\u061C\u202EWorld\u202C\u061C}" + + " 100\u061C\u202Ex\u202C\u061C"), + value(pattern, + "'\u061C\u202Eplus\u202C\u061C' HH:mm;" + + " '\u061C\u202Eminus\u202C\u061C' HH:mm"), + value(inherited, "\u061C\u202EUTC\u202C\u061C")); + } + + // This tests behaviour expected by Android (previously patched in earlier ICU versions). + // https://android-review.googlesource.com/c/platform/external/cldr/+/689949 + @Test + public void testLatinNumbering() { + CldrValue latn = value(ldmlPath("numbers/defaultNumberingSystem"), "latn"); + FakeDataSupplier src = new FakeDataSupplier().addInheritedData("en", latn); + + CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src); + + CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED); + assertValuesUnordered(unresolved, latn); + } + + @Test + public void testExemplars() { + CldrPath exemplarsPath = ldmlPath("characters/exemplarCharacters[@type=\"auxiliary\"]"); + FakeDataSupplier src = + new FakeDataSupplier().addLocaleData("en", value(exemplarsPath, "[ignored]")); + + CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src); + + assertValuesUnordered(pseudo.getDataForLocale("ar_XB", UNRESOLVED), + value(exemplarsPath, "[a b c d e f g h i j k l m n o p q r s t u v w x y z]")); + assertValuesUnordered(pseudo.getDataForLocale("en_XA", UNRESOLVED), + value(exemplarsPath, + "[a å b ƀ c ç d ð e é f ƒ g ĝ h ĥ i î j ĵ k ķ l ļ m ɱ" + + " n ñ o ö p þ q ǫ r ŕ s š t ţ u û v ṽ w ŵ x ẋ y ý z ž]")); + } + + public static void assertValuesUnordered(CldrData data, CldrValue... values) { + Set captured = new HashSet<>(); + data.accept(ARBITRARY, captured::add); + assertThat(captured).containsExactlyElementsIn(values); + } + + private static CldrPath ldmlPath(String path) { + return CldrPath.parseDistinguishingPath("//ldml/" + path); + } + + private static CldrValue value(CldrPath path, String value) { + return CldrValue.parseValue(path.toString(), value); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java index d3ee81346a9..5c12b0311c7 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java @@ -17,38 +17,20 @@ import org.unicode.cldr.api.CldrData; import org.unicode.cldr.api.CldrPath; import org.unicode.cldr.api.CldrValue; import org.unicode.icu.tool.cldrtoicu.IcuData; -import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; -import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; import org.unicode.icu.tool.cldrtoicu.testing.FakeResult; import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer; -import com.google.common.collect.ImmutableList; - @RunWith(JUnit4.class) public class AbstractPathValueMapperTest { - @Test - public void testNameAndIcuFallback() { - IcuData foo = new FakeMapper().generateIcuData("foo", false); - IcuData bar = new FakeMapper().generateIcuData("bar", true); - - assertThat(foo).getPaths().isEmpty(); - assertThat(foo).hasName("foo"); - assertThat(foo).hasFallback(false); - - assertThat(bar).getPaths().isEmpty(); - assertThat(bar).hasName("bar"); - assertThat(bar).hasFallback(true); - } - @Test public void testUngroupedConcatenation() { FakeMapper mapper = new FakeMapper(); mapper.addUngroupedResult("foo/bar", "one", "two"); mapper.addUngroupedResult("foo/baz", "other", "path"); mapper.addUngroupedResult("foo/bar", "three", "four"); - IcuData icuData = mapper.generateIcuData("foo", false); + IcuData icuData = mapper.addIcuData("foo"); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData).hasValuesFor("foo/bar", singletonValues("one", "two", "three", "four")); @@ -61,7 +43,7 @@ public class AbstractPathValueMapperTest { mapper.addGroupedResult("foo/bar", "one", "two"); mapper.addGroupedResult("foo/baz", "other", "path"); mapper.addGroupedResult("foo/bar", "three", "four"); - IcuData icuData = mapper.generateIcuData("foo", false); + IcuData icuData = mapper.addIcuData("foo"); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData) @@ -89,25 +71,25 @@ public class AbstractPathValueMapperTest { .addResult(explicit1) .addResult(explicit2) .addResult(explicit3) - .generateIcuData("foo", false); + .addIcuData("foo"); assertThat(noFallback).hasValuesFor("foo/bar", singletonValues("one", "two", "three")); // Missing explicit results trigger fallbacks. IcuData firstFallback = new FakeMapper(transformer) .addResult(explicit2) .addResult(explicit3) - .generateIcuData("foo", false); + .addIcuData("foo"); assertThat(firstFallback).hasValuesFor("foo/bar", singletonValues("", "two", "three")); // Fallbacks can appear in any part of the result sequence. IcuData lastFallbacks = new FakeMapper(transformer) .addResult(explicit1) - .generateIcuData("foo", false); + .addIcuData("foo"); assertThat(lastFallbacks) .hasValuesFor("foo/bar", singletonValues("one", "", "")); // Without a single result to "seed" the fallback group, nothing is emitted. - IcuData allFallbacks = new FakeMapper(transformer).generateIcuData("foo", false); + IcuData allFallbacks = new FakeMapper(transformer).addIcuData("foo"); assertThat(allFallbacks).getPaths().isEmpty(); } @@ -119,7 +101,7 @@ public class AbstractPathValueMapperTest { mapper.addUngroupedResult("foo/alias-1", "start", "/alias/target[1]", "end"); mapper.addUngroupedResult("foo/alias-2", "start", "/alias/target[2]", "end"); mapper.addUngroupedResult("alias/target", "first", "second", "third"); - IcuData icuData = mapper.generateIcuData("foo", false); + IcuData icuData = mapper.addIcuData("foo"); assertThat(icuData).getPaths().hasSize(5); assertThat(icuData) @@ -142,7 +124,7 @@ public class AbstractPathValueMapperTest { mapper.addGroupedResult("foo/bar", "/alias/target[1]"); mapper.addUngroupedResult("alias/target", "first", "second"); - IcuData icuData = mapper.generateIcuData("foo", false); + IcuData icuData = mapper.addIcuData("foo"); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData) .hasValuesFor("foo/bar", @@ -157,7 +139,7 @@ public class AbstractPathValueMapperTest { mapper.addUngroupedResult("foo/bar:alias", "/alias/target"); mapper.addUngroupedResult("foo/bar", "/alias/target"); mapper.addUngroupedResult("alias/target", "alias-value"); - IcuData icuData = mapper.generateIcuData("foo", false); + IcuData icuData = mapper.addIcuData("foo"); assertThat(icuData).getPaths().hasSize(3); assertThat(icuData).hasValuesFor("foo/bar:alias", singletonValues("/alias/target")); @@ -172,7 +154,7 @@ public class AbstractPathValueMapperTest { mapper.addUngroupedResult("first/alias", "hello"); mapper.addUngroupedResult("foo/bar", "/first/alias", "/last/alias"); mapper.addUngroupedResult("last/alias", "world"); - IcuData icuData = mapper.generateIcuData("foo", false); + IcuData icuData = mapper.addIcuData("foo"); assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world")); } @@ -184,7 +166,7 @@ public class AbstractPathValueMapperTest { mapper.addUngroupedResult("alias/target", "hello"); mapper.addUngroupedResult("foo/bar", "/alias/target[0]", "/alias/target[1]"); mapper.addUngroupedResult("alias/target", "world"); - IcuData icuData = mapper.generateIcuData("foo", false); + IcuData icuData = mapper.addIcuData("foo"); assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world")); } @@ -195,7 +177,7 @@ public class AbstractPathValueMapperTest { mapper.addUngroupedResult("alias/target", "value"); mapper.addUngroupedResult("foo/bar", "/no-such-alias/target"); IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false)); + assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo")); assertThat(e).hasMessageThat().contains("no such alias value"); assertThat(e).hasMessageThat().contains("/no-such-alias/target"); } @@ -206,7 +188,7 @@ public class AbstractPathValueMapperTest { mapper.addUngroupedResult("alias/target", "value"); mapper.addUngroupedResult("foo/bar", "/alias/target[1]"); IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false)); + assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo")); assertThat(e).hasMessageThat().contains("out of bounds"); assertThat(e).hasMessageThat().contains("/alias/target[1]"); } @@ -218,7 +200,7 @@ public class AbstractPathValueMapperTest { mapper.addUngroupedResult("other/alias", "/other/alias"); mapper.addUngroupedResult("foo/bar", "/alias/target"); IllegalStateException e = - assertThrows(IllegalStateException.class, () -> mapper.generateIcuData("foo", false)); + assertThrows(IllegalStateException.class, () -> mapper.addIcuData("foo")); assertThat(e).hasMessageThat().contains("recursive alias resolution is not supported"); } @@ -227,7 +209,7 @@ public class AbstractPathValueMapperTest { FakeMapper mapper = new FakeMapper(); mapper.addUngroupedResult("foo/bar:alias", "first", "second"); IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false)); + assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo")); assertThat(e).hasMessageThat().contains("explicit aliases must be singleton values"); assertThat(e).hasMessageThat().contains("foo/bar:alias"); } @@ -248,25 +230,6 @@ public class AbstractPathValueMapperTest { } }; - // We could also just use Mockito for this (it's not yet a project dependency however). - private final PathValueTransformer transformer = - new PathValueTransformer() { - @Override public ImmutableList transform(CldrValue cldrValue) { - throw new UnsupportedOperationException("should not be called by test"); - } - - @Override - public ImmutableList transform(CldrValue cldrValue, DynamicVars varFn) { - throw new UnsupportedOperationException("should not be called by test"); - } - - @Override - public ImmutableList getFallbackResultsFor(RbPath key, DynamicVars varFn) { - // TODO: Test fallbacks. - return ImmutableList.of(); - } - }; - // This preserves insertion order in a well defined way (good for testing alias order). private final List fakeResults = new ArrayList<>(); @@ -278,6 +241,13 @@ public class AbstractPathValueMapperTest { super(EXPLODING_DATA, transformer); } + // Helper method to neaten up the tests a bit. + IcuData addIcuData(String localeId) { + IcuData icuData = new IcuData(localeId, true); + addIcuData(icuData); + return icuData; + } + FakeMapper addUngroupedResult(String path, String... values) { int index = fakeResults.size() + 1; return addResult(FakeResult.of(path, index, false, values)); diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java index f221964011a..d5adde881cd 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java @@ -205,12 +205,12 @@ public class Bcp47MapperTest { // Only the type-map paths/values are split into the timezone data. assertThat(tzData).getPaths().hasSize(4); - assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped"); assertThat(tzData).hasValuesFor("/typeMap/tz/foo_bar", "unescaped"); - - // TODO: Check this is correct! Having foo/bar as the value rather than foo:bar seems wrong. - assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar"); assertThat(tzData).hasValuesFor("/typeAlias/tz/hello_world", "foo_bar"); + + // TODO: Raise bug - having alias target "foo/bar" not match the key "foo:bar" is a bug! + assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped"); + assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar"); } private static CldrData cldrData(CldrValue... values) { diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java index d01bf74712b..a4ca46980a0 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java @@ -50,7 +50,8 @@ public class BreakIteratorMapperTest { suppression(SENTENCE_BREAK, "Alt.", ++idx), suppression(SENTENCE_BREAK, "Approx.", ++idx)); - IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + BreakIteratorMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).getPaths().hasSize(1); assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array", @@ -72,7 +73,8 @@ public class BreakIteratorMapperTest { suppression(LINE_BREAK, "Bar", ++idx), suppression(LINE_BREAK, "Baz", ++idx)); - IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + BreakIteratorMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array", @@ -91,7 +93,8 @@ public class BreakIteratorMapperTest { dictionary("foo", ""), dictionary("bar", "")); - IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials)); + IcuData icuData = new IcuData("xx", true); + BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials)); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", ""); @@ -104,7 +107,8 @@ public class BreakIteratorMapperTest { boundaries(GRAPHEME, "", null), boundaries(SENTENCE, "", "altName")); - IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials)); + IcuData icuData = new IcuData("xx", true); + BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials)); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData) diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java index 7af7422f2cf..e039d0e2fd4 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java @@ -22,14 +22,17 @@ import com.google.common.base.Joiner; public class CollationMapperTest { @Test public void testEmpty() { - IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty()); + IcuData icuData = new IcuData("xx", true); + CollationMapper.process(icuData, cldrData(), Optional.empty()); + assertThat(icuData).hasName("xx"); assertThat(icuData).hasFallback(true); assertThat(icuData).getPaths().isEmpty(); // Root gets a couple of special paths added to it due to the need to work around a CLDR // data bug. - IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty()); + IcuData rootData = new IcuData("root", true); + CollationMapper.process(rootData, cldrData(), Optional.empty()); assertThat(rootData).hasName("root"); assertThat(rootData).hasFallback(true); assertThat(rootData).getPaths().hasSize(2); @@ -42,7 +45,8 @@ public class CollationMapperTest { CldrData cldrData = cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value")); - IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + CollationMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).getPaths().hasSize(1); assertThat(icuData).hasValuesFor("/collations/default", "any value"); } @@ -61,7 +65,8 @@ public class CollationMapperTest { collationRule("foo", "alt2", "Second alt rule"), collationRule("foo", null, "First rule")); - IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + CollationMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString()); assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule"); @@ -78,7 +83,8 @@ public class CollationMapperTest { "# And more comments to be stripped", "And another value")); - IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + CollationMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Here is a value", "And another value"); @@ -109,7 +115,8 @@ public class CollationMapperTest { + "\uD83D\uDE19", " <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11")); - IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + CollationMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString()); @@ -131,7 +138,8 @@ public class CollationMapperTest { CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""), CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", "")); - IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials)); + IcuData icuData = new IcuData("xx", true); + CollationMapper.process(icuData, cldrData(), Optional.of(specials)); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule"); assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps"); diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java index 3d3a8e2f63f..f218b4bc3c7 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java @@ -4,6 +4,8 @@ package org.unicode.icu.tool.cldrtoicu.mapper; import static com.google.common.truth.Truth.assertThat; import static java.util.Optional.empty; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; +import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import static org.unicode.cldr.api.CldrValue.parseValue; import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; @@ -14,8 +16,6 @@ import java.util.Optional; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import org.unicode.cldr.api.CldrData; -import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrValue; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; @@ -38,9 +38,7 @@ public class LocaleMapperTest { ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"), simpleResult("/durationUnits/foo", "Bar")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); - + IcuData icuData = process("xx"); assertThat(icuData).getPaths().hasSize(1); assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar"); } @@ -55,9 +53,7 @@ public class LocaleMapperTest { src.addLocaleData( "zz", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZ")); - IcuData icuData = - LocaleMapper.process("yy", src, empty(), transformer, empty()); - + IcuData icuData = process("yy"); assertThat(icuData).getPaths().hasSize(1); assertThat(icuData).hasValuesFor("/durationUnits/foo", "YY"); } @@ -73,8 +69,7 @@ public class LocaleMapperTest { ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"), simpleResult("/Keys/sometype", "Value")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); + IcuData icuData = process("xx"); // The 2nd mapping is not used because it does not appear in the unresolved CldrData. assertThat(icuData).getPaths().hasSize(1); @@ -95,8 +90,7 @@ public class LocaleMapperTest { ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"), simpleResult("/Currencies/USD", 2, "US Dollar")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); + IcuData icuData = process("xx"); // Now the inherited mapping is used because the path appeared for the unresolved CldrData. assertThat(icuData).getPaths().hasSize(1); @@ -121,8 +115,7 @@ public class LocaleMapperTest { + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"), simpleResult("/calendar/foo/availableFormats/bar/one", "Bar")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); + IcuData icuData = process("xx"); // Now the inherited mapping is used because the path appeared for the unresolved CldrData. assertThat(icuData).getPaths().hasSize(1); @@ -141,8 +134,7 @@ public class LocaleMapperTest { + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"), simpleResult("/calendar/foo/availableFormats/bar/one", "Bar")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); + IcuData icuData = process("xx"); // Now the inherited mapping is used because the path appeared for the unresolved CldrData. assertThat(icuData).getPaths().hasSize(1); @@ -177,8 +169,7 @@ public class LocaleMapperTest { + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"), simpleResult("/calendar/foo/availableFormats/bar/", 2, "Child-2")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); + IcuData icuData = process("xx"); assertThat(icuData).getPaths().hasSize(2); assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent"); @@ -246,8 +237,7 @@ public class LocaleMapperTest { + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"), simpleResult("/calendar/foo/availableFormats/bar/", 2, "Child-2")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); + IcuData icuData = process("xx"); assertThat(icuData).getPaths().hasSize(1); assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent"); @@ -255,9 +245,7 @@ public class LocaleMapperTest { @Test public void testDefaultCalendar() { - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, Optional.of("pastafarian")); - + IcuData icuData = process("xx", Optional.of("pastafarian")); assertThat(icuData).getPaths().hasSize(1); assertThat(icuData).hasValuesFor("/calendar/default", "pastafarian"); } @@ -302,8 +290,7 @@ public class LocaleMapperTest { format("dateTime", "short", "twelve"), simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve")); - IcuData icuData = - LocaleMapper.process("xx", src, empty(), transformer, empty()); + IcuData icuData = process("xx"); assertThat(icuData).getPaths().hasSize(1); assertThat(icuData).hasValuesFor("/calendar/foo/DateTimePatterns", @@ -351,10 +338,7 @@ public class LocaleMapperTest { format("dateTime", "short", "twelve"), simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve")); - IllegalStateException e = assertThrows( - IllegalStateException.class, - () -> LocaleMapper.process("xx", src, empty(), transformer, empty())); - + IllegalStateException e = assertThrows(IllegalStateException.class, () -> process("xx")); assertThat(e).hasMessageThat().contains("unexpected"); assertThat(e).hasMessageThat().contains("/calendar/foo/DateTimePatterns"); } @@ -368,6 +352,24 @@ public class LocaleMapperTest { type, length, pattern)); } + // ---- Helper methods ---- + + IcuData process(String localeId) { + return process(localeId, empty()); + } + + IcuData process(String localeId, Optional defCalendar) { + IcuData icuData = new IcuData(localeId, true); + LocaleMapper.process( + icuData, + src.getDataForLocale(localeId, UNRESOLVED), + src.getDataForLocale(localeId, RESOLVED), + empty(), + transformer, + defCalendar); + return icuData; + } + private void addMapping(String locale, CldrValue value, Result... results) { src.addLocaleData(locale, value); transformer.addResults(value, results); @@ -386,16 +388,12 @@ public class LocaleMapperTest { return FakeResult.of(path, index, false, value); } - private static CldrData cldrData(CldrValue... values) { - return CldrDataSupplier.forValues(Arrays.asList(values)); - } - private static CldrValue ldml(String path) { return ldml(path, ""); } private static CldrValue ldml(String path, String value) { - return parseValue("//ldml/" + path, ""); + return parseValue("//ldml/" + path, value); } private static RbValue[] singletonValues(String... values) { diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java index 841a2e12823..890b6a27a7c 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java @@ -55,7 +55,8 @@ public class RbnfMapperTest { rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", ++idx), rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", ++idx)); - IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + RbnfMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules", // Double-% prefix for "private" access. @@ -82,7 +83,8 @@ public class RbnfMapperTest { rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", ++idx), rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", ++idx)); - IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + RbnfMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules", // Single-% prefix for "public" access. @@ -115,7 +117,8 @@ public class RbnfMapperTest { rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0", "=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", ++idx)); - IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials)); + IcuData icuData = new IcuData("xx", true); + RbnfMapper.process(icuData, cldrData, Optional.of(specials)); assertThat(icuData).hasValuesFor("/RBNFRules/OrdinalRules", RbValue.of("%digits-ordinal:"), @@ -146,7 +149,8 @@ public class RbnfMapperTest { rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", ++idx), rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", ++idx)); - IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); + IcuData icuData = new IcuData("xx", true); + RbnfMapper.process(icuData, cldrData, Optional.empty()); assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules", RbValue.of("%escaping:"),