diff --git a/tools/cldr/cldr-to-icu/build-icu-data.xml b/tools/cldr/cldr-to-icu/build-icu-data.xml
index 665d7150a9c..fa9ab719976 100644
--- a/tools/cldr/cldr-to-icu/build-icu-data.xml
+++ b/tools/cldr/cldr-to-icu/build-icu-data.xml
@@ -39,6 +39,15 @@
CldrDraftStatus for more details. -->
+
+
+
+
+
+
@@ -78,7 +87,9 @@
+ outputTypes="${outputTypes}" minimalDraftStatus="${minDraftStatus}"
+ localeIdFilter="${localeIdFilter}" includePseudoLocales="${includePseudoLocales}"
+ emitReport="${emitReport}">
+
+
+
+
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleData.java
new file mode 100644
index 00000000000..55a3015672f
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleData.java
@@ -0,0 +1,126 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.unicode.cldr.api.CldrDataType.LDML;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrDataType;
+import org.unicode.cldr.api.CldrDraftStatus;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * A factory for data suppliers which can filter CLDR values by substituting values from one path
+ * to another. The replaced value must retain the original "target" path but will have the value
+ * and value attributes of the "source". A value will only be replaced if both the source and
+ * target paths have associated values. The replacement retains its original position in the value
+ * ordering.
+ *
+ *
This class DOES NOT transform supplemental or BCP-47 data, because the use of "alt" values
+ * is completely different for that data (it would require merging specific attributes together).
+ *
+ *
Note that this is not a general purpose transformation of CLDR data, since it is generally
+ * not possible to "move" values between arbitrary paths. Target and source paths must be in the
+ * same "namespace" (i.e. share the same element names) but attributes can differ.
+ *
+ *
Note also that the mapping is not recursive, so mapping {@code A -> B} and {@code B -> C}
+ * will NOT cause {@code A} to be mapped to {@code C}.
+ *
+ *
Typically this class is expected to be used for selecting alternate values of locale data
+ * based on the {@code "alt"} path attribute (e.g. selecting the short form of a region name).
+ */
+public final class AlternateLocaleData {
+ /**
+ * Returns a wrapped data supplier which will transform any {@link CldrValue}s according to the
+ * supplied {@link CldrPath} mapping. Keys in the path map are the "target" paths of values to
+ * be modified, and the values in the map are the "source" paths from which the replacement
+ * values are obtained. For each map entry, the target and source paths must be in the same
+ * namespace (i.e. have the same path element names).
+ */
+ public static CldrDataSupplier transform(CldrDataSupplier src, Map altPaths) {
+ return new CldrDataFilter(src, altPaths);
+ }
+
+ private static final class CldrDataFilter extends CldrDataSupplier {
+ private final CldrDataSupplier src;
+ // Mapping from target (destination) to source path. This is necessary since two targets
+ // could come from the same source).
+ private final ImmutableMap altPaths;
+
+ CldrDataFilter(
+ CldrDataSupplier src, Map altPaths) {
+ this.src = checkNotNull(src);
+ this.altPaths = ImmutableMap.copyOf(altPaths);
+ altPaths.forEach((t, s) -> checkArgument(hasSameNamespace(checkLdml(t), checkLdml(s)),
+ "alternate paths must have the same namespace: target=%s, source=%s", t, s));
+ }
+
+ @Override
+ public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
+ return new CldrDataFilter(src.withDraftStatusAtLeast(draftStatus), altPaths);
+ }
+
+ @Override
+ public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
+ return new AltData(src.getDataForLocale(localeId, resolution));
+ }
+
+ @Override
+ public Set getAvailableLocaleIds() {
+ return src.getAvailableLocaleIds();
+ }
+
+ @Override
+ public CldrData getDataForType(CldrDataType type) {
+ return src.getDataForType(type);
+ }
+
+ private final class AltData extends FilteredData {
+ AltData(CldrData srcData) {
+ super(srcData);
+ }
+
+ @Override
+ protected CldrValue filter(CldrValue value) {
+ CldrPath altPath = altPaths.get(value.getPath());
+ if (altPath != null) {
+ CldrValue altValue = getSourceData().get(altPath);
+ if (altValue != null) {
+ return altValue.replacePath(value.getPath());
+ }
+ }
+ return value;
+ }
+ }
+ }
+
+ private static boolean hasSameNamespace(CldrPath x, CldrPath y) {
+ if (x.getLength() != y.getLength()) {
+ return false;
+ }
+ do {
+ if (!x.getName().equals(y.getName())) {
+ return false;
+ }
+ x = x.getParent();
+ y = y.getParent();
+ } while (x != null);
+ return true;
+ }
+
+ private static CldrPath checkLdml(CldrPath path) {
+ checkArgument(path.getDataType() == LDML, "only locale data (LDML) is supported: %s", path);
+ return path;
+ }
+
+ private AlternateLocaleData() {}
+}
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/FilteredData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/FilteredData.java
new file mode 100644
index 00000000000..62551b0a6e0
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/FilteredData.java
@@ -0,0 +1,67 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import javax.annotation.Nullable;
+
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
+
+/**
+ * A class which allows data from some underlying {@link CldrData} source to be filtered or
+ * removed (but not added).
+ */
+// TODO: Once DTD ordering is the only allowed order, this can be extended to allow adding paths.
+abstract class FilteredData implements CldrData {
+ private final CldrData src;
+
+ public FilteredData(CldrData src) {
+ this.src = checkNotNull(src);
+ }
+
+ /** For sub-classes to access the underlying source data. */
+ protected CldrData getSourceData() {
+ return src;
+ }
+
+ /**
+ * Returns a filtered CLDR value, replacing or removing the original value during visitation.
+ * The filtered value can only differ in it's base value or value attributes, and must have
+ * the same {@link CldrPath} associated with it.
+ *
+ * @return the filtered to be replaced, or {@code null} to remove the value.
+ */
+ @Nullable
+ protected abstract CldrValue filter(CldrValue value);
+
+ @Override
+ public void accept(PathOrder order, ValueVisitor visitor) {
+ src.accept(order, v -> visitFiltered(v, visitor));
+ }
+
+ @Override
+ public CldrValue get(CldrPath path) {
+ CldrValue value = src.get(path);
+ return value != null ? checkFiltered(value) : null;
+ }
+
+ private void visitFiltered(CldrValue value, ValueVisitor visitor) {
+ CldrValue filteredValue = checkFiltered(value);
+ if (filteredValue != null) {
+ visitor.visit(filteredValue);
+ }
+ }
+
+ @Nullable
+ private CldrValue checkFiltered(CldrValue value) {
+ CldrValue filteredValue = filter(value);
+ checkArgument(filteredValue == null || filteredValue.getPath().equals(value.getPath()),
+ "filtering is not permitted to modify distinguishing paths: source=%s, filtered=%s",
+ value, filteredValue);
+ return filteredValue;
+ }
+}
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java
index e8206de9e09..0e9c3930aa7 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java
@@ -4,7 +4,9 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
-import static java.util.stream.Collectors.toList;
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import static org.unicode.cldr.api.CldrDataType.BCP47;
import static org.unicode.cldr.api.CldrDataType.LDML;
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
@@ -32,8 +34,6 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
-import java.util.function.Consumer;
-import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -56,6 +56,7 @@ import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer;
import com.google.common.base.CharMatcher;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.LinkedListMultimap;
@@ -137,69 +138,30 @@ public final class LdmlConverter {
* hide what are essentially implementation specific data splits.
*/
public enum OutputType {
- LOCALES(LDML, LdmlConverter::processLocales),
- BRKITR(LDML, LdmlConverter::processBrkitr),
- COLL(LDML, LdmlConverter::processCollation),
- RBNF(LDML, LdmlConverter::processRbnf),
+ LOCALES(LDML),
+ BRKITR(LDML),
+ COLL(LDML),
+ RBNF(LDML),
+ DAY_PERIODS(SUPPLEMENTAL),
+ GENDER_LIST(SUPPLEMENTAL),
+ LIKELY_SUBTAGS(SUPPLEMENTAL),
+ SUPPLEMENTAL_DATA(SUPPLEMENTAL),
+ CURRENCY_DATA(SUPPLEMENTAL),
+ METADATA(SUPPLEMENTAL),
+ META_ZONES(SUPPLEMENTAL),
+ NUMBERING_SYSTEMS(SUPPLEMENTAL),
+ PLURALS(SUPPLEMENTAL),
+ PLURAL_RANGES(SUPPLEMENTAL),
+ WINDOWS_ZONES(SUPPLEMENTAL),
+ TRANSFORMS(SUPPLEMENTAL),
+ KEY_TYPE_DATA(BCP47);
- DAY_PERIODS(
- SUPPLEMENTAL,
- LdmlConverter::processDayPeriods),
- GENDER_LIST(
- SUPPLEMENTAL,
- c -> c.processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false)),
- LIKELY_SUBTAGS(
- SUPPLEMENTAL,
- c -> c.processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false)),
- SUPPLEMENTAL_DATA(
- SUPPLEMENTAL,
- c -> c.processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true)),
- CURRENCY_DATA(
- SUPPLEMENTAL,
- c -> c.processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", true)),
- METADATA(
- SUPPLEMENTAL,
- c -> c.processSupplemental("metadata", METADATA_PATHS, "misc", false)),
- META_ZONES(
- SUPPLEMENTAL,
- c -> c.processSupplemental("metaZones", METAZONE_PATHS, "misc", false)),
- NUMBERING_SYSTEMS(
- SUPPLEMENTAL,
- c -> c.processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false)),
- PLURALS(
- SUPPLEMENTAL,
- LdmlConverter::processPlurals),
- PLURAL_RANGES(
- SUPPLEMENTAL,
- LdmlConverter::processPluralRanges),
- WINDOWS_ZONES(
- SUPPLEMENTAL,
- c -> c.processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false)),
- TRANSFORMS(
- SUPPLEMENTAL,
- LdmlConverter::processTransforms),
- KEY_TYPE_DATA(
- BCP47,
- LdmlConverter::processKeyTypeData),
-
- // Batching by type.
- DTD_LDML(LDML, c -> c.processAll(LDML)),
- DTD_SUPPLEMENTAL(SUPPLEMENTAL, c -> c.processAll(SUPPLEMENTAL)),
- DTD_BCP47(BCP47, c -> c.processAll(BCP47));
-
- public static final ImmutableSet ALL =
- ImmutableSet.of(DTD_BCP47, DTD_SUPPLEMENTAL, DTD_LDML);
+ public static final ImmutableSet ALL = ImmutableSet.copyOf(OutputType.values());
private final CldrDataType type;
- private final Consumer converterFn;
- OutputType(CldrDataType type, Consumer converterFn) {
+ OutputType(CldrDataType type) {
this.type = checkNotNull(type);
- this.converterFn = checkNotNull(converterFn);
- }
-
- void convert(LdmlConverter converter) {
- converterFn.accept(converter);
}
CldrDataType getCldrType() {
@@ -207,6 +169,17 @@ public final class LdmlConverter {
}
}
+ // Map to convert the rather arbitrarily defined "output types" to the directories into which
+ // the data is written. This is only for "LDML" types since other mappers don't need to split
+ // data into multiple directories.
+ private static final ImmutableListMultimap TYPE_TO_DIR =
+ ImmutableListMultimap.builder()
+ .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE)
+ .putAll(OutputType.BRKITR, BRKITR)
+ .putAll(OutputType.COLL, COLL)
+ .putAll(OutputType.RBNF, RBNF)
+ .build();
+
/** Converts CLDR data according to the given configuration. */
public static void convert(
CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
@@ -252,15 +225,8 @@ public final class LdmlConverter {
}
private void convertAll() {
- ListMultimap groupByType = LinkedListMultimap.create();
- for (OutputType t : config.getOutputTypes()) {
- groupByType.put(t.getCldrType(), t);
- }
- for (CldrDataType cldrType : groupByType.keySet()) {
- for (OutputType t : groupByType.get(cldrType)) {
- t.convert(this);
- }
- }
+ processLdml();
+ processSupplemental();
if (config.emitReport()) {
System.out.println("Supplemental Data Transformer=" + supplementalTransformer);
System.out.println("Locale Data Transformer=" + localeTransformer);
@@ -275,24 +241,6 @@ public final class LdmlConverter {
}
}
- private PathValueTransformer getLocaleTransformer() {
- return localeTransformer;
- }
-
- private PathValueTransformer getSupplementalTransformer() {
- return supplementalTransformer;
- }
-
- private void processAll(CldrDataType cldrType) {
- List targets = Arrays.stream(OutputType.values())
- .filter(t -> t.getCldrType().equals(cldrType))
- .filter(t -> !t.name().startsWith("DTD_"))
- .collect(toList());
- for (OutputType t : targets) {
- t.convert(this);
- }
- }
-
private Optional loadSpecialsData(String localeId) {
String expected = localeId + ".xml";
try (Stream files = Files.walk(config.getSpecialsDir())) {
@@ -310,31 +258,12 @@ public final class LdmlConverter {
}
}
- private void processLocales() {
- // TODO: Pre-load specials files to avoid repeatedly re-loading them.
- processAndSplitLocaleFiles(
- id -> LocaleMapper.process(
- id, src, loadSpecialsData(id), getLocaleTransformer(), supplementalData),
- CURR, LANG, LOCALES, REGION, UNIT, ZONE);
- }
-
- private void processBrkitr() {
- processAndSplitLocaleFiles(
- id -> BreakIteratorMapper.process(id, src, loadSpecialsData(id)), BRKITR);
- }
-
- private void processCollation() {
- processAndSplitLocaleFiles(
- id -> CollationMapper.process(id, src, loadSpecialsData(id)), COLL);
- }
-
- private void processRbnf() {
- processAndSplitLocaleFiles(
- id -> RbnfMapper.process(id, src, loadSpecialsData(id)), RBNF);
- }
-
- private void processAndSplitLocaleFiles(
- Function icuFn, IcuLocaleDir... splitDirs) {
+ private void processLdml() {
+ ImmutableList splitDirs =
+ config.getOutputTypes().stream()
+ .filter(t -> t.getCldrType() == LDML)
+ .flatMap(t -> TYPE_TO_DIR.get(t).stream())
+ .collect(toImmutableList());
SetMultimap writtenLocaleIds = HashMultimap.create();
Path baseDir = config.getOutputDir();
@@ -344,7 +273,20 @@ public final class LdmlConverter {
if (!availableIds.contains(id)) {
continue;
}
- IcuData icuData = icuFn.apply(id);
+
+ IcuData icuData = new IcuData(id, true);
+
+ Optional specials = loadSpecialsData(id);
+ CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
+
+ BreakIteratorMapper.process(icuData, unresolved, specials);
+ CollationMapper.process(icuData, unresolved, specials);
+ RbnfMapper.process(icuData, unresolved, specials);
+
+ CldrData resolved = src.getDataForLocale(id, RESOLVED);
+ Optional defaultCalendar = supplementalData.getDefaultCalendar(id);
+ LocaleMapper.process(
+ icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar);
ListMultimap splitPaths = LinkedListMultimap.create();
for (RbPath p : icuData.getPaths()) {
@@ -399,6 +341,15 @@ public final class LdmlConverter {
}
}
+ private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
+
+ // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
+ // annotations (e.g. "languages:intvector"). We strip these when considering the element name.
+ private static String getBaseSegmentName(String segment) {
+ int idx = PATH_MODIFIER.indexIn(segment);
+ return idx == -1 ? segment : segment.substring(0, idx);
+ }
+
private Map getAliasMap(Set localeIds, IcuLocaleDir dir) {
// There are four reasons for treating a locale ID as an alias.
// 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS").
@@ -445,34 +396,69 @@ public final class LdmlConverter {
return aliasMap;
}
- private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
+ private void processSupplemental() {
+ for (OutputType type : config.getOutputTypes()) {
+ if (type.getCldrType() == LDML) {
+ continue;
+ }
+ switch (type) {
+ case DAY_PERIODS:
+ write(DayPeriodsMapper.process(src), "misc");
+ break;
- // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
- // annotations (e.g. "languages:intvector"). We strip these when considering the element name.
- private static String getBaseSegmentName(String segment) {
- int idx = PATH_MODIFIER.indexIn(segment);
- return idx == -1 ? segment : segment.substring(0, idx);
- }
+ case GENDER_LIST:
+ processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false);
+ break;
- private void processDayPeriods() {
- write(DayPeriodsMapper.process(src), "misc");
- }
+ case LIKELY_SUBTAGS:
+ processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false);
+ break;
- private void processPlurals() {
- write(PluralsMapper.process(src), "misc");
- }
+ case SUPPLEMENTAL_DATA:
+ processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true);
+ break;
- private void processPluralRanges() {
- write(PluralRangesMapper.process(src), "misc");
- }
+ case CURRENCY_DATA:
+ processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", true);
+ break;
- private void processKeyTypeData() {
- Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
- }
+ case METADATA:
+ processSupplemental("metadata", METADATA_PATHS, "misc", false);
+ break;
- private void processTransforms() {
- Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
- write(TransformsMapper.process(src, transformDir, fileHeader), transformDir);
+ case META_ZONES:
+ processSupplemental("metaZones", METAZONE_PATHS, "misc", false);
+ break;
+
+ case NUMBERING_SYSTEMS:
+ processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false);
+ break;
+
+ case PLURALS:
+ write(PluralsMapper.process(src), "misc");
+ break;
+
+ case PLURAL_RANGES:
+ write(PluralRangesMapper.process(src), "misc");
+ break;
+
+ case WINDOWS_ZONES:
+ processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false);
+ break;
+
+ case TRANSFORMS:
+ Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
+ write(TransformsMapper.process(src, transformDir, fileHeader), transformDir);
+ break;
+
+ case KEY_TYPE_DATA:
+ Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
+ break;
+
+ default:
+ throw new AssertionError("Unsupported supplemental type: " + type);
+ }
+ }
}
private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
@@ -480,7 +466,7 @@ public final class LdmlConverter {
private void processSupplemental(
String label, PathMatcher paths, String dir, boolean addCldrVersion) {
IcuData icuData =
- SupplementalMapper.process(src, getSupplementalTransformer(), label, paths);
+ SupplementalMapper.process(src, supplementalTransformer, label, paths);
// A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
// supplemental data XML files.
if (addCldrVersion) {
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java
new file mode 100644
index 00000000000..850d17d9aad
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/PseudoLocales.java
@@ -0,0 +1,387 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT;
+import static java.util.function.Function.identity;
+import static java.util.regex.Pattern.CASE_INSENSITIVE;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
+
+import java.util.Arrays;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.function.IntUnaryOperator;
+import java.util.function.Predicate;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.IntStream;
+
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrDataSupplier.CldrResolution;
+import org.unicode.cldr.api.CldrDataType;
+import org.unicode.cldr.api.CldrDraftStatus;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+
+/**
+ * A factory for wrapping data suppliers to add synthetic locales for debugging. The currently
+ * supported synthetic locales are:
+ *
+ * - {@code en_XA}: A pseudo locale which generates expanded text with many non-Latin accents.
+ *
- {@code ar_XB}: A pseudo locale which generates BiDi text for debugging.
+ *
+ *
+ * Both pseudo locales are based on {@code "en"} data, and generate values which are readable
+ * by English speaking developers. For example, the CLDR value "Hello World" will be turned into
+ * something like:
+ *
+ * - {@code en_XA}: [Ĥéļļö Ŵöŕļð one two]
+ *
- {@code ar_XB}: dlroW elloH
+ *
+ *
+ * In the case of BiDi pseudo localization, bi-directional markers are also inserted into the
+ * text so that, if the system using the data is configured correctly, the results will look
+ * "normal" (i.e. Latin text will appear displayed left-to-right because of the BiDi markers).
+ */
+// TODO(CLDR-13381): Move this all into the CLDR API once the dust has settled.
+public final class PseudoLocales {
+ private enum PseudoType {
+ BIDI("ar_XB", PseudoLocales::bidi, "abcdefghijklmnopqrstuvwxyz"),
+ EXPAND("en_XA", PseudoLocales::expanding,
+ "a\u00e5b\u0180c\u00e7d\u00f0e\u00e9f\u0192g\u011dh\u0125i\u00eej\u0135k\u0137l\u013cm"
+ + "\u0271n\u00f1o\u00f6p\u00feq\u01ebr\u0155s\u0161t\u0163u\u00fbv\u1e7dw\u0175"
+ + "x\u1e8by\u00fdz\u017e");
+
+ private static final ImmutableMap ID_MAP =
+ Arrays.stream(values()).collect(toImmutableMap(PseudoType::getLocaleId, identity()));
+
+ private static PseudoType fromId(String localeId) {
+ return checkNotNull(ID_MAP.get(localeId), "unknown pseduo locale: %s", localeId);
+ }
+
+ private static ImmutableSet getLocaleIds() {
+ return ID_MAP.keySet();
+ }
+
+ private final String localeId;
+ private final Function textSupplier;
+ // A string whose code points form the exemplar set for the pseudo locale.
+ private final String exemplars;
+
+ PseudoType(String localeId, Function textSupplier, String exemplars) {
+ this.localeId = localeId;
+ this.textSupplier = textSupplier;
+ this.exemplars = exemplars;
+ }
+
+ String getLocaleId() {
+ return localeId;
+ }
+
+ PseudoText getText(boolean isPattern) {
+ return textSupplier.apply(isPattern);
+ }
+
+ String getExemplars() {
+ return exemplars;
+ }
+ }
+
+ /**
+ * Returns a wrapped data supplier which will inject {@link CldrData} for the pseudo locales
+ * {@code en_XA} and {@code ar_XB}. These locales should behave in all respects like normal
+ * locales and can be processed accordingly.
+ */
+ public static CldrDataSupplier addPseudoLocalesTo(CldrDataSupplier src) {
+ return new PseudoSupplier(src);
+ }
+
+ private static final class PseudoSupplier extends CldrDataSupplier {
+ private final CldrDataSupplier src;
+ private final Set srcIds;
+ private final CldrData enData;
+
+ PseudoSupplier(CldrDataSupplier src) {
+ this.src = checkNotNull(src);
+ this.srcIds = src.getAvailableLocaleIds();
+ // Use resolved data to ensure we get all the values (e.g. values in "en_001").
+ this.enData = src.getDataForLocale("en", RESOLVED);
+ // Just check that we aren't wrapping an already wrapped supplier.
+ PseudoType.getLocaleIds()
+ .forEach(id -> checkArgument(!srcIds.contains(id),
+ "pseudo locale %s already supported by given data supplier", id));
+ }
+
+ @Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
+ return new PseudoSupplier(src.withDraftStatusAtLeast(draftStatus));
+ }
+
+ @Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
+ if (PseudoType.getLocaleIds().contains(localeId)) {
+ return new PseudoLocaleData(enData, resolution, PseudoType.fromId(localeId));
+ } else {
+ return src.getDataForLocale(localeId, resolution);
+ }
+ }
+
+ @Override public Set getAvailableLocaleIds() {
+ return Sets.union(src.getAvailableLocaleIds(), PseudoType.getLocaleIds());
+ }
+
+ @Override public CldrData getDataForType(CldrDataType type) {
+ return src.getDataForType(type);
+ }
+ }
+
+ private interface PseudoText {
+ void addFragment(String text, boolean isLocalizable);
+ }
+
+ private static final class PseudoLocaleData extends FilteredData {
+ private static final PathMatcher AUX_EXEMPLARS =
+ PathMatcher.of("ldml/characters/exemplarCharacters[@type=\"auxiliary\"]");
+
+ private static final PathMatcher NUMBERING_SYSTEM =
+ PathMatcher.of("ldml/numbers/defaultNumberingSystem");
+
+ // These paths were mostly derived from looking at the previous implementation's behaviour
+ // and can be modified as needed. Notably there are no "units" here (but they were also
+ // excluded in the original code).
+ private static final PathMatcher PSEUDO_PATHS = PathMatcher.anyOf(
+ ldml("localeDisplayNames"),
+ ldml("delimiters"),
+ ldml("dates/calendars/calendar"),
+ ldml("dates/fields"),
+ ldml("dates/timeZoneNames"),
+ ldml("listPatterns"),
+ ldml("posix/messages"),
+ ldml("characterLabels"),
+ ldml("typographicNames"));
+
+ // Paths which contain non-localizable data. It is important that these paths catch all the
+ // non-localizable sub-paths of the list above. This list must be accurate.
+ private static final PathMatcher EXCLUDE_PATHS = PathMatcher.anyOf(
+ ldml("localeDisplayNames/localeDisplayPattern"),
+ ldml("dates/timeZoneNames/fallbackFormat"));
+
+ // The expectation is that all non-alias paths with values under these roots are "date/time
+ // pattern like" (such as "E h:mm:ss B") in which care must be taken to not pseudo localize
+ // the patterns in such as way as to break them. This list must be accurate.
+ private static final PathMatcher PATTERN_PATHS = PathMatcher.anyOf(
+ ldml("dates/calendars/calendar/timeFormats"),
+ ldml("dates/calendars/calendar/dateFormats"),
+ ldml("dates/calendars/calendar/dateTimeFormats"),
+ ldml("dates/timeZoneNames/hourFormat"));
+
+ private static PathMatcher ldml(String matcherSuffix) {
+ return PathMatcher.of("ldml/" + matcherSuffix);
+ }
+
+ // Look for any attribute in the path with "narrow" in its value. Since "narrow" values
+ // have strong expectations of width, we should not expand these (but might alter them
+ // otherwise).
+ private static final Predicate IS_NARROW =
+ Pattern.compile("\\[@[a-z]+=\"[^\"]*narrow[^\"]*\"]", CASE_INSENSITIVE).asPredicate();
+
+ private static final Pattern NUMERIC_PLACEHOLDER = Pattern.compile("\\{\\d+\\}");
+ private static final Pattern QUOTED_TEXT = Pattern.compile("'.*?'");
+
+ private final PseudoType type;
+ private final boolean isResolved;
+
+ private PseudoLocaleData(CldrData srcData, CldrResolution resolution, PseudoType type) {
+ super(srcData);
+ this.isResolved = checkNotNull(resolution) == RESOLVED;
+ this.type = checkNotNull(type);
+ }
+
+ @Override
+ protected CldrValue filter(CldrValue value) {
+ CldrPath path = value.getPath();
+
+ // Special case(s) first...
+ // We add the exemplar character list according to the pseudo type.
+ if (AUX_EXEMPLARS.matches(path)) {
+ return getExemplarValue(path);
+ }
+ // Force "latn" for the "ar_XB" pseudo locale (since otherwise it inherits from "ar".
+ // The path we get here was from "en" so should already be "latn", but we just have
+ // to return it in order for it to take effect.
+ if (type == PseudoType.BIDI && NUMBERING_SYSTEM.matches(path)) {
+ checkArgument(value.getValue().equals("latn"));
+ return value;
+ }
+
+ CldrValue defaultReturnValue = isResolved ? value : null;
+ // This makes it look like we have explicit values only for the included paths.
+ if (!PSEUDO_PATHS.matchesPrefixOf(path) || EXCLUDE_PATHS.matchesPrefixOf(path)) {
+ return defaultReturnValue;
+ }
+ String fullPath = value.getFullPath();
+ // For now don't do anything with "narrow" data (this matches the previous behaviour).
+ // We can always add something here later if necessary.
+ if (IS_NARROW.test(fullPath)) {
+ return defaultReturnValue;
+ }
+ String text = createMessage(value.getValue(), PATTERN_PATHS.matchesPrefixOf(path));
+ return CldrValue.parseValue(fullPath, text);
+ }
+
+ // It's tempting to think that the existing exemplar list in "en" could be parsed to
+ // generate list automatically (rather than having a hard coded list in the type) but
+ // https://unicode.org/reports/tr35/tr35-general.html#ExemplarSyntax
+ // makes it quite clear that this is infeasible, since there are many equivalent
+ // representations of the examplar characters that could appear in the value
+ // (e.g. "[a b ... z]", "[a-z]", "[{a} {b} ... {z}]")
+ private CldrValue getExemplarValue(CldrPath path) {
+ StringBuilder exemplarList = new StringBuilder("[");
+ type.getExemplars().codePoints()
+ .forEach(cp -> exemplarList.appendCodePoint(cp).append(' '));
+ exemplarList.setCharAt(exemplarList.length() - 1, ']');
+ return CldrValue.parseValue(path.toString(), exemplarList.toString());
+ }
+
+ private String createMessage(String text, boolean isPattern) {
+ // Pattern text is split by the quoted sections (which are localizable) whereas
+ // non-pattern text is split by placeholder (e.g. {0}) which are not localizable.
+ // This is why "isPattern" is used to signal "isLocalizable" in addFragment().
+ Matcher match = (isPattern ? QUOTED_TEXT : NUMERIC_PLACEHOLDER).matcher(text);
+ // Alternate between unmatched and matched sections in the text, always localizing one
+ // but not the other (depending the type). Append the trailing section at the end.
+ PseudoText out = type.getText(isPattern);
+ int start = 0;
+ for (; match.find(); start = match.end()) {
+ out.addFragment(text.substring(start, match.start()), !isPattern);
+ out.addFragment(match.group(), isPattern);
+ }
+ out.addFragment(text.substring(start), !isPattern);
+ return out.toString();
+ }
+ }
+
+ // ---- Expanding Pseudo-localizer (e.g. "November" --> "[Ñöṽéɱƀéŕ one two]") ----
+
+ // A map from a string of alternating key/value code-points; e.g. '1' -> '①'.
+ // Note that a subset of this is also used to form the "exemplar" set (see PseudoType).
+ private static final IntUnaryOperator CONVERT_CODEPOINT = toCodePointFunction(
+ " \u2003!\u00a1\"\u2033#\u266f$\u20ac%\u2030&\u214b*\u204e+\u207a,\u060c-\u2010.\u00b7"
+ + "/\u20440\u24ea1\u24602\u24613\u24624\u24635\u24646\u24657\u24668\u24679\u2468"
+ + ":\u2236;\u204f<\u2264=\u2242>\u2265?\u00bf@\u055eA\u00c5B\u0181C\u00c7D\u00d0"
+ + "E\u00c9F\u0191G\u011cH\u0124I\u00ceJ\u0134K\u0136L\u013bM\u1e40N\u00d1O\u00d6"
+ + "P\u00deQ\u01eaR\u0154S\u0160T\u0162U\u00dbV\u1e7cW\u0174X\u1e8aY\u00ddZ\u017d"
+ + "[\u2045\\\u2216]\u2046^\u02c4_\u203f`\u2035a\u00e5b\u0180c\u00e7d\u00f0e\u00e9"
+ + "f\u0192g\u011dh\u0125i\u00eej\u0135k\u0137l\u013cm\u0271n\u00f1o\u00f6p\u00fe"
+ + "q\u01ebr\u0155s\u0161t\u0163u\u00fbv\u1e7dw\u0175x\u1e8by\u00fdz\u017e|\u00a6"
+ + "~\u02de");
+
+ // Converts a source/target alternating code-points into a map.
+ private static IntUnaryOperator toCodePointFunction(String s) {
+ // Not pretty, but there's no nice way to "pair up" successive stream elements without
+ // extra library dependencies, so we collect them and then iterate via index.
+ int[] codePoints = s.codePoints().toArray();
+ checkArgument((codePoints.length & 1) == 0,
+ "must have an even number of code points (was %s)", codePoints.length);
+ ImmutableMap map =
+ IntStream.range(0, codePoints.length / 2)
+ .boxed()
+ .collect(toImmutableMap(n -> codePoints[2 * n], n -> codePoints[(2 * n) + 1]));
+ return cp -> map.getOrDefault(cp, cp);
+ }
+
+ // A list of words to be added to text when it is expanded. A whole number of words are
+ // always added (and the fact they are numeric words is irrelevant, could be Lorem Ipsum).
+ // So far nothing goes above "ten" in en_XA, but this can always be trivially extended.
+ private static final String PADDING = "one two three four five six seven eight nine ten";
+
+ private static PseudoText expanding(boolean isPattern) {
+ return new PseudoText() {
+ IntStream.Builder codePoints = IntStream.builder();
+
+ @Override
+ public void addFragment(String text, boolean isLocalizable) {
+ text.codePoints()
+ .map(isLocalizable ? CONVERT_CODEPOINT : cp -> cp)
+ .forEach(codePoints::add);
+ }
+
+ @Override
+ public String toString() {
+ int[] cp = codePoints.build().toArray();
+ // Copy the original code and round up the 50% calculation (it's not important).
+ int endIndex = CharMatcher.whitespace().indexIn(PADDING, (cp.length + 1) / 2);
+ String suffix = PADDING.substring(0, Math.min(endIndex, PADDING.length()));
+ // For pattern strings, any literal text must be quoted (the fragment text
+ // already was). Note that this is why we don't transform single-quotes.
+ if (isPattern) {
+ suffix = "'" + suffix.replace(" ", "' '") + "'";
+ }
+ // Final output is something like "November" --> "[Ñöṽéɱƀéŕ one two]"
+ // Where the additional padding adds at least 50% to the length of the text.
+ return "[" + new String(cp, 0, cp.length) + " " + suffix + "]";
+ }
+ };
+ }
+
+ // ---- Bidi Pseudo-localizer (e.g. "November" --> "rebmevoN" using BiDi tags)----
+
+ // Right-to-left override character.
+ private static final String RLO = "\u202e";
+ // Arabic letter mark character.
+ private static final String ALM = "\u061C";
+ // Pop direction formatting character.
+ private static final String PDF = "\u202c";
+ // Prefix to add before each LTR word.
+ private static final String BIDI_PREFIX = ALM + RLO;
+ // Postfix to add after each LTR word.
+ private static final String BIDI_POSTFIX = PDF + ALM;
+
+ // Bidi localization doesn't care if the fragment is a pattern or not.
+ @SuppressWarnings("unused")
+ private static PseudoText bidi(boolean isPattern) {
+ return new PseudoText() {
+ private final StringBuilder out = new StringBuilder();
+
+ // This was largely copied from the original CLDRFilePseudolocalizer class and
+ // while it appears to work fine, I don't know enough to comment it clearly.
+ // TODO: Find someone who can add a decent comment here!
+ @Override
+ public void addFragment(String text, boolean isLocalizable) {
+ if (isLocalizable) {
+ boolean wrapping = false;
+ for (int index = 0; index < text.length();) {
+ int codePoint = text.codePointAt(index);
+ index += Character.charCount(codePoint);
+ byte directionality = Character.getDirectionality(codePoint);
+ boolean needsWrap = (directionality == DIRECTIONALITY_LEFT_TO_RIGHT);
+ if (needsWrap != wrapping) {
+ wrapping = needsWrap;
+ out.append(wrapping ? BIDI_PREFIX : BIDI_POSTFIX);
+ }
+ out.appendCodePoint(codePoint);
+ }
+ if (wrapping) {
+ out.append(BIDI_POSTFIX);
+ }
+ } else {
+ out.append(text);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return out.toString();
+ }
+ };
+ }
+
+ private PseudoLocales() {}
+}
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java
index 876393c03c9..f9f3ad9fe1b 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java
@@ -9,19 +9,27 @@ import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.stream.Collectors.joining;
+import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import java.nio.file.Path;
import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
import java.util.Optional;
+import java.util.function.Predicate;
+import java.util.regex.Pattern;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Task;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDraftStatus;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData;
import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig;
import org.unicode.icu.tool.cldrtoicu.LdmlConverter;
import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
+import org.unicode.icu.tool.cldrtoicu.PseudoLocales;
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.base.Ascii;
@@ -31,6 +39,7 @@ import com.google.common.base.Splitter;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
import com.google.common.collect.SetMultimap;
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
@@ -50,6 +59,11 @@ public final class ConvertIcuDataTask extends Task {
// Per directory overrides (fully specified locale IDs).
private final SetMultimap perDirectoryIds = HashMultimap.create();
private final IcuConverterConfig.Builder config = IcuConverterConfig.builder();
+ // Don't try and resolve actual paths until inside the execute method.
+ private final Map altPathMap = new HashMap<>();
+ // TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales()
+ private boolean includePseudoLocales = false;
+ private Predicate idFilter = id -> true;
@SuppressWarnings("unused")
public void setOutputDir(Path path) {
@@ -83,6 +97,16 @@ public final class ConvertIcuDataTask extends Task {
config.setSpecialsDir(path);
}
+ @SuppressWarnings("unused")
+ public void setIncludePseudoLocales(boolean includePseudoLocales) {
+ this.includePseudoLocales = includePseudoLocales;
+ }
+
+ @SuppressWarnings("unused")
+ public void setLocaleIdFilter(String idFilterRegex) {
+ this.idFilter = Pattern.compile(idFilterRegex).asPredicate();
+ }
+
@SuppressWarnings("unused")
public void setEmitReport(boolean emit) {
config.setEmitReport(emit);
@@ -130,7 +154,7 @@ public final class ConvertIcuDataTask extends Task {
@SuppressWarnings("unused")
public void setDir(String directory) {
- this.dir = resolveOpt(IcuLocaleDir.class, directory);
+ this.dir = resolveDir(directory);
}
@SuppressWarnings("unused")
@@ -150,6 +174,28 @@ public final class ConvertIcuDataTask extends Task {
}
}
+
+ public static final class AltPath extends Task {
+ private String source = "";
+ private String target = "";
+
+ @SuppressWarnings("unused")
+ public void setTarget(String target) {
+ this.target = target.replace('\'', '"');
+ }
+
+ @SuppressWarnings("unused")
+ public void setSource(String source) {
+ this.source = source.replace('\'', '"');
+ }
+
+ @Override
+ public void init() throws BuildException {
+ checkBuild(!source.isEmpty(), "Source path not be empty");
+ checkBuild(!target.isEmpty(), "Target path not be empty");
+ }
+ }
+
@SuppressWarnings("unused")
public void addConfiguredLocaleIds(LocaleIds localeIds) {
checkBuild(this.localeIdSpec == null, "Cannot add more that one element");
@@ -172,23 +218,48 @@ public final class ConvertIcuDataTask extends Task {
}
}
+ @SuppressWarnings("unused")
+ public void addConfiguredAltPath(AltPath altPath) {
+ // Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs).
+ // Wait until the "execute()" method since in future we expect to use the configured CLDR
+ // directory explicitly there.
+ checkBuild(this.altPathMap.put(altPath.target, altPath.source) == null,
+ "Duplicate elements (same target): %s", altPath.target);
+ }
+
@SuppressWarnings("unused")
public void execute() throws BuildException {
- CldrDataSupplier src =
- CldrDataSupplier.forCldrFilesIn(cldrPath).withDraftStatusAtLeast(minimumDraftStatus);
+ CldrDataSupplier src = CldrDataSupplier
+ .forCldrFilesIn(cldrPath)
+ .withDraftStatusAtLeast(minimumDraftStatus);
+
+ // We must do this wrapping of the data supplier _before_ creating the supplemental data
+ // instance since adding pseudo locales affects the set of available locales.
+ // TODO: Move some/all of this into the base converter and control it via the config.
+ if (!altPathMap.isEmpty()) {
+ Map pathMap = new HashMap<>();
+ altPathMap.forEach(
+ (t, s) -> pathMap.put(parseDistinguishingPath(t), parseDistinguishingPath(s)));
+ src = AlternateLocaleData.transform(src, pathMap);
+ }
+ if (includePseudoLocales) {
+ src = PseudoLocales.addPseudoLocalesTo(src);
+ }
+
SupplementalData supplementalData = SupplementalData.create(src);
ImmutableSet defaultTargetIds =
LocaleIdResolver.expandTargetIds(this.localeIdSpec, supplementalData);
for (IcuLocaleDir dir : IcuLocaleDir.values()) {
- config.addLocaleIds(dir, perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds));
+ Iterable ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds);
+ config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test));
}
config.setMinimumDraftStatus(minimumDraftStatus);
LdmlConverter.convert(src, supplementalData, config.build());
}
- private static void checkBuild(boolean condition, String message) {
+ private static void checkBuild(boolean condition, String message, Object... args) {
if (!condition) {
- throw new BuildException(message);
+ throw new BuildException(String.format(message, args));
}
}
@@ -199,8 +270,8 @@ public final class ConvertIcuDataTask extends Task {
return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
}
- private static > Optional resolveOpt(Class enumClass, String name) {
- return !name.isEmpty() ? Optional.of(resolve(enumClass, name)) : Optional.empty();
+ private static Optional resolveDir(String name) {
+ return !name.isEmpty() ? Optional.of(resolve(IcuLocaleDir.class, name)) : Optional.empty();
}
private static > T resolve(Class enumClass, String name) {
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java
index b896d2126a7..b66b7d1c9bf 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java
@@ -55,12 +55,11 @@ abstract class AbstractPathValueMapper {
}
/**
- * Returns a new {@code IcuData} instance produced by post-processing a set of results
- * generated by calling sub-class method {@link #addResults()}. This is the only method which
- * need be directly invoked by the sub-class implementation (other methods are optionally used
- * from within the {@link #addResults()} callback).
+ * Post-processes results generated by calling the subclass method {@link #addResults()}. This
+ * is the only method which need be directly invoked by the sub-class implementation (other
+ * methods are optionally used from within the {@link #addResults()} callback).
*/
- final IcuData generateIcuData(String icuName, boolean hasFallback) {
+ final void addIcuData(IcuData icuData) {
// This subclass mostly exists to control the fact that results need to be added in one go
// to the IcuData because of how referenced paths are handled. If results could be added in
// multiple passes, you could have confusing situations in which values has path references
@@ -70,9 +69,8 @@ abstract class AbstractPathValueMapper {
checkState(resultsByRbPath.isEmpty(),
"results must not be added outside the call to addResults(): %s", resultsByRbPath);
addResults();
- IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback));
+ addResultsToIcuData(finalizeResults(), icuData);
resultsByRbPath.clear();
- return icuData;
}
/**
@@ -81,7 +79,7 @@ abstract class AbstractPathValueMapper {
* {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the
* results to this mapper using {@link #addResult(RbPath, Result)}.
*
- * This method is called once for each call to {@link #generateIcuData(String, boolean)} and
+ *
This method is called once for each call to {@link #addIcuData(IcuData)} and
* is responsible for adding all necessary results for the returned {@link IcuData}.
*/
abstract void addResults();
@@ -145,7 +143,7 @@ abstract class AbstractPathValueMapper {
* having an array index) then the referenced value is assumed to be an existing path whose
* value is then substituted.
*/
- private static IcuData addResultsToIcuData(
+ private static void addResultsToIcuData(
ImmutableListMultimap results, IcuData icuData) {
// Ordering of paths should not matter here (IcuData will re-sort them) and ordering of
@@ -178,7 +176,6 @@ abstract class AbstractPathValueMapper {
}
// This works because insertion order is maintained for values of each path.
map.forEach((p, v) -> icuData.add(p, v.resolve(map)));
- return icuData;
}
/*
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java
index fc6fe5984ae..8451d73d92b 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java
@@ -2,15 +2,14 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
+import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
-import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Optional;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
-import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
@@ -18,7 +17,6 @@ import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
-import com.google.common.annotations.VisibleForTesting;
import com.google.common.escape.UnicodeEscaper;
/**
@@ -52,21 +50,15 @@ public final class BreakIteratorMapper {
* Processes data from the given supplier to generate break-iterator data for a set of locale
* IDs.
*
- * @param localeId the locale ID to generate data for.
- * @param src the CLDR data supplier to process.
+ * @param icuData the ICU data to be filled.
+ * @param cldrData the unresolved CLDR data to process.
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
* @return IcuData containing break-iterator data for the given locale ID.
*/
public static IcuData process(
- String localeId, CldrDataSupplier src, Optional icuSpecialData) {
+ IcuData icuData, CldrData cldrData, Optional icuSpecialData) {
- CldrData cldrData = src.getDataForLocale(localeId, UNRESOLVED);
- return process(localeId, cldrData, icuSpecialData);
- }
-
- @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
- static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) {
- BreakIteratorMapper mapper = new BreakIteratorMapper(localeId);
+ BreakIteratorMapper mapper = new BreakIteratorMapper(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, mapper::addSpecials));
cldrData.accept(DTD, mapper::addSuppression);
return mapper.icuData;
@@ -75,8 +67,8 @@ public final class BreakIteratorMapper {
// The per-locale ICU data being collected by this visitor.
private final IcuData icuData;
- private BreakIteratorMapper(String localeId) {
- this.icuData = new IcuData(localeId, true);
+ private BreakIteratorMapper(IcuData icuData) {
+ this.icuData = checkNotNull(icuData);
}
private void addSuppression(CldrValue v) {
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java
index 385cadd3cbd..5bef09fc52b 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java
@@ -3,9 +3,9 @@
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
-import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Optional;
@@ -21,7 +21,6 @@ import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
-import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
@@ -61,20 +60,15 @@ public final class CollationMapper {
/**
* Processes data from the given supplier to generate collation data for a set of locale IDs.
*
- * @param localeId the locale ID to generate data for.
- * @param src the CLDR data supplier to process.
+ * @param icuData the ICU data to be filled.
+ * @param cldrData the unresolved CLDR data to process.
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
* @return IcuData containing RBNF data for the given locale ID.
*/
public static IcuData process(
- String localeId, CldrDataSupplier src, Optional icuSpecialData) {
+ IcuData icuData, CldrData cldrData, Optional icuSpecialData) {
- return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
- }
-
- @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
- static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) {
- CollationVisitor visitor = new CollationVisitor(localeId);
+ CollationVisitor visitor = new CollationVisitor(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
cldrData.accept(DTD, visitor);
return visitor.icuData;
@@ -83,13 +77,13 @@ public final class CollationMapper {
final static class CollationVisitor implements PrefixVisitor {
private final IcuData icuData;
- CollationVisitor(String localeId) {
- this.icuData = new IcuData(localeId, true);
+ CollationVisitor(IcuData icuData) {
+ this.icuData = checkNotNull(icuData);
// Super special hack case because the XML data is a bit broken for the root collation
// data (there's an empty element that's a non-leaf element and thus not
// visited, but we should add an empty sequence to the output data.
// TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
- if (localeId.equals("root")) {
+ if (icuData.getName().equals("root")) {
icuData.replace(RB_STANDARD_SEQUENCE, "");
// TODO: Collation versioning probably needs to be improved.
icuData.replace(RB_STANDARD_VERSION, CldrDataSupplier.getCldrVersionString());
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
index ce196662e6f..d3f39f64884 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java
@@ -5,8 +5,6 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
-import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
-import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.HashSet;
import java.util.List;
@@ -14,16 +12,12 @@ import java.util.Optional;
import java.util.Set;
import org.unicode.cldr.api.CldrData;
-import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
-import org.unicode.icu.tool.cldrtoicu.SupplementalData;
-
-import com.google.common.annotations.VisibleForTesting;
/**
* Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a
@@ -47,50 +41,30 @@ public final class LocaleMapper extends AbstractPathValueMapper {
* {@link org.unicode.cldr.api.CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data.
* @return IcuData containing locale data for the given locale ID.
*/
- public static IcuData process(
- String localeId,
- CldrDataSupplier src,
- Optional icuSpecialData,
- PathValueTransformer transformer,
- SupplementalData supplementalData) {
-
- return process(
- localeId,
- src,
- icuSpecialData,
- transformer,
- supplementalData.getDefaultCalendar(localeId));
- }
-
- @VisibleForTesting // Avoids needing to pass a complete SupplementalData instance in tests.
- public static IcuData process(
- String localeId,
- CldrDataSupplier src,
+ public static void process(
+ IcuData icuData,
+ CldrData unresolved,
+ CldrData resolved,
Optional icuSpecialData,
PathValueTransformer transformer,
Optional defaultCalendar) {
- IcuData icuData =
- new LocaleMapper(localeId, src, icuSpecialData, transformer)
- .generateIcuData(localeId, true);
+ new LocaleMapper(unresolved, resolved, icuSpecialData, transformer).addIcuData(icuData);
doDateTimeHack(icuData);
defaultCalendar.ifPresent(c -> icuData.add(RB_CALENDAR, c));
- return icuData;
}
- private final String localeId;
- private final CldrDataSupplier src;
+ private final CldrData unresolved;
private final Optional icuSpecialData;
private LocaleMapper(
- String localeId,
- CldrDataSupplier src,
+ CldrData unresolved,
+ CldrData resolved,
Optional icuSpecialData,
PathValueTransformer transformer) {
- super(src.getDataForLocale(localeId, RESOLVED), transformer);
- this.localeId = localeId;
- this.src = checkNotNull(src);
+ super(resolved, transformer);
+ this.unresolved = checkNotNull(unresolved);
this.icuSpecialData = checkNotNull(icuSpecialData);
}
@@ -102,7 +76,7 @@ public final class LocaleMapper extends AbstractPathValueMapper {
private Set collectPaths() {
Set validRbPaths = new HashSet<>();
- src.getDataForLocale(localeId, UNRESOLVED)
+ unresolved
.accept(DTD, v -> transformValue(v).forEach(r -> collectResultPath(r, validRbPaths)));
return validRbPaths;
}
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java
index 45cbf3863d2..d6d89ffc98f 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapper.java
@@ -2,9 +2,9 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
+import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
-import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -12,14 +12,12 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
-import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
-import com.google.common.annotations.VisibleForTesting;
import com.google.common.escape.UnicodeEscaper;
/**
@@ -46,23 +44,18 @@ public final class RbnfMapper {
/**
* Processes data from the given supplier to generate RBNF data for a set of locale IDs.
*
- * @param localeId the locale ID to generate data for.
- * @param src the CLDR data supplier to process.
+ * @param icuData the ICU data to be filled.
+ * @param cldrData the unresolved CLDR data to process.
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
* @return IcuData containing RBNF data for the given locale ID.
*/
public static IcuData process(
- String localeId, CldrDataSupplier src, Optional icuSpecialData) {
+ IcuData icuData, CldrData cldrData, Optional icuSpecialData) {
- return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
- }
-
- @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
- static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) {
// Using DTD order is essential here because the RBNF paths contain ordered elements,
// so we must ensure that they appear in sorted order (otherwise we'd have to do more
// work at this end to re-sort the results).
- RulesetVisitor visitor = new RulesetVisitor(localeId);
+ RulesetVisitor visitor = new RulesetVisitor(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
cldrData.accept(DTD, visitor);
return visitor.icuData;
@@ -72,8 +65,8 @@ public final class RbnfMapper {
private final IcuData icuData;
- private RulesetVisitor(String localeId) {
- this.icuData = new IcuData(localeId, true);
+ private RulesetVisitor(IcuData icuData) {
+ this.icuData = checkNotNull(icuData);
}
@Override public void visitPrefixStart(CldrPath prefix, Context context) {
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
index ae6f12c3ee8..b3c57050254 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java
@@ -38,7 +38,9 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
public static IcuData process(
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
- return new SupplementalMapper(src, transformer, paths).generateIcuData(icuName, false);
+ IcuData icuData = new IcuData(icuName, false);
+ new SupplementalMapper(src, transformer, paths).addIcuData(icuData);
+ return icuData;
}
private final PathMatcher paths;
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java
new file mode 100644
index 00000000000..009cdce8b7e
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/AlternateLocaleDataTest.java
@@ -0,0 +1,152 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
+import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
+
+import com.google.common.collect.ImmutableMap;
+
+@RunWith(JUnit4.class)
+public class AlternateLocaleDataTest {
+ @Test
+ public void testLocaleData() {
+ // Target and source values.
+ CldrValue target =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
+ CldrValue source =
+ ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
+ // The target path with the source value we expect to be seen in the transformed data.
+ CldrValue altValue =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Name");
+
+ // Something that's not transformed.
+ CldrValue other =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$");
+ // Something that should only exist in the resolved data.
+ CldrValue inherited =
+ ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YYY");
+
+ FakeDataSupplier src = new FakeDataSupplier()
+ .addLocaleData("xx", target, source, other)
+ .addInheritedData("xx", inherited);
+ CldrDataSupplier transformed =
+ AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
+
+ CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
+ CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
+
+ assertValuesUnordered(unresolved, altValue, source, other);
+ assertValuesUnordered(resolved, altValue, source, other, inherited);
+ }
+
+ @Test
+ public void testMissingSource() {
+ // Target and source values.
+ CldrValue target =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
+ CldrValue source =
+ ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
+
+ FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", target);
+ CldrDataSupplier transformed =
+ AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
+
+ CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
+ CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
+
+ // No change because there's nothing to get an alternate value from.
+ assertValuesUnordered(unresolved, target);
+ assertValuesUnordered(resolved, target);
+ }
+
+ @Test
+ public void testMissingTarget() {
+ // Target and source values.
+ CldrValue target =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
+ CldrValue source =
+ ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
+
+ FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source);
+ CldrDataSupplier transformed =
+ AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
+
+ CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
+ CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
+
+ // No change because there's nothing to replace.
+ assertValuesUnordered(unresolved, source);
+ assertValuesUnordered(resolved, source);
+ }
+
+ @Test
+ public void testBadPaths() {
+ // Target and source values.
+ CldrPath target = CldrPath.parseDistinguishingPath(
+ "//ldml/numbers/currencies/currency[@type=\"USD\"]/displayName");
+ CldrPath source = CldrPath.parseDistinguishingPath(
+ "//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol");
+
+ FakeDataSupplier src = new FakeDataSupplier();
+ IllegalArgumentException e = assertThrows(
+ IllegalArgumentException.class,
+ () -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
+ assertThat(e).hasMessageThat().contains("alternate paths must have the same namespace");
+ assertThat(e).hasMessageThat().contains(target.toString());
+ assertThat(e).hasMessageThat().contains(source.toString());
+ }
+
+ @Test
+ public void testNonLdml() {
+ // Real supplemental data with "values" in the value attributes:
+ // target: territories=[AG AR AS AU ...]
+ // source: territories=[GB]
+ // where GB is also listed as having "mon" as the first day in it's primary path.
+ //
+ // You can see why swapping paths based on 'alt' for supplemental data would be very wrong,
+ // because it would remove "XX" and "YY" by replacing the value attribute. Supplemental
+ // and BCP-47 data doesn't have a single value per path, so isn't suitable for swapping.
+ //
+ // The right way to do this would be to merge the 'territories' attribute and remove the
+ // alt territoy from its original list, but that's very complex and depends on the specific
+ // meaning of each path in question, and will probably never be supported.
+ CldrPath target = CldrPath.parseDistinguishingPath(
+ "//supplementalData/weekData/firstDay[@day=\"sun\"]");
+ CldrPath source = CldrPath.parseDistinguishingPath(
+ "//supplementalData/weekData/firstDay[@day=\"sun\"][@alt=\"variant\"]");
+
+ FakeDataSupplier src = new FakeDataSupplier();
+ IllegalArgumentException e = assertThrows(
+ IllegalArgumentException.class,
+ () -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
+ assertThat(e).hasMessageThat().contains("only locale data (LDML) is supported");
+ // At least one of the paths should be in the error message, so look for common substring.
+ assertThat(e).hasMessageThat().contains("/weekData/firstDay[@day=\"sun\"]");
+ }
+
+ public static void assertValuesUnordered(CldrData data, CldrValue... values) {
+ Set captured = new HashSet<>();
+ data.accept(ARBITRARY, captured::add);
+ assertThat(captured).containsExactlyElementsIn(values);
+ }
+
+ private static CldrValue ldml(String path, String value) {
+ return CldrValue.parseValue("//ldml/" + path, value);
+ }
+}
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/FilteredDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/FilteredDataTest.java
new file mode 100644
index 00000000000..603157685a0
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/FilteredDataTest.java
@@ -0,0 +1,101 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
+import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrValue;
+
+import com.google.common.collect.ImmutableList;
+
+@RunWith(JUnit4.class)
+public class FilteredDataTest {
+ @Test
+ public void testSimple() {
+ CldrValue keep =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar");
+ CldrValue remove =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$");
+ CldrValue replace =
+ ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YYY");
+ CldrValue replacement =
+ ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZZ");
+
+ CldrData src = CldrDataSupplier.forValues(ImmutableList.of(keep, remove, replace));
+ CldrData filtered = new FilteredData(src) {
+ @Nullable @Override protected CldrValue filter(CldrValue value) {
+ if (value.equals(remove)) {
+ return null;
+ } else if (value.equals(replace)) {
+ return replacement;
+ } else {
+ return value;
+ }
+ }
+ };
+
+ List filteredValues = new ArrayList<>();
+ filtered.accept(ARBITRARY, filteredValues::add);
+ assertThat(filteredValues).containsExactly(keep, replacement).inOrder();
+
+ assertThat(filtered.get(remove.getPath())).isNull();
+ assertThat(filtered.get(keep.getPath())).isEqualTo(keep);
+ assertThat(filtered.get(replace.getPath())).isEqualTo(replacement);
+ }
+
+ @Test
+ public void testBadReplacementPath() {
+ CldrValue replace =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "VALUE");
+ CldrValue replacement =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "VALUE");
+
+ CldrData src = CldrDataSupplier.forValues(ImmutableList.of(replace));
+ CldrData filtered = new FilteredData(src) {
+ @Nullable @Override protected CldrValue filter(CldrValue value) {
+ return replacement;
+ }
+ };
+ IllegalArgumentException e = assertThrows(
+ IllegalArgumentException.class, () -> filtered.accept(ARBITRARY, v -> {}));
+ assertThat(e).hasMessageThat().contains("not permitted to modify distinguishing paths");
+ assertThat(e).hasMessageThat().contains(replace.toString());
+ assertThat(e).hasMessageThat().contains(replacement.toString());
+ }
+
+ @Test
+ public void testBadReplacementAttributes() {
+ CldrValue replace =
+ ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "XXX");
+ CldrValue replacement =
+ ldml("numbers/currencies/currency[@type=\"GBP\"]/displayName", "XXX");
+
+ CldrData src = CldrDataSupplier.forValues(ImmutableList.of(replace));
+ CldrData filtered = new FilteredData(src) {
+ @Nullable @Override protected CldrValue filter(CldrValue value) {
+ return replacement;
+ }
+ };
+ IllegalArgumentException e = assertThrows(
+ IllegalArgumentException.class, () -> filtered.accept(ARBITRARY, v -> {}));
+ assertThat(e).hasMessageThat().contains("not permitted to modify distinguishing paths");
+ assertThat(e).hasMessageThat().contains(replace.toString());
+ assertThat(e).hasMessageThat().contains(replacement.toString());
+ }
+
+ private static CldrValue ldml(String path, String value) {
+ return CldrValue.parseValue("//ldml/" + path, value);
+ }
+}
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java
index 85f6ef2ad2a..cc3f962128d 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java
@@ -107,4 +107,4 @@ public class IcuDataTest {
icuData.replace(fooBar, "another-value");
assertThat(icuData.get(fooBar)).containsExactly(value2);
}
-}
\ No newline at end of file
+}
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java
new file mode 100644
index 00000000000..8c2322d30b2
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PseudoLocalesTest.java
@@ -0,0 +1,141 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.unicode.cldr.api.CldrData;
+import org.unicode.cldr.api.CldrDataSupplier;
+import org.unicode.cldr.api.CldrPath;
+import org.unicode.cldr.api.CldrValue;
+import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
+
+@RunWith(JUnit4.class)
+public class PseudoLocalesTest {
+ @Test
+ public void testExpansion() {
+ // Target and source values.
+ CldrPath included =
+ ldmlPath("localeDisplayNames/languages/language[@type=\"xx\"]");
+ CldrPath excluded =
+ ldmlPath("localeDisplayNames/localeDisplayPattern/localePattern[@alt=\"testing\"]");
+ CldrPath pattern =
+ ldmlPath("dates/timeZoneNames/hourFormat[@alt=\"testing\"]");
+ CldrPath narrow =
+ ldmlPath("dates/fields/field[@type=\"sun-narrow\"]/relative[@type=\"0\"]");
+ CldrPath inherited =
+ ldmlPath("dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard");
+
+ FakeDataSupplier src = new FakeDataSupplier()
+ .addLocaleData("en",
+ value(included, "{Hello} {0} {World} 100x"),
+ value(excluded, "Skipped"),
+ value(pattern, "'plus' HH:mm; 'minus' HH:mm"),
+ value(narrow, "Skipped"))
+ .addInheritedData("en",
+ value(inherited, "UTC"));
+
+ CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
+ assertThat(pseudo.getAvailableLocaleIds()).containsAtLeast("en_XA", "ar_XB");
+
+ // The pseudo locale should combine both explicit and inherited data from 'en'.
+ CldrData unresolved = pseudo.getDataForLocale("en_XA", UNRESOLVED);
+
+ assertValuesUnordered(unresolved,
+ // Note how {n} placeholders are not affected, but digits elsewhere are.
+ value(included, "[{Ĥéļļö} {0} {Ŵöŕļð} ①⓪⓪ẋ one two three]"),
+ // Note the quoting of any padding added to a pattern string.
+ value(pattern, "['þļûš' HH:mm; 'ɱîñûš' HH:mm 'one' 'two' 'three' 'four']"),
+ // Value obtained from the resolved "en" data is here in unresolved data.
+ value(inherited, "[ÛŢÇ one]"));
+ }
+
+ // This tests behaviour expected by Android (previously patched in earlier ICU versions).
+ // https://android-review.googlesource.com/c/platform/external/cldr/+/689949
+ // In particular the use of "ALM" (U+061c) rather than "RLM" (U+200F) as the BiDi marker.
+ @Test
+ public void testBidi() {
+ // Target and source values (same as above but not including the skipped paths).
+ CldrPath included =
+ ldmlPath("localeDisplayNames/languages/language[@type=\"xx\"]");
+ CldrPath pattern =
+ ldmlPath("dates/timeZoneNames/hourFormat[@alt=\"testing\"]");
+ CldrPath inherited =
+ ldmlPath("dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard");
+
+ FakeDataSupplier src = new FakeDataSupplier()
+ .addLocaleData("en",
+ value(included, "{Hello} {0} {World} 100x"),
+ value(pattern, "'plus' HH:mm; 'minus' HH:mm"))
+ .addInheritedData("en",
+ value(inherited, "UTC"));
+
+ CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
+
+ // The pseudo locale should combine both explicit and inherited data from 'en'.
+ CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED);
+
+ // These are a kind of golden data test because it's super hard to really reason about
+ // what should be coming out (note how direction markers are added for the 'x' in 100x).
+ assertValuesUnordered(unresolved,
+ value(included,
+ "{\u061C\u202EHello\u202C\u061C} {0}"
+ + " {\u061C\u202EWorld\u202C\u061C}"
+ + " 100\u061C\u202Ex\u202C\u061C"),
+ value(pattern,
+ "'\u061C\u202Eplus\u202C\u061C' HH:mm;"
+ + " '\u061C\u202Eminus\u202C\u061C' HH:mm"),
+ value(inherited, "\u061C\u202EUTC\u202C\u061C"));
+ }
+
+ // This tests behaviour expected by Android (previously patched in earlier ICU versions).
+ // https://android-review.googlesource.com/c/platform/external/cldr/+/689949
+ @Test
+ public void testLatinNumbering() {
+ CldrValue latn = value(ldmlPath("numbers/defaultNumberingSystem"), "latn");
+ FakeDataSupplier src = new FakeDataSupplier().addInheritedData("en", latn);
+
+ CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
+
+ CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED);
+ assertValuesUnordered(unresolved, latn);
+ }
+
+ @Test
+ public void testExemplars() {
+ CldrPath exemplarsPath = ldmlPath("characters/exemplarCharacters[@type=\"auxiliary\"]");
+ FakeDataSupplier src =
+ new FakeDataSupplier().addLocaleData("en", value(exemplarsPath, "[ignored]"));
+
+ CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
+
+ assertValuesUnordered(pseudo.getDataForLocale("ar_XB", UNRESOLVED),
+ value(exemplarsPath, "[a b c d e f g h i j k l m n o p q r s t u v w x y z]"));
+ assertValuesUnordered(pseudo.getDataForLocale("en_XA", UNRESOLVED),
+ value(exemplarsPath,
+ "[a å b ƀ c ç d ð e é f ƒ g ĝ h ĥ i î j ĵ k ķ l ļ m ɱ"
+ + " n ñ o ö p þ q ǫ r ŕ s š t ţ u û v ṽ w ŵ x ẋ y ý z ž]"));
+ }
+
+ public static void assertValuesUnordered(CldrData data, CldrValue... values) {
+ Set captured = new HashSet<>();
+ data.accept(ARBITRARY, captured::add);
+ assertThat(captured).containsExactlyElementsIn(values);
+ }
+
+ private static CldrPath ldmlPath(String path) {
+ return CldrPath.parseDistinguishingPath("//ldml/" + path);
+ }
+
+ private static CldrValue value(CldrPath path, String value) {
+ return CldrValue.parseValue(path.toString(), value);
+ }
+}
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java
index d3ee81346a9..5c12b0311c7 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java
@@ -17,38 +17,20 @@ import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
-import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
-import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
-import com.google.common.collect.ImmutableList;
-
@RunWith(JUnit4.class)
public class AbstractPathValueMapperTest {
- @Test
- public void testNameAndIcuFallback() {
- IcuData foo = new FakeMapper().generateIcuData("foo", false);
- IcuData bar = new FakeMapper().generateIcuData("bar", true);
-
- assertThat(foo).getPaths().isEmpty();
- assertThat(foo).hasName("foo");
- assertThat(foo).hasFallback(false);
-
- assertThat(bar).getPaths().isEmpty();
- assertThat(bar).hasName("bar");
- assertThat(bar).hasFallback(true);
- }
-
@Test
public void testUngroupedConcatenation() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/bar", "one", "two");
mapper.addUngroupedResult("foo/baz", "other", "path");
mapper.addUngroupedResult("foo/bar", "three", "four");
- IcuData icuData = mapper.generateIcuData("foo", false);
+ IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("one", "two", "three", "four"));
@@ -61,7 +43,7 @@ public class AbstractPathValueMapperTest {
mapper.addGroupedResult("foo/bar", "one", "two");
mapper.addGroupedResult("foo/baz", "other", "path");
mapper.addGroupedResult("foo/bar", "three", "four");
- IcuData icuData = mapper.generateIcuData("foo", false);
+ IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
@@ -89,25 +71,25 @@ public class AbstractPathValueMapperTest {
.addResult(explicit1)
.addResult(explicit2)
.addResult(explicit3)
- .generateIcuData("foo", false);
+ .addIcuData("foo");
assertThat(noFallback).hasValuesFor("foo/bar", singletonValues("one", "two", "three"));
// Missing explicit results trigger fallbacks.
IcuData firstFallback = new FakeMapper(transformer)
.addResult(explicit2)
.addResult(explicit3)
- .generateIcuData("foo", false);
+ .addIcuData("foo");
assertThat(firstFallback).hasValuesFor("foo/bar", singletonValues("", "two", "three"));
// Fallbacks can appear in any part of the result sequence.
IcuData lastFallbacks = new FakeMapper(transformer)
.addResult(explicit1)
- .generateIcuData("foo", false);
+ .addIcuData("foo");
assertThat(lastFallbacks)
.hasValuesFor("foo/bar", singletonValues("one", "", ""));
// Without a single result to "seed" the fallback group, nothing is emitted.
- IcuData allFallbacks = new FakeMapper(transformer).generateIcuData("foo", false);
+ IcuData allFallbacks = new FakeMapper(transformer).addIcuData("foo");
assertThat(allFallbacks).getPaths().isEmpty();
}
@@ -119,7 +101,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("foo/alias-1", "start", "/alias/target[1]", "end");
mapper.addUngroupedResult("foo/alias-2", "start", "/alias/target[2]", "end");
mapper.addUngroupedResult("alias/target", "first", "second", "third");
- IcuData icuData = mapper.generateIcuData("foo", false);
+ IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(5);
assertThat(icuData)
@@ -142,7 +124,7 @@ public class AbstractPathValueMapperTest {
mapper.addGroupedResult("foo/bar", "/alias/target[1]");
mapper.addUngroupedResult("alias/target", "first", "second");
- IcuData icuData = mapper.generateIcuData("foo", false);
+ IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
.hasValuesFor("foo/bar",
@@ -157,7 +139,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("foo/bar:alias", "/alias/target");
mapper.addUngroupedResult("foo/bar", "/alias/target");
mapper.addUngroupedResult("alias/target", "alias-value");
- IcuData icuData = mapper.generateIcuData("foo", false);
+ IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(3);
assertThat(icuData).hasValuesFor("foo/bar:alias", singletonValues("/alias/target"));
@@ -172,7 +154,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("first/alias", "hello");
mapper.addUngroupedResult("foo/bar", "/first/alias", "/last/alias");
mapper.addUngroupedResult("last/alias", "world");
- IcuData icuData = mapper.generateIcuData("foo", false);
+ IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
}
@@ -184,7 +166,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("alias/target", "hello");
mapper.addUngroupedResult("foo/bar", "/alias/target[0]", "/alias/target[1]");
mapper.addUngroupedResult("alias/target", "world");
- IcuData icuData = mapper.generateIcuData("foo", false);
+ IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
}
@@ -195,7 +177,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("alias/target", "value");
mapper.addUngroupedResult("foo/bar", "/no-such-alias/target");
IllegalArgumentException e =
- assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
+ assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("no such alias value");
assertThat(e).hasMessageThat().contains("/no-such-alias/target");
}
@@ -206,7 +188,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("alias/target", "value");
mapper.addUngroupedResult("foo/bar", "/alias/target[1]");
IllegalArgumentException e =
- assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
+ assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("out of bounds");
assertThat(e).hasMessageThat().contains("/alias/target[1]");
}
@@ -218,7 +200,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("other/alias", "/other/alias");
mapper.addUngroupedResult("foo/bar", "/alias/target");
IllegalStateException e =
- assertThrows(IllegalStateException.class, () -> mapper.generateIcuData("foo", false));
+ assertThrows(IllegalStateException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("recursive alias resolution is not supported");
}
@@ -227,7 +209,7 @@ public class AbstractPathValueMapperTest {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/bar:alias", "first", "second");
IllegalArgumentException e =
- assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
+ assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("explicit aliases must be singleton values");
assertThat(e).hasMessageThat().contains("foo/bar:alias");
}
@@ -248,25 +230,6 @@ public class AbstractPathValueMapperTest {
}
};
- // We could also just use Mockito for this (it's not yet a project dependency however).
- private final PathValueTransformer transformer =
- new PathValueTransformer() {
- @Override public ImmutableList transform(CldrValue cldrValue) {
- throw new UnsupportedOperationException("should not be called by test");
- }
-
- @Override
- public ImmutableList transform(CldrValue cldrValue, DynamicVars varFn) {
- throw new UnsupportedOperationException("should not be called by test");
- }
-
- @Override
- public ImmutableList getFallbackResultsFor(RbPath key, DynamicVars varFn) {
- // TODO: Test fallbacks.
- return ImmutableList.of();
- }
- };
-
// This preserves insertion order in a well defined way (good for testing alias order).
private final List fakeResults = new ArrayList<>();
@@ -278,6 +241,13 @@ public class AbstractPathValueMapperTest {
super(EXPLODING_DATA, transformer);
}
+ // Helper method to neaten up the tests a bit.
+ IcuData addIcuData(String localeId) {
+ IcuData icuData = new IcuData(localeId, true);
+ addIcuData(icuData);
+ return icuData;
+ }
+
FakeMapper addUngroupedResult(String path, String... values) {
int index = fakeResults.size() + 1;
return addResult(FakeResult.of(path, index, false, values));
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java
index f221964011a..d5adde881cd 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java
@@ -205,12 +205,12 @@ public class Bcp47MapperTest {
// Only the type-map paths/values are split into the timezone data.
assertThat(tzData).getPaths().hasSize(4);
- assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped");
assertThat(tzData).hasValuesFor("/typeMap/tz/foo_bar", "unescaped");
-
- // TODO: Check this is correct! Having foo/bar as the value rather than foo:bar seems wrong.
- assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar");
assertThat(tzData).hasValuesFor("/typeAlias/tz/hello_world", "foo_bar");
+
+ // TODO: Raise bug - having alias target "foo/bar" not match the key "foo:bar" is a bug!
+ assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped");
+ assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar");
}
private static CldrData cldrData(CldrValue... values) {
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java
index d01bf74712b..a4ca46980a0 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java
@@ -50,7 +50,8 @@ public class BreakIteratorMapperTest {
suppression(SENTENCE_BREAK, "Alt.", ++idx),
suppression(SENTENCE_BREAK, "Approx.", ++idx));
- IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ BreakIteratorMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
@@ -72,7 +73,8 @@ public class BreakIteratorMapperTest {
suppression(LINE_BREAK, "Bar", ++idx),
suppression(LINE_BREAK, "Baz", ++idx));
- IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ BreakIteratorMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
@@ -91,7 +93,8 @@ public class BreakIteratorMapperTest {
dictionary("foo", ""),
dictionary("bar", ""));
- IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
+ IcuData icuData = new IcuData("xx", true);
+ BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", "");
@@ -104,7 +107,8 @@ public class BreakIteratorMapperTest {
boundaries(GRAPHEME, "", null),
boundaries(SENTENCE, "", "altName"));
- IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
+ IcuData icuData = new IcuData("xx", true);
+ BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java
index 7af7422f2cf..e039d0e2fd4 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java
@@ -22,14 +22,17 @@ import com.google.common.base.Joiner;
public class CollationMapperTest {
@Test
public void testEmpty() {
- IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ CollationMapper.process(icuData, cldrData(), Optional.empty());
+
assertThat(icuData).hasName("xx");
assertThat(icuData).hasFallback(true);
assertThat(icuData).getPaths().isEmpty();
// Root gets a couple of special paths added to it due to the need to work around a CLDR
// data bug.
- IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty());
+ IcuData rootData = new IcuData("root", true);
+ CollationMapper.process(rootData, cldrData(), Optional.empty());
assertThat(rootData).hasName("root");
assertThat(rootData).hasFallback(true);
assertThat(rootData).getPaths().hasSize(2);
@@ -42,7 +45,8 @@ public class CollationMapperTest {
CldrData cldrData =
cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));
- IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/collations/default", "any value");
}
@@ -61,7 +65,8 @@ public class CollationMapperTest {
collationRule("foo", "alt2", "Second alt rule"),
collationRule("foo", null, "First rule"));
- IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
@@ -78,7 +83,8 @@ public class CollationMapperTest {
"# And more comments to be stripped",
"And another value"));
- IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
"Here is a value",
"And another value");
@@ -109,7 +115,8 @@ public class CollationMapperTest {
+ "\uD83D\uDE19",
" <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));
- IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
@@ -131,7 +138,8 @@ public class CollationMapperTest {
CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""),
CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));
- IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials));
+ IcuData icuData = new IcuData("xx", true);
+ CollationMapper.process(icuData, cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java
index 3d3a8e2f63f..f218b4bc3c7 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java
@@ -4,6 +4,8 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.truth.Truth.assertThat;
import static java.util.Optional.empty;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
+import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import static org.unicode.cldr.api.CldrValue.parseValue;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
@@ -14,8 +16,6 @@ import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
-import org.unicode.cldr.api.CldrData;
-import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
@@ -38,9 +38,7 @@ public class LocaleMapperTest {
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
simpleResult("/durationUnits/foo", "Bar"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
-
+ IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar");
}
@@ -55,9 +53,7 @@ public class LocaleMapperTest {
src.addLocaleData(
"zz", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZ"));
- IcuData icuData =
- LocaleMapper.process("yy", src, empty(), transformer, empty());
-
+ IcuData icuData = process("yy");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/durationUnits/foo", "YY");
}
@@ -73,8 +69,7 @@ public class LocaleMapperTest {
ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"),
simpleResult("/Keys/sometype", "Value"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
+ IcuData icuData = process("xx");
// The 2nd mapping is not used because it does not appear in the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@@ -95,8 +90,7 @@ public class LocaleMapperTest {
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"),
simpleResult("/Currencies/USD", 2, "US Dollar"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
+ IcuData icuData = process("xx");
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@@ -121,8 +115,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
+ IcuData icuData = process("xx");
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@@ -141,8 +134,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
+ IcuData icuData = process("xx");
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@@ -177,8 +169,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/", 2, "Child-2"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
+ IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
@@ -246,8 +237,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/", 2, "Child-2"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
+ IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
@@ -255,9 +245,7 @@ public class LocaleMapperTest {
@Test
public void testDefaultCalendar() {
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, Optional.of("pastafarian"));
-
+ IcuData icuData = process("xx", Optional.of("pastafarian"));
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/default", "pastafarian");
}
@@ -302,8 +290,7 @@ public class LocaleMapperTest {
format("dateTime", "short", "twelve"),
simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
- IcuData icuData =
- LocaleMapper.process("xx", src, empty(), transformer, empty());
+ IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/DateTimePatterns",
@@ -351,10 +338,7 @@ public class LocaleMapperTest {
format("dateTime", "short", "twelve"),
simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
- IllegalStateException e = assertThrows(
- IllegalStateException.class,
- () -> LocaleMapper.process("xx", src, empty(), transformer, empty()));
-
+ IllegalStateException e = assertThrows(IllegalStateException.class, () -> process("xx"));
assertThat(e).hasMessageThat().contains("unexpected");
assertThat(e).hasMessageThat().contains("/calendar/foo/DateTimePatterns");
}
@@ -368,6 +352,24 @@ public class LocaleMapperTest {
type, length, pattern));
}
+ // ---- Helper methods ----
+
+ IcuData process(String localeId) {
+ return process(localeId, empty());
+ }
+
+ IcuData process(String localeId, Optional defCalendar) {
+ IcuData icuData = new IcuData(localeId, true);
+ LocaleMapper.process(
+ icuData,
+ src.getDataForLocale(localeId, UNRESOLVED),
+ src.getDataForLocale(localeId, RESOLVED),
+ empty(),
+ transformer,
+ defCalendar);
+ return icuData;
+ }
+
private void addMapping(String locale, CldrValue value, Result... results) {
src.addLocaleData(locale, value);
transformer.addResults(value, results);
@@ -386,16 +388,12 @@ public class LocaleMapperTest {
return FakeResult.of(path, index, false, value);
}
- private static CldrData cldrData(CldrValue... values) {
- return CldrDataSupplier.forValues(Arrays.asList(values));
- }
-
private static CldrValue ldml(String path) {
return ldml(path, "");
}
private static CldrValue ldml(String path, String value) {
- return parseValue("//ldml/" + path, "");
+ return parseValue("//ldml/" + path, value);
}
private static RbValue[] singletonValues(String... values) {
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java
index 841a2e12823..890b6a27a7c 100644
--- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java
@@ -55,7 +55,8 @@ public class RbnfMapperTest {
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", ++idx),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", ++idx));
- IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ RbnfMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
// Double-% prefix for "private" access.
@@ -82,7 +83,8 @@ public class RbnfMapperTest {
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", ++idx),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", ++idx));
- IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ RbnfMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
// Single-% prefix for "public" access.
@@ -115,7 +117,8 @@ public class RbnfMapperTest {
rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0",
"=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", ++idx));
- IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials));
+ IcuData icuData = new IcuData("xx", true);
+ RbnfMapper.process(icuData, cldrData, Optional.of(specials));
assertThat(icuData).hasValuesFor("/RBNFRules/OrdinalRules",
RbValue.of("%digits-ordinal:"),
@@ -146,7 +149,8 @@ public class RbnfMapperTest {
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", ++idx),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", ++idx));
- IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
+ IcuData icuData = new IcuData("xx", true);
+ RbnfMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
RbValue.of("%escaping:"),