diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java index 563e1efb41d..4e37788c1c8 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java @@ -23,7 +23,6 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; @@ -147,14 +146,6 @@ public final class LdmlConverter { // TODO: Confirm that this has no meaningful effect and unify "empty" file contents. private static RbPath RB_EMPTY_ALIAS = RbPath.of("___"); - /** Provisional entry point until better config support exists. */ - public static void main(String... args) { - convert(IcuConverterConfig.builder() - .setOutputDir(Paths.get(args[0])) - .setEmitReport(true) - .build()); - } - /** * Output types defining specific subsets of the ICU data which can be converted separately. * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java index b6843134b57..071b04fb376 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java @@ -190,12 +190,16 @@ public final class RbPath implements Comparable { return new RbPath(segments.stream().map(fn).collect(toImmutableList())); } - // TODO: Remove this in favour of having properly typed paths. + // TODO: Remove this and isAlias() in favour of having properly typed paths. boolean isIntPath() { String lastElement = segments.get(segments.size() - 1); return lastElement.endsWith(":int") || lastElement.endsWith(":intvector"); } + public boolean isAlias() { + return getSegment(length() - 1).endsWith(":alias"); + } + @Override public int compareTo(RbPath other) { return ORDERING.compare(this, other); } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java index 954ebe0c287..b05d67359c2 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/SupplementalData.java @@ -93,7 +93,7 @@ public final class SupplementalData { * @param supplementalData the raw CLDR supplemental data instance. * @return the supplemental data API. */ - static SupplementalData create(CldrData supplementalData) { + public static SupplementalData create(CldrData supplementalData) { Table aliasTable = HashBasedTable.create(); Map parentLocaleMap = new HashMap<>(); Map defaultCalendarMap = new HashMap<>(); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java index 66781a5c5f6..b896d2126a7 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java @@ -3,98 +3,237 @@ package org.unicode.icu.tool.cldrtoicu.mapper; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.Ordering.natural; import java.util.List; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Stream; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableListMultimap; +import com.google.common.collect.Iterables; +import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.ListMultimap; +import com.google.common.collect.SetMultimap; /** * An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures * that transformation results are correctly processed when being added to IcuData instances. */ -public abstract class AbstractPathValueMapper { +abstract class AbstractPathValueMapper { + // Matches "/foo/bar" or "/foo/bar[N]" as a resource bundle path, capturing the path and + // optional index separately. Note that this is very sloppy matching and the path string will + // also be parsed via RbPath.parse(). private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$"); - private final IcuData icuData; + private final CldrData cldrData; + private final PathValueTransformer transformer; - AbstractPathValueMapper(String name, boolean hasFallback) { - this.icuData = new IcuData(name, hasFallback); + // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for + // each key. The reason is that result comparison is not "consistent with equals", and + // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() + // method), and it does this even if using the add() method of the sorted set (this is in + // fact in violation of the stated behaviour of Set#add). + private final SetMultimap resultsByRbPath = LinkedHashMultimap.create(); + + AbstractPathValueMapper(CldrData cldrData, PathValueTransformer transformer) { + this.cldrData = checkNotNull(cldrData); + this.transformer = checkNotNull(transformer); } - /** Implemented by sub-classes to return all results to be added to the IcuData instance. */ - abstract ListMultimap getResults(); - /** - * Adds results to the IcuData instance according to expected {@code PathValueTransformer} - * semantics. This method must only be called once per mapper. + * Returns a new {@code IcuData} instance produced by post-processing a set of results + * generated by calling sub-class method {@link #addResults()}. This is the only method which + * need be directly invoked by the sub-class implementation (other methods are optionally used + * from within the {@link #addResults()} callback). */ - final IcuData transform() { - checkState(icuData.getPaths().isEmpty(), - "transform() method cannot be called multiple times: %s", icuData); - + final IcuData generateIcuData(String icuName, boolean hasFallback) { // This subclass mostly exists to control the fact that results need to be added in one go // to the IcuData because of how referenced paths are handled. If results could be added in // multiple passes, you could have confusing situations in which values has path references // in them but the referenced paths have not been transformed yet. Forcing the subclass to // implement a single method to generate all results at once ensures that we control the // lifecycle of the data and how results are processed as they are added to the IcuData. - addResults(getResults()); + checkState(resultsByRbPath.isEmpty(), + "results must not be added outside the call to addResults(): %s", resultsByRbPath); + addResults(); + IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback)); + resultsByRbPath.clear(); return icuData; } /** - * Adds transformation results on the specified multi-map to this data instance. Results are - * handled differently according to whether they are grouped, or represent an alias value. If - * the value of an ungrouped result is itself a resource bundle path (including possibly having - * an array index) then the referenced value is assumed to be an existing path whose value is - * then substituted. + * Implemented by sub-classes to return all results to be added to the IcuData instance. The + * primary job of this callback is to generate transformed results (typically by calling + * {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the + * results to this mapper using {@link #addResult(RbPath, Result)}. + * + *

This method is called once for each call to {@link #generateIcuData(String, boolean)} and + * is responsible for adding all necessary results for the returned {@link IcuData}. */ - // TODO: Fix this to NOT implicitly rely of ordering of referenced values. - private void addResults(ListMultimap resultsByRbPath) { - for (RbPath rbPath : resultsByRbPath.keySet()) { - for (Result r : resultsByRbPath.get(rbPath)) { - if (r.isGrouped()) { - // Grouped results have all the values in a single value entry. - icuData.add(rbPath, RbValue.of(r.getValues())); - } else { - if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) { - r.getValues().forEach(v -> icuData.add(rbPath, RbValue.of(v))); - } else { - // Ungrouped results are one value per entry, but might be expanded into - // grouped results if they are a path referencing a grouped entry. - r.getValues().forEach(v -> icuData.add(rbPath, replacePathValues(v))); - } - } - } - } + abstract void addResults(); + + /** + * Returns the CLDR data used for this transformation. Note that a subclass mapper might have + * other data for different purposes, but this data instance is the one from which variables + * are resolved. A sub-class mapper might access this for additional processing. + */ + final CldrData getCldrData() { + return cldrData; } /** - * Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed - * to be a reference to an existing value in a resource bundle. Note that the referenced bundle - * might be grouped (i.e. an array with more than one element). + * Transforms a single value into a sequence of results using this mapper's {@link + * PathValueTransformer}, which can be added to the mapper (possibly after optional + * post-processing). */ - private RbValue replacePathValues(String value) { - Matcher m = ARRAY_INDEX.matcher(value); - if (!m.matches()) { - return RbValue.of(value); + final Stream transformValue(CldrValue value) { + return transformer.transform(value, this::getVarsFn).stream(); + } + + /** + * Adds a transformed result to the mapper. This should be called by the sub-class mapper in + * its implementation of the {@link #addResults()} method. + * + *

Note that the given path will often (but not always) be just the path of the result. + */ + final void addResult(RbPath path, Result result) { + resultsByRbPath.put(path, result); + } + + // Callback function used by the transform() method to resolve variables from CLDR data. + private String getVarsFn(CldrPath p) { + CldrValue cldrValue = cldrData.get(p); + return cldrValue != null ? cldrValue.getValue() : null; + } + + // Fills in any fallback results and orders the results by the resource bundle path. + private ImmutableListMultimap finalizeResults() { + ImmutableListMultimap.Builder out = ImmutableListMultimap.builder(); + out.orderValuesBy(natural()); + for (RbPath rbPath : resultsByRbPath.keySet()) { + Set existingResults = resultsByRbPath.get(rbPath); + out.putAll(rbPath, existingResults); + for (Result fallback : transformer.getFallbackResultsFor(rbPath, this::getVarsFn)) { + if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { + out.put(rbPath, fallback); + } + } } - // The only constraint is that the "path" value starts with a leading '/', but parsing into - // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the - // captured value contains '/' characters to represent path delimiters. - RbPath replacePath = RbPath.parse(m.group(1)); - List replaceValues = icuData.get(replacePath); - checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath); - // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]"). - int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0; - return replaceValues.get(replaceIndex); + return out.build(); + } + + /** + * Adds transformation results on the specified multi-map to this data instance. Results are + * processed in list order and handled differently according to whether they are grouped, or + * represent an alias value. + * + * If the value of an ungrouped result is itself a resource bundle path (including possibly + * having an array index) then the referenced value is assumed to be an existing path whose + * value is then substituted. + */ + private static IcuData addResultsToIcuData( + ImmutableListMultimap results, IcuData icuData) { + + // Ordering of paths should not matter here (IcuData will re-sort them) and ordering of + // values for a given key is preserved by list multimaps. + ListMultimap map = ArrayListMultimap.create(); + + // IMPORTANT: This code MUST use the keys of the results map (rather than extracting the + // paths from the results). This is because paths can be post-processed after the result + // is obtained, which can affect output ordering as well as the path mappings. + for (RbPath rbPath : results.keySet()) { + for (Result r : results.get(rbPath)) { + if (r.isGrouped()) { + // Grouped results have all values in a single entry and cannot be aliases. + map.put(rbPath, ValueOrAlias.value(RbValue.of(r.getValues()))); + } else if (rbPath.isAlias()) { + // Aliases (which should be single values) are not expanded to their referenced + // values (whereas non-aliases might be). This is really just a hack to work + // around the fact that RbPath/RbValue is not properly typed and we have to use + // heuristics to determine whether to replace a resource bundle path with its + // referenced value. + checkArgument(r.getValues().size() == 1, + "explicit aliases must be singleton values: %s", r); + map.put(rbPath, ValueOrAlias.value(Iterables.getOnlyElement(r.getValues()))); + } else { + // Ungrouped results are one value per entry, but might later be expanded into + // grouped results if they are a path referencing a grouped entry. + r.getValues().forEach(v -> map.put(rbPath, ValueOrAlias.parse(v))); + } + } + } + // This works because insertion order is maintained for values of each path. + map.forEach((p, v) -> icuData.add(p, v.resolve(map))); + return icuData; + } + + /* + * An unfortunately messy little interface to handle to way that aliases are defined in the + * path value mappers. A mapper Result is permitted to contain values which are actually + * aliases to other resource bundle elements. This is typically used in fallback values, where + * the fallback is a functional value. For example: + * fallback=/weekData/001:intvector[0] + * + * This is messy because when we process the Results from the mapper to put them into the + * IcuData instance, we cannot be sure we can resolve these "aliases" at the time that they + * are encountered (the target value might not be present yet). So we need to wait until + * all the values are in place and then do a 2nd pass to resolve things. + * + * So far path replacement is strictly limited to fallback results, so perhaps it could be + * handled more directly in the Result class, though it is possible for a single result to + * contain multiple path references: + * fallback=/weekData/001:intvector[2] /weekData/001:intvector[3] + */ + private interface ValueOrAlias { + // A simple value doesn't need resolving, and doesn't care if the given map is null (*). + static ValueOrAlias value(RbValue v) { + return src -> v; + } + + // Helper for (common) singleton values. + static ValueOrAlias value(String v) { + return value(RbValue.of(v)); + } + + static ValueOrAlias parse(String valueOrAlias) { + Matcher m = ARRAY_INDEX.matcher(valueOrAlias); + if (!m.matches()) { + return value(valueOrAlias); + } + // The only constraint is that the "path" value starts with a leading '/', but parsing into + // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the + // captured value contains '/' characters to represent path delimiters. + RbPath path = RbPath.parse(m.group(1)); + // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]"). + int index = m.group(2) != null ? Integer.parseUnsignedInt(m.group(2)) : 0; + return src -> { + checkState(src != null, "recursive alias resolution is not supported"); + List values = src.get(path); + checkArgument(!values.isEmpty(), "no such alias value: /%s", path); + checkArgument(index < values.size(), + "index for alias /%s[%s] is out of bounds", path, index); + // By passing 'null' to the recursive call to resolve, we prevent the resolution + // from being recursive (*). This could be changed to pass 'src' and achieve + // arbitrary recursive resolving if needed, put that's currently unnecessary (and + // should probably be guarded against unbounded recursion if it is ever enabled). + return values.get(index).resolve(null); + }; + } + + RbValue resolve(ListMultimap src); } } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java index 3cb20a4cf05..ce196662e6f 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java @@ -2,10 +2,8 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu.mapper; -import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; -import static com.google.common.collect.Ordering.natural; import static org.unicode.cldr.api.CldrData.PathOrder.DTD; import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; @@ -16,22 +14,16 @@ import java.util.Optional; import java.util.Set; import org.unicode.cldr.api.CldrData; -import org.unicode.cldr.api.CldrData.ValueVisitor; import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; -import org.unicode.cldr.api.CldrValue; import org.unicode.icu.tool.cldrtoicu.IcuData; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; -import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; import org.unicode.icu.tool.cldrtoicu.SupplementalData; -import com.google.common.collect.ImmutableListMultimap; -import com.google.common.collect.LinkedHashMultimap; -import com.google.common.collect.ListMultimap; -import com.google.common.collect.SetMultimap; +import com.google.common.annotations.VisibleForTesting; /** * Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a @@ -45,8 +37,7 @@ public final class LocaleMapper extends AbstractPathValueMapper { private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default"); /** - * Processes data from the given supplier to generate general locale data for the given locale - * ID. + * Processes data from the given supplier to generate general locale data for the given locale. * * @param localeId the locale ID to generate data for. * @param src the CLDR data supplier to process. @@ -63,13 +54,81 @@ public final class LocaleMapper extends AbstractPathValueMapper { PathValueTransformer transformer, SupplementalData supplementalData) { - IcuData icuData = new LocaleMapper(localeId, src, icuSpecialData, transformer).transform(); + return process( + localeId, + src, + icuSpecialData, + transformer, + supplementalData.getDefaultCalendar(localeId)); + } + + @VisibleForTesting // Avoids needing to pass a complete SupplementalData instance in tests. + public static IcuData process( + String localeId, + CldrDataSupplier src, + Optional icuSpecialData, + PathValueTransformer transformer, + Optional defaultCalendar) { + + IcuData icuData = + new LocaleMapper(localeId, src, icuSpecialData, transformer) + .generateIcuData(localeId, true); doDateTimeHack(icuData); - supplementalData.getDefaultCalendar(icuData.getName()) - .ifPresent(c -> icuData.add(RB_CALENDAR, c)); + defaultCalendar.ifPresent(c -> icuData.add(RB_CALENDAR, c)); return icuData; } + private final String localeId; + private final CldrDataSupplier src; + private final Optional icuSpecialData; + + private LocaleMapper( + String localeId, + CldrDataSupplier src, + Optional icuSpecialData, + PathValueTransformer transformer) { + + super(src.getDataForLocale(localeId, RESOLVED), transformer); + this.localeId = localeId; + this.src = checkNotNull(src); + this.icuSpecialData = checkNotNull(icuSpecialData); + } + + @Override + void addResults() { + collectResults(collectPaths()); + icuSpecialData.ifPresent(this::collectSpecials); + } + + private Set collectPaths() { + Set validRbPaths = new HashSet<>(); + src.getDataForLocale(localeId, UNRESOLVED) + .accept(DTD, v -> transformValue(v).forEach(r -> collectResultPath(r, validRbPaths))); + return validRbPaths; + } + + private static void collectResultPath(Result result, Set validRbPaths) { + RbPath rbPath = result.getKey(); + validRbPaths.add(rbPath); + if (rbPath.isAnonymous()) { + RbPath parent = rbPath.getParent(); + checkState(!parent.isAnonymous(), "anonymous paths must not be nested: %s", rbPath); + validRbPaths.add(parent); + } + } + + private void collectResults(Set validRbPaths) { + getCldrData().accept(DTD, + v -> transformValue(v) + .filter(r -> validRbPaths.contains(r.getKey())) + .forEach(result -> addResult(result.getKey(), result))); + } + + private void collectSpecials(CldrData specials) { + specials.accept(DTD, + v -> transformValue(v).forEach(result -> addResult(result.getKey(), result))); + } + // This is an awful hack for post-processing the date-time format patterns to inject a 13th // pattern at index 8, which is just a duplicate of the "medium" date-time pattern. The reasons // for this are lost in the midst of time, but essentially there's ICU library code that just @@ -87,94 +146,10 @@ public final class LocaleMapper extends AbstractPathValueMapper { && rbPath.getSegment(2).equals("DateTimePatterns")) { // This cannot be null and should not be empty, since the path is in this data. List valuesToHack = icuData.get(rbPath); - checkArgument(valuesToHack.size() == 12, - "unexpected number of date/time patterns for '%s': %s", rbPath, valuesToHack); + checkState(valuesToHack.size() == 12, + "unexpected number of date/time patterns for '/%s': %s", rbPath, valuesToHack); valuesToHack.add(8, valuesToHack.get(10)); } } } - - private final String localeId; - private final CldrDataSupplier src; - private final Optional icuSpecialData; - private final PathValueTransformer transformer; - - private final Set validRbPaths = new HashSet<>(); - - // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for - // each key. The reason is that result comparison is not "consistent with equals", and - // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() - // method), and it does this even if using the add() method of the sorted set (this is in - // fact in violation of the stated behaviour of Set#add). - private final SetMultimap resultsByRbPath = LinkedHashMultimap.create(); - - private LocaleMapper( - String localeId, - CldrDataSupplier src, - Optional icuSpecialData, - PathValueTransformer transformer) { - - super(localeId, true); - this.localeId = localeId; - this.src = checkNotNull(src); - this.icuSpecialData = checkNotNull(icuSpecialData); - this.transformer = checkNotNull(transformer); - } - - @Override - ListMultimap getResults() { - CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED); - CldrData resolved = src.getDataForLocale(localeId, RESOLVED); - DynamicVars varFn = p -> { - CldrValue cldrValue = resolved.get(p); - return cldrValue != null ? cldrValue.getValue() : null; - }; - - collectPaths(unresolved, varFn); - collectResults(resolved, varFn); - icuSpecialData.ifPresent(s -> collectSpecials(s, varFn)); - - ImmutableListMultimap.Builder out = ImmutableListMultimap.builder(); - out.orderValuesBy(natural()); - for (RbPath rbPath : resultsByRbPath.keySet()) { - Set existingResults = resultsByRbPath.get(rbPath); - out.putAll(rbPath, existingResults); - for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) { - if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { - out.put(rbPath, fallback); - } - } - } - return out.build(); - } - - private void collectPaths(CldrData unresolved, DynamicVars varFn) { - ValueVisitor collectPaths = - v -> transformer.transform(v, varFn).forEach(this::collectResultPath); - unresolved.accept(DTD, collectPaths); - } - - private void collectResultPath(Result result) { - RbPath rbPath = result.getKey(); - validRbPaths.add(rbPath); - if (rbPath.isAnonymous()) { - RbPath parent = rbPath.getParent(); - checkState(!parent.isAnonymous(), - "anonymous paths should not be nested: %s", rbPath); - validRbPaths.add(parent); - } - } - - private void collectResults(CldrData resolved, DynamicVars varFn) { - ValueVisitor collectResults = - v -> transformer.transform(v, varFn).stream() - .filter(r -> validRbPaths.contains(r.getKey())) - .forEach(r -> resultsByRbPath.put(r.getKey(), r)); - resolved.accept(DTD, collectResults); - } - - private void collectSpecials(CldrData cldrData, DynamicVars varFn) { - cldrData.accept(DTD, v -> - transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r))); - } } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java index 0f6885b9637..c8b81007702 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java @@ -3,12 +3,8 @@ package org.unicode.icu.tool.cldrtoicu.mapper; import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.common.collect.Ordering.natural; import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING; -import java.util.Set; - -import org.unicode.cldr.api.CldrData; import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; import org.unicode.cldr.api.CldrValue; @@ -18,10 +14,6 @@ import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; import org.unicode.icu.tool.cldrtoicu.RbPath; -import com.google.common.collect.ImmutableListMultimap; -import com.google.common.collect.LinkedHashMultimap; -import com.google.common.collect.SetMultimap; - /** * Generate supplemental {@link IcuData} by transforming {@link CldrDataType#SUPPLEMENTAL * SUPPLEMENTAL} data using a {@link PathValueTransformer}. @@ -46,70 +38,48 @@ public final class SupplementalMapper extends AbstractPathValueMapper { public static IcuData process( CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) { - return new SupplementalMapper(src, transformer, icuName, paths).transform(); + return new SupplementalMapper(src, transformer, paths).generateIcuData(icuName, false); } - private final CldrDataSupplier src; private final PathMatcher paths; - private final PathValueTransformer transformer; - - // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for - // each key. The reason is that result comparison is not "consistent with equals", and - // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() - // method), and it does this even if using the add() method of the sorted set (this is in - // fact in violation of the stated behaviour of Set#add). - private final SetMultimap resultsByRbPath = LinkedHashMultimap.create(); private int fifoCounter = 0; private SupplementalMapper( - CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) { + CldrDataSupplier src, PathValueTransformer transformer, PathMatcher pathFilter) { - super(icuName, false); - this.src = checkNotNull(src); - this.paths = checkNotNull(paths); - this.transformer = checkNotNull(transformer); + super(src.getDataForType(CldrDataType.SUPPLEMENTAL), transformer); + this.paths = checkNotNull(pathFilter); } @Override - ImmutableListMultimap getResults() { + void addResults() { // DTD and NESTED_GROUPING order differ because of how the magic label works (it // basically enforces "encounter order" onto things in unlabeled sequences, which matches // the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order // to remove any lingering implicit dependencies on the CLDR data behaviour. - CldrData supplementalData = src.getDataForType(CldrDataType.SUPPLEMENTAL); - PathValueTransformer.DynamicVars varFn = p -> { - CldrValue cldrValue = supplementalData.get(p); - return cldrValue != null ? cldrValue.getValue() : null; - }; - - supplementalData.accept(NESTED_GROUPING, this::visit); - - ImmutableListMultimap.Builder out = ImmutableListMultimap.builder(); - out.orderValuesBy(natural()); - for (RbPath rbPath : resultsByRbPath.keySet()) { - Set existingResults = resultsByRbPath.get(rbPath); - out.putAll(rbPath, existingResults); - for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) { - if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { - out.put(rbPath, fallback); - } - } - } - return out.build(); + getCldrData().accept(NESTED_GROUPING, this::visit); } private void visit(CldrValue value) { if (paths.matchesPrefixOf(value.getPath())) { - for (Result r : transformer.transform(value)) { - RbPath rbPath = r.getKey(); - if (rbPath.contains(RB_FIFO)) { - // The fifo counter needs to be formatted with leading zeros for sorting. - rbPath = rbPath.mapSegments( - s -> s.equals("") ? String.format("<%04d>", fifoCounter) : s); - } - resultsByRbPath.put(rbPath, r); - } + transformValue(value).forEach(this::collectResult); fifoCounter++; } } + + // hidden labels could be supported in the abstract mapper, but would need a "bulk" add + // method for results (since the counter is updated once per batch, which corresponds to once + // per rule). Having the same FIFO counter value for the same group of values is essential + // since it serves to group them. + // + // TODO: Improve this and push this up into the abstract class (so it works with LocaleMapper). + private void collectResult(Result r) { + RbPath rbPath = r.getKey(); + if (rbPath.contains(RB_FIFO)) { + // The fifo counter needs to be formatted with leading zeros for sorting. + rbPath = rbPath.mapSegments( + s -> s.equals("") ? String.format("<%04d>", fifoCounter) : s); + } + addResult(rbPath, r); + } } diff --git a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt index db421033e63..03ae48374fa 100644 --- a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt +++ b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt @@ -198,7 +198,7 @@ # Leap year names go after other month names. # "yeartype" is an #IMPLIED attribute in the DTD and it should implicitly default to "standard". -# In practice "standard" is never explicitly given, but it could be (so must match it here). +# In practice "standard" is never explicitly given, but it could be (so it must be matched here). //ldml/dates/calendars/calendar[@type="(%A)"]/(day|month)s/%W[@type="(%A)"]/%W[@type="(%A)"]/%W[@type="(%A)"](?:[@yeartype="standard"])? ; /calendar/$1/$2Names/$3/$4 //ldml/dates/calendars/calendar[@type="(%A)"]/(day|month)s/%W[@type="(%A)"]/%W[@type="(%A)"]/%W[@type="(%A)"][@yeartype="leap"] ; /calendar/$1/$2Names/$3/$4 @@ -227,7 +227,6 @@ # Locale Display Names //ldml/localeDisplayNames/codePatterns/codePattern[@type="(%A)"] ; /codePatterns/$1 -//ldml/localeDisplayNames/annotationPatterns/annotationPattern[@type="(%A)"] ; /codePatterns/$1 //ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1 @@ -264,23 +263,23 @@ # Ordering of rules is critical here since they write into the same resource bundle path and the # last 3 values are grouped together as a single value (via the special hidden label). # -# Note that the label is needed here (not the "group" instruction) because the grouped +# Note that the label is needed here (not the "group" instruction) because the grouped # values must be seen as having a resource bundle path that is a child of the "/Currencies/$1" # path. This is so that the grouped values only appear when one of them is present rather than -# whenever any of the other values in the main resource bundle path exist. +# whenever any of the other values in the main resource bundle path exists. # # Due to the optional nature of the final sub-array in the bundle, it would be very hard to ever -# add more elements after it. +# add any more elements after it. //ldml/numbers/currencies/currency[@type="(%W)"]/symbol ; /Currencies/$1 ; fallback=$1 //ldml/numbers/currencies/currency[@type="(%W)"]/displayName ; /Currencies/$1 ; fallback=$1 //ldml/numbers/currencies/currency[@type="(%W)"]/pattern[@type="standard"] - ; /Currencies/$1/ ; fallback=//ldml/numbers/currencyFormats[@numberSystem="%D"]/currencyFormatLength/currencyFormat[@type="standard"]/pattern[@type="standard"] + ; /Currencies/$1/ ; fallback=//ldml/numbers/currencyFormats[@numberSystem="%D"]/currencyFormatLength/currencyFormat[@type="standard"]/pattern[@type="standard"] //ldml/numbers/currencies/currency[@type="(%W)"]/decimal - ; /Currencies/$1/ ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/decimal + ; /Currencies/$1/ ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/decimal //ldml/numbers/currencies/currency[@type="(%W)"]/group - ; /Currencies/$1/ ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/group + ; /Currencies/$1/ ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/group # ---- //ldml/numbers/currencyFormats[@numberSystem="%D"]/currencySpacing/(%W)/(%W) ; /currencySpacing/$1/$2 diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java new file mode 100644 index 00000000000..d3ee81346a9 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapperTest.java @@ -0,0 +1,304 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static com.google.common.truth.Truth.assertThat; +import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; +import org.unicode.icu.tool.cldrtoicu.RbPath; +import org.unicode.icu.tool.cldrtoicu.RbValue; +import org.unicode.icu.tool.cldrtoicu.testing.FakeResult; +import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer; + +import com.google.common.collect.ImmutableList; + +@RunWith(JUnit4.class) +public class AbstractPathValueMapperTest { + @Test + public void testNameAndIcuFallback() { + IcuData foo = new FakeMapper().generateIcuData("foo", false); + IcuData bar = new FakeMapper().generateIcuData("bar", true); + + assertThat(foo).getPaths().isEmpty(); + assertThat(foo).hasName("foo"); + assertThat(foo).hasFallback(false); + + assertThat(bar).getPaths().isEmpty(); + assertThat(bar).hasName("bar"); + assertThat(bar).hasFallback(true); + } + + @Test + public void testUngroupedConcatenation() { + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("foo/bar", "one", "two"); + mapper.addUngroupedResult("foo/baz", "other", "path"); + mapper.addUngroupedResult("foo/bar", "three", "four"); + IcuData icuData = mapper.generateIcuData("foo", false); + + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData).hasValuesFor("foo/bar", singletonValues("one", "two", "three", "four")); + assertThat(icuData).hasValuesFor("foo/baz", singletonValues("other", "path")); + } + + @Test + public void testGrouping() { + FakeMapper mapper = new FakeMapper(); + mapper.addGroupedResult("foo/bar", "one", "two"); + mapper.addGroupedResult("foo/baz", "other", "path"); + mapper.addGroupedResult("foo/bar", "three", "four"); + IcuData icuData = mapper.generateIcuData("foo", false); + + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData) + .hasValuesFor("foo/bar", RbValue.of("one", "two"), RbValue.of("three", "four")); + assertThat(icuData) + .hasValuesFor("foo/baz", RbValue.of("other", "path")); + } + + @Test + public void testFallbackResults() { + // The indices are important in matching up the results and their respective fallbacks. + Result explicit1 = FakeResult.of("foo/bar", 1, false, "one"); + Result explicit2 = FakeResult.of("foo/bar", 2, false, "two"); + Result explicit3 = FakeResult.of("foo/bar", 3, false, "three"); + + Result fallback1 = FakeResult.fallback("foo/bar", 1, ""); + Result fallback2 = FakeResult.fallback("foo/bar", 2, ""); + Result fallback3 = FakeResult.fallback("foo/bar", 3, ""); + + FakeTransformer transformer = new FakeTransformer(); + transformer.addFallbacks("foo/bar", fallback1, fallback2, fallback3); + + // When all results are explicitly present, no fallbacks are used. + IcuData noFallback = new FakeMapper(transformer) + .addResult(explicit1) + .addResult(explicit2) + .addResult(explicit3) + .generateIcuData("foo", false); + assertThat(noFallback).hasValuesFor("foo/bar", singletonValues("one", "two", "three")); + + // Missing explicit results trigger fallbacks. + IcuData firstFallback = new FakeMapper(transformer) + .addResult(explicit2) + .addResult(explicit3) + .generateIcuData("foo", false); + assertThat(firstFallback).hasValuesFor("foo/bar", singletonValues("", "two", "three")); + + // Fallbacks can appear in any part of the result sequence. + IcuData lastFallbacks = new FakeMapper(transformer) + .addResult(explicit1) + .generateIcuData("foo", false); + assertThat(lastFallbacks) + .hasValuesFor("foo/bar", singletonValues("one", "", "")); + + // Without a single result to "seed" the fallback group, nothing is emitted. + IcuData allFallbacks = new FakeMapper(transformer).generateIcuData("foo", false); + assertThat(allFallbacks).getPaths().isEmpty(); + } + + @Test + public void testAliases_ungrouped() { + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("foo/default", "start", "/alias/target", "end"); + mapper.addUngroupedResult("foo/alias-0", "start", "/alias/target[0]", "end"); + mapper.addUngroupedResult("foo/alias-1", "start", "/alias/target[1]", "end"); + mapper.addUngroupedResult("foo/alias-2", "start", "/alias/target[2]", "end"); + mapper.addUngroupedResult("alias/target", "first", "second", "third"); + IcuData icuData = mapper.generateIcuData("foo", false); + + assertThat(icuData).getPaths().hasSize(5); + assertThat(icuData) + .hasValuesFor("foo/default", singletonValues("start", "first", "end")); + assertThat(icuData) + .hasValuesFor("foo/alias-0", singletonValues("start", "first", "end")); + assertThat(icuData) + .hasValuesFor("foo/alias-1", singletonValues("start", "second", "end")); + assertThat(icuData) + .hasValuesFor("foo/alias-2", singletonValues("start", "third", "end")); + assertThat(icuData) + .hasValuesFor("alias/target", singletonValues("first", "second", "third")); + } + + // Grouping ignores aliases. + @Test + public void testAliases_grouped() { + FakeMapper mapper = new FakeMapper(); + mapper.addGroupedResult("foo/bar", "grouped", "/alias/target"); + mapper.addGroupedResult("foo/bar", "/alias/target[1]"); + mapper.addUngroupedResult("alias/target", "first", "second"); + + IcuData icuData = mapper.generateIcuData("foo", false); + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData) + .hasValuesFor("foo/bar", + RbValue.of("grouped", "/alias/target"), + RbValue.of("/alias/target[1]")); + assertThat(icuData).hasValuesFor("alias/target", singletonValues("first", "second")); + } + + @Test + public void testAliases_explicit() { + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("foo/bar:alias", "/alias/target"); + mapper.addUngroupedResult("foo/bar", "/alias/target"); + mapper.addUngroupedResult("alias/target", "alias-value"); + IcuData icuData = mapper.generateIcuData("foo", false); + + assertThat(icuData).getPaths().hasSize(3); + assertThat(icuData).hasValuesFor("foo/bar:alias", singletonValues("/alias/target")); + assertThat(icuData).hasValuesFor("foo/bar", singletonValues("alias-value")); + assertThat(icuData).hasValuesFor("alias/target", singletonValues("alias-value")); + } + + @Test + public void testAliases_ordering() { + // It doesn't matter where an alias is in the order of results. + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("first/alias", "hello"); + mapper.addUngroupedResult("foo/bar", "/first/alias", "/last/alias"); + mapper.addUngroupedResult("last/alias", "world"); + IcuData icuData = mapper.generateIcuData("foo", false); + + assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world")); + } + + @Test + public void testAliases_concatenation() { + // It doesn't matter where an alias is in the order of results. + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("alias/target", "hello"); + mapper.addUngroupedResult("foo/bar", "/alias/target[0]", "/alias/target[1]"); + mapper.addUngroupedResult("alias/target", "world"); + IcuData icuData = mapper.generateIcuData("foo", false); + + assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world")); + } + + @Test + public void testAliases_missing() { + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("alias/target", "value"); + mapper.addUngroupedResult("foo/bar", "/no-such-alias/target"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false)); + assertThat(e).hasMessageThat().contains("no such alias value"); + assertThat(e).hasMessageThat().contains("/no-such-alias/target"); + } + + @Test + public void testAliases_badIndex() { + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("alias/target", "value"); + mapper.addUngroupedResult("foo/bar", "/alias/target[1]"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false)); + assertThat(e).hasMessageThat().contains("out of bounds"); + assertThat(e).hasMessageThat().contains("/alias/target[1]"); + } + + @Test + public void testAliases_noRecursion() { + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("alias/target", "/other/alias"); + mapper.addUngroupedResult("other/alias", "/other/alias"); + mapper.addUngroupedResult("foo/bar", "/alias/target"); + IllegalStateException e = + assertThrows(IllegalStateException.class, () -> mapper.generateIcuData("foo", false)); + assertThat(e).hasMessageThat().contains("recursive alias resolution is not supported"); + } + + @Test + public void testAliases_explicitAliasesAreSingletonOnly() { + FakeMapper mapper = new FakeMapper(); + mapper.addUngroupedResult("foo/bar:alias", "first", "second"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false)); + assertThat(e).hasMessageThat().contains("explicit aliases must be singleton values"); + assertThat(e).hasMessageThat().contains("foo/bar:alias"); + } + + private static final class FakeMapper extends AbstractPathValueMapper { + private final static CldrData EXPLODING_DATA = + new CldrData() { + @Override public void accept(PathOrder pathOrder, ValueVisitor valueVisitor) { + throw new UnsupportedOperationException("should not be called by test"); + } + + @Override public void accept(PathOrder pathOrder, PrefixVisitor prefixVisitor) { + throw new UnsupportedOperationException("should not be called by test"); + } + + @Override public CldrValue get(CldrPath cldrPath) { + throw new UnsupportedOperationException("should not be called by test"); + } + }; + + // We could also just use Mockito for this (it's not yet a project dependency however). + private final PathValueTransformer transformer = + new PathValueTransformer() { + @Override public ImmutableList transform(CldrValue cldrValue) { + throw new UnsupportedOperationException("should not be called by test"); + } + + @Override + public ImmutableList transform(CldrValue cldrValue, DynamicVars varFn) { + throw new UnsupportedOperationException("should not be called by test"); + } + + @Override + public ImmutableList getFallbackResultsFor(RbPath key, DynamicVars varFn) { + // TODO: Test fallbacks. + return ImmutableList.of(); + } + }; + + // This preserves insertion order in a well defined way (good for testing alias order). + private final List fakeResults = new ArrayList<>(); + + FakeMapper() { + this(new FakeTransformer()); + } + + FakeMapper(FakeTransformer transformer) { + super(EXPLODING_DATA, transformer); + } + + FakeMapper addUngroupedResult(String path, String... values) { + int index = fakeResults.size() + 1; + return addResult(FakeResult.of(path, index, false, values)); + } + + FakeMapper addGroupedResult(String path, String... values) { + int index = fakeResults.size() + 1; + return addResult(FakeResult.of(path, index, true, values)); + } + + FakeMapper addResult(Result r) { + fakeResults.add(r); + return this; + } + + @Override void addResults() { + fakeResults.forEach(result -> addResult(result.getKey(), result)); + } + } + + private static RbValue[] singletonValues(String... values) { + return Arrays.stream(values).map(RbValue::of).toArray(RbValue[]::new); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java new file mode 100644 index 00000000000..3d3a8e2f63f --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapperTest.java @@ -0,0 +1,404 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static com.google.common.truth.Truth.assertThat; +import static java.util.Optional.empty; +import static org.unicode.cldr.api.CldrValue.parseValue; +import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.Arrays; +import java.util.Optional; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; +import org.unicode.icu.tool.cldrtoicu.RbValue; +import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier; +import org.unicode.icu.tool.cldrtoicu.testing.FakeResult; +import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer; + +// Almost all the unit-testing for LocaleMapper is done via AbstractPathValueMapper or +// RegexTransformer (and friends). Very little is left that's special to locale data. +@RunWith(JUnit4.class) +public class LocaleMapperTest { + private final FakeTransformer transformer = new FakeTransformer(); + private final FakeDataSupplier src = new FakeDataSupplier(); + + @Test + public void testSimple() { + //ldml/units/durationUnit[@type="(%A)"]/durationUnitPattern ; /durationUnits/$1 + addMapping("xx", + ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"), + simpleResult("/durationUnits/foo", "Bar")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar"); + } + + @Test + public void testCorrectLocaleIsUsed() { + src.addLocaleData( + "xx", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "XX")); + addMapping( + "yy", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YY"), + simpleResult("/durationUnits/foo", "YY")); + src.addLocaleData( + "zz", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZ")); + + IcuData icuData = + LocaleMapper.process("yy", src, empty(), transformer, empty()); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/durationUnits/foo", "YY"); + } + + @Test + public void testInheritedValuesNotIncludedByDefault() { + //ldml/units/durationUnit[@type="(%A)"]/durationUnitPattern ; /durationUnits/$1 + addMapping("xx", + ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"), + simpleResult("/durationUnits/foo", "Bar")); + //ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1 + addInheritedMapping("xx", + ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"), + simpleResult("/Keys/sometype", "Value")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + // The 2nd mapping is not used because it does not appear in the unresolved CldrData. + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar"); + } + + @Test + public void testInheritedValuesIncludedWhenSameResourceBundle() { + //ldml/numbers/currencies/currency[@type="(%W)"]/symbol ; /Currencies/$1 ; fallback=$1 + //ldml/numbers/currencies/currency[@type="(%W)"]/displayName ; /Currencies/$1 ; fallback=$1 + addMapping("xx", + ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$"), + simpleResult("/Currencies/USD", 1, "US$")); + // This is included because the resource bundle path is the same as above. Note that we + // have to use the index to distinguish results here (this corresponds to the line number + // or the real when the real regex based config is used and determines result ordering). + addInheritedMapping("xx", + ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"), + simpleResult("/Currencies/USD", 2, "US Dollar")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + // Now the inherited mapping is used because the path appeared for the unresolved CldrData. + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/Currencies/USD", singletonValues("US$", "US Dollar")); + } + + @Test + public void testChildPathsNotIncludedByDefault() { + // Tests that in the case that one path is the child of another path (rare) the existence + // of the parent path will not trigger the child path to be included. + // + //ldml/.../dateTimeFormats/availableFormats/dateFormatItem[@id="(%A)"] + // ; /calendar/$1/availableFormats/$2 + //ldml/.../dateTimeFormats/availableFormats/dateFormatItem[@id="(%A)"][@count="(%A)"] + // ; /calendar/$1/availableFormats/$2/$3 + addMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"), + simpleResult("/calendar/foo/availableFormats/bar", "Foo")); + addInheritedMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"), + simpleResult("/calendar/foo/availableFormats/bar/one", "Bar")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + // Now the inherited mapping is used because the path appeared for the unresolved CldrData. + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Foo"); + } + + @Test + public void testParentPathsNotIncludedByDefault() { + // Same as above but swapping inherited vs explicit mappings. + addInheritedMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"), + simpleResult("/calendar/foo/availableFormats/bar", "Foo")); + addMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"), + simpleResult("/calendar/foo/availableFormats/bar/one", "Bar")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + // Now the inherited mapping is used because the path appeared for the unresolved CldrData. + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar/one", "Bar"); + } + + // This is done so that when these paths are written into the ICU data file (and the hidden + // labels are removed) you get the "two layer" array: + // + // { + // "Parent", + // { "Child-1", "Child-2" } + // } + // + // This needs to happen even when only one of the child elements is given explicitly. + @Test + public void testHiddenLabelsIncludeParentPaths() { + // Testing that the existence of a child element using a hidden label *does* trigger the + // parent element to be included. + addInheritedMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"), + simpleResult("/calendar/foo/availableFormats/bar", "Parent")); + addInheritedMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"), + simpleResult("/calendar/foo/availableFormats/bar/", 1, "Child-1")); + + // This is the only explicit mapping and it triggers the sibling _and_ the parent. + addMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"), + simpleResult("/calendar/foo/availableFormats/bar/", 2, "Child-2")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent"); + assertThat(icuData) + .hasValuesFor("/calendar/foo/availableFormats/bar/", + singletonValues("Child-1", "Child-2")); + } + + // This is strange behaviour given the test above, since it means that it's impossible to + // use hidden labels to create a situation where the output ICU data looks like: + // + // { + // "Parent", + // { "Child-1", "Child-2" }, + // "Other Parent" + // } + // + // if the child elements can be inherited; since if they are not present, you just get: + // + // { + // "Parent", + // "Other Parent" + // } + // + // Which moves the index of the following elements up by one and makes it impossible to + // define a stable length or index mapping for the array. + // + // However this is relied upon in the /Currencies/XXX case where a child array exists, but + // is optional if none of it's values are explicitly present. For example in en_150.txt: + // + // Currencies{ + // EUR{ + // "€", + // "Euro", + // { + // "¤#,##0.00", + // ".", + // ",", + // } + // } + // } + // + // In most cases the formatting/grouping information is omitted if it can all be inherited. + // + // This only really works because the child array is the last element in the parent array, so + // not having it present doesn't affect any later elements. + // + // The "group" instruction in the transformation configuration files is a different way to + // allow grouping of sub-arrays which does not have this behaviour. + @Test + public void testHiddenLabelsAreNotIncludedAutomatically() { + // As above, but now only the parent path is included explicitly. + addMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"), + simpleResult("/calendar/foo/availableFormats/bar", "Parent")); + addInheritedMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"), + simpleResult("/calendar/foo/availableFormats/bar/", 1, "Child-1")); + + // This is the only explicit mapping and it triggers the sibling _and_ the parent. + addInheritedMapping("xx", + ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats" + + "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"), + simpleResult("/calendar/foo/availableFormats/bar/", 2, "Child-2")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent"); + } + + @Test + public void testDefaultCalendar() { + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, Optional.of("pastafarian")); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/calendar/default", "pastafarian"); + } + + @Test + public void testDateTimeHack() { + //calendar/$1/DateTimePatterns + addMapping("xx", + format("time", "full", "one"), + simpleResult("/calendar/foo/DateTimePatterns", 1, "one")); + addMapping("xx", + format("time", "long", "two"), + simpleResult("/calendar/foo/DateTimePatterns", 2, "two")); + addMapping("xx", + format("time", "medium", "three"), + simpleResult("/calendar/foo/DateTimePatterns", 3, "three")); + addMapping("xx", + format("time", "short", "four"), + simpleResult("/calendar/foo/DateTimePatterns", 4, "four")); + addMapping("xx", + format("date", "full", "five"), + simpleResult("/calendar/foo/DateTimePatterns", 5, "five")); + addMapping("xx", + format("date", "long", "six"), + simpleResult("/calendar/foo/DateTimePatterns", 6, "six")); + addMapping("xx", + format("date", "medium", "seven"), + simpleResult("/calendar/foo/DateTimePatterns", 7, "seven")); + addMapping("xx", + format("date", "short", "eight"), + simpleResult("/calendar/foo/DateTimePatterns", 8, "eight")); + addMapping("xx", + format("dateTime", "full", "nine"), + simpleResult("/calendar/foo/DateTimePatterns", 9, "nine")); + addMapping("xx", + format("dateTime", "long", "ten"), + simpleResult("/calendar/foo/DateTimePatterns", 10, "ten")); + addMapping("xx", + format("dateTime", "medium", "eleven"), + simpleResult("/calendar/foo/DateTimePatterns", 11, "eleven")); + addMapping("xx", + format("dateTime", "short", "twelve"), + simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve")); + + IcuData icuData = + LocaleMapper.process("xx", src, empty(), transformer, empty()); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/calendar/foo/DateTimePatterns", + singletonValues( + "one", "two", "three", "four", + "five", "six", "seven", "eight", + "eleven", // <-- legacy reasons, don't ask! + "nine", "ten", "eleven", "twelve")); + } + + @Test + public void testDateTimeHack_wrongNumberofElements() { + // One missing pattern from the start. + addMapping("xx", + format("time", "long", "two"), + simpleResult("/calendar/foo/DateTimePatterns", 2, "two")); + addMapping("xx", + format("time", "medium", "three"), + simpleResult("/calendar/foo/DateTimePatterns", 3, "three")); + addMapping("xx", + format("time", "short", "four"), + simpleResult("/calendar/foo/DateTimePatterns", 4, "four")); + addMapping("xx", + format("date", "full", "five"), + simpleResult("/calendar/foo/DateTimePatterns", 5, "five")); + addMapping("xx", + format("date", "long", "six"), + simpleResult("/calendar/foo/DateTimePatterns", 6, "six")); + addMapping("xx", + format("date", "medium", "seven"), + simpleResult("/calendar/foo/DateTimePatterns", 7, "seven")); + addMapping("xx", + format("date", "short", "eight"), + simpleResult("/calendar/foo/DateTimePatterns", 8, "eight")); + addMapping("xx", + format("dateTime", "full", "nine"), + simpleResult("/calendar/foo/DateTimePatterns", 9, "nine")); + addMapping("xx", + format("dateTime", "long", "ten"), + simpleResult("/calendar/foo/DateTimePatterns", 10, "ten")); + addMapping("xx", + format("dateTime", "medium", "eleven"), + simpleResult("/calendar/foo/DateTimePatterns", 11, "eleven")); + addMapping("xx", + format("dateTime", "short", "twelve"), + simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve")); + + IllegalStateException e = assertThrows( + IllegalStateException.class, + () -> LocaleMapper.process("xx", src, empty(), transformer, empty())); + + assertThat(e).hasMessageThat().contains("unexpected"); + assertThat(e).hasMessageThat().contains("/calendar/foo/DateTimePatterns"); + } + + private static CldrValue format(String type,String length, String pattern) { + return ldml(String.format( + "dates/calendars/calendar[@type=\"foo\"]" + + "/%1$sFormats" + + "/%1$sFormatLength[@type=\"%2$s\"]" + + "/%1$sFormat[@type=\"standard\"]/pattern[@type=\"%3$s\"]", + type, length, pattern)); + } + + private void addMapping(String locale, CldrValue value, Result... results) { + src.addLocaleData(locale, value); + transformer.addResults(value, results); + } + + private void addInheritedMapping(String locale, CldrValue value, Result... results) { + src.addInheritedData(locale, value); + transformer.addResults(value, results); + } + + private static Result simpleResult(String path, String value) { + return FakeResult.of(path, 1, false, value); + } + + private static Result simpleResult(String path, int index, String value) { + return FakeResult.of(path, index, false, value); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + private static CldrValue ldml(String path) { + return ldml(path, ""); + } + + private static CldrValue ldml(String path, String value) { + return parseValue("//ldml/" + path, ""); + } + + private static RbValue[] singletonValues(String... values) { + return Arrays.stream(values).map(RbValue::of).toArray(RbValue[]::new); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java index a62e3fe86a2..4d52a716dcb 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java @@ -180,7 +180,8 @@ public class PluralsMapperTest { StringBuilder cldrPath = new StringBuilder("//supplementalData"); appendAttribute(cldrPath.append("/plurals"), "type", type); appendAttribute(cldrPath.append("/pluralRules"), "locales", Joiner.on(' ').join(locales)); - appendAttribute(cldrPath.append("/pluralRule"), "count", count); + // We aren't testing sort index (#N) here, but still need to set it to something. + appendAttribute(cldrPath.append("/pluralRule#0"), "count", count); return CldrValue.parseValue(cldrPath.toString(), value); } diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java index 38ba97185f0..841a2e12823 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java @@ -49,11 +49,11 @@ public class RbnfMapperTest { @Test public void testSingleRuleset() { - int idx = 1; + int idx = 0; CldrData cldrData = cldrData( - rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", idx++), - rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", idx++), - rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", idx++)); + rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", ++idx), + rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", ++idx), + rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", ++idx)); IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); @@ -71,16 +71,16 @@ public class RbnfMapperTest { // Note that input order of these paths shouldn't matter since they are ordered (and thus // grouped) by DTD order (relative order matters for values in the same set, but values // do not have to grouped together). - int idx = 1; + int idx = 0; CldrData cldrData = cldrData( - rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", idx++), - rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", idx++), - rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", idx++), - rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", idx++), - rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", idx++), - rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", idx++), - rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", idx++), - rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", idx++)); + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", ++idx), + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", ++idx), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", ++idx), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", ++idx), + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", ++idx), + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", ++idx), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", ++idx), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", ++idx)); IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); @@ -101,19 +101,19 @@ public class RbnfMapperTest { @Test public void testSpecials() { - int idx = 1; + int idx = 0; CldrData specials = cldrData( - rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", idx++), - rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", idx++), - rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", idx++), - rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", idx++), - rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", idx++)); + rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", ++idx), + rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", ++idx), + rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", ++idx), + rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", ++idx), + rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", ++idx)); - idx = 1; + idx = 0; CldrData cldrData = cldrData( - rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", idx++), + rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", ++idx), rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0", - "=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", idx++)); + "=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", ++idx)); IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials)); @@ -139,12 +139,12 @@ public class RbnfMapperTest { // the same, it's not entirely obviously why some of the special cases really exist. @Test public void testEscaping() { - int idx = 1; + int idx = 0; CldrData cldrData = cldrData( - rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", idx++), - rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", idx++), - rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", idx++), - rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", idx++)); + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", ++idx), + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", ++idx), + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", ++idx), + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", ++idx)); IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); @@ -173,7 +173,8 @@ public class RbnfMapperTest { StringBuilder cldrPath = new StringBuilder("//ldml/rbnf"); appendAttribute(cldrPath.append("/rulesetGrouping"), "type", group); - cldrPath.append("/ruleset"); + // We aren't testing sort index (#N) here, but still need to set it to something. + cldrPath.append("/ruleset#0"); appendAttribute(cldrPath, "type", setType); appendAttribute(cldrPath, "access", access); cldrPath.append("/rbnfrule#").append(ruleIndex); diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapperTest.java new file mode 100644 index 00000000000..8085616fd90 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapperTest.java @@ -0,0 +1,95 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static org.unicode.cldr.api.CldrValue.parseValue; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.PathMatcher; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; +import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier; +import org.unicode.icu.tool.cldrtoicu.testing.FakeResult; +import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer; + +// Almost all the unit-testing for SupplementalMapper is done via AbstractPathValueMapper or +// RegexTransformer (and friends). Very little is left that's special to supplemental data. +@RunWith(JUnit4.class) +public class SupplementalMapperTest { + private final FakeTransformer transformer = new FakeTransformer(); + private final FakeDataSupplier src = new FakeDataSupplier(); + + @Test + public void testSimple() { + addExpectedMapping( + supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"), + simpleResult("/Foo", "Bar")); + + PathMatcher allPaths = PathMatcher.of("supplementalData"); + IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/Foo", "Bar"); + } + + @Test + public void testFifoLabel() { + // Example: + // //supplementalData/currencyData/region[@iso3166="(%W)"]/currency[@iso4217="(%W)"] + // ; /CurrencyMap/$1//id ; values=$2 + // + // Note that the order mappings are added does not affect the output, since even though the + // "FIFO" mechanism works on encounter-order, the CldrData is sorted before being + // transformed (and in this case, is resolved on the currency code USD < USN < USS). + addExpectedMapping( + supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"), + simpleResult("/CurrencyMap/US//id", "USN")); + addExpectedMapping( + supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USS\"]"), + simpleResult("/CurrencyMap/US//id", "USS")); + addExpectedMapping( + supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USD\"]"), + simpleResult("/CurrencyMap/US//id", "USD")); + + PathMatcher allPaths = PathMatcher.of("supplementalData"); + IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths); + + assertThat(icuData).getPaths().hasSize(3); + assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0000>/id", "USD"); + assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0001>/id", "USN"); + assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0002>/id", "USS"); + } + + @Test + public void testPathFilter() { + addExpectedMapping( + supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"), + simpleResult("/Foo", "Bar")); + addExpectedMapping( + supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"), + simpleResult("/CurrencyMap/US//id", "USN")); + + PathMatcher filter = PathMatcher.of("supplementalData/likelySubtags"); + IcuData icuData = SupplementalMapper.process(src, transformer, "name", filter); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/Foo", "Bar"); + } + + private void addExpectedMapping(CldrValue value, Result... results) { + src.addSupplementalData(value); + transformer.addResults(value, results); + } + + private static Result simpleResult(String path, String value) { + return FakeResult.of(path, 1, false, value); + } + + private static CldrValue supplementalData(String path) { + return parseValue("//supplementalData/" + path, ""); + } +} \ No newline at end of file