ICU-20693 Remaining tests for new ICU tooling and some refactoring

This commit is contained in:
David Beaumont 2019-09-19 15:30:04 +02:00 committed by David Beaumont
parent e5529933b2
commit 142c90afcc
12 changed files with 1137 additions and 254 deletions

View file

@ -23,7 +23,6 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
@ -147,14 +146,6 @@ public final class LdmlConverter {
// TODO: Confirm that this has no meaningful effect and unify "empty" file contents.
private static RbPath RB_EMPTY_ALIAS = RbPath.of("___");
/** Provisional entry point until better config support exists. */
public static void main(String... args) {
convert(IcuConverterConfig.builder()
.setOutputDir(Paths.get(args[0]))
.setEmitReport(true)
.build());
}
/**
* Output types defining specific subsets of the ICU data which can be converted separately.
* This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to

View file

@ -190,12 +190,16 @@ public final class RbPath implements Comparable<RbPath> {
return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
}
// TODO: Remove this in favour of having properly typed paths.
// TODO: Remove this and isAlias() in favour of having properly typed paths.
boolean isIntPath() {
String lastElement = segments.get(segments.size() - 1);
return lastElement.endsWith(":int") || lastElement.endsWith(":intvector");
}
public boolean isAlias() {
return getSegment(length() - 1).endsWith(":alias");
}
@Override public int compareTo(RbPath other) {
return ORDERING.compare(this, other);
}

View file

@ -93,7 +93,7 @@ public final class SupplementalData {
* @param supplementalData the raw CLDR supplemental data instance.
* @return the supplemental data API.
*/
static SupplementalData create(CldrData supplementalData) {
public static SupplementalData create(CldrData supplementalData) {
Table<Alias, String, String> aliasTable = HashBasedTable.create();
Map<String, String> parentLocaleMap = new HashMap<>();
Map<String, String> defaultCalendarMap = new HashMap<>();

View file

@ -3,98 +3,237 @@
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Ordering.natural;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.SetMultimap;
/**
* An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures
* that transformation results are correctly processed when being added to IcuData instances.
*/
public abstract class AbstractPathValueMapper {
abstract class AbstractPathValueMapper {
// Matches "/foo/bar" or "/foo/bar[N]" as a resource bundle path, capturing the path and
// optional index separately. Note that this is very sloppy matching and the path string will
// also be parsed via RbPath.parse().
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
private final IcuData icuData;
private final CldrData cldrData;
private final PathValueTransformer transformer;
AbstractPathValueMapper(String name, boolean hasFallback) {
this.icuData = new IcuData(name, hasFallback);
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
AbstractPathValueMapper(CldrData cldrData, PathValueTransformer transformer) {
this.cldrData = checkNotNull(cldrData);
this.transformer = checkNotNull(transformer);
}
/** Implemented by sub-classes to return all results to be added to the IcuData instance. */
abstract ListMultimap<RbPath, Result> getResults();
/**
* Adds results to the IcuData instance according to expected {@code PathValueTransformer}
* semantics. This method must only be called once per mapper.
* Returns a new {@code IcuData} instance produced by post-processing a set of results
* generated by calling sub-class method {@link #addResults()}. This is the only method which
* need be directly invoked by the sub-class implementation (other methods are optionally used
* from within the {@link #addResults()} callback).
*/
final IcuData transform() {
checkState(icuData.getPaths().isEmpty(),
"transform() method cannot be called multiple times: %s", icuData);
final IcuData generateIcuData(String icuName, boolean hasFallback) {
// This subclass mostly exists to control the fact that results need to be added in one go
// to the IcuData because of how referenced paths are handled. If results could be added in
// multiple passes, you could have confusing situations in which values has path references
// in them but the referenced paths have not been transformed yet. Forcing the subclass to
// implement a single method to generate all results at once ensures that we control the
// lifecycle of the data and how results are processed as they are added to the IcuData.
addResults(getResults());
checkState(resultsByRbPath.isEmpty(),
"results must not be added outside the call to addResults(): %s", resultsByRbPath);
addResults();
IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback));
resultsByRbPath.clear();
return icuData;
}
/**
* Adds transformation results on the specified multi-map to this data instance. Results are
* handled differently according to whether they are grouped, or represent an alias value. If
* the value of an ungrouped result is itself a resource bundle path (including possibly having
* an array index) then the referenced value is assumed to be an existing path whose value is
* then substituted.
* Implemented by sub-classes to return all results to be added to the IcuData instance. The
* primary job of this callback is to generate transformed results (typically by calling
* {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the
* results to this mapper using {@link #addResult(RbPath, Result)}.
*
* <p>This method is called once for each call to {@link #generateIcuData(String, boolean)} and
* is responsible for adding all necessary results for the returned {@link IcuData}.
*/
// TODO: Fix this to NOT implicitly rely of ordering of referenced values.
private void addResults(ListMultimap<RbPath, Result> resultsByRbPath) {
for (RbPath rbPath : resultsByRbPath.keySet()) {
for (Result r : resultsByRbPath.get(rbPath)) {
if (r.isGrouped()) {
// Grouped results have all the values in a single value entry.
icuData.add(rbPath, RbValue.of(r.getValues()));
} else {
if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) {
r.getValues().forEach(v -> icuData.add(rbPath, RbValue.of(v)));
} else {
// Ungrouped results are one value per entry, but might be expanded into
// grouped results if they are a path referencing a grouped entry.
r.getValues().forEach(v -> icuData.add(rbPath, replacePathValues(v)));
}
}
}
}
abstract void addResults();
/**
* Returns the CLDR data used for this transformation. Note that a subclass mapper might have
* other data for different purposes, but this data instance is the one from which variables
* are resolved. A sub-class mapper might access this for additional processing.
*/
final CldrData getCldrData() {
return cldrData;
}
/**
* Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed
* to be a reference to an existing value in a resource bundle. Note that the referenced bundle
* might be grouped (i.e. an array with more than one element).
* Transforms a single value into a sequence of results using this mapper's {@link
* PathValueTransformer}, which can be added to the mapper (possibly after optional
* post-processing).
*/
private RbValue replacePathValues(String value) {
Matcher m = ARRAY_INDEX.matcher(value);
if (!m.matches()) {
return RbValue.of(value);
final Stream<Result> transformValue(CldrValue value) {
return transformer.transform(value, this::getVarsFn).stream();
}
/**
* Adds a transformed result to the mapper. This should be called by the sub-class mapper in
* its implementation of the {@link #addResults()} method.
*
* <p>Note that the given path will often (but not always) be just the path of the result.
*/
final void addResult(RbPath path, Result result) {
resultsByRbPath.put(path, result);
}
// Callback function used by the transform() method to resolve variables from CLDR data.
private String getVarsFn(CldrPath p) {
CldrValue cldrValue = cldrData.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
}
// Fills in any fallback results and orders the results by the resource bundle path.
private ImmutableListMultimap<RbPath, Result> finalizeResults() {
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, this::getVarsFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
}
}
// The only constraint is that the "path" value starts with a leading '/', but parsing into
// the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
// captured value contains '/' characters to represent path delimiters.
RbPath replacePath = RbPath.parse(m.group(1));
List<RbValue> replaceValues = icuData.get(replacePath);
checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath);
// If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0;
return replaceValues.get(replaceIndex);
return out.build();
}
/**
* Adds transformation results on the specified multi-map to this data instance. Results are
* processed in list order and handled differently according to whether they are grouped, or
* represent an alias value.
*
* If the value of an ungrouped result is itself a resource bundle path (including possibly
* having an array index) then the referenced value is assumed to be an existing path whose
* value is then substituted.
*/
private static IcuData addResultsToIcuData(
ImmutableListMultimap<RbPath, Result> results, IcuData icuData) {
// Ordering of paths should not matter here (IcuData will re-sort them) and ordering of
// values for a given key is preserved by list multimaps.
ListMultimap<RbPath, ValueOrAlias> map = ArrayListMultimap.create();
// IMPORTANT: This code MUST use the keys of the results map (rather than extracting the
// paths from the results). This is because paths can be post-processed after the result
// is obtained, which can affect output ordering as well as the path mappings.
for (RbPath rbPath : results.keySet()) {
for (Result r : results.get(rbPath)) {
if (r.isGrouped()) {
// Grouped results have all values in a single entry and cannot be aliases.
map.put(rbPath, ValueOrAlias.value(RbValue.of(r.getValues())));
} else if (rbPath.isAlias()) {
// Aliases (which should be single values) are not expanded to their referenced
// values (whereas non-aliases might be). This is really just a hack to work
// around the fact that RbPath/RbValue is not properly typed and we have to use
// heuristics to determine whether to replace a resource bundle path with its
// referenced value.
checkArgument(r.getValues().size() == 1,
"explicit aliases must be singleton values: %s", r);
map.put(rbPath, ValueOrAlias.value(Iterables.getOnlyElement(r.getValues())));
} else {
// Ungrouped results are one value per entry, but might later be expanded into
// grouped results if they are a path referencing a grouped entry.
r.getValues().forEach(v -> map.put(rbPath, ValueOrAlias.parse(v)));
}
}
}
// This works because insertion order is maintained for values of each path.
map.forEach((p, v) -> icuData.add(p, v.resolve(map)));
return icuData;
}
/*
* An unfortunately messy little interface to handle to way that aliases are defined in the
* path value mappers. A mapper Result is permitted to contain values which are actually
* aliases to other resource bundle elements. This is typically used in fallback values, where
* the fallback is a functional value. For example:
* fallback=/weekData/001:intvector[0]
*
* This is messy because when we process the Results from the mapper to put them into the
* IcuData instance, we cannot be sure we can resolve these "aliases" at the time that they
* are encountered (the target value might not be present yet). So we need to wait until
* all the values are in place and then do a 2nd pass to resolve things.
*
* So far path replacement is strictly limited to fallback results, so perhaps it could be
* handled more directly in the Result class, though it is possible for a single result to
* contain multiple path references:
* fallback=/weekData/001:intvector[2] /weekData/001:intvector[3]
*/
private interface ValueOrAlias {
// A simple value doesn't need resolving, and doesn't care if the given map is null (*).
static ValueOrAlias value(RbValue v) {
return src -> v;
}
// Helper for (common) singleton values.
static ValueOrAlias value(String v) {
return value(RbValue.of(v));
}
static ValueOrAlias parse(String valueOrAlias) {
Matcher m = ARRAY_INDEX.matcher(valueOrAlias);
if (!m.matches()) {
return value(valueOrAlias);
}
// The only constraint is that the "path" value starts with a leading '/', but parsing into
// the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
// captured value contains '/' characters to represent path delimiters.
RbPath path = RbPath.parse(m.group(1));
// If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
int index = m.group(2) != null ? Integer.parseUnsignedInt(m.group(2)) : 0;
return src -> {
checkState(src != null, "recursive alias resolution is not supported");
List<ValueOrAlias> values = src.get(path);
checkArgument(!values.isEmpty(), "no such alias value: /%s", path);
checkArgument(index < values.size(),
"index for alias /%s[%s] is out of bounds", path, index);
// By passing 'null' to the recursive call to resolve, we prevent the resolution
// from being recursive (*). This could be changed to pass 'src' and achieve
// arbitrary recursive resolving if needed, put that's currently unnecessary (and
// should probably be guarded against unbounded recursion if it is ever enabled).
return values.get(index).resolve(null);
};
}
RbValue resolve(ListMultimap<RbPath, ValueOrAlias> src);
}
}

View file

@ -2,10 +2,8 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Ordering.natural;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
@ -16,22 +14,16 @@ import java.util.Optional;
import java.util.Set;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.SetMultimap;
import com.google.common.annotations.VisibleForTesting;
/**
* Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a
@ -45,8 +37,7 @@ public final class LocaleMapper extends AbstractPathValueMapper {
private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default");
/**
* Processes data from the given supplier to generate general locale data for the given locale
* ID.
* Processes data from the given supplier to generate general locale data for the given locale.
*
* @param localeId the locale ID to generate data for.
* @param src the CLDR data supplier to process.
@ -63,13 +54,81 @@ public final class LocaleMapper extends AbstractPathValueMapper {
PathValueTransformer transformer,
SupplementalData supplementalData) {
IcuData icuData = new LocaleMapper(localeId, src, icuSpecialData, transformer).transform();
return process(
localeId,
src,
icuSpecialData,
transformer,
supplementalData.getDefaultCalendar(localeId));
}
@VisibleForTesting // Avoids needing to pass a complete SupplementalData instance in tests.
public static IcuData process(
String localeId,
CldrDataSupplier src,
Optional<CldrData> icuSpecialData,
PathValueTransformer transformer,
Optional<String> defaultCalendar) {
IcuData icuData =
new LocaleMapper(localeId, src, icuSpecialData, transformer)
.generateIcuData(localeId, true);
doDateTimeHack(icuData);
supplementalData.getDefaultCalendar(icuData.getName())
.ifPresent(c -> icuData.add(RB_CALENDAR, c));
defaultCalendar.ifPresent(c -> icuData.add(RB_CALENDAR, c));
return icuData;
}
private final String localeId;
private final CldrDataSupplier src;
private final Optional<CldrData> icuSpecialData;
private LocaleMapper(
String localeId,
CldrDataSupplier src,
Optional<CldrData> icuSpecialData,
PathValueTransformer transformer) {
super(src.getDataForLocale(localeId, RESOLVED), transformer);
this.localeId = localeId;
this.src = checkNotNull(src);
this.icuSpecialData = checkNotNull(icuSpecialData);
}
@Override
void addResults() {
collectResults(collectPaths());
icuSpecialData.ifPresent(this::collectSpecials);
}
private Set<RbPath> collectPaths() {
Set<RbPath> validRbPaths = new HashSet<>();
src.getDataForLocale(localeId, UNRESOLVED)
.accept(DTD, v -> transformValue(v).forEach(r -> collectResultPath(r, validRbPaths)));
return validRbPaths;
}
private static void collectResultPath(Result result, Set<RbPath> validRbPaths) {
RbPath rbPath = result.getKey();
validRbPaths.add(rbPath);
if (rbPath.isAnonymous()) {
RbPath parent = rbPath.getParent();
checkState(!parent.isAnonymous(), "anonymous paths must not be nested: %s", rbPath);
validRbPaths.add(parent);
}
}
private void collectResults(Set<RbPath> validRbPaths) {
getCldrData().accept(DTD,
v -> transformValue(v)
.filter(r -> validRbPaths.contains(r.getKey()))
.forEach(result -> addResult(result.getKey(), result)));
}
private void collectSpecials(CldrData specials) {
specials.accept(DTD,
v -> transformValue(v).forEach(result -> addResult(result.getKey(), result)));
}
// This is an awful hack for post-processing the date-time format patterns to inject a 13th
// pattern at index 8, which is just a duplicate of the "medium" date-time pattern. The reasons
// for this are lost in the midst of time, but essentially there's ICU library code that just
@ -87,94 +146,10 @@ public final class LocaleMapper extends AbstractPathValueMapper {
&& rbPath.getSegment(2).equals("DateTimePatterns")) {
// This cannot be null and should not be empty, since the path is in this data.
List<RbValue> valuesToHack = icuData.get(rbPath);
checkArgument(valuesToHack.size() == 12,
"unexpected number of date/time patterns for '%s': %s", rbPath, valuesToHack);
checkState(valuesToHack.size() == 12,
"unexpected number of date/time patterns for '/%s': %s", rbPath, valuesToHack);
valuesToHack.add(8, valuesToHack.get(10));
}
}
}
private final String localeId;
private final CldrDataSupplier src;
private final Optional<CldrData> icuSpecialData;
private final PathValueTransformer transformer;
private final Set<RbPath> validRbPaths = new HashSet<>();
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
private LocaleMapper(
String localeId,
CldrDataSupplier src,
Optional<CldrData> icuSpecialData,
PathValueTransformer transformer) {
super(localeId, true);
this.localeId = localeId;
this.src = checkNotNull(src);
this.icuSpecialData = checkNotNull(icuSpecialData);
this.transformer = checkNotNull(transformer);
}
@Override
ListMultimap<RbPath, Result> getResults() {
CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED);
CldrData resolved = src.getDataForLocale(localeId, RESOLVED);
DynamicVars varFn = p -> {
CldrValue cldrValue = resolved.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
};
collectPaths(unresolved, varFn);
collectResults(resolved, varFn);
icuSpecialData.ifPresent(s -> collectSpecials(s, varFn));
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
}
}
return out.build();
}
private void collectPaths(CldrData unresolved, DynamicVars varFn) {
ValueVisitor collectPaths =
v -> transformer.transform(v, varFn).forEach(this::collectResultPath);
unresolved.accept(DTD, collectPaths);
}
private void collectResultPath(Result result) {
RbPath rbPath = result.getKey();
validRbPaths.add(rbPath);
if (rbPath.isAnonymous()) {
RbPath parent = rbPath.getParent();
checkState(!parent.isAnonymous(),
"anonymous paths should not be nested: %s", rbPath);
validRbPaths.add(parent);
}
}
private void collectResults(CldrData resolved, DynamicVars varFn) {
ValueVisitor collectResults =
v -> transformer.transform(v, varFn).stream()
.filter(r -> validRbPaths.contains(r.getKey()))
.forEach(r -> resultsByRbPath.put(r.getKey(), r));
resolved.accept(DTD, collectResults);
}
private void collectSpecials(CldrData cldrData, DynamicVars varFn) {
cldrData.accept(DTD, v ->
transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r)));
}
}

View file

@ -3,12 +3,8 @@
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Ordering.natural;
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
import java.util.Set;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrValue;
@ -18,10 +14,6 @@ import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.SetMultimap;
/**
* Generate supplemental {@link IcuData} by transforming {@link CldrDataType#SUPPLEMENTAL
* SUPPLEMENTAL} data using a {@link PathValueTransformer}.
@ -46,70 +38,48 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
public static IcuData process(
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
return new SupplementalMapper(src, transformer, icuName, paths).transform();
return new SupplementalMapper(src, transformer, paths).generateIcuData(icuName, false);
}
private final CldrDataSupplier src;
private final PathMatcher paths;
private final PathValueTransformer transformer;
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
private int fifoCounter = 0;
private SupplementalMapper(
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
CldrDataSupplier src, PathValueTransformer transformer, PathMatcher pathFilter) {
super(icuName, false);
this.src = checkNotNull(src);
this.paths = checkNotNull(paths);
this.transformer = checkNotNull(transformer);
super(src.getDataForType(CldrDataType.SUPPLEMENTAL), transformer);
this.paths = checkNotNull(pathFilter);
}
@Override
ImmutableListMultimap<RbPath, Result> getResults() {
void addResults() {
// DTD and NESTED_GROUPING order differ because of how the magic <FIFO> label works (it
// basically enforces "encounter order" onto things in unlabeled sequences, which matches
// the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order
// to remove any lingering implicit dependencies on the CLDR data behaviour.
CldrData supplementalData = src.getDataForType(CldrDataType.SUPPLEMENTAL);
PathValueTransformer.DynamicVars varFn = p -> {
CldrValue cldrValue = supplementalData.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
};
supplementalData.accept(NESTED_GROUPING, this::visit);
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
}
}
return out.build();
getCldrData().accept(NESTED_GROUPING, this::visit);
}
private void visit(CldrValue value) {
if (paths.matchesPrefixOf(value.getPath())) {
for (Result r : transformer.transform(value)) {
RbPath rbPath = r.getKey();
if (rbPath.contains(RB_FIFO)) {
// The fifo counter needs to be formatted with leading zeros for sorting.
rbPath = rbPath.mapSegments(
s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
}
resultsByRbPath.put(rbPath, r);
}
transformValue(value).forEach(this::collectResult);
fifoCounter++;
}
}
// <FIFO> hidden labels could be supported in the abstract mapper, but would need a "bulk" add
// method for results (since the counter is updated once per batch, which corresponds to once
// per rule). Having the same FIFO counter value for the same group of values is essential
// since it serves to group them.
//
// TODO: Improve this and push this up into the abstract class (so it works with LocaleMapper).
private void collectResult(Result r) {
RbPath rbPath = r.getKey();
if (rbPath.contains(RB_FIFO)) {
// The fifo counter needs to be formatted with leading zeros for sorting.
rbPath = rbPath.mapSegments(
s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
}
addResult(rbPath, r);
}
}

View file

@ -198,7 +198,7 @@
# Leap year names go after other month names.
# "yeartype" is an #IMPLIED attribute in the DTD and it should implicitly default to "standard".
# In practice "standard" is never explicitly given, but it could be (so must match it here).
# In practice "standard" is never explicitly given, but it could be (so it must be matched here).
//ldml/dates/calendars/calendar[@type="(%A)"]/(day|month)s/%W[@type="(%A)"]/%W[@type="(%A)"]/%W[@type="(%A)"](?:[@yeartype="standard"])? ; /calendar/$1/$2Names/$3/$4
//ldml/dates/calendars/calendar[@type="(%A)"]/(day|month)s/%W[@type="(%A)"]/%W[@type="(%A)"]/%W[@type="(%A)"][@yeartype="leap"] ; /calendar/$1/$2Names/$3/$4
@ -227,7 +227,6 @@
# Locale Display Names
//ldml/localeDisplayNames/codePatterns/codePattern[@type="(%A)"] ; /codePatterns/$1
//ldml/localeDisplayNames/annotationPatterns/annotationPattern[@type="(%A)"] ; /codePatterns/$1
//ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1
@ -264,23 +263,23 @@
# Ordering of rules is critical here since they write into the same resource bundle path and the
# last 3 values are grouped together as a single value (via the special <FIFO> hidden label).
#
# Note that the <FIFO> label is needed here (not the "group" instruction) because the grouped
# Note that the <FORMAT> label is needed here (not the "group" instruction) because the grouped
# values must be seen as having a resource bundle path that is a child of the "/Currencies/$1"
# path. This is so that the grouped values only appear when one of them is present rather than
# whenever any of the other values in the main resource bundle path exist.
# whenever any of the other values in the main resource bundle path exists.
#
# Due to the optional nature of the final sub-array in the bundle, it would be very hard to ever
# add more elements after it.
# add any more elements after it.
//ldml/numbers/currencies/currency[@type="(%W)"]/symbol
; /Currencies/$1 ; fallback=$1
//ldml/numbers/currencies/currency[@type="(%W)"]/displayName
; /Currencies/$1 ; fallback=$1
//ldml/numbers/currencies/currency[@type="(%W)"]/pattern[@type="standard"]
; /Currencies/$1/<FIFO> ; fallback=//ldml/numbers/currencyFormats[@numberSystem="%D"]/currencyFormatLength/currencyFormat[@type="standard"]/pattern[@type="standard"]
; /Currencies/$1/<FORMAT> ; fallback=//ldml/numbers/currencyFormats[@numberSystem="%D"]/currencyFormatLength/currencyFormat[@type="standard"]/pattern[@type="standard"]
//ldml/numbers/currencies/currency[@type="(%W)"]/decimal
; /Currencies/$1/<FIFO> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/decimal
; /Currencies/$1/<FORMAT> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/decimal
//ldml/numbers/currencies/currency[@type="(%W)"]/group
; /Currencies/$1/<FIFO> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/group
; /Currencies/$1/<FORMAT> ; fallback=//ldml/numbers/symbols[@numberSystem="%D"]/group
# ----
//ldml/numbers/currencyFormats[@numberSystem="%D"]/currencySpacing/(%W)/(%W) ; /currencySpacing/$1/$2

View file

@ -0,0 +1,304 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.truth.Truth.assertThat;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
import com.google.common.collect.ImmutableList;
@RunWith(JUnit4.class)
public class AbstractPathValueMapperTest {
@Test
public void testNameAndIcuFallback() {
IcuData foo = new FakeMapper().generateIcuData("foo", false);
IcuData bar = new FakeMapper().generateIcuData("bar", true);
assertThat(foo).getPaths().isEmpty();
assertThat(foo).hasName("foo");
assertThat(foo).hasFallback(false);
assertThat(bar).getPaths().isEmpty();
assertThat(bar).hasName("bar");
assertThat(bar).hasFallback(true);
}
@Test
public void testUngroupedConcatenation() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/bar", "one", "two");
mapper.addUngroupedResult("foo/baz", "other", "path");
mapper.addUngroupedResult("foo/bar", "three", "four");
IcuData icuData = mapper.generateIcuData("foo", false);
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("one", "two", "three", "four"));
assertThat(icuData).hasValuesFor("foo/baz", singletonValues("other", "path"));
}
@Test
public void testGrouping() {
FakeMapper mapper = new FakeMapper();
mapper.addGroupedResult("foo/bar", "one", "two");
mapper.addGroupedResult("foo/baz", "other", "path");
mapper.addGroupedResult("foo/bar", "three", "four");
IcuData icuData = mapper.generateIcuData("foo", false);
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
.hasValuesFor("foo/bar", RbValue.of("one", "two"), RbValue.of("three", "four"));
assertThat(icuData)
.hasValuesFor("foo/baz", RbValue.of("other", "path"));
}
@Test
public void testFallbackResults() {
// The indices are important in matching up the results and their respective fallbacks.
Result explicit1 = FakeResult.of("foo/bar", 1, false, "one");
Result explicit2 = FakeResult.of("foo/bar", 2, false, "two");
Result explicit3 = FakeResult.of("foo/bar", 3, false, "three");
Result fallback1 = FakeResult.fallback("foo/bar", 1, "<ONE>");
Result fallback2 = FakeResult.fallback("foo/bar", 2, "<TWO>");
Result fallback3 = FakeResult.fallback("foo/bar", 3, "<THREE>");
FakeTransformer transformer = new FakeTransformer();
transformer.addFallbacks("foo/bar", fallback1, fallback2, fallback3);
// When all results are explicitly present, no fallbacks are used.
IcuData noFallback = new FakeMapper(transformer)
.addResult(explicit1)
.addResult(explicit2)
.addResult(explicit3)
.generateIcuData("foo", false);
assertThat(noFallback).hasValuesFor("foo/bar", singletonValues("one", "two", "three"));
// Missing explicit results trigger fallbacks.
IcuData firstFallback = new FakeMapper(transformer)
.addResult(explicit2)
.addResult(explicit3)
.generateIcuData("foo", false);
assertThat(firstFallback).hasValuesFor("foo/bar", singletonValues("<ONE>", "two", "three"));
// Fallbacks can appear in any part of the result sequence.
IcuData lastFallbacks = new FakeMapper(transformer)
.addResult(explicit1)
.generateIcuData("foo", false);
assertThat(lastFallbacks)
.hasValuesFor("foo/bar", singletonValues("one", "<TWO>", "<THREE>"));
// Without a single result to "seed" the fallback group, nothing is emitted.
IcuData allFallbacks = new FakeMapper(transformer).generateIcuData("foo", false);
assertThat(allFallbacks).getPaths().isEmpty();
}
@Test
public void testAliases_ungrouped() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/default", "start", "/alias/target", "end");
mapper.addUngroupedResult("foo/alias-0", "start", "/alias/target[0]", "end");
mapper.addUngroupedResult("foo/alias-1", "start", "/alias/target[1]", "end");
mapper.addUngroupedResult("foo/alias-2", "start", "/alias/target[2]", "end");
mapper.addUngroupedResult("alias/target", "first", "second", "third");
IcuData icuData = mapper.generateIcuData("foo", false);
assertThat(icuData).getPaths().hasSize(5);
assertThat(icuData)
.hasValuesFor("foo/default", singletonValues("start", "first", "end"));
assertThat(icuData)
.hasValuesFor("foo/alias-0", singletonValues("start", "first", "end"));
assertThat(icuData)
.hasValuesFor("foo/alias-1", singletonValues("start", "second", "end"));
assertThat(icuData)
.hasValuesFor("foo/alias-2", singletonValues("start", "third", "end"));
assertThat(icuData)
.hasValuesFor("alias/target", singletonValues("first", "second", "third"));
}
// Grouping ignores aliases.
@Test
public void testAliases_grouped() {
FakeMapper mapper = new FakeMapper();
mapper.addGroupedResult("foo/bar", "grouped", "/alias/target");
mapper.addGroupedResult("foo/bar", "/alias/target[1]");
mapper.addUngroupedResult("alias/target", "first", "second");
IcuData icuData = mapper.generateIcuData("foo", false);
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
.hasValuesFor("foo/bar",
RbValue.of("grouped", "/alias/target"),
RbValue.of("/alias/target[1]"));
assertThat(icuData).hasValuesFor("alias/target", singletonValues("first", "second"));
}
@Test
public void testAliases_explicit() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/bar:alias", "/alias/target");
mapper.addUngroupedResult("foo/bar", "/alias/target");
mapper.addUngroupedResult("alias/target", "alias-value");
IcuData icuData = mapper.generateIcuData("foo", false);
assertThat(icuData).getPaths().hasSize(3);
assertThat(icuData).hasValuesFor("foo/bar:alias", singletonValues("/alias/target"));
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("alias-value"));
assertThat(icuData).hasValuesFor("alias/target", singletonValues("alias-value"));
}
@Test
public void testAliases_ordering() {
// It doesn't matter where an alias is in the order of results.
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("first/alias", "hello");
mapper.addUngroupedResult("foo/bar", "/first/alias", "/last/alias");
mapper.addUngroupedResult("last/alias", "world");
IcuData icuData = mapper.generateIcuData("foo", false);
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
}
@Test
public void testAliases_concatenation() {
// It doesn't matter where an alias is in the order of results.
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("alias/target", "hello");
mapper.addUngroupedResult("foo/bar", "/alias/target[0]", "/alias/target[1]");
mapper.addUngroupedResult("alias/target", "world");
IcuData icuData = mapper.generateIcuData("foo", false);
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
}
@Test
public void testAliases_missing() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("alias/target", "value");
mapper.addUngroupedResult("foo/bar", "/no-such-alias/target");
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
assertThat(e).hasMessageThat().contains("no such alias value");
assertThat(e).hasMessageThat().contains("/no-such-alias/target");
}
@Test
public void testAliases_badIndex() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("alias/target", "value");
mapper.addUngroupedResult("foo/bar", "/alias/target[1]");
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
assertThat(e).hasMessageThat().contains("out of bounds");
assertThat(e).hasMessageThat().contains("/alias/target[1]");
}
@Test
public void testAliases_noRecursion() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("alias/target", "/other/alias");
mapper.addUngroupedResult("other/alias", "/other/alias");
mapper.addUngroupedResult("foo/bar", "/alias/target");
IllegalStateException e =
assertThrows(IllegalStateException.class, () -> mapper.generateIcuData("foo", false));
assertThat(e).hasMessageThat().contains("recursive alias resolution is not supported");
}
@Test
public void testAliases_explicitAliasesAreSingletonOnly() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/bar:alias", "first", "second");
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
assertThat(e).hasMessageThat().contains("explicit aliases must be singleton values");
assertThat(e).hasMessageThat().contains("foo/bar:alias");
}
private static final class FakeMapper extends AbstractPathValueMapper {
private final static CldrData EXPLODING_DATA =
new CldrData() {
@Override public void accept(PathOrder pathOrder, ValueVisitor valueVisitor) {
throw new UnsupportedOperationException("should not be called by test");
}
@Override public void accept(PathOrder pathOrder, PrefixVisitor prefixVisitor) {
throw new UnsupportedOperationException("should not be called by test");
}
@Override public CldrValue get(CldrPath cldrPath) {
throw new UnsupportedOperationException("should not be called by test");
}
};
// We could also just use Mockito for this (it's not yet a project dependency however).
private final PathValueTransformer transformer =
new PathValueTransformer() {
@Override public ImmutableList<Result> transform(CldrValue cldrValue) {
throw new UnsupportedOperationException("should not be called by test");
}
@Override
public ImmutableList<Result> transform(CldrValue cldrValue, DynamicVars varFn) {
throw new UnsupportedOperationException("should not be called by test");
}
@Override
public ImmutableList<Result> getFallbackResultsFor(RbPath key, DynamicVars varFn) {
// TODO: Test fallbacks.
return ImmutableList.of();
}
};
// This preserves insertion order in a well defined way (good for testing alias order).
private final List<Result> fakeResults = new ArrayList<>();
FakeMapper() {
this(new FakeTransformer());
}
FakeMapper(FakeTransformer transformer) {
super(EXPLODING_DATA, transformer);
}
FakeMapper addUngroupedResult(String path, String... values) {
int index = fakeResults.size() + 1;
return addResult(FakeResult.of(path, index, false, values));
}
FakeMapper addGroupedResult(String path, String... values) {
int index = fakeResults.size() + 1;
return addResult(FakeResult.of(path, index, true, values));
}
FakeMapper addResult(Result r) {
fakeResults.add(r);
return this;
}
@Override void addResults() {
fakeResults.forEach(result -> addResult(result.getKey(), result));
}
}
private static RbValue[] singletonValues(String... values) {
return Arrays.stream(values).map(RbValue::of).toArray(RbValue[]::new);
}
}

View file

@ -0,0 +1,404 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.truth.Truth.assertThat;
import static java.util.Optional.empty;
import static org.unicode.cldr.api.CldrValue.parseValue;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
// Almost all the unit-testing for LocaleMapper is done via AbstractPathValueMapper or
// RegexTransformer (and friends). Very little is left that's special to locale data.
@RunWith(JUnit4.class)
public class LocaleMapperTest {
private final FakeTransformer transformer = new FakeTransformer();
private final FakeDataSupplier src = new FakeDataSupplier();
@Test
public void testSimple() {
//ldml/units/durationUnit[@type="(%A)"]/durationUnitPattern ; /durationUnits/$1
addMapping("xx",
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
simpleResult("/durationUnits/foo", "Bar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar");
}
@Test
public void testCorrectLocaleIsUsed() {
src.addLocaleData(
"xx", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "XX"));
addMapping(
"yy", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YY"),
simpleResult("/durationUnits/foo", "YY"));
src.addLocaleData(
"zz", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZ"));
IcuData icuData =
LocaleMapper.process("yy", src, empty(), transformer, empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/durationUnits/foo", "YY");
}
@Test
public void testInheritedValuesNotIncludedByDefault() {
//ldml/units/durationUnit[@type="(%A)"]/durationUnitPattern ; /durationUnits/$1
addMapping("xx",
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
simpleResult("/durationUnits/foo", "Bar"));
//ldml/localeDisplayNames/keys/key[@type="(%A)"] ; /Keys/$1
addInheritedMapping("xx",
ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"),
simpleResult("/Keys/sometype", "Value"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
// The 2nd mapping is not used because it does not appear in the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar");
}
@Test
public void testInheritedValuesIncludedWhenSameResourceBundle() {
//ldml/numbers/currencies/currency[@type="(%W)"]/symbol ; /Currencies/$1 ; fallback=$1
//ldml/numbers/currencies/currency[@type="(%W)"]/displayName ; /Currencies/$1 ; fallback=$1
addMapping("xx",
ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$"),
simpleResult("/Currencies/USD", 1, "US$"));
// This is included because the resource bundle path is the same as above. Note that we
// have to use the index to distinguish results here (this corresponds to the line number
// or the real when the real regex based config is used and determines result ordering).
addInheritedMapping("xx",
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"),
simpleResult("/Currencies/USD", 2, "US Dollar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/Currencies/USD", singletonValues("US$", "US Dollar"));
}
@Test
public void testChildPathsNotIncludedByDefault() {
// Tests that in the case that one path is the child of another path (rare) the existence
// of the parent path will not trigger the child path to be included.
//
//ldml/.../dateTimeFormats/availableFormats/dateFormatItem[@id="(%A)"]
// ; /calendar/$1/availableFormats/$2
//ldml/.../dateTimeFormats/availableFormats/dateFormatItem[@id="(%A)"][@count="(%A)"]
// ; /calendar/$1/availableFormats/$2/$3
addMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
addInheritedMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Foo");
}
@Test
public void testParentPathsNotIncludedByDefault() {
// Same as above but swapping inherited vs explicit mappings.
addInheritedMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Foo"),
simpleResult("/calendar/foo/availableFormats/bar", "Foo"));
addMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar/one", "Bar");
}
// This is done so that when these paths are written into the ICU data file (and the hidden
// labels are removed) you get the "two layer" array:
//
// {
// "Parent",
// { "Child-1", "Child-2" }
// }
//
// This needs to happen even when only one of the child elements is given explicitly.
@Test
public void testHiddenLabelsIncludeParentPaths() {
// Testing that the existence of a child element using a hidden label *does* trigger the
// parent element to be included.
addInheritedMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
addInheritedMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
// This is the only explicit mapping and it triggers the sibling _and_ the parent.
addMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
assertThat(icuData)
.hasValuesFor("/calendar/foo/availableFormats/bar/<HIDDEN>",
singletonValues("Child-1", "Child-2"));
}
// This is strange behaviour given the test above, since it means that it's impossible to
// use hidden labels to create a situation where the output ICU data looks like:
//
// {
// "Parent",
// { "Child-1", "Child-2" },
// "Other Parent"
// }
//
// if the child elements can be inherited; since if they are not present, you just get:
//
// {
// "Parent",
// "Other Parent"
// }
//
// Which moves the index of the following elements up by one and makes it impossible to
// define a stable length or index mapping for the array.
//
// However this is relied upon in the /Currencies/XXX case where a child array exists, but
// is optional if none of it's values are explicitly present. For example in en_150.txt:
//
// Currencies{
// EUR{
// "",
// "Euro",
// {
// "¤#,##0.00",
// ".",
// ",",
// }
// }
// }
//
// In most cases the formatting/grouping information is omitted if it can all be inherited.
//
// This only really works because the child array is the last element in the parent array, so
// not having it present doesn't affect any later elements.
//
// The "group" instruction in the transformation configuration files is a different way to
// allow grouping of sub-arrays which does not have this behaviour.
@Test
public void testHiddenLabelsAreNotIncludedAutomatically() {
// As above, but now only the parent path is included explicitly.
addMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"]", "Parent"),
simpleResult("/calendar/foo/availableFormats/bar", "Parent"));
addInheritedMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Child-1"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 1, "Child-1"));
// This is the only explicit mapping and it triggers the sibling _and_ the parent.
addInheritedMapping("xx",
ldml("dates/calendars/calendar[@type=\"foo\"]/dateTimeFormats"
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
}
@Test
public void testDefaultCalendar() {
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, Optional.of("pastafarian"));
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/default", "pastafarian");
}
@Test
public void testDateTimeHack() {
//calendar/$1/DateTimePatterns
addMapping("xx",
format("time", "full", "one"),
simpleResult("/calendar/foo/DateTimePatterns", 1, "one"));
addMapping("xx",
format("time", "long", "two"),
simpleResult("/calendar/foo/DateTimePatterns", 2, "two"));
addMapping("xx",
format("time", "medium", "three"),
simpleResult("/calendar/foo/DateTimePatterns", 3, "three"));
addMapping("xx",
format("time", "short", "four"),
simpleResult("/calendar/foo/DateTimePatterns", 4, "four"));
addMapping("xx",
format("date", "full", "five"),
simpleResult("/calendar/foo/DateTimePatterns", 5, "five"));
addMapping("xx",
format("date", "long", "six"),
simpleResult("/calendar/foo/DateTimePatterns", 6, "six"));
addMapping("xx",
format("date", "medium", "seven"),
simpleResult("/calendar/foo/DateTimePatterns", 7, "seven"));
addMapping("xx",
format("date", "short", "eight"),
simpleResult("/calendar/foo/DateTimePatterns", 8, "eight"));
addMapping("xx",
format("dateTime", "full", "nine"),
simpleResult("/calendar/foo/DateTimePatterns", 9, "nine"));
addMapping("xx",
format("dateTime", "long", "ten"),
simpleResult("/calendar/foo/DateTimePatterns", 10, "ten"));
addMapping("xx",
format("dateTime", "medium", "eleven"),
simpleResult("/calendar/foo/DateTimePatterns", 11, "eleven"));
addMapping("xx",
format("dateTime", "short", "twelve"),
simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/DateTimePatterns",
singletonValues(
"one", "two", "three", "four",
"five", "six", "seven", "eight",
"eleven", // <-- legacy reasons, don't ask!
"nine", "ten", "eleven", "twelve"));
}
@Test
public void testDateTimeHack_wrongNumberofElements() {
// One missing pattern from the start.
addMapping("xx",
format("time", "long", "two"),
simpleResult("/calendar/foo/DateTimePatterns", 2, "two"));
addMapping("xx",
format("time", "medium", "three"),
simpleResult("/calendar/foo/DateTimePatterns", 3, "three"));
addMapping("xx",
format("time", "short", "four"),
simpleResult("/calendar/foo/DateTimePatterns", 4, "four"));
addMapping("xx",
format("date", "full", "five"),
simpleResult("/calendar/foo/DateTimePatterns", 5, "five"));
addMapping("xx",
format("date", "long", "six"),
simpleResult("/calendar/foo/DateTimePatterns", 6, "six"));
addMapping("xx",
format("date", "medium", "seven"),
simpleResult("/calendar/foo/DateTimePatterns", 7, "seven"));
addMapping("xx",
format("date", "short", "eight"),
simpleResult("/calendar/foo/DateTimePatterns", 8, "eight"));
addMapping("xx",
format("dateTime", "full", "nine"),
simpleResult("/calendar/foo/DateTimePatterns", 9, "nine"));
addMapping("xx",
format("dateTime", "long", "ten"),
simpleResult("/calendar/foo/DateTimePatterns", 10, "ten"));
addMapping("xx",
format("dateTime", "medium", "eleven"),
simpleResult("/calendar/foo/DateTimePatterns", 11, "eleven"));
addMapping("xx",
format("dateTime", "short", "twelve"),
simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
IllegalStateException e = assertThrows(
IllegalStateException.class,
() -> LocaleMapper.process("xx", src, empty(), transformer, empty()));
assertThat(e).hasMessageThat().contains("unexpected");
assertThat(e).hasMessageThat().contains("/calendar/foo/DateTimePatterns");
}
private static CldrValue format(String type,String length, String pattern) {
return ldml(String.format(
"dates/calendars/calendar[@type=\"foo\"]"
+ "/%1$sFormats"
+ "/%1$sFormatLength[@type=\"%2$s\"]"
+ "/%1$sFormat[@type=\"standard\"]/pattern[@type=\"%3$s\"]",
type, length, pattern));
}
private void addMapping(String locale, CldrValue value, Result... results) {
src.addLocaleData(locale, value);
transformer.addResults(value, results);
}
private void addInheritedMapping(String locale, CldrValue value, Result... results) {
src.addInheritedData(locale, value);
transformer.addResults(value, results);
}
private static Result simpleResult(String path, String value) {
return FakeResult.of(path, 1, false, value);
}
private static Result simpleResult(String path, int index, String value) {
return FakeResult.of(path, index, false, value);
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue ldml(String path) {
return ldml(path, "");
}
private static CldrValue ldml(String path, String value) {
return parseValue("//ldml/" + path, "");
}
private static RbValue[] singletonValues(String... values) {
return Arrays.stream(values).map(RbValue::of).toArray(RbValue[]::new);
}
}

View file

@ -180,7 +180,8 @@ public class PluralsMapperTest {
StringBuilder cldrPath = new StringBuilder("//supplementalData");
appendAttribute(cldrPath.append("/plurals"), "type", type);
appendAttribute(cldrPath.append("/pluralRules"), "locales", Joiner.on(' ').join(locales));
appendAttribute(cldrPath.append("/pluralRule"), "count", count);
// We aren't testing sort index (#N) here, but still need to set it to something.
appendAttribute(cldrPath.append("/pluralRule#0"), "count", count);
return CldrValue.parseValue(cldrPath.toString(), value);
}

View file

@ -49,11 +49,11 @@ public class RbnfMapperTest {
@Test
public void testSingleRuleset() {
int idx = 1;
int idx = 0;
CldrData cldrData = cldrData(
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", idx++),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", idx++),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", idx++));
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", ++idx),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", ++idx),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
@ -71,16 +71,16 @@ public class RbnfMapperTest {
// Note that input order of these paths shouldn't matter since they are ordered (and thus
// grouped) by DTD order (relative order matters for values in the same set, but values
// do not have to grouped together).
int idx = 1;
int idx = 0;
CldrData cldrData = cldrData(
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", idx++),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", idx++),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", idx++),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", idx++));
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", ++idx),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", ++idx),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", ++idx),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", ++idx),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", ++idx),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", ++idx),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", ++idx),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
@ -101,19 +101,19 @@ public class RbnfMapperTest {
@Test
public void testSpecials() {
int idx = 1;
int idx = 0;
CldrData specials = cldrData(
rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", idx++),
rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", idx++),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", idx++),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", idx++),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", idx++));
rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", ++idx),
rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", ++idx),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", ++idx),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", ++idx),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", ++idx));
idx = 1;
idx = 0;
CldrData cldrData = cldrData(
rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", idx++),
rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", ++idx),
rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0",
"=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", idx++));
"=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials));
@ -139,12 +139,12 @@ public class RbnfMapperTest {
// the same, it's not entirely obviously why some of the special cases really exist.
@Test
public void testEscaping() {
int idx = 1;
int idx = 0;
CldrData cldrData = cldrData(
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", idx++),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", idx++),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", idx++),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", idx++));
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", ++idx),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", ++idx),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", ++idx),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
@ -173,7 +173,8 @@ public class RbnfMapperTest {
StringBuilder cldrPath = new StringBuilder("//ldml/rbnf");
appendAttribute(cldrPath.append("/rulesetGrouping"), "type", group);
cldrPath.append("/ruleset");
// We aren't testing sort index (#N) here, but still need to set it to something.
cldrPath.append("/ruleset#0");
appendAttribute(cldrPath, "type", setType);
appendAttribute(cldrPath, "access", access);
cldrPath.append("/rbnfrule#").append(ruleIndex);

View file

@ -0,0 +1,95 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.cldr.api.CldrValue.parseValue;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
// Almost all the unit-testing for SupplementalMapper is done via AbstractPathValueMapper or
// RegexTransformer (and friends). Very little is left that's special to supplemental data.
@RunWith(JUnit4.class)
public class SupplementalMapperTest {
private final FakeTransformer transformer = new FakeTransformer();
private final FakeDataSupplier src = new FakeDataSupplier();
@Test
public void testSimple() {
addExpectedMapping(
supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"),
simpleResult("/Foo", "Bar"));
PathMatcher allPaths = PathMatcher.of("supplementalData");
IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/Foo", "Bar");
}
@Test
public void testFifoLabel() {
// Example:
// //supplementalData/currencyData/region[@iso3166="(%W)"]/currency[@iso4217="(%W)"]
// ; /CurrencyMap/$1/<FIFO>/id ; values=$2
//
// Note that the order mappings are added does not affect the output, since even though the
// "FIFO" mechanism works on encounter-order, the CldrData is sorted before being
// transformed (and in this case, is resolved on the currency code USD < USN < USS).
addExpectedMapping(
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"),
simpleResult("/CurrencyMap/US/<FIFO>/id", "USN"));
addExpectedMapping(
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USS\"]"),
simpleResult("/CurrencyMap/US/<FIFO>/id", "USS"));
addExpectedMapping(
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USD\"]"),
simpleResult("/CurrencyMap/US/<FIFO>/id", "USD"));
PathMatcher allPaths = PathMatcher.of("supplementalData");
IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
assertThat(icuData).getPaths().hasSize(3);
assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0000>/id", "USD");
assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0001>/id", "USN");
assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0002>/id", "USS");
}
@Test
public void testPathFilter() {
addExpectedMapping(
supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"),
simpleResult("/Foo", "Bar"));
addExpectedMapping(
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"),
simpleResult("/CurrencyMap/US/<FIFO>/id", "USN"));
PathMatcher filter = PathMatcher.of("supplementalData/likelySubtags");
IcuData icuData = SupplementalMapper.process(src, transformer, "name", filter);
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/Foo", "Bar");
}
private void addExpectedMapping(CldrValue value, Result... results) {
src.addSupplementalData(value);
transformer.addResults(value, results);
}
private static Result simpleResult(String path, String value) {
return FakeResult.of(path, 1, false, value);
}
private static CldrValue supplementalData(String path) {
return parseValue("//supplementalData/" + path, "");
}
}