ICU-20693 Unit tests for most conversion code and mappers.

See #774
This commit is contained in:
David Beaumont 2019-09-19 13:21:59 +00:00 committed by David Beaumont
parent 417254f66a
commit 791980cf41
34 changed files with 2460 additions and 278 deletions

View file

@ -2,8 +2,6 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Arrays;
@ -12,8 +10,6 @@ import java.util.List;
import java.util.NavigableSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
@ -24,7 +20,6 @@ import com.google.common.collect.ListMultimap;
*/
public final class IcuData {
private static final RbPath RB_VERSION = RbPath.of("Version");
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
private final String name;
private final boolean hasFallback;
@ -98,48 +93,11 @@ public final class IcuData {
add(rbPath, rbValue);
}
public void setVersion(String versionString) {
add(RB_VERSION, versionString);
}
public void addResults(ListMultimap<RbPath, PathValueTransformer.Result> resultsByRbPath) {
for (RbPath rbPath : resultsByRbPath.keySet()) {
for (PathValueTransformer.Result r : resultsByRbPath.get(rbPath)) {
if (r.isGrouped()) {
// Grouped results have all the values in a single value entry.
add(rbPath, RbValue.of(r.getValues()));
} else {
if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) {
r.getValues().forEach(v -> add(rbPath, RbValue.of(v)));
} else {
// Ungrouped results are one value per entry, but might be expanded into
// grouped results if they are a path referencing a grouped entry.
r.getValues().forEach(v -> add(rbPath, replacePathValues(v)));
}
}
}
}
}
/**
* Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed
* to be a reference to an existing value in a resource bundle. Note that the referenced bundle
* might be grouped (i.e. an array with more than one element).
* Sets the value of the "/Version" path to be the given string, replacing any previous value.
*/
private RbValue replacePathValues(String value) {
Matcher m = ARRAY_INDEX.matcher(value);
if (!m.matches()) {
return RbValue.of(value);
}
// The only constraint is that the "path" value starts with a leading '/', but parsing into
// the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
// captured value contains '/' characters to represent path delimiters.
RbPath replacePath = RbPath.parse(m.group(1));
List<RbValue> replaceValues = get(replacePath);
checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath);
// If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0;
return replaceValues.get(replaceIndex);
public void setVersion(String versionString) {
replace(RB_VERSION, versionString);
}
/**
@ -155,16 +113,6 @@ public final class IcuData {
return Collections.unmodifiableSet(paths);
}
/** Returns whether the given path is present in this instance. */
public boolean contains(RbPath rbPath) {
return paths.contains(rbPath);
}
/** Returns whether there are any paths in this instance. */
public boolean isEmpty() {
return paths.isEmpty();
}
@Override public String toString() {
StringWriter out = new StringWriter();
PrintWriter w = new PrintWriter(out);

View file

@ -260,14 +260,14 @@ final class IcuDataDumper {
private RbPath getPathFromStack() {
if (pathStack.isEmpty()) {
return RbPath.empty();
return RbPath.of();
}
List<String> segments = new ArrayList<>();
Iterables.addAll(segments, pathStack);
if (segments.get(0).matches("<[0-9]{4}>")) {
segments.remove(0);
}
return segments.isEmpty() ? RbPath.empty() : RbPath.of(Lists.reverse(segments));
return RbPath.of(Lists.reverse(segments));
}
private String getSegment(String segmentOrNull) {

View file

@ -14,6 +14,8 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.collect.Iterables;
/**
* Writes an IcuData object to a text file. A lot of this class was copied directly from the
* original {@code IcuTextWriter} in the CLDR project and has a number of very idiosyncratic
@ -76,7 +78,7 @@ final class IcuTextWriter {
out.print("{");
depth++;
RbPath lastPath = RbPath.empty();
RbPath lastPath = RbPath.of();
for (RbPath path : icuData.getPaths()) {
// Close any blocks up to the common path length. Since paths are all distinct, the
// common length should always be shorter than either path. We add 1 since we must also
@ -166,7 +168,7 @@ final class IcuTextWriter {
onlyValue = values.get(0);
if (onlyValue.isSingleton() && !mustBeArray(false, name, rbPath)) {
// Value has a single element and is not being forced to be an array.
String onlyElement = onlyValue.getElement(0);
String onlyElement = Iterables.getOnlyElement(onlyValue.getElements());
if (quote) {
onlyElement = quoteInside(onlyElement);
}

View file

@ -404,7 +404,7 @@ public final class LdmlConverter {
// Adding a parent locale makes the data non-empty and forces it to be written.
supplementalData.getExplicitParentLocaleOf(splitData.getName())
.ifPresent(p -> splitData.add(RB_PARENT, p));
if (!splitData.isEmpty() || isBaseLanguage || dir.includeEmpty()) {
if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
splitData.setVersion(CldrDataSupplier.getCldrVersionString());
write(splitData, outDir);
writtenLocaleIds.put(dir, id);

View file

@ -54,20 +54,25 @@ public final class RbPath implements Comparable<RbPath> {
private static final CharMatcher UNQUOTED_SEGMENT_CHARS =
QUOTED_SEGMENT_CHARS.and(whitespace().negate());
private static final RbPath EMPTY = new RbPath(ImmutableList.of());
public static RbPath empty() {
return EMPTY;
}
/**
* Returns a path with the specified segments in (possibly empty). Note that unlike
* {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
* element constructed by this method.
*/
public static RbPath of(String... segments) {
return of(Arrays.asList(segments));
}
/**
* Returns a path with the specified segments in (possibly empty). Note that unlike
* {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
* element constructed by this method.
*/
public static RbPath of(Iterable<String> segments) {
return new RbPath(segments);
}
/** Parses the given path string, assuming {@code '/'} as a path separator. */
public static RbPath parse(String path) {
checkArgument(!path.isEmpty(), "cannot parse an empty path string");
// Allow leading '/', but don't allow empty segments anywhere else.
@ -77,7 +82,8 @@ public final class RbPath implements Comparable<RbPath> {
return new RbPath(PATH_SPLITTER.split(path));
}
static int getCommonPrefixLength(RbPath lhs, RbPath rhs) {
/** Returns the common prefix length of two paths (useful when thinking of path hierarchies). */
public static int getCommonPrefixLength(RbPath lhs, RbPath rhs) {
int maxLength = Math.min(lhs.length(), rhs.length());
int n = 0;
while (n < maxLength && lhs.getSegment(n).equals(rhs.getSegment(n))) {
@ -91,6 +97,7 @@ public final class RbPath implements Comparable<RbPath> {
private RbPath(Iterable<String> segments) {
this.segments = ImmutableList.copyOf(segments);
// Use "this.segments" since the incoming list can have a different hash!
this.hashCode = Objects.hash(this.segments);
for (String segment : this.segments) {
checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments);
@ -122,43 +129,32 @@ public final class RbPath implements Comparable<RbPath> {
}
}
/** Returns the number of segments in this path. */
public int length() {
return segments.size();
}
/** Returns the Nth segments in this path. */
public String getSegment(int n) {
return segments.get(n);
}
public RbPath getParent() {
checkState(length() > 0, "cannot get parent of the empty path");
return length() > 1 ? new RbPath(segments.subList(0, length() - 1)) : EMPTY;
}
public boolean isAnonymous() {
return length() > 0 && segments.get(length() - 1).charAt(0) == '<';
}
/** Returns a new path extended at the end by the specified segments. */
public RbPath extendBy(String... parts) {
return new RbPath(Iterables.concat(segments, Arrays.asList(parts)));
}
public RbPath extendBy(RbPath suffix) {
return new RbPath(Iterables.concat(segments, suffix.segments));
}
public RbPath mapSegments(Function<? super String, String> fn) {
return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
}
/** Returns whether this path starts with the specified prefix. */
public boolean startsWith(RbPath prefix) {
return prefix.length() <= length() && matchesSublist(prefix, 0);
}
/** Returns whether this path ends with the specified suffix. */
public boolean endsWith(RbPath suffix) {
return suffix.length() <= length() && matchesSublist(suffix, length() - suffix.length());
}
/** Returns whether this path contains the specified path. */
public boolean contains(RbPath path) {
int maxOffset = length() - path.length();
for (int i = 0; i <= maxOffset; i++) {
@ -179,6 +175,22 @@ public final class RbPath implements Comparable<RbPath> {
return true;
}
// TODO: Remove this and isAnonymous() since they are only called once each, in the same place.
public RbPath getParent() {
checkState(length() > 0, "cannot get parent of the empty path");
return new RbPath(segments.subList(0, length() - 1));
}
public boolean isAnonymous() {
return length() > 0 && segments.get(length() - 1).charAt(0) == '<';
}
// TODO: Remove this special case code (called exactly once).
public RbPath mapSegments(Function<? super String, String> fn) {
return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
}
// TODO: Remove this in favour of having properly typed paths.
boolean isIntPath() {
String lastElement = segments.get(segments.size() - 1);
return lastElement.endsWith(":int") || lastElement.endsWith(":intvector");

View file

@ -38,22 +38,20 @@ public final class RbValue {
return elements;
}
/** Returns whether this is a single element value. */
/**
* Returns whether this is a single element value. Singleton values are treated different when
* writing out ICU data files.
*/
public boolean isSingleton() {
return elements.size() == 1;
}
/** Returns the Nth element of this value. */
public String getElement(int n) {
return elements.get(n);
}
@Override public int hashCode() {
return Objects.hashCode(elements);
}
@Override public boolean equals(Object obj) {
return obj instanceof RbValue && elements.equals(((RbValue) obj).elements);
return obj instanceof RbValue && elements.equals(((RbValue) obj).elements);
}
@Override public String toString() {

View file

@ -0,0 +1,100 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.collect.ListMultimap;
/**
* An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures
* that transformation results are correctly processed when being added to IcuData instances.
*/
public abstract class AbstractPathValueMapper {
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
private final IcuData icuData;
AbstractPathValueMapper(String name, boolean hasFallback) {
this.icuData = new IcuData(name, hasFallback);
}
/** Implemented by sub-classes to return all results to be added to the IcuData instance. */
abstract ListMultimap<RbPath, Result> getResults();
/**
* Adds results to the IcuData instance according to expected {@code PathValueTransformer}
* semantics. This method must only be called once per mapper.
*/
final IcuData transform() {
checkState(icuData.getPaths().isEmpty(),
"transform() method cannot be called multiple times: %s", icuData);
// This subclass mostly exists to control the fact that results need to be added in one go
// to the IcuData because of how referenced paths are handled. If results could be added in
// multiple passes, you could have confusing situations in which values has path references
// in them but the referenced paths have not been transformed yet. Forcing the subclass to
// implement a single method to generate all results at once ensures that we control the
// lifecycle of the data and how results are processed as they are added to the IcuData.
addResults(getResults());
return icuData;
}
/**
* Adds transformation results on the specified multi-map to this data instance. Results are
* handled differently according to whether they are grouped, or represent an alias value. If
* the value of an ungrouped result is itself a resource bundle path (including possibly having
* an array index) then the referenced value is assumed to be an existing path whose value is
* then substituted.
*/
// TODO: Fix this to NOT implicitly rely of ordering of referenced values.
private void addResults(ListMultimap<RbPath, Result> resultsByRbPath) {
for (RbPath rbPath : resultsByRbPath.keySet()) {
for (Result r : resultsByRbPath.get(rbPath)) {
if (r.isGrouped()) {
// Grouped results have all the values in a single value entry.
icuData.add(rbPath, RbValue.of(r.getValues()));
} else {
if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) {
r.getValues().forEach(v -> icuData.add(rbPath, RbValue.of(v)));
} else {
// Ungrouped results are one value per entry, but might be expanded into
// grouped results if they are a path referencing a grouped entry.
r.getValues().forEach(v -> icuData.add(rbPath, replacePathValues(v)));
}
}
}
}
}
/**
* Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed
* to be a reference to an existing value in a resource bundle. Note that the referenced bundle
* might be grouped (i.e. an array with more than one element).
*/
private RbValue replacePathValues(String value) {
Matcher m = ARRAY_INDEX.matcher(value);
if (!m.matches()) {
return RbValue.of(value);
}
// The only constraint is that the "path" value starts with a leading '/', but parsing into
// the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the
// captured value contains '/' characters to represent path delimiters.
RbPath replacePath = RbPath.parse(m.group(1));
List<RbValue> replaceValues = icuData.get(replacePath);
checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath);
// If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]").
int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0;
return replaceValues.get(replaceIndex);
}
}

View file

@ -21,6 +21,7 @@ import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
@ -68,9 +69,14 @@ public final class CollationMapper {
public static IcuData process(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
CollationVisitor visitor = new CollationVisitor(localeId);
icuSpecialData.ifPresent(s -> s.accept(ARBITRARY, visitor));
src.getDataForLocale(localeId, UNRESOLVED).accept(ARBITRARY, visitor);
cldrData.accept(ARBITRARY, visitor);
return visitor.icuData;
}
@ -82,6 +88,7 @@ public final class CollationMapper {
// Super special hack case because the XML data is a bit broken for the root collation
// data (there's an empty <collation> element that's a non-leaf element and thus not
// visited, but we should add an empty sequence to the output data.
// TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
if (localeId.equals("root")) {
icuData.replace(RB_STANDARD_SEQUENCE, "");
// TODO: Collation versioning probably needs to be improved.
@ -108,12 +115,13 @@ public final class CollationMapper {
// "short" it can also have other values. This code was copied from CollationMapper
// which has the line;
// isShort = attr.getValue("alt") != null;
// TODO: Raise a ticket to examine this.
boolean isShort = COLLATION_RULE_ALT.optionalValueFrom(v).isPresent();
// Note that it's not clear why there's a check for "contains()" here. The code
// from which this was derived is largely undocumented and this check could have
// been overly defensive (perhaps a duplicate key should be an error?).
if (isShort || !icuData.contains(rbPath)) {
if (isShort || !icuData.getPaths().contains(rbPath)) {
RbValue rules = RbValue.of(
LINE_SPLITTER.splitToList(v.getValue()).stream()
.map(CollationMapper::removeComment)

View file

@ -30,6 +30,7 @@ import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.SetMultimap;
/**
@ -39,7 +40,7 @@ import com.google.common.collect.SetMultimap;
* <p>This is currently driven by the {@code ldml2icu_locale.txt} configuration file via a
* {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation.
*/
public final class LocaleMapper {
public final class LocaleMapper extends AbstractPathValueMapper {
// The default calendar (only set is different from inherited parent value).
private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default");
@ -62,11 +63,7 @@ public final class LocaleMapper {
PathValueTransformer transformer,
SupplementalData supplementalData) {
IcuData icuData = new IcuData(localeId, true);
// Write out the results into the IcuData class, preserving result grouping and expanding
// path references as necessary.
ResultsCollector collector = new ResultsCollector(transformer);
icuData.addResults(collector.collectResultsFor(localeId, src, icuSpecialData));
IcuData icuData = new LocaleMapper(localeId, src, icuSpecialData, transformer).transform();
doDateTimeHack(icuData);
supplementalData.getDefaultCalendar(icuData.getName())
.ifPresent(c -> icuData.add(RB_CALENDAR, c));
@ -97,79 +94,87 @@ public final class LocaleMapper {
}
}
private static final class ResultsCollector {
private final PathValueTransformer transformer;
private final Set<RbPath> validRbPaths = new HashSet<>();
private final String localeId;
private final CldrDataSupplier src;
private final Optional<CldrData> icuSpecialData;
private final PathValueTransformer transformer;
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
private final Set<RbPath> validRbPaths = new HashSet<>();
ResultsCollector(PathValueTransformer transformer) {
this.transformer = checkNotNull(transformer);
}
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
ImmutableListMultimap<RbPath, Result> collectResultsFor(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
private LocaleMapper(
String localeId,
CldrDataSupplier src,
Optional<CldrData> icuSpecialData,
PathValueTransformer transformer) {
CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED);
CldrData resolved = src.getDataForLocale(localeId, RESOLVED);
DynamicVars varFn = p -> {
CldrValue cldrValue = resolved.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
};
super(localeId, true);
this.localeId = localeId;
this.src = checkNotNull(src);
this.icuSpecialData = checkNotNull(icuSpecialData);
this.transformer = checkNotNull(transformer);
}
collectPaths(unresolved, varFn);
collectResults(resolved, varFn);
icuSpecialData.ifPresent(s -> collectSpecials(s, varFn));
@Override
ListMultimap<RbPath, Result> getResults() {
CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED);
CldrData resolved = src.getDataForLocale(localeId, RESOLVED);
DynamicVars varFn = p -> {
CldrValue cldrValue = resolved.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
};
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
collectPaths(unresolved, varFn);
collectResults(resolved, varFn);
icuSpecialData.ifPresent(s -> collectSpecials(s, varFn));
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
}
return out.build();
}
return out.build();
}
private void collectPaths(CldrData unresolved, DynamicVars varFn) {
ValueVisitor collectPaths =
v -> transformer.transform(v, varFn).forEach(this::collectResultPath);
unresolved.accept(DTD, collectPaths);
}
private void collectPaths(CldrData unresolved, DynamicVars varFn) {
ValueVisitor collectPaths =
v -> transformer.transform(v, varFn).forEach(this::collectResultPath);
unresolved.accept(DTD, collectPaths);
}
private void collectResultPath(Result result) {
RbPath rbPath = result.getKey();
validRbPaths.add(rbPath);
if (rbPath.isAnonymous()) {
RbPath parent = rbPath.getParent();
checkState(!parent.isAnonymous(),
"anonymous paths should not be nested: %s", rbPath);
validRbPaths.add(parent);
}
}
void collectResults(CldrData resolved, DynamicVars varFn) {
ValueVisitor collectResults =
v -> transformer.transform(v, varFn).stream()
.filter(r -> validRbPaths.contains(r.getKey()))
.forEach(r -> resultsByRbPath.put(r.getKey(), r));
resolved.accept(DTD, collectResults);
}
private void collectSpecials(CldrData cldrData, DynamicVars varFn) {
cldrData.accept(DTD, v ->
transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r)));
private void collectResultPath(Result result) {
RbPath rbPath = result.getKey();
validRbPaths.add(rbPath);
if (rbPath.isAnonymous()) {
RbPath parent = rbPath.getParent();
checkState(!parent.isAnonymous(),
"anonymous paths should not be nested: %s", rbPath);
validRbPaths.add(parent);
}
}
private LocaleMapper() {}
private void collectResults(CldrData resolved, DynamicVars varFn) {
ValueVisitor collectResults =
v -> transformer.transform(v, varFn).stream()
.filter(r -> validRbPaths.contains(r.getKey()))
.forEach(r -> resultsByRbPath.put(r.getKey(), r));
resolved.accept(DTD, collectResults);
}
private void collectSpecials(CldrData cldrData, DynamicVars varFn) {
cldrData.accept(DTD, v ->
transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r)));
}
}

View file

@ -29,7 +29,7 @@ import com.google.common.collect.SetMultimap;
* <p>This is currently driven by the {@code ldml2icu_supplemental.txt} configuration file via a
* {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation.
*/
public final class SupplementalMapper {
public final class SupplementalMapper extends AbstractPathValueMapper {
private static final RbPath RB_FIFO = RbPath.of("<FIFO>");
/**
@ -38,81 +38,78 @@ public final class SupplementalMapper {
* @param src the CLDR data supplier to process.
* @param transformer the transformer to match and transform each CLDR path/value pair.
* @param icuName the name for the generated IcuData.
* @param includePaths a matcher to select the CLDR paths to be transformed.
* @param paths a matcher to select the CLDR paths to be transformed.
* @return An IcuData instance containing the specified subset of supplemental data with the
* given ICU name.
*/
// TODO: Improve external data splitting and remove need for a PathMatcher here.
public static IcuData process(
CldrDataSupplier src, PathValueTransformer transformer, String icuName,
PathMatcher includePaths) {
ResultsCollector collector = new ResultsCollector(includePaths, transformer);
// Write out the results into the IcuData class, preserving result grouping and expanding
// path references as necessary.
IcuData icuData = new IcuData(icuName, false);
icuData.addResults(collector.getResults(src));
return icuData;
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
return new SupplementalMapper(src, transformer, icuName, paths).transform();
}
private static final class ResultsCollector {
private final PathMatcher pathMatcher;
private final PathValueTransformer transformer;
private final CldrDataSupplier src;
private final PathMatcher paths;
private final PathValueTransformer transformer;
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
private int fifoCounter = 0;
// WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for
// each key. The reason is that result comparison is not "consistent with equals", and
// TreeMultimap uses the comparator to decide if two elements are equal (not the equals()
// method), and it does this even if using the add() method of the sorted set (this is in
// fact in violation of the stated behaviour of Set#add).
private final SetMultimap<RbPath, Result> resultsByRbPath = LinkedHashMultimap.create();
private int fifoCounter = 0;
ResultsCollector(PathMatcher pathMatcher, PathValueTransformer transformer) {
this.pathMatcher = checkNotNull(pathMatcher);
this.transformer = checkNotNull(transformer);
}
private SupplementalMapper(
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
private void visit(CldrValue value) {
if (pathMatcher.matchesPrefixOf(value.getPath())) {
for (Result r : transformer.transform(value)) {
RbPath rbPath = r.getKey();
if (rbPath.contains(RB_FIFO)) {
// The fifo counter needs to be formatted with leading zeros for sorting.
rbPath = rbPath.mapSegments(
s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
}
resultsByRbPath.put(rbPath, r);
}
fifoCounter++;
}
}
ImmutableListMultimap<RbPath, Result> getResults(CldrDataSupplier supplier) {
// DTD and NESTED_GROUPING order differ because of how the magic <FIFO> label works (it
// basically enforces "encounter order" onto things in unlabeled sequences, which matches
// the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order
// to remove any lingering implicit dependencies on the CLDR data behaviour.
CldrData supplementalData = supplier.getDataForType(CldrDataType.SUPPLEMENTAL);
PathValueTransformer.DynamicVars varFn = p -> {
CldrValue cldrValue = supplementalData.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
};
supplementalData.accept(NESTED_GROUPING, this::visit);
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
}
}
return out.build();
}
super(icuName, false);
this.src = checkNotNull(src);
this.paths = checkNotNull(paths);
this.transformer = checkNotNull(transformer);
}
private SupplementalMapper() {}
@Override
ImmutableListMultimap<RbPath, Result> getResults() {
// DTD and NESTED_GROUPING order differ because of how the magic <FIFO> label works (it
// basically enforces "encounter order" onto things in unlabeled sequences, which matches
// the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order
// to remove any lingering implicit dependencies on the CLDR data behaviour.
CldrData supplementalData = src.getDataForType(CldrDataType.SUPPLEMENTAL);
PathValueTransformer.DynamicVars varFn = p -> {
CldrValue cldrValue = supplementalData.get(p);
return cldrValue != null ? cldrValue.getValue() : null;
};
supplementalData.accept(NESTED_GROUPING, this::visit);
ImmutableListMultimap.Builder<RbPath, Result> out = ImmutableListMultimap.builder();
out.orderValuesBy(natural());
for (RbPath rbPath : resultsByRbPath.keySet()) {
Set<Result> existingResults = resultsByRbPath.get(rbPath);
out.putAll(rbPath, existingResults);
for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) {
if (existingResults.stream().noneMatch(fallback::isFallbackFor)) {
out.put(rbPath, fallback);
}
}
}
return out.build();
}
private void visit(CldrValue value) {
if (paths.matchesPrefixOf(value.getPath())) {
for (Result r : transformer.transform(value)) {
RbPath rbPath = r.getKey();
if (rbPath.contains(RB_FIFO)) {
// The fifo counter needs to be formatted with leading zeros for sorting.
rbPath = rbPath.mapSegments(
s -> s.equals("<FIFO>") ? String.format("<%04d>", fifoCounter) : s);
}
resultsByRbPath.put(rbPath, r);
}
fifoCounter++;
}
}
}

View file

@ -19,6 +19,7 @@ import java.util.Optional;
import java.util.function.Function;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
@ -28,6 +29,7 @@ import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.annotations.VisibleForTesting;
import com.ibm.icu.text.Transliterator;
/**
@ -77,15 +79,23 @@ public final class TransformsMapper {
* @return the IcuData instance to be written to a file.
*/
public static IcuData process(CldrDataSupplier src, Path ruleFileOutputDir) {
RuleVisitor visitor = new RuleVisitor(p -> {
Function<Path, PrintWriter> fileWriterFn = p -> {
Path file = ruleFileOutputDir.resolve(p);
try {
return new PrintWriter(Files.newBufferedWriter(file, CREATE, TRUNCATE_EXISTING));
} catch (IOException e) {
throw new RuntimeException("error opening file: " + file, e);
}
});
src.getDataForType(SUPPLEMENTAL).accept(DTD, visitor);
};
CldrData cldrData = src.getDataForType(SUPPLEMENTAL);
return process(cldrData, fileWriterFn);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(CldrData cldrData, Function<Path, PrintWriter> fileWriterFn) {
RuleVisitor visitor = new RuleVisitor(fileWriterFn);
cldrData.accept(DTD, visitor);
addSpecialCaseValues(visitor.icuData);
return visitor.icuData;
}
@ -96,27 +106,6 @@ public final class TransformsMapper {
RuleVisitor(Function<Path, PrintWriter> outFn) {
this.outFn = checkNotNull(outFn);
icuData.setFileComment("File: root.txt");
// I have _no_ idea what any of this is about, I'm just trying to mimic the original
// (complex and undocumented) code in "ConvertTransforms.java".
// TODO: Understand and document each of the cases below.
icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}");
// Note that this quoting of path segments is almost certainly unnecessary. It matches
// the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so
// it seems very likely that it's not needed here.
// TODO: Once migration done, remove quotes here & check in RbPath for unwanted quotes.
icuData.add(RbPath.of("\"%Translit%Hex\""), "%Translit%Hex");
icuData.add(RbPath.of("\"%Translit%UnicodeName\""), "%Translit%UnicodeName");
icuData.add(RbPath.of("\"%Translit%UnicodeChar\""), "%Translit%UnicodeChar");
// Special case, where Latin is a no-op.
icuData.add(RbPath.of("TransliterateLATIN"), RbValue.of("", ""));
// Some hard-coded special case mappings.
icuData.add(
RB_TRANSLITERATOR_IDS.extendBy("Tone-Digit", "alias"),
"Pinyin-NumericPinyin");
icuData.add(
RB_TRANSLITERATOR_IDS.extendBy("Digit-Tone", "alias"),
"NumericPinyin-Pinyin");
}
@Override public void visit(CldrValue value) {
@ -153,6 +142,7 @@ public final class TransformsMapper {
String status = visibility == Visibility.internal ? "internal" : "file";
Direction dir = TRANSFORM_DIRECTION.valueFrom(value, Direction.class);
// TODO: Consider checks for unused data (e.g. forward aliases in a backward rule).
if (dir != Direction.backward) {
String id = getId(source, target, variant);
TRANSFORM_ALIAS.listOfValuesFrom(value)
@ -172,6 +162,33 @@ public final class TransformsMapper {
}
}
private static void addSpecialCaseValues(IcuData icuData) {
// I have _no_ idea what any of this is about, I'm just trying to mimic the original
// (complex and undocumented) code in "ConvertTransforms.java".
// TODO: Understand and document each of the cases below.
icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}");
// Note that this quoting of path segments is almost certainly unnecessary. It matches
// the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so
// it seems very likely that it's not needed here.
// TODO: Once migration done, remove quotes here & check in RbPath for unwanted quotes.
icuData.add(RbPath.of("\"%Translit%Hex\""), "%Translit%Hex");
icuData.add(RbPath.of("\"%Translit%UnicodeName\""), "%Translit%UnicodeName");
icuData.add(RbPath.of("\"%Translit%UnicodeChar\""), "%Translit%UnicodeChar");
// Special case, where Latin is a no-op.
icuData.add(RbPath.of("TransliterateLATIN"), RbValue.of("", ""));
// Some hard-coded special case mappings.
icuData.add(
RB_TRANSLITERATOR_IDS.extendBy("Tone-Digit", "alias"),
"Pinyin-NumericPinyin");
icuData.add(
RB_TRANSLITERATOR_IDS.extendBy("Digit-Tone", "alias"),
"NumericPinyin-Pinyin");
}
// It is important to note that this ID contains a '/' but this is a literal in the path
// element and does not add an extra laying in the resource bundle path (the use of '/' to
// separate path elements is a purely internal detail for things like LocaleMapper and the
// regex-based configuration.
private static String getId(String from, String to, Optional<String> variant) {
String baseId = from + "-" + to;
return variant.map(v -> baseId + "/" + v).orElse(baseId);

View file

@ -0,0 +1,110 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.truth.Truth.assertThat;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import java.util.Arrays;
import java.util.Set;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
public class IcuDataTest {
@Test
public void testSimple() {
IcuData icuData = new IcuData("icu-name", true);
assertThat(icuData.getName()).isEqualTo("icu-name");
assertThat(icuData.hasFallback()).isTrue();
IcuData otherData = new IcuData("other-name", false);
assertThat(otherData.getName()).isEqualTo("other-name");
assertThat(otherData.hasFallback()).isFalse();
}
@Test
public void testFileComment() {
IcuData icuData = new IcuData("icu-name", false);
assertThat(icuData.getFileComment()).isEmpty();
icuData.setFileComment("Hello", "World");
assertThat(icuData.getFileComment()).containsExactly("Hello", "World").inOrder();
icuData.setFileComment(Arrays.asList("Foo", "Bar"));
assertThat(icuData.getFileComment()).containsExactly("Foo", "Bar").inOrder();
icuData.setFileComment(ImmutableList.of());
assertThat(icuData.getFileComment()).isEmpty();
}
@Test
public void testSetVersion() {
IcuData icuData = new IcuData("icu-name", false);
icuData.setVersion("VERSION");
RbPath rbPath = RbPath.of("Version");
assertThat(icuData.getPaths()).containsExactly(rbPath);
assertThat(icuData.get(rbPath)).isEqualTo(ImmutableList.of(RbValue.of("VERSION")));
}
@Test
public void testGetPaths() {
IcuData icuData = new IcuData("icu-name", false);
// getPaths() is a live view on the data, not a snapshot.
Set<RbPath> paths = icuData.getPaths();
assertThat(paths).isEmpty();
RbPath fooBar = RbPath.of("foo", "bar");
icuData.add(fooBar, "value1");
assertThat(icuData.get(fooBar)).contains(RbValue.of("value1"));
assertThat(paths).containsExactly(fooBar);
assertThat(paths).hasSize(1);
RbPath fooBaz = RbPath.of("foo", "baz");
icuData.add(fooBaz, "value2");
assertThat(icuData.get(fooBaz)).contains(RbValue.of("value2"));
assertThat(paths).containsExactly(fooBar, fooBaz).inOrder();
assertThat(paths).hasSize(2);
// Paths is not modifiable.
assertThrows(UnsupportedOperationException.class, () -> paths.add(RbPath.of("nope")));
assertThrows(UnsupportedOperationException.class, () -> paths.remove(fooBar));
assertThrows(UnsupportedOperationException.class, paths::clear);
}
@Test
public void addMultiple() {
IcuData icuData = new IcuData("icu-name", false);
RbPath fooBar = RbPath.of("foo", "bar");
RbValue value1 = RbValue.of("the", "first", "value");
RbValue value2 = RbValue.of("another-value");
icuData.add(fooBar, value1);
assertThat(icuData.get(fooBar)).containsExactly(value1);
icuData.add(fooBar, "another-value");
assertThat(icuData.get(fooBar)).containsExactly(value1, value2).inOrder();
// It's just a list, with no ordering and no deduplication.
icuData.add(fooBar, Arrays.asList(value2, value1));
assertThat(icuData.get(fooBar)).containsExactly(value1, value2, value2, value1).inOrder();
}
@Test
public void replace() {
IcuData icuData = new IcuData("icu-name", false);
RbPath fooBar = RbPath.of("foo", "bar");
RbValue value1 = RbValue.of("the", "first", "value");
RbValue value2 = RbValue.of("another-value");
icuData.replace(fooBar, value1);
assertThat(icuData.get(fooBar)).containsExactly(value1);
icuData.replace(fooBar, "another-value");
assertThat(icuData.get(fooBar)).containsExactly(value2);
}
}

View file

@ -4,7 +4,6 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static org.junit.Assert.fail;
import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
@ -76,16 +75,48 @@ public class PathMatcherTest {
}
@Test
public void testAnyOf() {
PathMatcher monthMatch = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
PathMatcher dayMatch = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]");
PathMatcher combined = PathMatcher.anyOf(monthMatch, dayMatch);
public void testAnyOf_match() {
PathMatcher narrowMonth =
PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/months"
+ "/monthContext[@type=\"format\"]/monthWidth[@type=\"narrow\"]/month[@type=*]");
PathMatcher narrowDay =
PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/days"
+ "/dayContext[@type=\"format\"]/dayWidth[@type=\"narrow\"]/day[@type=*]");
PathMatcher prefix = PathMatcher.anyOf(narrowMonth, narrowDay);
assertThat(combined.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue();
assertThat(combined.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
assertThat(prefix.matches(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
assertThat(prefix.matches(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
assertThat(combined.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse();
assertThat(combined.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse();
assertThat(prefix.matches(monthInfo("hindu", "format", "wide", 1))).isFalse();
assertThat(prefix.matches(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
}
@Test
public void testAnyOf_suffix() {
PathMatcher monthSuffix = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
PathMatcher daySuffix = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]");
PathMatcher suffix = PathMatcher.anyOf(monthSuffix, daySuffix);
assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue();
assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse();
assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse();
}
@Test
public void testAnyOf_prefix() {
PathMatcher monthPrefix =
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"gregorian\"]/months");
PathMatcher dayPrefix =
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"buddhist\"]/days");
PathMatcher prefix = PathMatcher.anyOf(monthPrefix, dayPrefix);
assertThat(prefix.matchesPrefixOf(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
assertThat(prefix.matchesPrefixOf(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
assertThat(prefix.matchesPrefixOf(monthInfo("hindu", "format", "wide", 1))).isFalse();
assertThat(prefix.matchesPrefixOf(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
}
@Test

View file

@ -2,10 +2,9 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static org.unicode.icu.tool.cldrtoicu.testing.RbPathSubjectFactory.assertThat;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import static org.unicode.icu.tool.cldrtoicu.testing.RbPathSubjectFactory.assertThat;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -15,27 +14,90 @@ import org.junit.runners.JUnit4;
public class RbPathTest {
@Test
public void testEmpty() {
assertThat(RbPath.empty()).hasSegments();
assertThat(RbPath.empty()).hasLength(0);
assertThat(RbPath.of()).hasSegments();
assertThat(RbPath.of()).hasLength(0);
}
@Test
public void testParseVsOf() {
assertThat(RbPath.of("foo", "bar")).hasSegments("foo", "bar");
assertThat(RbPath.of("foo/bar")).hasSegments("foo/bar");
assertThat(RbPath.parse("foo/bar")).hasSegments("foo", "bar");
assertThat(RbPath.parse("foo/bar/baz")).hasSegments("foo", "bar", "baz");
// Allow and ignore leading '/' for legacy reasons.
assertThat(RbPath.parse("/foo/bar/baz")).hasSegments("foo", "bar", "baz");
assertThat(RbPath.of("foo/bar", "baz")).hasSegments("foo/bar", "baz");
}
@Test
public void testBadArgs() {
assertBadPath("", "empty path string");
assertBadPath("foo//bar", "empty path segment");
assertBadPath("foo//bar", "path segments must not be empty");
assertBadPath("foo/<bar/baz", "mismatched quoting");
assertBadPath("foo/\"bar", "mismatched quoting");
assertBadPath("foo/\"bar\"baz\"", "invalid character");
assertBadPath("foo/bar baz", "invalid character");
}
@Test
public void testIteration() {
RbPath path = RbPath.parse("foo/bar/baz");
assertThat(path.length()).isEqualTo(3);
assertThat(path.getSegment(0)).isEqualTo("foo");
assertThat(path.getSegment(1)).isEqualTo("bar");
assertThat(path.getSegment(2)).isEqualTo("baz");
}
@Test
public void testExtendBy() {
assertThat(RbPath.of("foo").extendBy("bar")).hasSegments("foo", "bar");
assertThat(RbPath.of("foo").extendBy("bar/baz")).hasSegments("foo", "bar/baz");
assertThat(RbPath.of("foo").extendBy("bar/baz")).isNotEqualTo(RbPath.parse("foo/bar/baz"));
}
@Test
public void testStartsWith() {
RbPath p = RbPath.of("foo", "bar", "baz");
assertThat(p).startsWith(p).isTrue();
assertThat(p).startsWith(RbPath.of()).isTrue();
assertThat(p).startsWith(p.getParent()).isTrue();
assertThat(p).startsWith(RbPath.of("foo")).isTrue();
assertThat(p).startsWith(RbPath.of("bar")).isFalse();
assertThat(p).startsWith(RbPath.of("foo/bar/baz")).isFalse();
}
@Test
public void testEndsWith() {
RbPath p = RbPath.of("foo", "bar", "baz");
assertThat(p).endsWith(p).isTrue();
assertThat(p).endsWith(RbPath.of()).isTrue();
assertThat(p).endsWith(RbPath.of("bar", "baz")).isTrue();
assertThat(p).endsWith(RbPath.of("bar")).isFalse();
assertThat(p).endsWith(RbPath.of("foo/bar/baz")).isFalse();
}
@Test
public void testContains() {
RbPath p = RbPath.of("foo", "bar", "baz");
assertThat(p).contains(p).isTrue();
assertThat(p).contains(RbPath.of()).isTrue();
assertThat(p).contains(RbPath.of("bar", "baz")).isTrue();
assertThat(p).contains(RbPath.of("foo", "bar")).isTrue();
assertThat(p).contains(RbPath.of("foo/bar/baz")).isFalse();
}
@Test
public void testCommonPrefixLength() {
RbPath p = RbPath.of("foo", "bar", "baz");
RbPath q = RbPath.of("foo", "bar", "quux");
assertThat(RbPath.getCommonPrefixLength(p, q)).isEqualTo(2);
assertThat(RbPath.getCommonPrefixLength(p, p)).isEqualTo(3);
assertThat(RbPath.getCommonPrefixLength(p, RbPath.of())).isEqualTo(0);
// Not a prefix even though it's a suffix of the path.
assertThat(RbPath.getCommonPrefixLength(p, RbPath.of("bar", "baz"))).isEqualTo(0);
}
private static void assertBadPath(String path, String errorSnippet) {
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> RbPath.parse(path));

View file

@ -199,10 +199,6 @@ public class SupplementalDataTest {
.that(regressionData.maximize(id).orElse(null))
.isEqualTo(likelySubtags.maximize(id));
}
// ars currently a special case since it's in the ICU data as an alias, but not in the CLDR
// data at all. This while it's a structurally valid language code, it cannot be maximized.
assertThat(regressionData.maximize("ars")).isEmpty();
}
@Test
@ -214,7 +210,7 @@ public class SupplementalDataTest {
try {
ltc.transform(id);
} catch (NullPointerException e) {
System.out.println("--> " + id);
// Occurs for sh_CS and sh_YU.
continue;
}
// Need to maximize to work around:

View file

@ -0,0 +1,283 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@RunWith(JUnit4.class)
public class Bcp47MapperTest {
private static final ImmutableMap<RbPath, RbValue> EXPECTED_ALIAS_MAP = ImmutableMap.of(
RbPath.of("bcpTypeAlias", "tz:alias"),
RbValue.of("/ICUDATA/timezoneTypes/bcpTypeAlias/tz"),
RbPath.of("typeAlias", "timezone:alias"),
RbValue.of("/ICUDATA/timezoneTypes/typeAlias/timezone"),
RbPath.of("typeMap", "timezone:alias"),
RbValue.of("/ICUDATA/timezoneTypes/typeMap/timezone"));
@Test
public void testSimple() {
CldrData cldrData = cldrData(
simpleType("foo", "one"),
simpleType("foo", "two"),
simpleType("foo", "three"),
simpleType("bar", "four"),
simpleType("bar", "five"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
assertThat(bcp47Data).hasName("keyTypeData");
assertThat(bcp47Data).hasFallback(false);
// Check the number of paths and verify the special injected values.
assertThat(bcp47Data).getPaths().hasSize(7 + EXPECTED_ALIAS_MAP.size());
EXPECTED_ALIAS_MAP.forEach((p, v) -> assertThat(bcp47Data).hasValuesFor(p, v));
assertThat(bcp47Data).hasEmptyValue("/keyMap/foo");
assertThat(bcp47Data).hasEmptyValue("/keyMap/bar");
assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/one");
assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/two");
assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/three");
assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/four");
assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/five");
IcuData tzData = icuData.get(1);
assertThat(tzData).hasName("timezoneTypes");
assertThat(tzData).hasFallback(false);
assertThat(tzData).getPaths().isEmpty();
}
@Test
public void testSimpleTimezone() {
CldrData cldrData = cldrData(
simpleType("tz", "one"),
simpleType("tz", "two"),
simpleType("tz", "three"),
simpleType("bar", "four"),
simpleType("bar", "five"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
assertThat(bcp47Data).hasName("keyTypeData");
assertThat(bcp47Data).hasFallback(false);
// Check the number of paths and verify the special injected values.
assertThat(bcp47Data).getPaths().hasSize(4 + EXPECTED_ALIAS_MAP.size());
EXPECTED_ALIAS_MAP.forEach((p, v) -> assertThat(bcp47Data).hasValuesFor(p, v));
// The key-map is only ever in the main bcp47 data and contains the timezone key.
assertThat(bcp47Data).hasEmptyValue("/keyMap/tz");
assertThat(bcp47Data).hasEmptyValue("/keyMap/bar");
assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/four");
assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/five");
IcuData tzData = icuData.get(1);
assertThat(tzData).hasName("timezoneTypes");
assertThat(tzData).hasFallback(false);
// Only the type-map paths/values are split into the timezone data.
assertThat(tzData).getPaths().hasSize(3);
assertThat(tzData).hasEmptyValue("/typeMap/tz/one");
assertThat(tzData).hasEmptyValue("/typeMap/tz/two");
assertThat(tzData).hasEmptyValue("/typeMap/tz/three");
}
@Test
public void testKeyAliases() {
CldrData cldrData = cldrData(
alias("key", "ALIAS", "type"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
// Key aliases are lower-cased (though it's not entirely obvious as to why).
assertThat(bcp47Data).hasValuesFor("/keyMap/alias", "key");
assertThat(bcp47Data).hasEmptyValue("/typeMap/alias/type");
}
@Test
public void testTypeAliases_single() {
CldrData cldrData = cldrData(
alias("key", null, "type", "main"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
assertThat(bcp47Data).hasEmptyValue("/keyMap/key");
assertThat(bcp47Data).hasValuesFor("/typeMap/key/main", "type");
}
@Test
public void testTypeAliases_multiple() {
CldrData cldrData = cldrData(
alias("key", null, "type", "main", "alias1", "alias2", "alias3"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
assertThat(bcp47Data).hasEmptyValue("/keyMap/key");
assertThat(bcp47Data).hasValuesFor("/typeMap/key/main", "type");
// Only aliases after the first (main) one go in the typeAlias set.
assertThat(bcp47Data).getPaths().doesNotContain(RbPath.parse("typeAlias/key/main"));
assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias1", "main");
assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias2", "main");
assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias3", "main");
}
@Test
public void testKeyAndTypeAliases() {
CldrData cldrData = cldrData(
alias("key", "key-alias", "type", "main", "type-alias"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
assertThat(bcp47Data).hasValuesFor("/keyMap/key-alias", "key");
assertThat(bcp47Data).hasValuesFor("/typeMap/key-alias/main", "type");
assertThat(bcp47Data).hasValuesFor("/typeAlias/key-alias/type-alias", "main");
}
@Test
public void testPreferredTypeName() {
CldrData cldrData = cldrData(
deprecated("deprecated-key", true, "type", false, "/preferred/path1"),
deprecated("key", false, "deprecated-type", true, "/preferred/path2"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
assertThat(bcp47Data).hasValuesFor("/bcpTypeAlias/deprecated-key/type", "/preferred/path1");
assertThat(bcp47Data).hasValuesFor("/bcpTypeAlias/key/deprecated-type", "/preferred/path2");
}
@Test
public void testInfoAttributes() {
CldrData cldrData = cldrData(
// Deprecated without a replacement.
deprecated("deprecated-key", true, "type", false, null),
deprecated("key", false, "deprecated-type", true, null),
valueType("info-key", "info-type", "value-type"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData bcp47Data = icuData.get(0);
assertThat(bcp47Data).hasEmptyValue("/keyMap/deprecated-key");
assertThat(bcp47Data).hasEmptyValue("/typeMap/deprecated-key/type");
assertThat(bcp47Data).hasValuesFor("/keyInfo/deprecated/deprecated-key", "true");
assertThat(bcp47Data).hasEmptyValue("/keyMap/key");
assertThat(bcp47Data).hasEmptyValue("/typeMap/key/deprecated-type");
assertThat(bcp47Data).hasValuesFor("/typeInfo/deprecated/key/deprecated-type", "true");
assertThat(bcp47Data).hasEmptyValue("/keyMap/info-key");
assertThat(bcp47Data).hasEmptyValue("/typeMap/info-key/info-type");
assertThat(bcp47Data).hasValuesFor("/keyInfo/valueType/info-key", "value-type");
}
// This will hopefully one day be the responsibility of the IcuTextWriter.
@Test
public void testTimezonePathQuotingForAliases() {
CldrData cldrData = cldrData(
alias("tz", null, "escaped", "foo/bar", "hello/world"),
alias("tz", null, "unescaped", "foo_bar", "hello_world"));
ImmutableList<IcuData> icuData = Bcp47Mapper.process(cldrData);
IcuData tzData = icuData.get(1);
// Only the type-map paths/values are split into the timezone data.
assertThat(tzData).getPaths().hasSize(4);
assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped");
assertThat(tzData).hasValuesFor("/typeMap/tz/foo_bar", "unescaped");
// TODO: Check this is correct! Having foo/bar as the value rather than foo:bar seems wrong.
assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar");
assertThat(tzData).hasValuesFor("/typeAlias/tz/hello_world", "foo_bar");
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue simpleType(String keyName, String typeName) {
StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
cldrPath.append("/key");
appendAttribute(cldrPath, "name", keyName);
cldrPath.append("/type");
appendAttribute(cldrPath, "name", typeName);
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static CldrValue alias(
String keyName, String keyAlias, String typeName, String... typeAliases) {
StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
cldrPath.append("/key");
appendAttribute(cldrPath, "name", keyName);
if (keyAlias != null) {
appendAttribute(cldrPath, "alias", keyAlias);
}
cldrPath.append("/type");
appendAttribute(cldrPath, "name", typeName);
if (typeAliases.length > 0) {
appendAttribute(cldrPath, "alias", Joiner.on(" ").join(typeAliases));
}
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static CldrValue deprecated(
String keyName,
boolean keyDeprecated,
String typeName,
boolean typeDeprecated,
String preferred) {
StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
cldrPath.append("/key");
appendAttribute(cldrPath, "name", keyName);
if (keyDeprecated) {
appendAttribute(cldrPath, "deprecated", keyDeprecated);
}
cldrPath.append("/type");
appendAttribute(cldrPath, "name", typeName);
if (preferred != null) {
appendAttribute(cldrPath, "preferred", preferred);
}
if (typeDeprecated) {
appendAttribute(cldrPath, "deprecated", typeDeprecated);
}
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static CldrValue valueType(String keyName, String typeName, String valueType) {
StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword");
cldrPath.append("/key");
appendAttribute(cldrPath, "name", keyName);
appendAttribute(cldrPath, "valueType", valueType);
cldrPath.append("/type");
appendAttribute(cldrPath, "name", typeName);
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}

View file

@ -0,0 +1,150 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.GRAPHEME;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.SENTENCE;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.LINE_BREAK;
import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.SENTENCE_BREAK;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.base.Ascii;
import com.google.common.base.CaseFormat;
@RunWith(JUnit4.class)
public class BreakIteratorMapperTest {
enum SegmentationType {
GRAPHEME_CLUSTER_BREAK, LINE_BREAK, SENTENCE_BREAK, WORD_BREAK;
@Override public String toString() {
return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name());
}
}
enum BoundaryType {
GRAPHEME, WORD, LINE, SENTENCE, TITLE;
// E.g. "icu:grapheme"
@Override public String toString() {
return "icu:" + Ascii.toLowerCase(name());
}
}
@Test
public void testSingleSuppression() {
int idx = 0;
CldrData cldrData = cldrData(
suppression(SENTENCE_BREAK, "L.P.", ++idx),
suppression(SENTENCE_BREAK, "Alt.", ++idx),
suppression(SENTENCE_BREAK, "Approx.", ++idx));
IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
RbValue.of("L.P."),
RbValue.of("Alt."),
RbValue.of("Approx."));
}
// In real data, suppression is only a SentenceBreak thing, but we might as well test it for
// other types.
@Test
public void testMultipleSupressionTypes() {
int idx = 0;
CldrData cldrData = cldrData(
suppression(SENTENCE_BREAK, "L.P.", ++idx),
suppression(SENTENCE_BREAK, "Alt.", ++idx),
suppression(SENTENCE_BREAK, "Approx.", ++idx),
suppression(LINE_BREAK, "Foo", ++idx),
suppression(LINE_BREAK, "Bar", ++idx),
suppression(LINE_BREAK, "Baz", ++idx));
IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
RbValue.of("L.P."),
RbValue.of("Alt."),
RbValue.of("Approx."));
assertThat(icuData).hasValuesFor("/exceptions/LineBreak:array",
RbValue.of("Foo"),
RbValue.of("Bar"),
RbValue.of("Baz"));
}
@Test
public void testSpecials_dictionary() {
CldrData specials = cldrData(
dictionary("foo", "<foo deps>"),
dictionary("bar", "<bar deps>"));
IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", "<foo deps>");
assertThat(icuData).hasValuesFor("/dictionaries/bar:process(dependency)", "<bar deps>");
}
@Test
public void testSpecials_boundaries() {
CldrData specials = cldrData(
boundaries(GRAPHEME, "<grapheme deps>", null),
boundaries(SENTENCE, "<sentence deps>", "altName"));
IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
.hasValuesFor("/boundaries/grapheme:process(dependency)", "<grapheme deps>");
assertThat(icuData)
.hasValuesFor("/boundaries/sentence_altName:process(dependency)", "<sentence deps>");
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue suppression(SegmentationType type, String value, int index) {
StringBuilder cldrPath = new StringBuilder("//ldml/segmentations");
appendAttribute(cldrPath.append("/segmentation"), "type", type);
cldrPath.append("/suppressions[@type=\"standard\"]");
// Suppression is an ordered element, so needs a sort index.
cldrPath.append("/suppression#").append(index);
return CldrValue.parseValue(cldrPath.toString(), value);
}
private static CldrValue dictionary(String type, String dependency) {
StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
cldrPath.append("/icu:dictionaries/icu:dictionary");
appendAttribute(cldrPath, "type", type);
appendAttribute(cldrPath, "icu:dependency", dependency);
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static CldrValue boundaries(BoundaryType type, String dependency, String alt) {
StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData");
cldrPath.append("/icu:boundaries/").append(type);
appendAttribute(cldrPath, "icu:dependency", dependency);
if (alt != null) {
appendAttribute(cldrPath, "alt", alt);
}
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}

View file

@ -0,0 +1,157 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import com.google.common.base.Joiner;
@RunWith(JUnit4.class)
public class CollationMapperTest {
@Test
public void testEmpty() {
IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty());
assertThat(icuData).hasName("xx");
assertThat(icuData).hasFallback(true);
assertThat(icuData).getPaths().isEmpty();
// Root gets a couple of special paths added to it due to the need to work around a CLDR
// data bug.
IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty());
assertThat(rootData).hasName("root");
assertThat(rootData).hasFallback(true);
assertThat(rootData).getPaths().hasSize(2);
assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString());
assertThat(rootData).hasEmptyValue("/collations/standard/Sequence");
}
@Test
public void testDefault() {
CldrData cldrData =
cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/collations/default", "any value");
}
// This tests legacy behaviour which mimics the original converter code. There's no promise
// that it's semantically correct though.
@Test
public void testLastAltRuleOverridesExisting() {
// Note that in DTD order (which is what the paths are processed in) the path with no "alt"
// attribute comes after everything else, but the first "alt" path is overwritten by the
// second. It's not even clear there should ever be two alt paths, or what the paths mean
// (the original code seems to suggest it's looking for the "short" alternate form, but
// the "alt" attribute can have more that the value "short"...)
CldrData cldrData = cldrData(
collationRule("foo", "alt1", "First alt rule"),
collationRule("foo", "alt2", "Second alt rule"),
collationRule("foo", null, "First rule"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
}
@Test
public void testCommentAndWhitespaceStripping() {
CldrData cldrData = cldrData(
collationRule("foo", null,
"# Comments are stripped",
"",
" # As are empty lines and leading/trailing spaces",
" Here is a value ",
"# And more comments to be stripped",
"And another value"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
"Here is a value",
"And another value");
}
// Just in case anything weird happens with non-BMP char sequences:
// <collation type='emoji'>
// <cr><![CDATA[
// # START AUTOGENERATED EMOJI ORDER
// & [last primary ignorable]<<*🦰🦱🦳🦲🏻🏼🏽🏾🏿
// & [before 1]\uFDD1
// <*😀😃😄😁😆😅🤣😂🙂🙃😉😊😇
// <*🥰😍🤩😘😗😚😙
// <*😋😛😜🤪😝🤑
// ...
@Test
public void testEmoji() {
CldrData cldrData = cldrData(
collationRule("emoji", null,
" # START AUTOGENERATED EMOJI ORDER",
" & [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
" & [before 1]\uFDD1€",
" <*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
+ "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
+ "\uD83D\uDE07",
" <*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
+ "\uD83D\uDE19",
" <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/emoji/Sequence",
"& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
"& [before 1]\uFDD1€",
"<*\uD83D\uDE00\uD83D\uDE03\uD83D\uDE04\uD83D\uDE01\uD83D\uDE06\uD83D\uDE05"
+ "\uD83E\uDD23\uD83D\uDE02\uD83D\uDE42\uD83D\uDE43\uD83D\uDE09\uD83D\uDE0A"
+ "\uD83D\uDE07",
"<*\uD83E\uDD70\uD83D\uDE0D\uD83E\uDD29\uD83D\uDE18\uD83D\uDE17☺\uD83D\uDE1A"
+ "\uD83D\uDE19",
"<*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11");
}
@Test
public void testSpecials() {
CldrData specials = cldrData(
CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""),
CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));
IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue collationRule(String type, String alt, String... lines) {
StringBuilder cldrPath = new StringBuilder("//ldml/collations");
appendAttribute(cldrPath.append("/collation"), "type", type);
cldrPath.append("/cr");
if (alt != null) {
appendAttribute(cldrPath, "alt", alt);
}
return CldrValue.parseValue(cldrPath.toString(), Joiner.on('\n').join(lines));
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}

View file

@ -0,0 +1,183 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.AFTERNOON1;
import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.EVENING1;
import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.MIDNIGHT;
import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.MORNING1;
import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.NIGHT1;
import static org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapperTest.RuleType.NOON;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import com.google.common.base.Ascii;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
@RunWith(JUnit4.class)
public class DayPeriodsMapperTest {
// A subset of rule types for testing.
enum RuleType {
MORNING1, NOON, AFTERNOON1, EVENING1, NIGHT1, MIDNIGHT;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
// Possible rule names (these are the value attributes).
enum RuleName {
AT, BEFORE, FROM;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
@Test
public void testSimple() {
Set<String> locales = ImmutableSet.of("en_GB", "en_AU", "en_NZ");
CldrData cldrData = cldrData(
dayPeriodRule(locales, MORNING1, isBetween("04:00", "12:00")),
dayPeriodRule(locales, NOON, isAt("12:00")),
dayPeriodRule(locales, AFTERNOON1, isBetween("12:00", "18:00")),
dayPeriodRule(locales, EVENING1, isBetween("18:00", "21:00")),
dayPeriodRule(locales, NIGHT1, isBetween("21:00", "04:00")),
dayPeriodRule(locales, MIDNIGHT, isAt("00:00")));
IcuData icuData = DayPeriodsMapper.process(cldrData);
assertThat(icuData).hasName("dayPeriods");
assertThat(icuData).hasFallback(false);
assertThat(icuData).hasValuesFor("/locales/en_AU", "set1");
assertThat(icuData).hasValuesFor("/locales/en_GB", "set1");
assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "04:00");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "12:00");
assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "12:00");
assertThat(icuData).hasValuesFor("/rules/set1/afternoon1/from", "12:00");
assertThat(icuData).hasValuesFor("/rules/set1/afternoon1/before", "18:00");
assertThat(icuData).hasValuesFor("/rules/set1/evening1/from", "18:00");
assertThat(icuData).hasValuesFor("/rules/set1/evening1/before", "21:00");
assertThat(icuData).hasValuesFor("/rules/set1/night1/from", "21:00");
assertThat(icuData).hasValuesFor("/rules/set1/night1/before", "04:00");
assertThat(icuData).hasValuesFor("/rules/set1/midnight/at", "00:00");
}
@Test
public void testMultipleRuleSets() {
Set<String> locales1 = ImmutableSet.of("en_GB");
Set<String> locales2 = ImmutableSet.of("en_AU", "en_NZ");
CldrData cldrData = cldrData(
dayPeriodRule(locales1, MORNING1, isBetween("04:00", "12:00")),
dayPeriodRule(locales1, NOON, isAt("12:00")),
dayPeriodRule(locales2, MORNING1, isBetween("06:00", "13:00")),
dayPeriodRule(locales2, NOON, isAt("13:00")));
IcuData icuData = DayPeriodsMapper.process(cldrData);
// This reversal of the set ordering (as compared to the order of the input paths) is
// because visitation requires nested path ordering, which is achieved by lexicographical
// ordering of path strings ("en_AU" < "en_GB"). This is an implementation detail however
// and might one day change. If this were switched to use DTD order, then it would be
// stable (but also affect the ordering of paths in the released ICU data).
assertThat(icuData).hasValuesFor("/locales/en_AU", "set1");
assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "06:00");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "13:00");
assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "13:00");
assertThat(icuData).hasValuesFor("/locales/en_GB", "set2");
assertThat(icuData).hasValuesFor("/rules/set2/morning1/from", "04:00");
assertThat(icuData).hasValuesFor("/rules/set2/morning1/before", "12:00");
assertThat(icuData).hasValuesFor("/rules/set2/noon/at", "12:00");
}
@Test
public void testRulesetLabels() {
Set<String> locales = ImmutableSet.of("en_GB");
// Note that there's an implicit assumption in the mapper that the ruleset label is the
// same for all of the rules of any given locale (since it comes from the parent element).
CldrData cldrData = cldrData(
dayPeriodRule(locales, MORNING1, isBetween("04:00", "12:00"), "foo"),
dayPeriodRule(locales, NOON, isAt("12:00"), "foo"));
IcuData icuData = DayPeriodsMapper.process(cldrData);
assertThat(icuData).hasValuesFor("/locales_foo/en_GB", "set1");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "04:00");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "12:00");
assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "12:00");
}
// Just demonstrating that the mapper does no data validation.
@Test
public void testNoDataValidation() {
Set<String> locales = ImmutableSet.of("foo", "bar");
CldrData cldrData = cldrData(
dayPeriodRule(locales, MORNING1, isBetween("start", "end")),
dayPeriodRule(locales, NOON, isAt("moment")));
IcuData icuData = DayPeriodsMapper.process(cldrData);
// This reversal of the set ordering (as compared to the order of the input paths) is
// because visitation requires nested path ordering, which is achieved by lexicographical
// ordering of path strings. This is an implementation detail however and might one day
// change. If this were switched to use DTD order, then it would be stable (but also
// affect the ordering of paths in the released ICU data).
assertThat(icuData).hasValuesFor("/locales/foo", "set1");
assertThat(icuData).hasValuesFor("/locales/bar", "set1");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "start");
assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "end");
assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "moment");
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue dayPeriodRule(
Set<String> locales, RuleType type, Map<RuleName, String> rules) {
return dayPeriodRule(locales, type, rules, null);
}
private static CldrValue dayPeriodRule(
Set<String> locales, RuleType type, Map<RuleName, String> rules, String label) {
StringBuilder cldrPath = new StringBuilder("//supplementalData/dayPeriodRuleSet");
if (label != null) {
appendAttribute(cldrPath, "type", label);
}
appendAttribute(cldrPath.append("/dayPeriodRules"), "locales", Joiner.on(' ').join(locales));
appendAttribute(cldrPath.append("/dayPeriodRule"), "type", type);
rules.forEach((k, v) -> cldrPath.append(String.format("[@%s=\"%s\"]", k, v)));
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static Map<RuleName, String> isAt(String time) {
return ImmutableMap.of(RuleName.AT, time);
}
private static Map<RuleName, String> isBetween(String from, String to) {
return ImmutableMap.of(RuleName.FROM, from, RuleName.BEFORE, to);
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}

View file

@ -0,0 +1,114 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.FEW;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.MANY;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.ONE;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.OTHER;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.TWO;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.ZERO;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.base.Ascii;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
@RunWith(JUnit4.class)
public class PluralRangesMapperTest {
// Possible rule names (these are the value attributes).
enum PluralCount {
ZERO, ONE, TWO, FEW, MANY, OTHER;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
@Test
public void testSimple() {
Set<String> locales = ImmutableSet.of("en_GB", "en_NZ");
CldrData cldrData = cldrData(
pluralRange(locales, ZERO, ONE, ZERO),
pluralRange(locales, ZERO, FEW, FEW),
pluralRange(locales, ONE, TWO, OTHER),
pluralRange(locales, ONE, MANY, MANY));
IcuData icuData = PluralRangesMapper.process(cldrData);
assertThat(icuData).hasName("pluralRanges");
assertThat(icuData).hasFallback(false);
assertThat(icuData).hasValuesFor("/locales/en_GB", "set00");
assertThat(icuData).hasValuesFor("/locales/en_NZ", "set00");
// Note that ordering is based on incoming CLDR path ordering, which is reordered by virtue
// of being processed in "nested grouping" order. This should probably be made to use DTD
// order to make output more isolated once it's clear that this doesn't affect output.
assertThat(icuData)
.hasValuesFor("/rules/set00",
RbValue.of("one", "many", "many"),
RbValue.of("one", "two", "other"),
RbValue.of("zero", "few", "few"),
RbValue.of("zero", "one", "zero"));
}
@Test
public void testMultipleSets() {
Set<String> locales1 = ImmutableSet.of("en_GB");
Set<String> locales2 = ImmutableSet.of("en_AU");
CldrData cldrData = cldrData(
pluralRange(locales1, ZERO, ONE, ZERO),
pluralRange(locales1, ZERO, FEW, FEW),
pluralRange(locales2, ONE, TWO, OTHER),
pluralRange(locales2, ONE, MANY, MANY));
IcuData icuData = PluralRangesMapper.process(cldrData);
assertThat(icuData).hasName("pluralRanges");
assertThat(icuData).hasFallback(false);
assertThat(icuData).hasValuesFor("/locales/en_AU", "set00");
assertThat(icuData)
.hasValuesFor("/rules/set00",
RbValue.of("one", "many", "many"),
RbValue.of("one", "two", "other"));
assertThat(icuData).hasValuesFor("/locales/en_GB", "set01");
assertThat(icuData)
.hasValuesFor("/rules/set01",
RbValue.of("zero", "few", "few"),
RbValue.of("zero", "one", "zero"));
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue pluralRange(
Set<String> locales, PluralCount start, PluralCount end, PluralCount result) {
StringBuilder cldrPath = new StringBuilder("//supplementalData/plurals");
appendAttribute(cldrPath.append("/pluralRanges"), "locales", Joiner.on(' ').join(locales));
cldrPath.append("/pluralRange");
appendAttribute(cldrPath, "start", start);
appendAttribute(cldrPath, "end", end);
appendAttribute(cldrPath, "result", result);
return CldrValue.parseValue(cldrPath.toString(), "");
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}

View file

@ -0,0 +1,190 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.FEW;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.MANY;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.ONE;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.OTHER;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.TWO;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.ZERO;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralType.CARDINAL;
import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralType.ORDINAL;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import com.google.common.base.Ascii;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
@RunWith(JUnit4.class)
public class PluralsMapperTest {
enum PluralType {
ORDINAL, CARDINAL;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
// Possible rule names (these are the value attributes).
enum PluralCount {
ZERO, ONE, TWO, FEW, MANY, OTHER;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
@Test
public void testSimple() {
Set<String> locales = ImmutableSet.of("en_GB", "en_NZ");
CldrData cldrData = cldrData(
pluralRule(ORDINAL, locales, ZERO, "zero"),
pluralRule(ORDINAL, locales, ONE, "one"),
pluralRule(ORDINAL, locales, TWO, "two"),
pluralRule(ORDINAL, locales, FEW, "few"),
pluralRule(ORDINAL, locales, MANY, "many"),
pluralRule(ORDINAL, locales, OTHER, "other"),
pluralRule(CARDINAL, locales, ZERO, "!zero!"),
pluralRule(CARDINAL, locales, ONE, "!one!"),
pluralRule(CARDINAL, locales, TWO, "!two!"),
pluralRule(CARDINAL, locales, FEW, "!few!"),
pluralRule(CARDINAL, locales, MANY, "!many!"),
pluralRule(CARDINAL, locales, OTHER, "!other!"));
IcuData icuData = PluralsMapper.process(cldrData);
assertThat(icuData).hasName("plurals");
assertThat(icuData).hasFallback(false);
// Cardinals are assigned first, regardless of the CLDR path order (this could change).
assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
assertThat(icuData).hasValuesFor("/locales/en_NZ", "set0");
assertThat(icuData).hasValuesFor("/locales_ordinals/en_GB", "set1");
assertThat(icuData).hasValuesFor("/locales_ordinals/en_NZ", "set1");
assertThat(icuData).hasValuesFor("/rules/set0/zero", "!zero!");
assertThat(icuData).hasValuesFor("/rules/set0/one", "!one!");
assertThat(icuData).hasValuesFor("/rules/set0/two", "!two!");
assertThat(icuData).hasValuesFor("/rules/set0/few", "!few!");
assertThat(icuData).hasValuesFor("/rules/set0/many", "!many!");
assertThat(icuData).hasValuesFor("/rules/set0/other", "!other!");
assertThat(icuData).hasValuesFor("/rules/set1/zero", "zero");
assertThat(icuData).hasValuesFor("/rules/set1/one", "one");
assertThat(icuData).hasValuesFor("/rules/set1/two", "two");
assertThat(icuData).hasValuesFor("/rules/set1/few", "few");
assertThat(icuData).hasValuesFor("/rules/set1/many", "many");
assertThat(icuData).hasValuesFor("/rules/set1/other", "other");
}
@Test
public void testGroupDeduplication_subsets() {
Set<String> locales1 = ImmutableSet.of("en_GB");
Set<String> locales2 = ImmutableSet.of("en_NZ");
CldrData cldrData = cldrData(
// One group is a subset of the other, but this does not trigger deduplication.
pluralRule(CARDINAL, locales1, ZERO, "zero"),
pluralRule(CARDINAL, locales1, ONE, "one"),
pluralRule(CARDINAL, locales1, TWO, "two"),
pluralRule(CARDINAL, locales2, ZERO, "zero"),
pluralRule(CARDINAL, locales2, ONE, "one"),
pluralRule(CARDINAL, locales2, TWO, "two"),
pluralRule(CARDINAL, locales2, FEW, "few"));
IcuData icuData = PluralsMapper.process(cldrData);
assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1");
assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero");
assertThat(icuData).hasValuesFor("/rules/set0/one", "one");
assertThat(icuData).hasValuesFor("/rules/set0/two", "two");
assertThat(icuData).hasValuesFor("/rules/set1/zero", "zero");
assertThat(icuData).hasValuesFor("/rules/set1/one", "one");
assertThat(icuData).hasValuesFor("/rules/set1/two", "two");
assertThat(icuData).hasValuesFor("/rules/set1/few", "few");
}
@Test
public void testGroupDeduplication_type() {
Set<String> locales = ImmutableSet.of("en_GB");
CldrData cldrData = cldrData(
// Groups are the same, but assigned separately to different types.
pluralRule(CARDINAL, locales, ZERO, "zero"),
pluralRule(CARDINAL, locales, ONE, "one"),
pluralRule(CARDINAL, locales, TWO, "two"),
pluralRule(ORDINAL, locales, ZERO, "zero"),
pluralRule(ORDINAL, locales, ONE, "one"),
pluralRule(ORDINAL, locales, TWO, "two"));
IcuData icuData = PluralsMapper.process(cldrData);
// Group is deduplicated!
assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
assertThat(icuData).hasValuesFor("/locales_ordinals/en_GB", "set0");
assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero");
assertThat(icuData).hasValuesFor("/rules/set0/one", "one");
assertThat(icuData).hasValuesFor("/rules/set0/two", "two");
}
@Test
public void testGroupDeduplication_locales() {
Set<String> locales1 = ImmutableSet.of("en_GB");
Set<String> locales2 = ImmutableSet.of("en_NZ");
CldrData cldrData = cldrData(
// Groups are the same, but assigned separately to different locales.
pluralRule(CARDINAL, locales1, ZERO, "zero"),
pluralRule(CARDINAL, locales1, ONE, "one"),
pluralRule(CARDINAL, locales1, TWO, "two"),
pluralRule(CARDINAL, locales2, ZERO, "zero"),
pluralRule(CARDINAL, locales2, ONE, "one"),
pluralRule(CARDINAL, locales2, TWO, "two"));
IcuData icuData = PluralsMapper.process(cldrData);
// Group is deduplicated!
assertThat(icuData).hasValuesFor("/locales/en_GB", "set0");
assertThat(icuData).hasValuesFor("/locales/en_NZ", "set0");
assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero");
assertThat(icuData).hasValuesFor("/rules/set0/one", "one");
assertThat(icuData).hasValuesFor("/rules/set0/two", "two");
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue pluralRule(
PluralType type, Set<String> locales, PluralCount count, String value) {
StringBuilder cldrPath = new StringBuilder("//supplementalData");
appendAttribute(cldrPath.append("/plurals"), "type", type);
appendAttribute(cldrPath.append("/pluralRules"), "locales", Joiner.on(' ').join(locales));
appendAttribute(cldrPath.append("/pluralRule"), "count", count);
return CldrValue.parseValue(cldrPath.toString(), value);
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}

View file

@ -0,0 +1,187 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Access.PRIVATE;
import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Access.PUBLIC;
import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.DURATION_RULES;
import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.ORDINAL_RULES;
import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.SPELLOUT_RULES;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.base.Ascii;
import com.google.common.base.CaseFormat;
@RunWith(JUnit4.class)
public class RbnfMapperTest {
enum Access {
PUBLIC, PRIVATE;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
// IMPORTANT: The ldml.dtd only defines 3 groups:
// NumberingSystemRules, OrdinalRules, SpelloutRules
// but the "specials" files used by ICU introduce additional group names (e.g. DurationRules)
// which are strictly speaking invalid according to the DTD.
enum Group {
NUMBERING_SYSTEM_RULES, ORDINAL_RULES, SPELLOUT_RULES, DURATION_RULES;
@Override public String toString() {
// It's "NumberingSystemRules" not "numberingSystemRules"
return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name());
}
}
@Test
public void testSingleRuleset() {
int idx = 1;
CldrData cldrData = cldrData(
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", idx++),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", idx++),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", idx++));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
// Double-% prefix for "private" access.
RbValue.of("%%2d-year:"),
RbValue.of("0: hundred;"),
RbValue.of("1: oh-=%first-set=;"),
RbValue.of("10: =%first-set=;"));
}
@Test
public void testMultipleRulesets() {
// Note that input order of these paths shouldn't matter since they are ordered (and thus
// grouped) by DTD order (relative order matters for values in the same set, but values
// do not have to grouped together).
int idx = 1;
CldrData cldrData = cldrData(
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", idx++),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", idx++),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", idx++),
rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", idx++),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", idx++));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
// Single-% prefix for "public" access.
RbValue.of("%first-set:"),
RbValue.of("-x: one;"),
RbValue.of("Inf: two;"),
RbValue.of("NaN: three;"),
RbValue.of("0: four;"),
// Each "heading" appears once at the start of the section.
RbValue.of("%second-set:"),
RbValue.of("-x: five;"),
RbValue.of("Inf: six;"),
RbValue.of("NaN: seven;"),
RbValue.of("0: eight;"));
}
@Test
public void testSpecials() {
int idx = 1;
CldrData specials = cldrData(
rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", idx++),
rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", idx++),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", idx++),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", idx++),
rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", idx++));
idx = 1;
CldrData cldrData = cldrData(
rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", idx++),
rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0",
"=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", idx++));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials));
assertThat(icuData).hasValuesFor("/RBNFRules/OrdinalRules",
RbValue.of("%digits-ordinal:"),
RbValue.of("-x: \\u2212>>;"),
RbValue.of("0: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;"));
// The headings are sorted in the output ("hr" < "in-numerals" < min").
assertThat(icuData).hasValuesFor("/RBNFRules/DurationRules",
RbValue.of("%%hr:"),
RbValue.of("0: 0 hours; 1 hour; =0= hours;"),
RbValue.of("%in-numerals:"),
RbValue.of("0: =0= sec.;"),
RbValue.of("60: =%%min-sec=;"),
RbValue.of("3600: =%%hr-min-sec=;"),
RbValue.of("%%min:"),
RbValue.of("0: 0 minutes; 1 minute; =0= minutes;"));
}
// Note that while this is testing the escaping behaviour, the implementation was largely
// derived from a mostly undocumented method in the previous converter, and while it behaves
// the same, it's not entirely obviously why some of the special cases really exist.
@Test
public void testEscaping() {
int idx = 1;
CldrData cldrData = cldrData(
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", idx++),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", idx++),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", idx++),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", idx++));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
RbValue.of("%escaping:"),
RbValue.of("k1: \\\\ Backslash"),
RbValue.of("k2: << Arrows >>"),
RbValue.of("k3: \\u00DC Umlaut"),
RbValue.of("k4: \\U0001F603 Smiley"));
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
// Both ruleset and rbnfrule are "ordered" elements, but to mimic the XML below, it's the
// rbnfrule which needs to have an incrementing sort index:
//
// <ruleset type="<set-type>" access="<access>">
// <rbnfrule value="<key-1>">value-1</rbnfrule>
// <rbnfrule value="<key-2>">value-2</rbnfrule>
// <rbnfrule value="<key-3>">value-3</rbnfrule>
// </ruleset>
private static CldrValue rbnfRule(
Group group, String setType, Access access, String key, String value, int ruleIndex) {
StringBuilder cldrPath = new StringBuilder("//ldml/rbnf");
appendAttribute(cldrPath.append("/rulesetGrouping"), "type", group);
cldrPath.append("/ruleset");
appendAttribute(cldrPath, "type", setType);
appendAttribute(cldrPath, "access", access);
cldrPath.append("/rbnfrule#").append(ruleIndex);
appendAttribute(cldrPath, "value", key);
return CldrValue.parseValue(cldrPath.toString(), value);
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
}

View file

@ -0,0 +1,265 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.truth.Truth.assertThat;
import static java.util.stream.Collectors.joining;
import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.BACKWARD;
import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.BOTH;
import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.FORWARD;
import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Visibility.EXTERNAL;
import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Visibility.INTERNAL;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.base.Ascii;
import com.google.common.collect.ImmutableList;
@RunWith(JUnit4.class)
public class TransformsMapperTest {
private static final ImmutableList<String> FILE_HEADER = ImmutableList.of(
"\uFEFF# © 2016 and later: Unicode, Inc. and others.",
"# License & terms of use: http://www.unicode.org/copyright.html#License",
"#");
private static final int DEFAULT_PATH_COUNT = 7;
enum Direction {
FORWARD, BACKWARD, BOTH;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
enum Visibility {
INTERNAL, EXTERNAL;
@Override public String toString() {
return Ascii.toLowerCase(name());
}
}
@Test
public void testDefaultContent() {
Map<String, String> fileMap = new TreeMap<>();
IcuData icuData = TransformsMapper.process(cldrData(), wrap(fileMap));
assertThat(fileMap).isEmpty();
assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT);
assertThat(icuData).hasValuesFor("/\"%Translit%Hex\"", "%Translit%Hex");
assertThat(icuData).hasValuesFor("/\"%Translit%UnicodeChar\"", "%Translit%UnicodeChar");
assertThat(icuData).hasValuesFor("/\"%Translit%UnicodeName\"", "%Translit%UnicodeName");
assertThat(icuData)
.hasValuesFor("/RuleBasedTransliteratorIDs/Digit-Tone/alias", "NumericPinyin-Pinyin");
assertThat(icuData)
.hasValuesFor("/RuleBasedTransliteratorIDs/Tone-Digit/alias", "Pinyin-NumericPinyin");
assertThat(icuData).hasValuesFor("TransliterateLATIN", "", "");
assertThat(icuData)
.hasValuesFor("TransliteratorNamePattern", "{0,choice,0#|1#{1}|2#{1}-{2}}");
}
@Test
public void testForward() {
int idx = 0;
CldrData cldrData =
cldrData(oneWay("foo", "bar", FORWARD, null, INTERNAL, "first second third", ++idx));
Map<String, String> fileMap = new TreeMap<>();
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5);
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/first/alias", "foo-bar");
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/second/alias", "foo-bar");
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/third/alias", "foo-bar");
assertThat(icuData)
.hasValuesFor("RuleBasedTransliteratorIDs/foo-bar/internal/direction", "FORWARD");
assertThat(icuData)
.hasValuesFor(
"RuleBasedTransliteratorIDs/foo-bar/internal/resource:process(transliterator)",
"foo_bar.txt");
assertThat(fileMap).hasSize(1);
assertThat(fileMap).containsEntry("foo_bar.txt", headerPlusLines(
"# File: foo_bar.txt",
"# Generated from CLDR",
"#",
"",
"foo --> bar [internal]:",
"first second third"));
}
@Test
public void testBackward() {
int idx = 0;
CldrData cldrData =
cldrData(oneWay("foo", "bar", BACKWARD, "variant", EXTERNAL, "one two three", ++idx));
Map<String, String> fileMap = new TreeMap<>();
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5);
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/one/alias", "bar-foo/variant");
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/two/alias", "bar-foo/variant");
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/three/alias", "bar-foo/variant");
// Since the variant uses a '/' in the path element (not a path separator) we cannot just
// parse a string to get the expected path, so we do it the "hard way".
RbPath prefix = RbPath.of("RuleBasedTransliteratorIDs", "bar-foo/variant", "file");
assertThat(icuData).hasValuesFor(prefix.extendBy("direction"), "REVERSE");
assertThat(icuData)
.hasValuesFor(prefix.extendBy("resource:process(transliterator)"), "foo_bar_variant.txt");
assertThat(fileMap).hasSize(1);
assertThat(fileMap).containsEntry("foo_bar_variant.txt", headerPlusLines(
"# File: foo_bar_variant.txt",
"# Generated from CLDR",
"#",
"",
"foo <-- bar [external]:",
"one two three"));
}
@Test
public void testBoth() {
int idx = 0;
CldrData cldrData = cldrData(
both("foo", "bar", null, INTERNAL, "forward-alias", "backward-alias", ++idx));
Map<String, String> fileMap = new TreeMap<>();
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
// 3 for each direction.
assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 6);
// Both directions.
assertThat(icuData)
.hasValuesFor("RuleBasedTransliteratorIDs/foo-bar/internal/direction", "FORWARD");
assertThat(icuData)
.hasValuesFor("RuleBasedTransliteratorIDs/bar-foo/internal/direction", "REVERSE");
// Both aliases.
assertThat(icuData)
.hasValuesFor("RuleBasedTransliteratorIDs/forward-alias/alias", "foo-bar");
assertThat(icuData)
.hasValuesFor("RuleBasedTransliteratorIDs/backward-alias/alias", "bar-foo");
// But the file is the same (obvious really since there's only one).
assertThat(icuData).hasValuesFor(
"RuleBasedTransliteratorIDs/foo-bar/internal/resource:process(transliterator)",
"foo_bar.txt");
assertThat(icuData).hasValuesFor(
"RuleBasedTransliteratorIDs/bar-foo/internal/resource:process(transliterator)",
"foo_bar.txt");
assertThat(fileMap).hasSize(1);
assertThat(fileMap).containsEntry("foo_bar.txt", headerPlusLines(
"# File: foo_bar.txt",
"# Generated from CLDR",
"#",
"",
"foo <-> bar [internal]:",
"forward-alias",
"backward-alias"));
}
private String headerPlusLines(String... lines) {
// For now the files always contain a blank line at the end (to match legacy behaviour) but
// this can, and probably should be changed.
return Stream
.concat(FILE_HEADER.stream(), Arrays.stream(lines))
.collect(joining("\n", "", "\n\n"));
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue oneWay(
String src, String dst, Direction dir, String var, Visibility vis, String alias, int idx) {
checkArgument(dir != BOTH, "use both() for bidirectional transforms");
StringBuilder cldrPath = new StringBuilder("//supplementalData/transforms/transform");
appendAttribute(cldrPath, "source", src);
appendAttribute(cldrPath, "target", dst);
appendAttribute(cldrPath, "direction", dir);
if (var != null) {
appendAttribute(cldrPath, "variant", var);
}
appendAttribute(cldrPath, "visibility", vis);
appendAttribute(cldrPath, dir == FORWARD ? "alias" : "backwardAlias", alias);
cldrPath.append("/tRule#").append(idx);
String arrow = dir == FORWARD ? "-->" : "<--";
return CldrValue.parseValue(
cldrPath.toString(),
String.format("%s %s %s [%s]:\n%s", src, arrow, dst, vis, alias));
}
private static CldrValue both(
String src, String dst, String var, Visibility vis, String alias, String backAlias, int idx) {
StringBuilder cldrPath = new StringBuilder("//supplementalData/transforms/transform");
appendAttribute(cldrPath, "source", src);
appendAttribute(cldrPath, "target", dst);
appendAttribute(cldrPath, "direction", BOTH);
if (var != null) {
appendAttribute(cldrPath, "variant", var);
}
appendAttribute(cldrPath, "visibility", vis);
appendAttribute(cldrPath, "alias", alias);
appendAttribute(cldrPath, "backwardAlias", backAlias);
cldrPath.append("/tRule#").append(idx);
return CldrValue.parseValue(
cldrPath.toString(),
String.format("%s <-> %s [%s]:\n%s\n%s", src, dst, vis, alias, backAlias));
}
private static void appendAttribute(StringBuilder out, String k, Object v) {
out.append(String.format("[@%s=\"%s\"]", k, v));
}
private static Function<Path, PrintWriter> wrap(Map<String, String> data) {
return path -> {
Writer writer = new Writer() {
StringWriter buffer = new StringWriter();
@Override public void write(char[] chars, int offset, int length) {
buffer.write(chars, offset, length);
}
@Override public void flush() {
buffer.flush();
}
@Override public void close() throws IOException {
buffer.close();
data.put(path.toString(), buffer.toString());
}
};
return new PrintWriter(writer);
};
}
}

View file

@ -9,8 +9,6 @@ import static org.unicode.icu.tool.cldrtoicu.testing.ResultSubjectFactory.assert
import java.util.List;
import javax.annotation.concurrent.Immutable;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@ -21,7 +19,6 @@ import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
/**
* Tests for the regex transformer class. Note that in most cases, the rules used here are taken

View file

@ -0,0 +1,70 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import static com.google.common.base.Preconditions.checkArgument;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.Iterables;
import com.google.common.collect.Table;
import com.google.common.collect.TreeBasedTable;
/**
* Fake data supplier for testing (especially the path value mappers).
*/
public final class FakeDataSupplier extends CldrDataSupplier {
private final Map<CldrPath, CldrValue> nonLocaleData = new LinkedHashMap<>();
private final Table<String, CldrPath, CldrValue> unresolvedData = TreeBasedTable.create();
private final Table<String, CldrPath, CldrValue> resolvedData = TreeBasedTable.create();
public FakeDataSupplier addLocaleData(String localeId, CldrValue... values) {
Arrays.stream(values).forEach(v -> {
unresolvedData.put(localeId, v.getPath(), v);
resolvedData.put(localeId, v.getPath(), v);
});
return this;
}
public FakeDataSupplier addInheritedData(String localeId, CldrValue... values) {
Arrays.stream(values)
.forEach(v -> checkArgument(resolvedData.put(localeId, v.getPath(), v) == null,
"path already present in unresolved CLDR data: %s", v.getPath()));
return this;
}
public FakeDataSupplier addSupplementalData(CldrValue... values) {
Arrays.stream(values).forEach(v -> nonLocaleData.put(v.getPath(), v));
return this;
}
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
Table<String, CldrPath, CldrValue> data =
resolution == CldrResolution.UNRESOLVED ? unresolvedData : resolvedData;
return CldrDataSupplier.forValues(data.row(localeId).values());
}
@Override public CldrData getDataForType(CldrDataType type) {
return CldrDataSupplier.forValues(
Iterables.filter(nonLocaleData.values(), v -> v.getPath().getDataType() == type));
}
@Override public Set<String> getAvailableLocaleIds() {
return Collections.unmodifiableSet(resolvedData.rowKeySet());
}
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus cldrDraftStatus) {
throw new UnsupportedOperationException("not supported in fake data supplier");
}
}

View file

@ -0,0 +1,106 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import static com.google.common.base.Preconditions.checkState;
import java.util.Comparator;
import java.util.Objects;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableList;
/**
* A fake result, primarily for testing mappers. This implementation does not "play well" with
* other result implementations.
*/
public final class FakeResult extends Result {
private static final Comparator<FakeResult> ORDERING =
Comparator.comparing(FakeResult::getKey)
.thenComparing(r -> r.index)
.thenComparing(r -> r.isFallback);
/**
* Returns a primary result. Care must be taken to ensure that multiple "equal()" results are
* not used in the same test (results are equal if their path and index are equal, and they
* share the same fallback state).
*
* @param path the path of the result
* @param index the sort index of the result (to distinguish paths with the same path).
* @param isGrouped whether values in the result should be grouped into a separate sub-array.
* @param values the result values.
*/
public static Result of(String path, int index, boolean isGrouped, String... values) {
return new FakeResult(
RbPath.parse(path), ImmutableList.copyOf(values), isGrouped, index, false);
}
/**
* Returns a fallback result. Note that currently fallbacks are never "grouped".
*
* @param path the path of the result
* @param index the sort index of the result (to distinguish paths with the same path).
* @param values the result values.
*/
public static Result fallback(String path, int index, String... values) {
return new FakeResult(RbPath.parse(path), ImmutableList.copyOf(values), false, index, true);
}
private final boolean grouped;
private final ImmutableList<String> values;
private final boolean isFallback;
private final int index;
private FakeResult(
RbPath path, ImmutableList<String> values, boolean grouped, int index, boolean isFallback) {
super(path);
this.grouped = grouped;
this.values = values;
this.isFallback = isFallback;
this.index = index;
}
boolean isFallback() {
return isFallback;
}
@Override public boolean isGrouped() {
return grouped;
}
@Override public ImmutableList<String> getValues() {
return values;
}
@Override public boolean isFallbackFor(Result r) {
FakeResult other = (FakeResult) r;
return isFallback && !other.isFallback
&& getKey().equals(r.getKey())
&& index == (other).index;
}
@Override public int compareTo(Result other) {
int signum = ORDERING.compare(this, (FakeResult) other);
checkState(signum != 0 || this == other,
"equivalent but non-identical results found in test data: %s / %s", this, other);
return signum;
}
// We really don't want to pretend to support mixing implementations of Result in tests.
@SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
@Override public boolean equals(Object obj) {
FakeResult other = (FakeResult) obj;
boolean isEqual = getKey().equals(other.getKey())
&& index == other.index
&& isFallback == other.isFallback;
checkState(!isEqual || this == other,
"equivalent but non-identical results found in test data: %s / %s", this, other);
return isEqual;
}
@Override public int hashCode() {
return Objects.hash(getKey(), index, isFallback);
}
}

View file

@ -0,0 +1,37 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import java.util.HashMap;
import java.util.Map;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableList;
public final class FakeTransformer extends PathValueTransformer {
private final Map<CldrValue, ImmutableList<Result>> resultMap = new HashMap<>();
private final Map<RbPath, ImmutableList<Result>> fallbackMap = new HashMap<>();
public void addResults(CldrValue value, Result... results) {
resultMap.put(value, ImmutableList.copyOf(results));
}
public void addFallbacks(String path, Result... results) {
fallbackMap.put(RbPath.parse(path), ImmutableList.copyOf(results));
}
@Override public ImmutableList<Result> transform(CldrValue value) {
return resultMap.getOrDefault(value, ImmutableList.of());
}
@Override public ImmutableList<Result> transform(CldrValue value, DynamicVars ignored) {
return resultMap.getOrDefault(value, ImmutableList.of());
}
@Override public ImmutableList<Result> getFallbackResultsFor(RbPath key, DynamicVars ignored) {
return fallbackMap.getOrDefault(key, ImmutableList.of());
}
}

View file

@ -0,0 +1,56 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import java.util.List;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.IterableSubject;
import com.google.common.truth.Subject;
public final class IcuDataSubject extends Subject {
private final IcuData actual;
protected IcuDataSubject(FailureMetadata metadata, IcuData actual) {
super(metadata, actual);
this.actual = actual;
}
public void hasName(String name) {
check("getName()").that(actual.getName()).isEqualTo(name);
}
public void hasFallback(boolean fallback) {
check("hasFallback()").that(actual.hasFallback()).isEqualTo(fallback);
}
public IterableSubject getPaths() {
return check("getPaths()").that(actual.getPaths());
}
public void hasEmptyValue(String rbPath) {
hasValuesFor(rbPath, RbValue.of(""));
}
public void hasValuesFor(String rbPath, String... values) {
hasValuesFor(rbPath, RbValue.of(values));
}
public void hasValuesFor(String rbPath, RbValue... values) {
hasValuesFor(RbPath.parse(rbPath), values);
}
public void hasValuesFor(RbPath p, String... values) {
hasValuesFor(p, RbValue.of(values));
}
public void hasValuesFor(RbPath p, RbValue... values) {
List<RbValue> rbValues = actual.get(p);
check("get('%s')", p).that(rbValues).isNotNull();
check("get('%s')", p).that(rbValues).containsExactlyElementsIn(values).inOrder();
}
}

View file

@ -0,0 +1,23 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.common.truth.Truth;
/** Truth subject for asserting about ICU data instances (makes tests much more readable). */
public final class IcuDataSubjectFactory implements Subject.Factory<IcuDataSubject, IcuData> {
public static IcuDataSubject assertThat(IcuData result) {
return Truth.assertAbout(new IcuDataSubjectFactory()).that(result);
}
@Override
public IcuDataSubject createSubject(FailureMetadata failureMetadata, IcuData that) {
return new IcuDataSubject(failureMetadata, that);
}
IcuDataSubjectFactory() {}
}

View file

@ -4,9 +4,11 @@ package org.unicode.icu.tool.cldrtoicu.testing;
import static com.google.common.base.Preconditions.checkArgument;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.truth.BooleanSubject;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import org.unicode.icu.tool.cldrtoicu.RbPath;
public final class RbPathSubject extends Subject {
// For use when chaining from other subjects.
@ -30,4 +32,16 @@ public final class RbPathSubject extends Subject {
checkArgument(n >= 0, "invalid path length: %s", n);
check("length()").that(actual.length()).isEqualTo(n);
}
public final BooleanSubject startsWith(RbPath path) {
return check("startsWith('%s')", path).that(actual.startsWith(path));
}
public final BooleanSubject endsWith(RbPath path) {
return check("endsWith('%s')", path).that(actual.endsWith(path));
}
public final BooleanSubject contains(RbPath path) {
return check("contains('%s')", path).that(actual.contains(path));
}
}

View file

@ -2,10 +2,11 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.common.truth.Truth;
import org.unicode.icu.tool.cldrtoicu.RbPath;
/** Truth subject for asserting about resource bundle paths (makes tests much more readable). */
public final class RbPathSubjectFactory implements Subject.Factory<RbPathSubject, RbPath> {

View file

@ -0,0 +1,39 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import static com.google.common.base.Preconditions.checkArgument;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
public final class RbValueSubject extends Subject {
// For use when chaining from other subjects.
public static Factory<RbValueSubject, RbValue> rbValues() {
return RbValueSubject::new;
}
private final RbValue actual;
protected RbValueSubject(FailureMetadata metadata, RbValue actual) {
super(metadata, actual);
this.actual = actual;
}
/** Asserts the value of the path, as segments (use this if a segment can contain '/'). */
public final void hasValue(String value) {
check("getElements()").that(actual.getElements()).containsExactly(value);
}
/** Asserts the value of the path, as segments (use this if a segment can contain '/'). */
public final void hasValues(String... values) {
check("getElements()").that(actual.getElements()).containsExactlyElementsIn(values).inOrder();
}
public final void hasSize(int n) {
checkArgument(n > 0, "invalid element count: %s", n);
check("getElements().size()").that(actual.getElements().size()).isEqualTo(n);
}
}

View file

@ -0,0 +1,23 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.common.truth.Truth;
/** Truth subject for asserting about resource bundle paths (makes tests much more readable). */
public final class RbValueSubjectFactory implements Subject.Factory<RbValueSubject, RbValue> {
public static RbValueSubject assertThat(RbValue result) {
return Truth.assertAbout(new RbValueSubjectFactory()).that(result);
}
@Override
public RbValueSubject createSubject(FailureMetadata failureMetadata, RbValue that) {
return new RbValueSubject(failureMetadata, that);
}
RbValueSubjectFactory() {}
}

View file

@ -2,10 +2,11 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.testing;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.common.truth.Truth;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
/** Truth subject for asserting about transformation results (makes tests much more readable). */
public class ResultSubjectFactory implements Subject.Factory<ResultSubject, Result> {