diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java index 6f736f8b14c..c6bc4b7c9a0 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuData.java @@ -2,8 +2,6 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu; -import static com.google.common.base.Preconditions.checkArgument; - import java.io.PrintWriter; import java.io.StringWriter; import java.util.Arrays; @@ -12,8 +10,6 @@ import java.util.List; import java.util.NavigableSet; import java.util.Set; import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; @@ -24,7 +20,6 @@ import com.google.common.collect.ListMultimap; */ public final class IcuData { private static final RbPath RB_VERSION = RbPath.of("Version"); - private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$"); private final String name; private final boolean hasFallback; @@ -98,48 +93,11 @@ public final class IcuData { add(rbPath, rbValue); } - public void setVersion(String versionString) { - add(RB_VERSION, versionString); - } - - public void addResults(ListMultimap resultsByRbPath) { - for (RbPath rbPath : resultsByRbPath.keySet()) { - for (PathValueTransformer.Result r : resultsByRbPath.get(rbPath)) { - if (r.isGrouped()) { - // Grouped results have all the values in a single value entry. - add(rbPath, RbValue.of(r.getValues())); - } else { - if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) { - r.getValues().forEach(v -> add(rbPath, RbValue.of(v))); - } else { - // Ungrouped results are one value per entry, but might be expanded into - // grouped results if they are a path referencing a grouped entry. - r.getValues().forEach(v -> add(rbPath, replacePathValues(v))); - } - } - } - } - } - /** - * Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed - * to be a reference to an existing value in a resource bundle. Note that the referenced bundle - * might be grouped (i.e. an array with more than one element). + * Sets the value of the "/Version" path to be the given string, replacing any previous value. */ - private RbValue replacePathValues(String value) { - Matcher m = ARRAY_INDEX.matcher(value); - if (!m.matches()) { - return RbValue.of(value); - } - // The only constraint is that the "path" value starts with a leading '/', but parsing into - // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the - // captured value contains '/' characters to represent path delimiters. - RbPath replacePath = RbPath.parse(m.group(1)); - List replaceValues = get(replacePath); - checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath); - // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]"). - int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0; - return replaceValues.get(replaceIndex); + public void setVersion(String versionString) { + replace(RB_VERSION, versionString); } /** @@ -155,16 +113,6 @@ public final class IcuData { return Collections.unmodifiableSet(paths); } - /** Returns whether the given path is present in this instance. */ - public boolean contains(RbPath rbPath) { - return paths.contains(rbPath); - } - - /** Returns whether there are any paths in this instance. */ - public boolean isEmpty() { - return paths.isEmpty(); - } - @Override public String toString() { StringWriter out = new StringWriter(); PrintWriter w = new PrintWriter(out); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java index 13dbcd33dd4..5df53606ba3 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java @@ -260,14 +260,14 @@ final class IcuDataDumper { private RbPath getPathFromStack() { if (pathStack.isEmpty()) { - return RbPath.empty(); + return RbPath.of(); } List segments = new ArrayList<>(); Iterables.addAll(segments, pathStack); if (segments.get(0).matches("<[0-9]{4}>")) { segments.remove(0); } - return segments.isEmpty() ? RbPath.empty() : RbPath.of(Lists.reverse(segments)); + return RbPath.of(Lists.reverse(segments)); } private String getSegment(String segmentOrNull) { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java index 66e77ec9313..341731318b4 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuTextWriter.java @@ -14,6 +14,8 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.google.common.collect.Iterables; + /** * Writes an IcuData object to a text file. A lot of this class was copied directly from the * original {@code IcuTextWriter} in the CLDR project and has a number of very idiosyncratic @@ -76,7 +78,7 @@ final class IcuTextWriter { out.print("{"); depth++; - RbPath lastPath = RbPath.empty(); + RbPath lastPath = RbPath.of(); for (RbPath path : icuData.getPaths()) { // Close any blocks up to the common path length. Since paths are all distinct, the // common length should always be shorter than either path. We add 1 since we must also @@ -166,7 +168,7 @@ final class IcuTextWriter { onlyValue = values.get(0); if (onlyValue.isSingleton() && !mustBeArray(false, name, rbPath)) { // Value has a single element and is not being forced to be an array. - String onlyElement = onlyValue.getElement(0); + String onlyElement = Iterables.getOnlyElement(onlyValue.getElements()); if (quote) { onlyElement = quoteInside(onlyElement); } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java index 8ec1c36106f..563e1efb41d 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java @@ -404,7 +404,7 @@ public final class LdmlConverter { // Adding a parent locale makes the data non-empty and forces it to be written. supplementalData.getExplicitParentLocaleOf(splitData.getName()) .ifPresent(p -> splitData.add(RB_PARENT, p)); - if (!splitData.isEmpty() || isBaseLanguage || dir.includeEmpty()) { + if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) { splitData.setVersion(CldrDataSupplier.getCldrVersionString()); write(splitData, outDir); writtenLocaleIds.put(dir, id); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java index 0adeeb5fd4b..b6843134b57 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java @@ -54,20 +54,25 @@ public final class RbPath implements Comparable { private static final CharMatcher UNQUOTED_SEGMENT_CHARS = QUOTED_SEGMENT_CHARS.and(whitespace().negate()); - private static final RbPath EMPTY = new RbPath(ImmutableList.of()); - - public static RbPath empty() { - return EMPTY; - } - + /** + * Returns a path with the specified segments in (possibly empty). Note that unlike + * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path + * element constructed by this method. + */ public static RbPath of(String... segments) { return of(Arrays.asList(segments)); } + /** + * Returns a path with the specified segments in (possibly empty). Note that unlike + * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path + * element constructed by this method. + */ public static RbPath of(Iterable segments) { return new RbPath(segments); } + /** Parses the given path string, assuming {@code '/'} as a path separator. */ public static RbPath parse(String path) { checkArgument(!path.isEmpty(), "cannot parse an empty path string"); // Allow leading '/', but don't allow empty segments anywhere else. @@ -77,7 +82,8 @@ public final class RbPath implements Comparable { return new RbPath(PATH_SPLITTER.split(path)); } - static int getCommonPrefixLength(RbPath lhs, RbPath rhs) { + /** Returns the common prefix length of two paths (useful when thinking of path hierarchies). */ + public static int getCommonPrefixLength(RbPath lhs, RbPath rhs) { int maxLength = Math.min(lhs.length(), rhs.length()); int n = 0; while (n < maxLength && lhs.getSegment(n).equals(rhs.getSegment(n))) { @@ -91,6 +97,7 @@ public final class RbPath implements Comparable { private RbPath(Iterable segments) { this.segments = ImmutableList.copyOf(segments); + // Use "this.segments" since the incoming list can have a different hash! this.hashCode = Objects.hash(this.segments); for (String segment : this.segments) { checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments); @@ -122,43 +129,32 @@ public final class RbPath implements Comparable { } } + /** Returns the number of segments in this path. */ public int length() { return segments.size(); } + /** Returns the Nth segments in this path. */ public String getSegment(int n) { return segments.get(n); } - public RbPath getParent() { - checkState(length() > 0, "cannot get parent of the empty path"); - return length() > 1 ? new RbPath(segments.subList(0, length() - 1)) : EMPTY; - } - - public boolean isAnonymous() { - return length() > 0 && segments.get(length() - 1).charAt(0) == '<'; - } - + /** Returns a new path extended at the end by the specified segments. */ public RbPath extendBy(String... parts) { return new RbPath(Iterables.concat(segments, Arrays.asList(parts))); } - public RbPath extendBy(RbPath suffix) { - return new RbPath(Iterables.concat(segments, suffix.segments)); - } - - public RbPath mapSegments(Function fn) { - return new RbPath(segments.stream().map(fn).collect(toImmutableList())); - } - + /** Returns whether this path starts with the specified prefix. */ public boolean startsWith(RbPath prefix) { return prefix.length() <= length() && matchesSublist(prefix, 0); } + /** Returns whether this path ends with the specified suffix. */ public boolean endsWith(RbPath suffix) { return suffix.length() <= length() && matchesSublist(suffix, length() - suffix.length()); } + /** Returns whether this path contains the specified path. */ public boolean contains(RbPath path) { int maxOffset = length() - path.length(); for (int i = 0; i <= maxOffset; i++) { @@ -179,6 +175,22 @@ public final class RbPath implements Comparable { return true; } + // TODO: Remove this and isAnonymous() since they are only called once each, in the same place. + public RbPath getParent() { + checkState(length() > 0, "cannot get parent of the empty path"); + return new RbPath(segments.subList(0, length() - 1)); + } + + public boolean isAnonymous() { + return length() > 0 && segments.get(length() - 1).charAt(0) == '<'; + } + + // TODO: Remove this special case code (called exactly once). + public RbPath mapSegments(Function fn) { + return new RbPath(segments.stream().map(fn).collect(toImmutableList())); + } + + // TODO: Remove this in favour of having properly typed paths. boolean isIntPath() { String lastElement = segments.get(segments.size() - 1); return lastElement.endsWith(":int") || lastElement.endsWith(":intvector"); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java index af1020f93f7..886ff18a266 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbValue.java @@ -38,22 +38,20 @@ public final class RbValue { return elements; } - /** Returns whether this is a single element value. */ + /** + * Returns whether this is a single element value. Singleton values are treated different when + * writing out ICU data files. + */ public boolean isSingleton() { return elements.size() == 1; } - /** Returns the Nth element of this value. */ - public String getElement(int n) { - return elements.get(n); - } - @Override public int hashCode() { return Objects.hashCode(elements); } @Override public boolean equals(Object obj) { - return obj instanceof RbValue && elements.equals(((RbValue) obj).elements); + return obj instanceof RbValue && elements.equals(((RbValue) obj).elements); } @Override public String toString() { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java new file mode 100644 index 00000000000..66781a5c5f6 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/AbstractPathValueMapper.java @@ -0,0 +1,100 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; +import org.unicode.icu.tool.cldrtoicu.RbPath; +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.collect.ListMultimap; + +/** + * An abstract parent class for any mappers based on {@code PathValueTransformer}. This ensures + * that transformation results are correctly processed when being added to IcuData instances. + */ +public abstract class AbstractPathValueMapper { + private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$"); + + private final IcuData icuData; + + AbstractPathValueMapper(String name, boolean hasFallback) { + this.icuData = new IcuData(name, hasFallback); + } + + /** Implemented by sub-classes to return all results to be added to the IcuData instance. */ + abstract ListMultimap getResults(); + + /** + * Adds results to the IcuData instance according to expected {@code PathValueTransformer} + * semantics. This method must only be called once per mapper. + */ + final IcuData transform() { + checkState(icuData.getPaths().isEmpty(), + "transform() method cannot be called multiple times: %s", icuData); + + // This subclass mostly exists to control the fact that results need to be added in one go + // to the IcuData because of how referenced paths are handled. If results could be added in + // multiple passes, you could have confusing situations in which values has path references + // in them but the referenced paths have not been transformed yet. Forcing the subclass to + // implement a single method to generate all results at once ensures that we control the + // lifecycle of the data and how results are processed as they are added to the IcuData. + addResults(getResults()); + return icuData; + } + + /** + * Adds transformation results on the specified multi-map to this data instance. Results are + * handled differently according to whether they are grouped, or represent an alias value. If + * the value of an ungrouped result is itself a resource bundle path (including possibly having + * an array index) then the referenced value is assumed to be an existing path whose value is + * then substituted. + */ + // TODO: Fix this to NOT implicitly rely of ordering of referenced values. + private void addResults(ListMultimap resultsByRbPath) { + for (RbPath rbPath : resultsByRbPath.keySet()) { + for (Result r : resultsByRbPath.get(rbPath)) { + if (r.isGrouped()) { + // Grouped results have all the values in a single value entry. + icuData.add(rbPath, RbValue.of(r.getValues())); + } else { + if (rbPath.getSegment(rbPath.length() - 1).endsWith(":alias")) { + r.getValues().forEach(v -> icuData.add(rbPath, RbValue.of(v))); + } else { + // Ungrouped results are one value per entry, but might be expanded into + // grouped results if they are a path referencing a grouped entry. + r.getValues().forEach(v -> icuData.add(rbPath, replacePathValues(v))); + } + } + } + } + } + + /** + * Replaces an ungrouped CLDR value for the form "/foo/bar" or "/foo/bar[N]" which is assumed + * to be a reference to an existing value in a resource bundle. Note that the referenced bundle + * might be grouped (i.e. an array with more than one element). + */ + private RbValue replacePathValues(String value) { + Matcher m = ARRAY_INDEX.matcher(value); + if (!m.matches()) { + return RbValue.of(value); + } + // The only constraint is that the "path" value starts with a leading '/', but parsing into + // the RbPath ignores this. We must use "parse()" here, rather than RbPath.of(), since the + // captured value contains '/' characters to represent path delimiters. + RbPath replacePath = RbPath.parse(m.group(1)); + List replaceValues = icuData.get(replacePath); + checkArgument(replaceValues != null, "Path %s is missing from IcuData", replacePath); + // If no index is given (e.g. "/foo/bar") then treat it as index 0 (i.e. "/foo/bar[0]"). + int replaceIndex = m.groupCount() > 1 ? Integer.parseInt(m.group(2)) : 0; + return replaceValues.get(replaceIndex); + } +} diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java index c64c5ae9d61..2de90b12360 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapper.java @@ -21,6 +21,7 @@ import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.CharMatcher; import com.google.common.base.Splitter; @@ -68,9 +69,14 @@ public final class CollationMapper { public static IcuData process( String localeId, CldrDataSupplier src, Optional icuSpecialData) { + return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData); + } + + @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. + static IcuData process(String localeId, CldrData cldrData, Optional icuSpecialData) { CollationVisitor visitor = new CollationVisitor(localeId); icuSpecialData.ifPresent(s -> s.accept(ARBITRARY, visitor)); - src.getDataForLocale(localeId, UNRESOLVED).accept(ARBITRARY, visitor); + cldrData.accept(ARBITRARY, visitor); return visitor.icuData; } @@ -82,6 +88,7 @@ public final class CollationMapper { // Super special hack case because the XML data is a bit broken for the root collation // data (there's an empty element that's a non-leaf element and thus not // visited, but we should add an empty sequence to the output data. + // TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131) if (localeId.equals("root")) { icuData.replace(RB_STANDARD_SEQUENCE, ""); // TODO: Collation versioning probably needs to be improved. @@ -108,12 +115,13 @@ public final class CollationMapper { // "short" it can also have other values. This code was copied from CollationMapper // which has the line; // isShort = attr.getValue("alt") != null; + // TODO: Raise a ticket to examine this. boolean isShort = COLLATION_RULE_ALT.optionalValueFrom(v).isPresent(); // Note that it's not clear why there's a check for "contains()" here. The code // from which this was derived is largely undocumented and this check could have // been overly defensive (perhaps a duplicate key should be an error?). - if (isShort || !icuData.contains(rbPath)) { + if (isShort || !icuData.getPaths().contains(rbPath)) { RbValue rules = RbValue.of( LINE_SPLITTER.splitToList(v.getValue()).stream() .map(CollationMapper::removeComment) diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java index 88273a3a5ff..3cb20a4cf05 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/LocaleMapper.java @@ -30,6 +30,7 @@ import org.unicode.icu.tool.cldrtoicu.SupplementalData; import com.google.common.collect.ImmutableListMultimap; import com.google.common.collect.LinkedHashMultimap; +import com.google.common.collect.ListMultimap; import com.google.common.collect.SetMultimap; /** @@ -39,7 +40,7 @@ import com.google.common.collect.SetMultimap; *

This is currently driven by the {@code ldml2icu_locale.txt} configuration file via a * {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation. */ -public final class LocaleMapper { +public final class LocaleMapper extends AbstractPathValueMapper { // The default calendar (only set is different from inherited parent value). private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default"); @@ -62,11 +63,7 @@ public final class LocaleMapper { PathValueTransformer transformer, SupplementalData supplementalData) { - IcuData icuData = new IcuData(localeId, true); - // Write out the results into the IcuData class, preserving result grouping and expanding - // path references as necessary. - ResultsCollector collector = new ResultsCollector(transformer); - icuData.addResults(collector.collectResultsFor(localeId, src, icuSpecialData)); + IcuData icuData = new LocaleMapper(localeId, src, icuSpecialData, transformer).transform(); doDateTimeHack(icuData); supplementalData.getDefaultCalendar(icuData.getName()) .ifPresent(c -> icuData.add(RB_CALENDAR, c)); @@ -97,79 +94,87 @@ public final class LocaleMapper { } } - private static final class ResultsCollector { - private final PathValueTransformer transformer; - private final Set validRbPaths = new HashSet<>(); + private final String localeId; + private final CldrDataSupplier src; + private final Optional icuSpecialData; + private final PathValueTransformer transformer; - // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for - // each key. The reason is that result comparison is not "consistent with equals", and - // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() - // method), and it does this even if using the add() method of the sorted set (this is in - // fact in violation of the stated behaviour of Set#add). - private final SetMultimap resultsByRbPath = LinkedHashMultimap.create(); + private final Set validRbPaths = new HashSet<>(); - ResultsCollector(PathValueTransformer transformer) { - this.transformer = checkNotNull(transformer); - } + // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for + // each key. The reason is that result comparison is not "consistent with equals", and + // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() + // method), and it does this even if using the add() method of the sorted set (this is in + // fact in violation of the stated behaviour of Set#add). + private final SetMultimap resultsByRbPath = LinkedHashMultimap.create(); - ImmutableListMultimap collectResultsFor( - String localeId, CldrDataSupplier src, Optional icuSpecialData) { + private LocaleMapper( + String localeId, + CldrDataSupplier src, + Optional icuSpecialData, + PathValueTransformer transformer) { - CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED); - CldrData resolved = src.getDataForLocale(localeId, RESOLVED); - DynamicVars varFn = p -> { - CldrValue cldrValue = resolved.get(p); - return cldrValue != null ? cldrValue.getValue() : null; - }; + super(localeId, true); + this.localeId = localeId; + this.src = checkNotNull(src); + this.icuSpecialData = checkNotNull(icuSpecialData); + this.transformer = checkNotNull(transformer); + } - collectPaths(unresolved, varFn); - collectResults(resolved, varFn); - icuSpecialData.ifPresent(s -> collectSpecials(s, varFn)); + @Override + ListMultimap getResults() { + CldrData unresolved = src.getDataForLocale(localeId, UNRESOLVED); + CldrData resolved = src.getDataForLocale(localeId, RESOLVED); + DynamicVars varFn = p -> { + CldrValue cldrValue = resolved.get(p); + return cldrValue != null ? cldrValue.getValue() : null; + }; - ImmutableListMultimap.Builder out = ImmutableListMultimap.builder(); - out.orderValuesBy(natural()); - for (RbPath rbPath : resultsByRbPath.keySet()) { - Set existingResults = resultsByRbPath.get(rbPath); - out.putAll(rbPath, existingResults); - for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) { - if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { - out.put(rbPath, fallback); - } + collectPaths(unresolved, varFn); + collectResults(resolved, varFn); + icuSpecialData.ifPresent(s -> collectSpecials(s, varFn)); + + ImmutableListMultimap.Builder out = ImmutableListMultimap.builder(); + out.orderValuesBy(natural()); + for (RbPath rbPath : resultsByRbPath.keySet()) { + Set existingResults = resultsByRbPath.get(rbPath); + out.putAll(rbPath, existingResults); + for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) { + if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { + out.put(rbPath, fallback); } } - return out.build(); } + return out.build(); + } - private void collectPaths(CldrData unresolved, DynamicVars varFn) { - ValueVisitor collectPaths = - v -> transformer.transform(v, varFn).forEach(this::collectResultPath); - unresolved.accept(DTD, collectPaths); - } + private void collectPaths(CldrData unresolved, DynamicVars varFn) { + ValueVisitor collectPaths = + v -> transformer.transform(v, varFn).forEach(this::collectResultPath); + unresolved.accept(DTD, collectPaths); + } - private void collectResultPath(Result result) { - RbPath rbPath = result.getKey(); - validRbPaths.add(rbPath); - if (rbPath.isAnonymous()) { - RbPath parent = rbPath.getParent(); - checkState(!parent.isAnonymous(), - "anonymous paths should not be nested: %s", rbPath); - validRbPaths.add(parent); - } - } - - void collectResults(CldrData resolved, DynamicVars varFn) { - ValueVisitor collectResults = - v -> transformer.transform(v, varFn).stream() - .filter(r -> validRbPaths.contains(r.getKey())) - .forEach(r -> resultsByRbPath.put(r.getKey(), r)); - resolved.accept(DTD, collectResults); - } - - private void collectSpecials(CldrData cldrData, DynamicVars varFn) { - cldrData.accept(DTD, v -> - transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r))); + private void collectResultPath(Result result) { + RbPath rbPath = result.getKey(); + validRbPaths.add(rbPath); + if (rbPath.isAnonymous()) { + RbPath parent = rbPath.getParent(); + checkState(!parent.isAnonymous(), + "anonymous paths should not be nested: %s", rbPath); + validRbPaths.add(parent); } } - private LocaleMapper() {} + private void collectResults(CldrData resolved, DynamicVars varFn) { + ValueVisitor collectResults = + v -> transformer.transform(v, varFn).stream() + .filter(r -> validRbPaths.contains(r.getKey())) + .forEach(r -> resultsByRbPath.put(r.getKey(), r)); + resolved.accept(DTD, collectResults); + } + + private void collectSpecials(CldrData cldrData, DynamicVars varFn) { + cldrData.accept(DTD, v -> + transformer.transform(v, varFn).forEach(r -> resultsByRbPath.put(r.getKey(), r))); + } } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java index 20847541ee5..0f6885b9637 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/SupplementalMapper.java @@ -29,7 +29,7 @@ import com.google.common.collect.SetMultimap; *

This is currently driven by the {@code ldml2icu_supplemental.txt} configuration file via a * {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation. */ -public final class SupplementalMapper { +public final class SupplementalMapper extends AbstractPathValueMapper { private static final RbPath RB_FIFO = RbPath.of(""); /** @@ -38,81 +38,78 @@ public final class SupplementalMapper { * @param src the CLDR data supplier to process. * @param transformer the transformer to match and transform each CLDR path/value pair. * @param icuName the name for the generated IcuData. - * @param includePaths a matcher to select the CLDR paths to be transformed. + * @param paths a matcher to select the CLDR paths to be transformed. * @return An IcuData instance containing the specified subset of supplemental data with the * given ICU name. */ // TODO: Improve external data splitting and remove need for a PathMatcher here. public static IcuData process( - CldrDataSupplier src, PathValueTransformer transformer, String icuName, - PathMatcher includePaths) { - ResultsCollector collector = new ResultsCollector(includePaths, transformer); - // Write out the results into the IcuData class, preserving result grouping and expanding - // path references as necessary. - IcuData icuData = new IcuData(icuName, false); - icuData.addResults(collector.getResults(src)); - return icuData; + CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) { + + return new SupplementalMapper(src, transformer, icuName, paths).transform(); } - private static final class ResultsCollector { - private final PathMatcher pathMatcher; - private final PathValueTransformer transformer; + private final CldrDataSupplier src; + private final PathMatcher paths; + private final PathValueTransformer transformer; - // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for - // each key. The reason is that result comparison is not "consistent with equals", and - // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() - // method), and it does this even if using the add() method of the sorted set (this is in - // fact in violation of the stated behaviour of Set#add). - private final SetMultimap resultsByRbPath = LinkedHashMultimap.create(); - private int fifoCounter = 0; + // WARNING: TreeMultimap() is NOT suitable here, even though it would sort the values for + // each key. The reason is that result comparison is not "consistent with equals", and + // TreeMultimap uses the comparator to decide if two elements are equal (not the equals() + // method), and it does this even if using the add() method of the sorted set (this is in + // fact in violation of the stated behaviour of Set#add). + private final SetMultimap resultsByRbPath = LinkedHashMultimap.create(); + private int fifoCounter = 0; - ResultsCollector(PathMatcher pathMatcher, PathValueTransformer transformer) { - this.pathMatcher = checkNotNull(pathMatcher); - this.transformer = checkNotNull(transformer); - } + private SupplementalMapper( + CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) { - private void visit(CldrValue value) { - if (pathMatcher.matchesPrefixOf(value.getPath())) { - for (Result r : transformer.transform(value)) { - RbPath rbPath = r.getKey(); - if (rbPath.contains(RB_FIFO)) { - // The fifo counter needs to be formatted with leading zeros for sorting. - rbPath = rbPath.mapSegments( - s -> s.equals("") ? String.format("<%04d>", fifoCounter) : s); - } - resultsByRbPath.put(rbPath, r); - } - fifoCounter++; - } - } - - ImmutableListMultimap getResults(CldrDataSupplier supplier) { - // DTD and NESTED_GROUPING order differ because of how the magic label works (it - // basically enforces "encounter order" onto things in unlabeled sequences, which matches - // the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order - // to remove any lingering implicit dependencies on the CLDR data behaviour. - CldrData supplementalData = supplier.getDataForType(CldrDataType.SUPPLEMENTAL); - PathValueTransformer.DynamicVars varFn = p -> { - CldrValue cldrValue = supplementalData.get(p); - return cldrValue != null ? cldrValue.getValue() : null; - }; - - supplementalData.accept(NESTED_GROUPING, this::visit); - - ImmutableListMultimap.Builder out = ImmutableListMultimap.builder(); - out.orderValuesBy(natural()); - for (RbPath rbPath : resultsByRbPath.keySet()) { - Set existingResults = resultsByRbPath.get(rbPath); - out.putAll(rbPath, existingResults); - for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) { - if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { - out.put(rbPath, fallback); - } - } - } - return out.build(); - } + super(icuName, false); + this.src = checkNotNull(src); + this.paths = checkNotNull(paths); + this.transformer = checkNotNull(transformer); } - private SupplementalMapper() {} + @Override + ImmutableListMultimap getResults() { + // DTD and NESTED_GROUPING order differ because of how the magic label works (it + // basically enforces "encounter order" onto things in unlabeled sequences, which matches + // the old behaviour). If it wouldn't break anything, it might be worth moving to DTD order + // to remove any lingering implicit dependencies on the CLDR data behaviour. + CldrData supplementalData = src.getDataForType(CldrDataType.SUPPLEMENTAL); + PathValueTransformer.DynamicVars varFn = p -> { + CldrValue cldrValue = supplementalData.get(p); + return cldrValue != null ? cldrValue.getValue() : null; + }; + + supplementalData.accept(NESTED_GROUPING, this::visit); + + ImmutableListMultimap.Builder out = ImmutableListMultimap.builder(); + out.orderValuesBy(natural()); + for (RbPath rbPath : resultsByRbPath.keySet()) { + Set existingResults = resultsByRbPath.get(rbPath); + out.putAll(rbPath, existingResults); + for (Result fallback : transformer.getFallbackResultsFor(rbPath, varFn)) { + if (existingResults.stream().noneMatch(fallback::isFallbackFor)) { + out.put(rbPath, fallback); + } + } + } + return out.build(); + } + + private void visit(CldrValue value) { + if (paths.matchesPrefixOf(value.getPath())) { + for (Result r : transformer.transform(value)) { + RbPath rbPath = r.getKey(); + if (rbPath.contains(RB_FIFO)) { + // The fifo counter needs to be formatted with leading zeros for sorting. + rbPath = rbPath.mapSegments( + s -> s.equals("") ? String.format("<%04d>", fifoCounter) : s); + } + resultsByRbPath.put(rbPath, r); + } + fifoCounter++; + } + } } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java index 18e189d6d7c..14c4d340bce 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapper.java @@ -19,6 +19,7 @@ import java.util.Optional; import java.util.function.Function; import org.unicode.cldr.api.AttributeKey; +import org.unicode.cldr.api.CldrData; import org.unicode.cldr.api.CldrData.ValueVisitor; import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDataType; @@ -28,6 +29,7 @@ import org.unicode.icu.tool.cldrtoicu.PathMatcher; import org.unicode.icu.tool.cldrtoicu.RbPath; import org.unicode.icu.tool.cldrtoicu.RbValue; +import com.google.common.annotations.VisibleForTesting; import com.ibm.icu.text.Transliterator; /** @@ -77,15 +79,23 @@ public final class TransformsMapper { * @return the IcuData instance to be written to a file. */ public static IcuData process(CldrDataSupplier src, Path ruleFileOutputDir) { - RuleVisitor visitor = new RuleVisitor(p -> { + Function fileWriterFn = p -> { Path file = ruleFileOutputDir.resolve(p); try { return new PrintWriter(Files.newBufferedWriter(file, CREATE, TRUNCATE_EXISTING)); } catch (IOException e) { throw new RuntimeException("error opening file: " + file, e); } - }); - src.getDataForType(SUPPLEMENTAL).accept(DTD, visitor); + }; + CldrData cldrData = src.getDataForType(SUPPLEMENTAL); + return process(cldrData, fileWriterFn); + } + + @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier. + static IcuData process(CldrData cldrData, Function fileWriterFn) { + RuleVisitor visitor = new RuleVisitor(fileWriterFn); + cldrData.accept(DTD, visitor); + addSpecialCaseValues(visitor.icuData); return visitor.icuData; } @@ -96,27 +106,6 @@ public final class TransformsMapper { RuleVisitor(Function outFn) { this.outFn = checkNotNull(outFn); icuData.setFileComment("File: root.txt"); - - // I have _no_ idea what any of this is about, I'm just trying to mimic the original - // (complex and undocumented) code in "ConvertTransforms.java". - // TODO: Understand and document each of the cases below. - icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}"); - // Note that this quoting of path segments is almost certainly unnecessary. It matches - // the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so - // it seems very likely that it's not needed here. - // TODO: Once migration done, remove quotes here & check in RbPath for unwanted quotes. - icuData.add(RbPath.of("\"%Translit%Hex\""), "%Translit%Hex"); - icuData.add(RbPath.of("\"%Translit%UnicodeName\""), "%Translit%UnicodeName"); - icuData.add(RbPath.of("\"%Translit%UnicodeChar\""), "%Translit%UnicodeChar"); - // Special case, where Latin is a no-op. - icuData.add(RbPath.of("TransliterateLATIN"), RbValue.of("", "")); - // Some hard-coded special case mappings. - icuData.add( - RB_TRANSLITERATOR_IDS.extendBy("Tone-Digit", "alias"), - "Pinyin-NumericPinyin"); - icuData.add( - RB_TRANSLITERATOR_IDS.extendBy("Digit-Tone", "alias"), - "NumericPinyin-Pinyin"); } @Override public void visit(CldrValue value) { @@ -153,6 +142,7 @@ public final class TransformsMapper { String status = visibility == Visibility.internal ? "internal" : "file"; Direction dir = TRANSFORM_DIRECTION.valueFrom(value, Direction.class); + // TODO: Consider checks for unused data (e.g. forward aliases in a backward rule). if (dir != Direction.backward) { String id = getId(source, target, variant); TRANSFORM_ALIAS.listOfValuesFrom(value) @@ -172,6 +162,33 @@ public final class TransformsMapper { } } + private static void addSpecialCaseValues(IcuData icuData) { + // I have _no_ idea what any of this is about, I'm just trying to mimic the original + // (complex and undocumented) code in "ConvertTransforms.java". + // TODO: Understand and document each of the cases below. + icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}"); + // Note that this quoting of path segments is almost certainly unnecessary. It matches + // the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so + // it seems very likely that it's not needed here. + // TODO: Once migration done, remove quotes here & check in RbPath for unwanted quotes. + icuData.add(RbPath.of("\"%Translit%Hex\""), "%Translit%Hex"); + icuData.add(RbPath.of("\"%Translit%UnicodeName\""), "%Translit%UnicodeName"); + icuData.add(RbPath.of("\"%Translit%UnicodeChar\""), "%Translit%UnicodeChar"); + // Special case, where Latin is a no-op. + icuData.add(RbPath.of("TransliterateLATIN"), RbValue.of("", "")); + // Some hard-coded special case mappings. + icuData.add( + RB_TRANSLITERATOR_IDS.extendBy("Tone-Digit", "alias"), + "Pinyin-NumericPinyin"); + icuData.add( + RB_TRANSLITERATOR_IDS.extendBy("Digit-Tone", "alias"), + "NumericPinyin-Pinyin"); + } + + // It is important to note that this ID contains a '/' but this is a literal in the path + // element and does not add an extra laying in the resource bundle path (the use of '/' to + // separate path elements is a purely internal detail for things like LocaleMapper and the + // regex-based configuration. private static String getId(String from, String to, Optional variant) { String baseId = from + "-" + to; return variant.map(v -> baseId + "/" + v).orElse(baseId); diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java new file mode 100644 index 00000000000..85f6ef2ad2a --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/IcuDataTest.java @@ -0,0 +1,110 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static com.google.common.truth.Truth.assertThat; +import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; + +import java.util.Arrays; +import java.util.Set; + +import org.junit.Test; + +import com.google.common.collect.ImmutableList; + +public class IcuDataTest { + @Test + public void testSimple() { + IcuData icuData = new IcuData("icu-name", true); + assertThat(icuData.getName()).isEqualTo("icu-name"); + assertThat(icuData.hasFallback()).isTrue(); + + IcuData otherData = new IcuData("other-name", false); + assertThat(otherData.getName()).isEqualTo("other-name"); + assertThat(otherData.hasFallback()).isFalse(); + } + + @Test + public void testFileComment() { + IcuData icuData = new IcuData("icu-name", false); + assertThat(icuData.getFileComment()).isEmpty(); + + icuData.setFileComment("Hello", "World"); + assertThat(icuData.getFileComment()).containsExactly("Hello", "World").inOrder(); + + icuData.setFileComment(Arrays.asList("Foo", "Bar")); + assertThat(icuData.getFileComment()).containsExactly("Foo", "Bar").inOrder(); + + icuData.setFileComment(ImmutableList.of()); + assertThat(icuData.getFileComment()).isEmpty(); + } + + @Test + public void testSetVersion() { + IcuData icuData = new IcuData("icu-name", false); + icuData.setVersion("VERSION"); + + RbPath rbPath = RbPath.of("Version"); + assertThat(icuData.getPaths()).containsExactly(rbPath); + assertThat(icuData.get(rbPath)).isEqualTo(ImmutableList.of(RbValue.of("VERSION"))); + } + + @Test + public void testGetPaths() { + IcuData icuData = new IcuData("icu-name", false); + // getPaths() is a live view on the data, not a snapshot. + Set paths = icuData.getPaths(); + assertThat(paths).isEmpty(); + + RbPath fooBar = RbPath.of("foo", "bar"); + icuData.add(fooBar, "value1"); + assertThat(icuData.get(fooBar)).contains(RbValue.of("value1")); + assertThat(paths).containsExactly(fooBar); + assertThat(paths).hasSize(1); + + RbPath fooBaz = RbPath.of("foo", "baz"); + icuData.add(fooBaz, "value2"); + assertThat(icuData.get(fooBaz)).contains(RbValue.of("value2")); + assertThat(paths).containsExactly(fooBar, fooBaz).inOrder(); + assertThat(paths).hasSize(2); + + // Paths is not modifiable. + assertThrows(UnsupportedOperationException.class, () -> paths.add(RbPath.of("nope"))); + assertThrows(UnsupportedOperationException.class, () -> paths.remove(fooBar)); + assertThrows(UnsupportedOperationException.class, paths::clear); + } + + @Test + public void addMultiple() { + IcuData icuData = new IcuData("icu-name", false); + RbPath fooBar = RbPath.of("foo", "bar"); + + RbValue value1 = RbValue.of("the", "first", "value"); + RbValue value2 = RbValue.of("another-value"); + + icuData.add(fooBar, value1); + assertThat(icuData.get(fooBar)).containsExactly(value1); + + icuData.add(fooBar, "another-value"); + assertThat(icuData.get(fooBar)).containsExactly(value1, value2).inOrder(); + + // It's just a list, with no ordering and no deduplication. + icuData.add(fooBar, Arrays.asList(value2, value1)); + assertThat(icuData.get(fooBar)).containsExactly(value1, value2, value2, value1).inOrder(); + } + + @Test + public void replace() { + IcuData icuData = new IcuData("icu-name", false); + RbPath fooBar = RbPath.of("foo", "bar"); + + RbValue value1 = RbValue.of("the", "first", "value"); + RbValue value2 = RbValue.of("another-value"); + + icuData.replace(fooBar, value1); + assertThat(icuData.get(fooBar)).containsExactly(value1); + + icuData.replace(fooBar, "another-value"); + assertThat(icuData.get(fooBar)).containsExactly(value2); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PathMatcherTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PathMatcherTest.java index dd81a2b3a3b..ff9c54cdc21 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PathMatcherTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/PathMatcherTest.java @@ -4,7 +4,6 @@ package org.unicode.icu.tool.cldrtoicu; import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth8.assertThat; -import static org.junit.Assert.fail; import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath; import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; @@ -76,16 +75,48 @@ public class PathMatcherTest { } @Test - public void testAnyOf() { - PathMatcher monthMatch = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]"); - PathMatcher dayMatch = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]"); - PathMatcher combined = PathMatcher.anyOf(monthMatch, dayMatch); + public void testAnyOf_match() { + PathMatcher narrowMonth = + PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/months" + + "/monthContext[@type=\"format\"]/monthWidth[@type=\"narrow\"]/month[@type=*]"); + PathMatcher narrowDay = + PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/days" + + "/dayContext[@type=\"format\"]/dayWidth[@type=\"narrow\"]/day[@type=*]"); + PathMatcher prefix = PathMatcher.anyOf(narrowMonth, narrowDay); - assertThat(combined.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue(); - assertThat(combined.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue(); + assertThat(prefix.matches(monthInfo("gregorian", "format", "narrow", 1))).isTrue(); + assertThat(prefix.matches(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue(); - assertThat(combined.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse(); - assertThat(combined.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse(); + assertThat(prefix.matches(monthInfo("hindu", "format", "wide", 1))).isFalse(); + assertThat(prefix.matches(dayInfo("hindu", "format", "wide", "mon"))).isFalse(); + } + + @Test + public void testAnyOf_suffix() { + PathMatcher monthSuffix = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]"); + PathMatcher daySuffix = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]"); + PathMatcher suffix = PathMatcher.anyOf(monthSuffix, daySuffix); + + assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue(); + assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue(); + + assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse(); + assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse(); + } + + @Test + public void testAnyOf_prefix() { + PathMatcher monthPrefix = + PathMatcher.of("ldml/dates/calendars/calendar[@type=\"gregorian\"]/months"); + PathMatcher dayPrefix = + PathMatcher.of("ldml/dates/calendars/calendar[@type=\"buddhist\"]/days"); + PathMatcher prefix = PathMatcher.anyOf(monthPrefix, dayPrefix); + + assertThat(prefix.matchesPrefixOf(monthInfo("gregorian", "format", "narrow", 1))).isTrue(); + assertThat(prefix.matchesPrefixOf(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue(); + + assertThat(prefix.matchesPrefixOf(monthInfo("hindu", "format", "wide", 1))).isFalse(); + assertThat(prefix.matchesPrefixOf(dayInfo("hindu", "format", "wide", "mon"))).isFalse(); } @Test diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/RbPathTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/RbPathTest.java index d7aaf80919b..57cec80a9cc 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/RbPathTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/RbPathTest.java @@ -2,10 +2,9 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu; -import static org.unicode.icu.tool.cldrtoicu.testing.RbPathSubjectFactory.assertThat; -import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; import static com.google.common.truth.Truth.assertThat; -import static com.google.common.truth.Truth8.assertThat; +import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows; +import static org.unicode.icu.tool.cldrtoicu.testing.RbPathSubjectFactory.assertThat; import org.junit.Test; import org.junit.runner.RunWith; @@ -15,27 +14,90 @@ import org.junit.runners.JUnit4; public class RbPathTest { @Test public void testEmpty() { - assertThat(RbPath.empty()).hasSegments(); - assertThat(RbPath.empty()).hasLength(0); + assertThat(RbPath.of()).hasSegments(); + assertThat(RbPath.of()).hasLength(0); } @Test public void testParseVsOf() { assertThat(RbPath.of("foo", "bar")).hasSegments("foo", "bar"); - assertThat(RbPath.of("foo/bar")).hasSegments("foo/bar"); - assertThat(RbPath.parse("foo/bar")).hasSegments("foo", "bar"); + assertThat(RbPath.parse("foo/bar/baz")).hasSegments("foo", "bar", "baz"); + // Allow and ignore leading '/' for legacy reasons. + assertThat(RbPath.parse("/foo/bar/baz")).hasSegments("foo", "bar", "baz"); + assertThat(RbPath.of("foo/bar", "baz")).hasSegments("foo/bar", "baz"); } @Test public void testBadArgs() { assertBadPath("", "empty path string"); - assertBadPath("foo//bar", "empty path segment"); + assertBadPath("foo//bar", "path segments must not be empty"); assertBadPath("foo/ RbPath.parse(path)); diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java index af10861a1fd..17368f411d4 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java @@ -199,10 +199,6 @@ public class SupplementalDataTest { .that(regressionData.maximize(id).orElse(null)) .isEqualTo(likelySubtags.maximize(id)); } - - // ars currently a special case since it's in the ICU data as an alias, but not in the CLDR - // data at all. This while it's a structurally valid language code, it cannot be maximized. - assertThat(regressionData.maximize("ars")).isEmpty(); } @Test @@ -214,7 +210,7 @@ public class SupplementalDataTest { try { ltc.transform(id); } catch (NullPointerException e) { - System.out.println("--> " + id); + // Occurs for sh_CS and sh_YU. continue; } // Need to maximize to work around: diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java new file mode 100644 index 00000000000..f221964011a --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java @@ -0,0 +1,283 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.Arrays; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.RbPath; +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +@RunWith(JUnit4.class) +public class Bcp47MapperTest { + private static final ImmutableMap EXPECTED_ALIAS_MAP = ImmutableMap.of( + RbPath.of("bcpTypeAlias", "tz:alias"), + RbValue.of("/ICUDATA/timezoneTypes/bcpTypeAlias/tz"), + RbPath.of("typeAlias", "timezone:alias"), + RbValue.of("/ICUDATA/timezoneTypes/typeAlias/timezone"), + RbPath.of("typeMap", "timezone:alias"), + RbValue.of("/ICUDATA/timezoneTypes/typeMap/timezone")); + + @Test + public void testSimple() { + CldrData cldrData = cldrData( + simpleType("foo", "one"), + simpleType("foo", "two"), + simpleType("foo", "three"), + simpleType("bar", "four"), + simpleType("bar", "five")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + + IcuData bcp47Data = icuData.get(0); + assertThat(bcp47Data).hasName("keyTypeData"); + assertThat(bcp47Data).hasFallback(false); + + // Check the number of paths and verify the special injected values. + assertThat(bcp47Data).getPaths().hasSize(7 + EXPECTED_ALIAS_MAP.size()); + EXPECTED_ALIAS_MAP.forEach((p, v) -> assertThat(bcp47Data).hasValuesFor(p, v)); + + assertThat(bcp47Data).hasEmptyValue("/keyMap/foo"); + assertThat(bcp47Data).hasEmptyValue("/keyMap/bar"); + + assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/one"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/two"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/foo/three"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/four"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/five"); + + IcuData tzData = icuData.get(1); + assertThat(tzData).hasName("timezoneTypes"); + assertThat(tzData).hasFallback(false); + assertThat(tzData).getPaths().isEmpty(); + } + + @Test + public void testSimpleTimezone() { + CldrData cldrData = cldrData( + simpleType("tz", "one"), + simpleType("tz", "two"), + simpleType("tz", "three"), + simpleType("bar", "four"), + simpleType("bar", "five")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + + IcuData bcp47Data = icuData.get(0); + assertThat(bcp47Data).hasName("keyTypeData"); + assertThat(bcp47Data).hasFallback(false); + + // Check the number of paths and verify the special injected values. + assertThat(bcp47Data).getPaths().hasSize(4 + EXPECTED_ALIAS_MAP.size()); + EXPECTED_ALIAS_MAP.forEach((p, v) -> assertThat(bcp47Data).hasValuesFor(p, v)); + + // The key-map is only ever in the main bcp47 data and contains the timezone key. + assertThat(bcp47Data).hasEmptyValue("/keyMap/tz"); + assertThat(bcp47Data).hasEmptyValue("/keyMap/bar"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/four"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/bar/five"); + + IcuData tzData = icuData.get(1); + assertThat(tzData).hasName("timezoneTypes"); + assertThat(tzData).hasFallback(false); + + // Only the type-map paths/values are split into the timezone data. + assertThat(tzData).getPaths().hasSize(3); + assertThat(tzData).hasEmptyValue("/typeMap/tz/one"); + assertThat(tzData).hasEmptyValue("/typeMap/tz/two"); + assertThat(tzData).hasEmptyValue("/typeMap/tz/three"); + } + + @Test + public void testKeyAliases() { + CldrData cldrData = cldrData( + alias("key", "ALIAS", "type")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + IcuData bcp47Data = icuData.get(0); + + // Key aliases are lower-cased (though it's not entirely obvious as to why). + assertThat(bcp47Data).hasValuesFor("/keyMap/alias", "key"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/alias/type"); + } + + @Test + public void testTypeAliases_single() { + CldrData cldrData = cldrData( + alias("key", null, "type", "main")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + IcuData bcp47Data = icuData.get(0); + + assertThat(bcp47Data).hasEmptyValue("/keyMap/key"); + assertThat(bcp47Data).hasValuesFor("/typeMap/key/main", "type"); + } + + @Test + public void testTypeAliases_multiple() { + CldrData cldrData = cldrData( + alias("key", null, "type", "main", "alias1", "alias2", "alias3")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + IcuData bcp47Data = icuData.get(0); + + assertThat(bcp47Data).hasEmptyValue("/keyMap/key"); + assertThat(bcp47Data).hasValuesFor("/typeMap/key/main", "type"); + + // Only aliases after the first (main) one go in the typeAlias set. + assertThat(bcp47Data).getPaths().doesNotContain(RbPath.parse("typeAlias/key/main")); + assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias1", "main"); + assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias2", "main"); + assertThat(bcp47Data).hasValuesFor("/typeAlias/key/alias3", "main"); + } + + @Test + public void testKeyAndTypeAliases() { + CldrData cldrData = cldrData( + alias("key", "key-alias", "type", "main", "type-alias")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + IcuData bcp47Data = icuData.get(0); + + assertThat(bcp47Data).hasValuesFor("/keyMap/key-alias", "key"); + assertThat(bcp47Data).hasValuesFor("/typeMap/key-alias/main", "type"); + assertThat(bcp47Data).hasValuesFor("/typeAlias/key-alias/type-alias", "main"); + } + + @Test + public void testPreferredTypeName() { + CldrData cldrData = cldrData( + deprecated("deprecated-key", true, "type", false, "/preferred/path1"), + deprecated("key", false, "deprecated-type", true, "/preferred/path2")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + IcuData bcp47Data = icuData.get(0); + + assertThat(bcp47Data).hasValuesFor("/bcpTypeAlias/deprecated-key/type", "/preferred/path1"); + assertThat(bcp47Data).hasValuesFor("/bcpTypeAlias/key/deprecated-type", "/preferred/path2"); + } + + @Test + public void testInfoAttributes() { + CldrData cldrData = cldrData( + // Deprecated without a replacement. + deprecated("deprecated-key", true, "type", false, null), + deprecated("key", false, "deprecated-type", true, null), + valueType("info-key", "info-type", "value-type")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + IcuData bcp47Data = icuData.get(0); + + assertThat(bcp47Data).hasEmptyValue("/keyMap/deprecated-key"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/deprecated-key/type"); + assertThat(bcp47Data).hasValuesFor("/keyInfo/deprecated/deprecated-key", "true"); + + assertThat(bcp47Data).hasEmptyValue("/keyMap/key"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/key/deprecated-type"); + assertThat(bcp47Data).hasValuesFor("/typeInfo/deprecated/key/deprecated-type", "true"); + + assertThat(bcp47Data).hasEmptyValue("/keyMap/info-key"); + assertThat(bcp47Data).hasEmptyValue("/typeMap/info-key/info-type"); + assertThat(bcp47Data).hasValuesFor("/keyInfo/valueType/info-key", "value-type"); + } + + // This will hopefully one day be the responsibility of the IcuTextWriter. + @Test + public void testTimezonePathQuotingForAliases() { + CldrData cldrData = cldrData( + alias("tz", null, "escaped", "foo/bar", "hello/world"), + alias("tz", null, "unescaped", "foo_bar", "hello_world")); + + ImmutableList icuData = Bcp47Mapper.process(cldrData); + IcuData tzData = icuData.get(1); + + // Only the type-map paths/values are split into the timezone data. + assertThat(tzData).getPaths().hasSize(4); + assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped"); + assertThat(tzData).hasValuesFor("/typeMap/tz/foo_bar", "unescaped"); + + // TODO: Check this is correct! Having foo/bar as the value rather than foo:bar seems wrong. + assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar"); + assertThat(tzData).hasValuesFor("/typeAlias/tz/hello_world", "foo_bar"); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + private static CldrValue simpleType(String keyName, String typeName) { + StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword"); + cldrPath.append("/key"); + appendAttribute(cldrPath, "name", keyName); + cldrPath.append("/type"); + appendAttribute(cldrPath, "name", typeName); + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static CldrValue alias( + String keyName, String keyAlias, String typeName, String... typeAliases) { + + StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword"); + cldrPath.append("/key"); + appendAttribute(cldrPath, "name", keyName); + if (keyAlias != null) { + appendAttribute(cldrPath, "alias", keyAlias); + } + cldrPath.append("/type"); + appendAttribute(cldrPath, "name", typeName); + if (typeAliases.length > 0) { + appendAttribute(cldrPath, "alias", Joiner.on(" ").join(typeAliases)); + } + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static CldrValue deprecated( + String keyName, + boolean keyDeprecated, + String typeName, + boolean typeDeprecated, + String preferred) { + + StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword"); + cldrPath.append("/key"); + appendAttribute(cldrPath, "name", keyName); + if (keyDeprecated) { + appendAttribute(cldrPath, "deprecated", keyDeprecated); + } + cldrPath.append("/type"); + appendAttribute(cldrPath, "name", typeName); + if (preferred != null) { + appendAttribute(cldrPath, "preferred", preferred); + } + if (typeDeprecated) { + appendAttribute(cldrPath, "deprecated", typeDeprecated); + } + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static CldrValue valueType(String keyName, String typeName, String valueType) { + StringBuilder cldrPath = new StringBuilder("//ldmlBCP47/keyword"); + cldrPath.append("/key"); + appendAttribute(cldrPath, "name", keyName); + appendAttribute(cldrPath, "valueType", valueType); + cldrPath.append("/type"); + appendAttribute(cldrPath, "name", typeName); + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static void appendAttribute(StringBuilder out, String k, Object v) { + out.append(String.format("[@%s=\"%s\"]", k, v)); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java new file mode 100644 index 00000000000..d01bf74712b --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapperTest.java @@ -0,0 +1,150 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.GRAPHEME; +import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.BoundaryType.SENTENCE; +import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.LINE_BREAK; +import static org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapperTest.SegmentationType.SENTENCE_BREAK; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.Arrays; +import java.util.Optional; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.base.Ascii; +import com.google.common.base.CaseFormat; + +@RunWith(JUnit4.class) +public class BreakIteratorMapperTest { + enum SegmentationType { + GRAPHEME_CLUSTER_BREAK, LINE_BREAK, SENTENCE_BREAK, WORD_BREAK; + + @Override public String toString() { + return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name()); + } + } + + enum BoundaryType { + GRAPHEME, WORD, LINE, SENTENCE, TITLE; + + // E.g. "icu:grapheme" + @Override public String toString() { + return "icu:" + Ascii.toLowerCase(name()); + } + } + + @Test + public void testSingleSuppression() { + int idx = 0; + CldrData cldrData = cldrData( + suppression(SENTENCE_BREAK, "L.P.", ++idx), + suppression(SENTENCE_BREAK, "Alt.", ++idx), + suppression(SENTENCE_BREAK, "Approx.", ++idx)); + + IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty()); + + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array", + RbValue.of("L.P."), + RbValue.of("Alt."), + RbValue.of("Approx.")); + } + + // In real data, suppression is only a SentenceBreak thing, but we might as well test it for + // other types. + @Test + public void testMultipleSupressionTypes() { + int idx = 0; + CldrData cldrData = cldrData( + suppression(SENTENCE_BREAK, "L.P.", ++idx), + suppression(SENTENCE_BREAK, "Alt.", ++idx), + suppression(SENTENCE_BREAK, "Approx.", ++idx), + suppression(LINE_BREAK, "Foo", ++idx), + suppression(LINE_BREAK, "Bar", ++idx), + suppression(LINE_BREAK, "Baz", ++idx)); + + IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty()); + + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array", + RbValue.of("L.P."), + RbValue.of("Alt."), + RbValue.of("Approx.")); + assertThat(icuData).hasValuesFor("/exceptions/LineBreak:array", + RbValue.of("Foo"), + RbValue.of("Bar"), + RbValue.of("Baz")); + } + + @Test + public void testSpecials_dictionary() { + CldrData specials = cldrData( + dictionary("foo", ""), + dictionary("bar", "")); + + IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials)); + + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", ""); + assertThat(icuData).hasValuesFor("/dictionaries/bar:process(dependency)", ""); + } + + @Test + public void testSpecials_boundaries() { + CldrData specials = cldrData( + boundaries(GRAPHEME, "", null), + boundaries(SENTENCE, "", "altName")); + + IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials)); + + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData) + .hasValuesFor("/boundaries/grapheme:process(dependency)", ""); + assertThat(icuData) + .hasValuesFor("/boundaries/sentence_altName:process(dependency)", ""); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + private static CldrValue suppression(SegmentationType type, String value, int index) { + StringBuilder cldrPath = new StringBuilder("//ldml/segmentations"); + appendAttribute(cldrPath.append("/segmentation"), "type", type); + cldrPath.append("/suppressions[@type=\"standard\"]"); + // Suppression is an ordered element, so needs a sort index. + cldrPath.append("/suppression#").append(index); + return CldrValue.parseValue(cldrPath.toString(), value); + } + + private static CldrValue dictionary(String type, String dependency) { + StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData"); + cldrPath.append("/icu:dictionaries/icu:dictionary"); + appendAttribute(cldrPath, "type", type); + appendAttribute(cldrPath, "icu:dependency", dependency); + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static CldrValue boundaries(BoundaryType type, String dependency, String alt) { + StringBuilder cldrPath = new StringBuilder("//ldml/special/icu:breakIteratorData"); + cldrPath.append("/icu:boundaries/").append(type); + appendAttribute(cldrPath, "icu:dependency", dependency); + if (alt != null) { + appendAttribute(cldrPath, "alt", alt); + } + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static void appendAttribute(StringBuilder out, String k, Object v) { + out.append(String.format("[@%s=\"%s\"]", k, v)); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java new file mode 100644 index 00000000000..7af7422f2cf --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/CollationMapperTest.java @@ -0,0 +1,157 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.Arrays; +import java.util.Optional; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; + +import com.google.common.base.Joiner; + +@RunWith(JUnit4.class) +public class CollationMapperTest { + @Test + public void testEmpty() { + IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty()); + assertThat(icuData).hasName("xx"); + assertThat(icuData).hasFallback(true); + assertThat(icuData).getPaths().isEmpty(); + + // Root gets a couple of special paths added to it due to the need to work around a CLDR + // data bug. + IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty()); + assertThat(rootData).hasName("root"); + assertThat(rootData).hasFallback(true); + assertThat(rootData).getPaths().hasSize(2); + assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString()); + assertThat(rootData).hasEmptyValue("/collations/standard/Sequence"); + } + + @Test + public void testDefault() { + CldrData cldrData = + cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value")); + + IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); + assertThat(icuData).getPaths().hasSize(1); + assertThat(icuData).hasValuesFor("/collations/default", "any value"); + } + + // This tests legacy behaviour which mimics the original converter code. There's no promise + // that it's semantically correct though. + @Test + public void testLastAltRuleOverridesExisting() { + // Note that in DTD order (which is what the paths are processed in) the path with no "alt" + // attribute comes after everything else, but the first "alt" path is overwritten by the + // second. It's not even clear there should ever be two alt paths, or what the paths mean + // (the original code seems to suggest it's looking for the "short" alternate form, but + // the "alt" attribute can have more that the value "short"...) + CldrData cldrData = cldrData( + collationRule("foo", "alt1", "First alt rule"), + collationRule("foo", "alt2", "Second alt rule"), + collationRule("foo", null, "First rule")); + + IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); + assertThat(icuData).getPaths().hasSize(2); + assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString()); + assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule"); + } + + @Test + public void testCommentAndWhitespaceStripping() { + CldrData cldrData = cldrData( + collationRule("foo", null, + "# Comments are stripped", + "", + " # As are empty lines and leading/trailing spaces", + " Here is a value ", + "# And more comments to be stripped", + "And another value")); + + IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty()); + assertThat(icuData).hasValuesFor("/collations/foo/Sequence", + "Here is a value", + "And another value"); + } + + // Just in case anything weird happens with non-BMP char sequences: + // + // locales = ImmutableSet.of("en_GB", "en_AU", "en_NZ"); + CldrData cldrData = cldrData( + dayPeriodRule(locales, MORNING1, isBetween("04:00", "12:00")), + dayPeriodRule(locales, NOON, isAt("12:00")), + dayPeriodRule(locales, AFTERNOON1, isBetween("12:00", "18:00")), + dayPeriodRule(locales, EVENING1, isBetween("18:00", "21:00")), + dayPeriodRule(locales, NIGHT1, isBetween("21:00", "04:00")), + dayPeriodRule(locales, MIDNIGHT, isAt("00:00"))); + + IcuData icuData = DayPeriodsMapper.process(cldrData); + + assertThat(icuData).hasName("dayPeriods"); + assertThat(icuData).hasFallback(false); + assertThat(icuData).hasValuesFor("/locales/en_AU", "set1"); + assertThat(icuData).hasValuesFor("/locales/en_GB", "set1"); + assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1"); + + assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "04:00"); + assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "12:00"); + assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "12:00"); + assertThat(icuData).hasValuesFor("/rules/set1/afternoon1/from", "12:00"); + assertThat(icuData).hasValuesFor("/rules/set1/afternoon1/before", "18:00"); + assertThat(icuData).hasValuesFor("/rules/set1/evening1/from", "18:00"); + assertThat(icuData).hasValuesFor("/rules/set1/evening1/before", "21:00"); + assertThat(icuData).hasValuesFor("/rules/set1/night1/from", "21:00"); + assertThat(icuData).hasValuesFor("/rules/set1/night1/before", "04:00"); + assertThat(icuData).hasValuesFor("/rules/set1/midnight/at", "00:00"); + } + + @Test + public void testMultipleRuleSets() { + Set locales1 = ImmutableSet.of("en_GB"); + Set locales2 = ImmutableSet.of("en_AU", "en_NZ"); + CldrData cldrData = cldrData( + dayPeriodRule(locales1, MORNING1, isBetween("04:00", "12:00")), + dayPeriodRule(locales1, NOON, isAt("12:00")), + dayPeriodRule(locales2, MORNING1, isBetween("06:00", "13:00")), + dayPeriodRule(locales2, NOON, isAt("13:00"))); + + IcuData icuData = DayPeriodsMapper.process(cldrData); + + // This reversal of the set ordering (as compared to the order of the input paths) is + // because visitation requires nested path ordering, which is achieved by lexicographical + // ordering of path strings ("en_AU" < "en_GB"). This is an implementation detail however + // and might one day change. If this were switched to use DTD order, then it would be + // stable (but also affect the ordering of paths in the released ICU data). + assertThat(icuData).hasValuesFor("/locales/en_AU", "set1"); + assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1"); + assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "06:00"); + assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "13:00"); + assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "13:00"); + + assertThat(icuData).hasValuesFor("/locales/en_GB", "set2"); + assertThat(icuData).hasValuesFor("/rules/set2/morning1/from", "04:00"); + assertThat(icuData).hasValuesFor("/rules/set2/morning1/before", "12:00"); + assertThat(icuData).hasValuesFor("/rules/set2/noon/at", "12:00"); + } + + @Test + public void testRulesetLabels() { + Set locales = ImmutableSet.of("en_GB"); + // Note that there's an implicit assumption in the mapper that the ruleset label is the + // same for all of the rules of any given locale (since it comes from the parent element). + CldrData cldrData = cldrData( + dayPeriodRule(locales, MORNING1, isBetween("04:00", "12:00"), "foo"), + dayPeriodRule(locales, NOON, isAt("12:00"), "foo")); + + IcuData icuData = DayPeriodsMapper.process(cldrData); + + assertThat(icuData).hasValuesFor("/locales_foo/en_GB", "set1"); + assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "04:00"); + assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "12:00"); + assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "12:00"); + } + + // Just demonstrating that the mapper does no data validation. + @Test + public void testNoDataValidation() { + Set locales = ImmutableSet.of("foo", "bar"); + CldrData cldrData = cldrData( + dayPeriodRule(locales, MORNING1, isBetween("start", "end")), + dayPeriodRule(locales, NOON, isAt("moment"))); + + IcuData icuData = DayPeriodsMapper.process(cldrData); + + // This reversal of the set ordering (as compared to the order of the input paths) is + // because visitation requires nested path ordering, which is achieved by lexicographical + // ordering of path strings. This is an implementation detail however and might one day + // change. If this were switched to use DTD order, then it would be stable (but also + // affect the ordering of paths in the released ICU data). + assertThat(icuData).hasValuesFor("/locales/foo", "set1"); + assertThat(icuData).hasValuesFor("/locales/bar", "set1"); + assertThat(icuData).hasValuesFor("/rules/set1/morning1/from", "start"); + assertThat(icuData).hasValuesFor("/rules/set1/morning1/before", "end"); + assertThat(icuData).hasValuesFor("/rules/set1/noon/at", "moment"); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + private static CldrValue dayPeriodRule( + Set locales, RuleType type, Map rules) { + + return dayPeriodRule(locales, type, rules, null); + } + + private static CldrValue dayPeriodRule( + Set locales, RuleType type, Map rules, String label) { + + StringBuilder cldrPath = new StringBuilder("//supplementalData/dayPeriodRuleSet"); + if (label != null) { + appendAttribute(cldrPath, "type", label); + } + appendAttribute(cldrPath.append("/dayPeriodRules"), "locales", Joiner.on(' ').join(locales)); + appendAttribute(cldrPath.append("/dayPeriodRule"), "type", type); + rules.forEach((k, v) -> cldrPath.append(String.format("[@%s=\"%s\"]", k, v))); + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static Map isAt(String time) { + return ImmutableMap.of(RuleName.AT, time); + } + + private static Map isBetween(String from, String to) { + return ImmutableMap.of(RuleName.FROM, from, RuleName.BEFORE, to); + } + + private static void appendAttribute(StringBuilder out, String k, Object v) { + out.append(String.format("[@%s=\"%s\"]", k, v)); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralRangesMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralRangesMapperTest.java new file mode 100644 index 00000000000..63c9df57f31 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralRangesMapperTest.java @@ -0,0 +1,114 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.FEW; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.MANY; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.ONE; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.OTHER; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.TWO; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapperTest.PluralCount.ZERO; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.Arrays; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.base.Ascii; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; + +@RunWith(JUnit4.class) +public class PluralRangesMapperTest { + // Possible rule names (these are the value attributes). + enum PluralCount { + ZERO, ONE, TWO, FEW, MANY, OTHER; + + @Override public String toString() { + return Ascii.toLowerCase(name()); + } + } + + @Test + public void testSimple() { + Set locales = ImmutableSet.of("en_GB", "en_NZ"); + CldrData cldrData = cldrData( + pluralRange(locales, ZERO, ONE, ZERO), + pluralRange(locales, ZERO, FEW, FEW), + pluralRange(locales, ONE, TWO, OTHER), + pluralRange(locales, ONE, MANY, MANY)); + + IcuData icuData = PluralRangesMapper.process(cldrData); + + assertThat(icuData).hasName("pluralRanges"); + assertThat(icuData).hasFallback(false); + + assertThat(icuData).hasValuesFor("/locales/en_GB", "set00"); + assertThat(icuData).hasValuesFor("/locales/en_NZ", "set00"); + // Note that ordering is based on incoming CLDR path ordering, which is reordered by virtue + // of being processed in "nested grouping" order. This should probably be made to use DTD + // order to make output more isolated once it's clear that this doesn't affect output. + assertThat(icuData) + .hasValuesFor("/rules/set00", + RbValue.of("one", "many", "many"), + RbValue.of("one", "two", "other"), + RbValue.of("zero", "few", "few"), + RbValue.of("zero", "one", "zero")); + } + + @Test + public void testMultipleSets() { + Set locales1 = ImmutableSet.of("en_GB"); + Set locales2 = ImmutableSet.of("en_AU"); + CldrData cldrData = cldrData( + pluralRange(locales1, ZERO, ONE, ZERO), + pluralRange(locales1, ZERO, FEW, FEW), + pluralRange(locales2, ONE, TWO, OTHER), + pluralRange(locales2, ONE, MANY, MANY)); + + IcuData icuData = PluralRangesMapper.process(cldrData); + + assertThat(icuData).hasName("pluralRanges"); + assertThat(icuData).hasFallback(false); + + assertThat(icuData).hasValuesFor("/locales/en_AU", "set00"); + assertThat(icuData) + .hasValuesFor("/rules/set00", + RbValue.of("one", "many", "many"), + RbValue.of("one", "two", "other")); + + assertThat(icuData).hasValuesFor("/locales/en_GB", "set01"); + assertThat(icuData) + .hasValuesFor("/rules/set01", + RbValue.of("zero", "few", "few"), + RbValue.of("zero", "one", "zero")); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + private static CldrValue pluralRange( + Set locales, PluralCount start, PluralCount end, PluralCount result) { + + StringBuilder cldrPath = new StringBuilder("//supplementalData/plurals"); + appendAttribute(cldrPath.append("/pluralRanges"), "locales", Joiner.on(' ').join(locales)); + cldrPath.append("/pluralRange"); + appendAttribute(cldrPath, "start", start); + appendAttribute(cldrPath, "end", end); + appendAttribute(cldrPath, "result", result); + return CldrValue.parseValue(cldrPath.toString(), ""); + } + + private static void appendAttribute(StringBuilder out, String k, Object v) { + out.append(String.format("[@%s=\"%s\"]", k, v)); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java new file mode 100644 index 00000000000..a62e3fe86a2 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/PluralsMapperTest.java @@ -0,0 +1,190 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.FEW; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.MANY; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.ONE; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.OTHER; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.TWO; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralCount.ZERO; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralType.CARDINAL; +import static org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapperTest.PluralType.ORDINAL; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.Arrays; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; + +import com.google.common.base.Ascii; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; + +@RunWith(JUnit4.class) +public class PluralsMapperTest { + enum PluralType { + ORDINAL, CARDINAL; + + @Override public String toString() { + return Ascii.toLowerCase(name()); + } + } + + // Possible rule names (these are the value attributes). + enum PluralCount { + ZERO, ONE, TWO, FEW, MANY, OTHER; + + @Override public String toString() { + return Ascii.toLowerCase(name()); + } + } + + @Test + public void testSimple() { + Set locales = ImmutableSet.of("en_GB", "en_NZ"); + CldrData cldrData = cldrData( + pluralRule(ORDINAL, locales, ZERO, "zero"), + pluralRule(ORDINAL, locales, ONE, "one"), + pluralRule(ORDINAL, locales, TWO, "two"), + pluralRule(ORDINAL, locales, FEW, "few"), + pluralRule(ORDINAL, locales, MANY, "many"), + pluralRule(ORDINAL, locales, OTHER, "other"), + + pluralRule(CARDINAL, locales, ZERO, "!zero!"), + pluralRule(CARDINAL, locales, ONE, "!one!"), + pluralRule(CARDINAL, locales, TWO, "!two!"), + pluralRule(CARDINAL, locales, FEW, "!few!"), + pluralRule(CARDINAL, locales, MANY, "!many!"), + pluralRule(CARDINAL, locales, OTHER, "!other!")); + + IcuData icuData = PluralsMapper.process(cldrData); + + assertThat(icuData).hasName("plurals"); + assertThat(icuData).hasFallback(false); + + // Cardinals are assigned first, regardless of the CLDR path order (this could change). + assertThat(icuData).hasValuesFor("/locales/en_GB", "set0"); + assertThat(icuData).hasValuesFor("/locales/en_NZ", "set0"); + assertThat(icuData).hasValuesFor("/locales_ordinals/en_GB", "set1"); + assertThat(icuData).hasValuesFor("/locales_ordinals/en_NZ", "set1"); + + assertThat(icuData).hasValuesFor("/rules/set0/zero", "!zero!"); + assertThat(icuData).hasValuesFor("/rules/set0/one", "!one!"); + assertThat(icuData).hasValuesFor("/rules/set0/two", "!two!"); + assertThat(icuData).hasValuesFor("/rules/set0/few", "!few!"); + assertThat(icuData).hasValuesFor("/rules/set0/many", "!many!"); + assertThat(icuData).hasValuesFor("/rules/set0/other", "!other!"); + + assertThat(icuData).hasValuesFor("/rules/set1/zero", "zero"); + assertThat(icuData).hasValuesFor("/rules/set1/one", "one"); + assertThat(icuData).hasValuesFor("/rules/set1/two", "two"); + assertThat(icuData).hasValuesFor("/rules/set1/few", "few"); + assertThat(icuData).hasValuesFor("/rules/set1/many", "many"); + assertThat(icuData).hasValuesFor("/rules/set1/other", "other"); + } + + @Test + public void testGroupDeduplication_subsets() { + Set locales1 = ImmutableSet.of("en_GB"); + Set locales2 = ImmutableSet.of("en_NZ"); + CldrData cldrData = cldrData( + // One group is a subset of the other, but this does not trigger deduplication. + pluralRule(CARDINAL, locales1, ZERO, "zero"), + pluralRule(CARDINAL, locales1, ONE, "one"), + pluralRule(CARDINAL, locales1, TWO, "two"), + + pluralRule(CARDINAL, locales2, ZERO, "zero"), + pluralRule(CARDINAL, locales2, ONE, "one"), + pluralRule(CARDINAL, locales2, TWO, "two"), + pluralRule(CARDINAL, locales2, FEW, "few")); + + IcuData icuData = PluralsMapper.process(cldrData); + + assertThat(icuData).hasValuesFor("/locales/en_GB", "set0"); + assertThat(icuData).hasValuesFor("/locales/en_NZ", "set1"); + + assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero"); + assertThat(icuData).hasValuesFor("/rules/set0/one", "one"); + assertThat(icuData).hasValuesFor("/rules/set0/two", "two"); + + assertThat(icuData).hasValuesFor("/rules/set1/zero", "zero"); + assertThat(icuData).hasValuesFor("/rules/set1/one", "one"); + assertThat(icuData).hasValuesFor("/rules/set1/two", "two"); + assertThat(icuData).hasValuesFor("/rules/set1/few", "few"); + } + + @Test + public void testGroupDeduplication_type() { + Set locales = ImmutableSet.of("en_GB"); + CldrData cldrData = cldrData( + // Groups are the same, but assigned separately to different types. + pluralRule(CARDINAL, locales, ZERO, "zero"), + pluralRule(CARDINAL, locales, ONE, "one"), + pluralRule(CARDINAL, locales, TWO, "two"), + + pluralRule(ORDINAL, locales, ZERO, "zero"), + pluralRule(ORDINAL, locales, ONE, "one"), + pluralRule(ORDINAL, locales, TWO, "two")); + + IcuData icuData = PluralsMapper.process(cldrData); + + // Group is deduplicated! + assertThat(icuData).hasValuesFor("/locales/en_GB", "set0"); + assertThat(icuData).hasValuesFor("/locales_ordinals/en_GB", "set0"); + + assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero"); + assertThat(icuData).hasValuesFor("/rules/set0/one", "one"); + assertThat(icuData).hasValuesFor("/rules/set0/two", "two"); + } + + + @Test + public void testGroupDeduplication_locales() { + Set locales1 = ImmutableSet.of("en_GB"); + Set locales2 = ImmutableSet.of("en_NZ"); + CldrData cldrData = cldrData( + // Groups are the same, but assigned separately to different locales. + pluralRule(CARDINAL, locales1, ZERO, "zero"), + pluralRule(CARDINAL, locales1, ONE, "one"), + pluralRule(CARDINAL, locales1, TWO, "two"), + + pluralRule(CARDINAL, locales2, ZERO, "zero"), + pluralRule(CARDINAL, locales2, ONE, "one"), + pluralRule(CARDINAL, locales2, TWO, "two")); + + IcuData icuData = PluralsMapper.process(cldrData); + + // Group is deduplicated! + assertThat(icuData).hasValuesFor("/locales/en_GB", "set0"); + assertThat(icuData).hasValuesFor("/locales/en_NZ", "set0"); + + assertThat(icuData).hasValuesFor("/rules/set0/zero", "zero"); + assertThat(icuData).hasValuesFor("/rules/set0/one", "one"); + assertThat(icuData).hasValuesFor("/rules/set0/two", "two"); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + private static CldrValue pluralRule( + PluralType type, Set locales, PluralCount count, String value) { + + StringBuilder cldrPath = new StringBuilder("//supplementalData"); + appendAttribute(cldrPath.append("/plurals"), "type", type); + appendAttribute(cldrPath.append("/pluralRules"), "locales", Joiner.on(' ').join(locales)); + appendAttribute(cldrPath.append("/pluralRule"), "count", count); + return CldrValue.parseValue(cldrPath.toString(), value); + } + + private static void appendAttribute(StringBuilder out, String k, Object v) { + out.append(String.format("[@%s=\"%s\"]", k, v)); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java new file mode 100644 index 00000000000..38ba97185f0 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/RbnfMapperTest.java @@ -0,0 +1,187 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Access.PRIVATE; +import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Access.PUBLIC; +import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.DURATION_RULES; +import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.ORDINAL_RULES; +import static org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapperTest.Group.SPELLOUT_RULES; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.util.Arrays; +import java.util.Optional; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.base.Ascii; +import com.google.common.base.CaseFormat; + +@RunWith(JUnit4.class) +public class RbnfMapperTest { + enum Access { + PUBLIC, PRIVATE; + + @Override public String toString() { + return Ascii.toLowerCase(name()); + } + } + + // IMPORTANT: The ldml.dtd only defines 3 groups: + // NumberingSystemRules, OrdinalRules, SpelloutRules + // but the "specials" files used by ICU introduce additional group names (e.g. DurationRules) + // which are strictly speaking invalid according to the DTD. + enum Group { + NUMBERING_SYSTEM_RULES, ORDINAL_RULES, SPELLOUT_RULES, DURATION_RULES; + + @Override public String toString() { + // It's "NumberingSystemRules" not "numberingSystemRules" + return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name()); + } + } + + @Test + public void testSingleRuleset() { + int idx = 1; + CldrData cldrData = cldrData( + rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "0", "hundred;", idx++), + rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", idx++), + rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", idx++)); + + IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); + + assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules", + // Double-% prefix for "private" access. + RbValue.of("%%2d-year:"), + RbValue.of("0: hundred;"), + RbValue.of("1: oh-=%first-set=;"), + RbValue.of("10: =%first-set=;")); + } + + + @Test + public void testMultipleRulesets() { + // Note that input order of these paths shouldn't matter since they are ordered (and thus + // grouped) by DTD order (relative order matters for values in the same set, but values + // do not have to grouped together). + int idx = 1; + CldrData cldrData = cldrData( + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "-x", "one;", idx++), + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "Inf", "two;", idx++), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "-x", "five;", idx++), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "Inf", "six;", idx++), + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "NaN", "three;", idx++), + rbnfRule(SPELLOUT_RULES, "first-set", PUBLIC, "0", "four;", idx++), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", idx++), + rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", idx++)); + + IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); + + assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules", + // Single-% prefix for "public" access. + RbValue.of("%first-set:"), + RbValue.of("-x: one;"), + RbValue.of("Inf: two;"), + RbValue.of("NaN: three;"), + RbValue.of("0: four;"), + // Each "heading" appears once at the start of the section. + RbValue.of("%second-set:"), + RbValue.of("-x: five;"), + RbValue.of("Inf: six;"), + RbValue.of("NaN: seven;"), + RbValue.of("0: eight;")); + } + + @Test + public void testSpecials() { + int idx = 1; + CldrData specials = cldrData( + rbnfRule(DURATION_RULES, "min", PRIVATE, "0", "0 minutes; 1 minute; =0= minutes;", idx++), + rbnfRule(DURATION_RULES, "hr", PRIVATE, "0", "0 hours; 1 hour; =0= hours;", idx++), + rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "0", "=0= sec.;", idx++), + rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "60", "=%%min-sec=;", idx++), + rbnfRule(DURATION_RULES, "in-numerals", PUBLIC, "3600", "=%%hr-min-sec=;", idx++)); + + idx = 1; + CldrData cldrData = cldrData( + rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "-x", "−→→;", idx++), + rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0", + "=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", idx++)); + + IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials)); + + assertThat(icuData).hasValuesFor("/RBNFRules/OrdinalRules", + RbValue.of("%digits-ordinal:"), + RbValue.of("-x: \\u2212>>;"), + RbValue.of("0: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;")); + + // The headings are sorted in the output ("hr" < "in-numerals" < min"). + assertThat(icuData).hasValuesFor("/RBNFRules/DurationRules", + RbValue.of("%%hr:"), + RbValue.of("0: 0 hours; 1 hour; =0= hours;"), + RbValue.of("%in-numerals:"), + RbValue.of("0: =0= sec.;"), + RbValue.of("60: =%%min-sec=;"), + RbValue.of("3600: =%%hr-min-sec=;"), + RbValue.of("%%min:"), + RbValue.of("0: 0 minutes; 1 minute; =0= minutes;")); + } + + // Note that while this is testing the escaping behaviour, the implementation was largely + // derived from a mostly undocumented method in the previous converter, and while it behaves + // the same, it's not entirely obviously why some of the special cases really exist. + @Test + public void testEscaping() { + int idx = 1; + CldrData cldrData = cldrData( + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k1", "\\ Backslash", idx++), + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k2", "←← Arrows →→", idx++), + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", idx++), + rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", idx++)); + + IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty()); + + assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules", + RbValue.of("%escaping:"), + RbValue.of("k1: \\\\ Backslash"), + RbValue.of("k2: << Arrows >>"), + RbValue.of("k3: \\u00DC Umlaut"), + RbValue.of("k4: \\U0001F603 Smiley")); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + // Both ruleset and rbnfrule are "ordered" elements, but to mimic the XML below, it's the + // rbnfrule which needs to have an incrementing sort index: + // + // + // value-1 + // value-2 + // value-3 + // + private static CldrValue rbnfRule( + Group group, String setType, Access access, String key, String value, int ruleIndex) { + + StringBuilder cldrPath = new StringBuilder("//ldml/rbnf"); + appendAttribute(cldrPath.append("/rulesetGrouping"), "type", group); + cldrPath.append("/ruleset"); + appendAttribute(cldrPath, "type", setType); + appendAttribute(cldrPath, "access", access); + cldrPath.append("/rbnfrule#").append(ruleIndex); + appendAttribute(cldrPath, "value", key); + return CldrValue.parseValue(cldrPath.toString(), value); + } + + private static void appendAttribute(StringBuilder out, String k, Object v) { + out.append(String.format("[@%s=\"%s\"]", k, v)); + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapperTest.java new file mode 100644 index 00000000000..7ba5ef918de --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/TransformsMapperTest.java @@ -0,0 +1,265 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.mapper; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.truth.Truth.assertThat; +import static java.util.stream.Collectors.joining; +import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.BACKWARD; +import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.BOTH; +import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Direction.FORWARD; +import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Visibility.EXTERNAL; +import static org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapperTest.Visibility.INTERNAL; +import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Map; +import java.util.TreeMap; +import java.util.function.Function; +import java.util.stream.Stream; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.RbPath; + +import com.google.common.base.Ascii; +import com.google.common.collect.ImmutableList; + +@RunWith(JUnit4.class) +public class TransformsMapperTest { + + private static final ImmutableList FILE_HEADER = ImmutableList.of( + "\uFEFF# © 2016 and later: Unicode, Inc. and others.", + "# License & terms of use: http://www.unicode.org/copyright.html#License", + "#"); + + private static final int DEFAULT_PATH_COUNT = 7; + + enum Direction { + FORWARD, BACKWARD, BOTH; + + @Override public String toString() { + return Ascii.toLowerCase(name()); + } + } + + enum Visibility { + INTERNAL, EXTERNAL; + + @Override public String toString() { + return Ascii.toLowerCase(name()); + } + } + + @Test + public void testDefaultContent() { + Map fileMap = new TreeMap<>(); + IcuData icuData = TransformsMapper.process(cldrData(), wrap(fileMap)); + + assertThat(fileMap).isEmpty(); + + assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT); + assertThat(icuData).hasValuesFor("/\"%Translit%Hex\"", "%Translit%Hex"); + assertThat(icuData).hasValuesFor("/\"%Translit%UnicodeChar\"", "%Translit%UnicodeChar"); + assertThat(icuData).hasValuesFor("/\"%Translit%UnicodeName\"", "%Translit%UnicodeName"); + assertThat(icuData) + .hasValuesFor("/RuleBasedTransliteratorIDs/Digit-Tone/alias", "NumericPinyin-Pinyin"); + assertThat(icuData) + .hasValuesFor("/RuleBasedTransliteratorIDs/Tone-Digit/alias", "Pinyin-NumericPinyin"); + assertThat(icuData).hasValuesFor("TransliterateLATIN", "", ""); + assertThat(icuData) + .hasValuesFor("TransliteratorNamePattern", "{0,choice,0#|1#{1}|2#{1}-{2}}"); + } + + @Test + public void testForward() { + int idx = 0; + CldrData cldrData = + cldrData(oneWay("foo", "bar", FORWARD, null, INTERNAL, "first second third", ++idx)); + + Map fileMap = new TreeMap<>(); + IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap)); + + assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5); + assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/first/alias", "foo-bar"); + assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/second/alias", "foo-bar"); + assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/third/alias", "foo-bar"); + assertThat(icuData) + .hasValuesFor("RuleBasedTransliteratorIDs/foo-bar/internal/direction", "FORWARD"); + assertThat(icuData) + .hasValuesFor( + "RuleBasedTransliteratorIDs/foo-bar/internal/resource:process(transliterator)", + "foo_bar.txt"); + + assertThat(fileMap).hasSize(1); + assertThat(fileMap).containsEntry("foo_bar.txt", headerPlusLines( + "# File: foo_bar.txt", + "# Generated from CLDR", + "#", + "", + "foo --> bar [internal]:", + "first second third")); + } + + @Test + public void testBackward() { + int idx = 0; + CldrData cldrData = + cldrData(oneWay("foo", "bar", BACKWARD, "variant", EXTERNAL, "one two three", ++idx)); + + Map fileMap = new TreeMap<>(); + IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap)); + + assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5); + assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/one/alias", "bar-foo/variant"); + assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/two/alias", "bar-foo/variant"); + assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/three/alias", "bar-foo/variant"); + + // Since the variant uses a '/' in the path element (not a path separator) we cannot just + // parse a string to get the expected path, so we do it the "hard way". + RbPath prefix = RbPath.of("RuleBasedTransliteratorIDs", "bar-foo/variant", "file"); + assertThat(icuData).hasValuesFor(prefix.extendBy("direction"), "REVERSE"); + assertThat(icuData) + .hasValuesFor(prefix.extendBy("resource:process(transliterator)"), "foo_bar_variant.txt"); + + assertThat(fileMap).hasSize(1); + assertThat(fileMap).containsEntry("foo_bar_variant.txt", headerPlusLines( + "# File: foo_bar_variant.txt", + "# Generated from CLDR", + "#", + "", + "foo <-- bar [external]:", + "one two three")); + } + + @Test + public void testBoth() { + int idx = 0; + CldrData cldrData = cldrData( + both("foo", "bar", null, INTERNAL, "forward-alias", "backward-alias", ++idx)); + + Map fileMap = new TreeMap<>(); + IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap)); + + // 3 for each direction. + assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 6); + + // Both directions. + assertThat(icuData) + .hasValuesFor("RuleBasedTransliteratorIDs/foo-bar/internal/direction", "FORWARD"); + assertThat(icuData) + .hasValuesFor("RuleBasedTransliteratorIDs/bar-foo/internal/direction", "REVERSE"); + + // Both aliases. + assertThat(icuData) + .hasValuesFor("RuleBasedTransliteratorIDs/forward-alias/alias", "foo-bar"); + assertThat(icuData) + .hasValuesFor("RuleBasedTransliteratorIDs/backward-alias/alias", "bar-foo"); + + // But the file is the same (obvious really since there's only one). + assertThat(icuData).hasValuesFor( + "RuleBasedTransliteratorIDs/foo-bar/internal/resource:process(transliterator)", + "foo_bar.txt"); + assertThat(icuData).hasValuesFor( + "RuleBasedTransliteratorIDs/bar-foo/internal/resource:process(transliterator)", + "foo_bar.txt"); + + assertThat(fileMap).hasSize(1); + assertThat(fileMap).containsEntry("foo_bar.txt", headerPlusLines( + "# File: foo_bar.txt", + "# Generated from CLDR", + "#", + "", + "foo <-> bar [internal]:", + "forward-alias", + "backward-alias")); + } + + private String headerPlusLines(String... lines) { + // For now the files always contain a blank line at the end (to match legacy behaviour) but + // this can, and probably should be changed. + return Stream + .concat(FILE_HEADER.stream(), Arrays.stream(lines)) + .collect(joining("\n", "", "\n\n")); + } + + private static CldrData cldrData(CldrValue... values) { + return CldrDataSupplier.forValues(Arrays.asList(values)); + } + + private static CldrValue oneWay( + String src, String dst, Direction dir, String var, Visibility vis, String alias, int idx) { + + checkArgument(dir != BOTH, "use both() for bidirectional transforms"); + StringBuilder cldrPath = new StringBuilder("//supplementalData/transforms/transform"); + appendAttribute(cldrPath, "source", src); + appendAttribute(cldrPath, "target", dst); + appendAttribute(cldrPath, "direction", dir); + if (var != null) { + appendAttribute(cldrPath, "variant", var); + } + appendAttribute(cldrPath, "visibility", vis); + appendAttribute(cldrPath, dir == FORWARD ? "alias" : "backwardAlias", alias); + cldrPath.append("/tRule#").append(idx); + + String arrow = dir == FORWARD ? "-->" : "<--"; + return CldrValue.parseValue( + cldrPath.toString(), + String.format("%s %s %s [%s]:\n%s", src, arrow, dst, vis, alias)); + } + + private static CldrValue both( + String src, String dst, String var, Visibility vis, String alias, String backAlias, int idx) { + + StringBuilder cldrPath = new StringBuilder("//supplementalData/transforms/transform"); + appendAttribute(cldrPath, "source", src); + appendAttribute(cldrPath, "target", dst); + appendAttribute(cldrPath, "direction", BOTH); + if (var != null) { + appendAttribute(cldrPath, "variant", var); + } + appendAttribute(cldrPath, "visibility", vis); + appendAttribute(cldrPath, "alias", alias); + appendAttribute(cldrPath, "backwardAlias", backAlias); + cldrPath.append("/tRule#").append(idx); + + return CldrValue.parseValue( + cldrPath.toString(), + String.format("%s <-> %s [%s]:\n%s\n%s", src, dst, vis, alias, backAlias)); + } + + private static void appendAttribute(StringBuilder out, String k, Object v) { + out.append(String.format("[@%s=\"%s\"]", k, v)); + } + + private static Function wrap(Map data) { + return path -> { + Writer writer = new Writer() { + StringWriter buffer = new StringWriter(); + @Override public void write(char[] chars, int offset, int length) { + buffer.write(chars, offset, length); + } + + @Override public void flush() { + buffer.flush(); + } + + @Override public void close() throws IOException { + buffer.close(); + data.put(path.toString(), buffer.toString()); + } + }; + return new PrintWriter(writer); + }; + } +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformerTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformerTest.java index 1a66a2a4bf9..2a36508069f 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformerTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/regex/RegexTransformerTest.java @@ -9,8 +9,6 @@ import static org.unicode.icu.tool.cldrtoicu.testing.ResultSubjectFactory.assert import java.util.List; -import javax.annotation.concurrent.Immutable; - import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -21,7 +19,6 @@ import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; import org.unicode.icu.tool.cldrtoicu.RbPath; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; /** * Tests for the regex transformer class. Note that in most cases, the rules used here are taken diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java new file mode 100644 index 00000000000..287ff3590c3 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeDataSupplier.java @@ -0,0 +1,70 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.testing; + +import static com.google.common.base.Preconditions.checkArgument; + +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +import org.unicode.cldr.api.CldrData; +import org.unicode.cldr.api.CldrDataSupplier; +import org.unicode.cldr.api.CldrDataType; +import org.unicode.cldr.api.CldrDraftStatus; +import org.unicode.cldr.api.CldrPath; +import org.unicode.cldr.api.CldrValue; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Table; +import com.google.common.collect.TreeBasedTable; + +/** + * Fake data supplier for testing (especially the path value mappers). + */ +public final class FakeDataSupplier extends CldrDataSupplier { + private final Map nonLocaleData = new LinkedHashMap<>(); + private final Table unresolvedData = TreeBasedTable.create(); + private final Table resolvedData = TreeBasedTable.create(); + + public FakeDataSupplier addLocaleData(String localeId, CldrValue... values) { + Arrays.stream(values).forEach(v -> { + unresolvedData.put(localeId, v.getPath(), v); + resolvedData.put(localeId, v.getPath(), v); + }); + return this; + } + + public FakeDataSupplier addInheritedData(String localeId, CldrValue... values) { + Arrays.stream(values) + .forEach(v -> checkArgument(resolvedData.put(localeId, v.getPath(), v) == null, + "path already present in unresolved CLDR data: %s", v.getPath())); + return this; + } + + public FakeDataSupplier addSupplementalData(CldrValue... values) { + Arrays.stream(values).forEach(v -> nonLocaleData.put(v.getPath(), v)); + return this; + } + + @Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) { + Table data = + resolution == CldrResolution.UNRESOLVED ? unresolvedData : resolvedData; + return CldrDataSupplier.forValues(data.row(localeId).values()); + } + + @Override public CldrData getDataForType(CldrDataType type) { + return CldrDataSupplier.forValues( + Iterables.filter(nonLocaleData.values(), v -> v.getPath().getDataType() == type)); + } + + @Override public Set getAvailableLocaleIds() { + return Collections.unmodifiableSet(resolvedData.rowKeySet()); + } + + @Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus cldrDraftStatus) { + throw new UnsupportedOperationException("not supported in fake data supplier"); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeResult.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeResult.java new file mode 100644 index 00000000000..0c2a2853193 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeResult.java @@ -0,0 +1,106 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.testing; + +import static com.google.common.base.Preconditions.checkState; + +import java.util.Comparator; +import java.util.Objects; + +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; +import org.unicode.icu.tool.cldrtoicu.RbPath; + +import com.google.common.collect.ImmutableList; + +/** + * A fake result, primarily for testing mappers. This implementation does not "play well" with + * other result implementations. + */ +public final class FakeResult extends Result { + private static final Comparator ORDERING = + Comparator.comparing(FakeResult::getKey) + .thenComparing(r -> r.index) + .thenComparing(r -> r.isFallback); + + /** + * Returns a primary result. Care must be taken to ensure that multiple "equal()" results are + * not used in the same test (results are equal if their path and index are equal, and they + * share the same fallback state). + * + * @param path the path of the result + * @param index the sort index of the result (to distinguish paths with the same path). + * @param isGrouped whether values in the result should be grouped into a separate sub-array. + * @param values the result values. + */ + public static Result of(String path, int index, boolean isGrouped, String... values) { + return new FakeResult( + RbPath.parse(path), ImmutableList.copyOf(values), isGrouped, index, false); + } + + /** + * Returns a fallback result. Note that currently fallbacks are never "grouped". + * + * @param path the path of the result + * @param index the sort index of the result (to distinguish paths with the same path). + * @param values the result values. + */ + public static Result fallback(String path, int index, String... values) { + return new FakeResult(RbPath.parse(path), ImmutableList.copyOf(values), false, index, true); + } + + private final boolean grouped; + private final ImmutableList values; + private final boolean isFallback; + private final int index; + + private FakeResult( + RbPath path, ImmutableList values, boolean grouped, int index, boolean isFallback) { + super(path); + this.grouped = grouped; + this.values = values; + this.isFallback = isFallback; + this.index = index; + } + + boolean isFallback() { + return isFallback; + } + + @Override public boolean isGrouped() { + return grouped; + } + + @Override public ImmutableList getValues() { + return values; + } + + @Override public boolean isFallbackFor(Result r) { + FakeResult other = (FakeResult) r; + return isFallback && !other.isFallback + && getKey().equals(r.getKey()) + && index == (other).index; + } + + @Override public int compareTo(Result other) { + int signum = ORDERING.compare(this, (FakeResult) other); + checkState(signum != 0 || this == other, + "equivalent but non-identical results found in test data: %s / %s", this, other); + return signum; + } + + // We really don't want to pretend to support mixing implementations of Result in tests. + @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") + @Override public boolean equals(Object obj) { + FakeResult other = (FakeResult) obj; + boolean isEqual = getKey().equals(other.getKey()) + && index == other.index + && isFallback == other.isFallback; + checkState(!isEqual || this == other, + "equivalent but non-identical results found in test data: %s / %s", this, other); + return isEqual; + } + + @Override public int hashCode() { + return Objects.hash(getKey(), index, isFallback); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeTransformer.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeTransformer.java new file mode 100644 index 00000000000..862522cabc2 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/FakeTransformer.java @@ -0,0 +1,37 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.testing; + +import java.util.HashMap; +import java.util.Map; + +import org.unicode.cldr.api.CldrValue; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer; +import org.unicode.icu.tool.cldrtoicu.RbPath; + +import com.google.common.collect.ImmutableList; + +public final class FakeTransformer extends PathValueTransformer { + private final Map> resultMap = new HashMap<>(); + private final Map> fallbackMap = new HashMap<>(); + + public void addResults(CldrValue value, Result... results) { + resultMap.put(value, ImmutableList.copyOf(results)); + } + + public void addFallbacks(String path, Result... results) { + fallbackMap.put(RbPath.parse(path), ImmutableList.copyOf(results)); + } + + @Override public ImmutableList transform(CldrValue value) { + return resultMap.getOrDefault(value, ImmutableList.of()); + } + + @Override public ImmutableList transform(CldrValue value, DynamicVars ignored) { + return resultMap.getOrDefault(value, ImmutableList.of()); + } + + @Override public ImmutableList getFallbackResultsFor(RbPath key, DynamicVars ignored) { + return fallbackMap.getOrDefault(key, ImmutableList.of()); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/IcuDataSubject.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/IcuDataSubject.java new file mode 100644 index 00000000000..cc418122e1c --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/IcuDataSubject.java @@ -0,0 +1,56 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.testing; + +import java.util.List; + +import org.unicode.icu.tool.cldrtoicu.IcuData; +import org.unicode.icu.tool.cldrtoicu.RbPath; +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.truth.FailureMetadata; +import com.google.common.truth.IterableSubject; +import com.google.common.truth.Subject; + +public final class IcuDataSubject extends Subject { + private final IcuData actual; + + protected IcuDataSubject(FailureMetadata metadata, IcuData actual) { + super(metadata, actual); + this.actual = actual; + } + + public void hasName(String name) { + check("getName()").that(actual.getName()).isEqualTo(name); + } + + public void hasFallback(boolean fallback) { + check("hasFallback()").that(actual.hasFallback()).isEqualTo(fallback); + } + + public IterableSubject getPaths() { + return check("getPaths()").that(actual.getPaths()); + } + + public void hasEmptyValue(String rbPath) { + hasValuesFor(rbPath, RbValue.of("")); + } + + public void hasValuesFor(String rbPath, String... values) { + hasValuesFor(rbPath, RbValue.of(values)); + } + + public void hasValuesFor(String rbPath, RbValue... values) { + hasValuesFor(RbPath.parse(rbPath), values); + } + + public void hasValuesFor(RbPath p, String... values) { + hasValuesFor(p, RbValue.of(values)); + } + + public void hasValuesFor(RbPath p, RbValue... values) { + List rbValues = actual.get(p); + check("get('%s')", p).that(rbValues).isNotNull(); + check("get('%s')", p).that(rbValues).containsExactlyElementsIn(values).inOrder(); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/IcuDataSubjectFactory.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/IcuDataSubjectFactory.java new file mode 100644 index 00000000000..a1e924377c9 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/IcuDataSubjectFactory.java @@ -0,0 +1,23 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.testing; + +import org.unicode.icu.tool.cldrtoicu.IcuData; + +import com.google.common.truth.FailureMetadata; +import com.google.common.truth.Subject; +import com.google.common.truth.Truth; + +/** Truth subject for asserting about ICU data instances (makes tests much more readable). */ +public final class IcuDataSubjectFactory implements Subject.Factory { + public static IcuDataSubject assertThat(IcuData result) { + return Truth.assertAbout(new IcuDataSubjectFactory()).that(result); + } + + @Override + public IcuDataSubject createSubject(FailureMetadata failureMetadata, IcuData that) { + return new IcuDataSubject(failureMetadata, that); + } + + IcuDataSubjectFactory() {} +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubject.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubject.java index ed123420cb8..471939d2c55 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubject.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubject.java @@ -4,9 +4,11 @@ package org.unicode.icu.tool.cldrtoicu.testing; import static com.google.common.base.Preconditions.checkArgument; +import org.unicode.icu.tool.cldrtoicu.RbPath; + +import com.google.common.truth.BooleanSubject; import com.google.common.truth.FailureMetadata; import com.google.common.truth.Subject; -import org.unicode.icu.tool.cldrtoicu.RbPath; public final class RbPathSubject extends Subject { // For use when chaining from other subjects. @@ -30,4 +32,16 @@ public final class RbPathSubject extends Subject { checkArgument(n >= 0, "invalid path length: %s", n); check("length()").that(actual.length()).isEqualTo(n); } + + public final BooleanSubject startsWith(RbPath path) { + return check("startsWith('%s')", path).that(actual.startsWith(path)); + } + + public final BooleanSubject endsWith(RbPath path) { + return check("endsWith('%s')", path).that(actual.endsWith(path)); + } + + public final BooleanSubject contains(RbPath path) { + return check("contains('%s')", path).that(actual.contains(path)); + } } diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubjectFactory.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubjectFactory.java index 537b4bbfc84..823285040c7 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubjectFactory.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbPathSubjectFactory.java @@ -2,10 +2,11 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu.testing; +import org.unicode.icu.tool.cldrtoicu.RbPath; + import com.google.common.truth.FailureMetadata; import com.google.common.truth.Subject; import com.google.common.truth.Truth; -import org.unicode.icu.tool.cldrtoicu.RbPath; /** Truth subject for asserting about resource bundle paths (makes tests much more readable). */ public final class RbPathSubjectFactory implements Subject.Factory { diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbValueSubject.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbValueSubject.java new file mode 100644 index 00000000000..d20c0b50fe6 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbValueSubject.java @@ -0,0 +1,39 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.testing; + +import static com.google.common.base.Preconditions.checkArgument; + +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.truth.FailureMetadata; +import com.google.common.truth.Subject; + +public final class RbValueSubject extends Subject { + // For use when chaining from other subjects. + public static Factory rbValues() { + return RbValueSubject::new; + } + + private final RbValue actual; + + protected RbValueSubject(FailureMetadata metadata, RbValue actual) { + super(metadata, actual); + this.actual = actual; + } + + /** Asserts the value of the path, as segments (use this if a segment can contain '/'). */ + public final void hasValue(String value) { + check("getElements()").that(actual.getElements()).containsExactly(value); + } + + /** Asserts the value of the path, as segments (use this if a segment can contain '/'). */ + public final void hasValues(String... values) { + check("getElements()").that(actual.getElements()).containsExactlyElementsIn(values).inOrder(); + } + + public final void hasSize(int n) { + checkArgument(n > 0, "invalid element count: %s", n); + check("getElements().size()").that(actual.getElements().size()).isEqualTo(n); + } +} diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbValueSubjectFactory.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbValueSubjectFactory.java new file mode 100644 index 00000000000..f7e8d66a60e --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/RbValueSubjectFactory.java @@ -0,0 +1,23 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.testing; + +import org.unicode.icu.tool.cldrtoicu.RbValue; + +import com.google.common.truth.FailureMetadata; +import com.google.common.truth.Subject; +import com.google.common.truth.Truth; + +/** Truth subject for asserting about resource bundle paths (makes tests much more readable). */ +public final class RbValueSubjectFactory implements Subject.Factory { + public static RbValueSubject assertThat(RbValue result) { + return Truth.assertAbout(new RbValueSubjectFactory()).that(result); + } + + @Override + public RbValueSubject createSubject(FailureMetadata failureMetadata, RbValue that) { + return new RbValueSubject(failureMetadata, that); + } + + RbValueSubjectFactory() {} +} \ No newline at end of file diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/ResultSubjectFactory.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/ResultSubjectFactory.java index a8a2f8ff1a9..b50613fb171 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/ResultSubjectFactory.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/testing/ResultSubjectFactory.java @@ -2,10 +2,11 @@ // License & terms of use: http://www.unicode.org/copyright.html package org.unicode.icu.tool.cldrtoicu.testing; +import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; + import com.google.common.truth.FailureMetadata; import com.google.common.truth.Subject; import com.google.common.truth.Truth; -import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; /** Truth subject for asserting about transformation results (makes tests much more readable). */ public class ResultSubjectFactory implements Subject.Factory {