ICU-21084 Migrating ICU tools to use PathMatcher

This commit is contained in:
David Beaumont 2020-05-04 10:13:49 +00:00
parent 4231ca5be0
commit 566e0f8686
18 changed files with 1247 additions and 1100 deletions

View file

@ -0,0 +1,457 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkNotNull;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PathOrder;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrData.PrefixVisitor.Context;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.cldr.api.PathMatcher;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
/**
* An immutable processor which can be configured to process CLDR data according to a series of
* mappings from CLDR paths to "actions".
*
* <p>In typical use a processor would be statically created to bind paths and handler functions
* (actions) together, and calling {@link CldrDataProcessor#process(CldrData, Object, PathOrder)}
* once for each {@link CldrData} instance.
*
* <p>A processor is built by adding a mixture of "actions" to a builder. An action either defines
* how to handle a single value (see {@link SubProcessor#addValueAction addValueAction()}) or how
* to start a new sub-processor at a specific point in the data hierarchy (see {@link
* SubProcessor#addAction addAction()} or {@link SubProcessor#addSubprocessor addSubprocessor()}).
*
* @param <T> the main "state" type used by the processor for the top-level processing.
*/
public class CldrDataProcessor<T> {
/** Returns a processor builder which operates on a "state" of type {@code <T>}. */
public static <T> Builder<T> builder() {
return new Builder<>();
}
/**
* A builder for processing a CLDR data sub-hierarchy.
*
* @param <T> the "state" type used by the processor.
*/
public static abstract class SubProcessor<T> {
final List<PrefixBuilder<?, T>> prefixActions = new ArrayList<>();
final List<ValueAction<T>> valueActions = new ArrayList<>();
private SubProcessor() { }
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is intended for cases where the subtype state does not depend on the
* parent state or the path prefix, but needs some post-processing. For example, the
* subtype state might just be a {@code List} and the elements added to it must be
* combined with the parent state after sub-hierarchy is processing is complete.
*
* <pre>{@code
* processor
* .addAction("//parent/path", ArrayList::new, ParentState::addValues)
* .addValueAction("value/suffix", List::add);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
* @param doneFn called after each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern, Supplier<S> newStateFn, BiConsumer<T, ? super S> doneFn) {
return addAction(pattern, (t, p) -> newStateFn.get(), doneFn);
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is similar to {@link #addAction(String, Supplier, BiConsumer)} but is
* intended for cases where the subtype state depends on the parent path prefix.
*
* <pre>{@code
* processor
* .addAction("//parent/path[@type=*]", SubState::fromType, ParentState::addSubState)
* .addValueAction("value/suffix", SubState::collectValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
* @param doneFn called after each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern, Function<CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn) {
return addAction(pattern, (t, p) -> newStateFn.apply(p), doneFn);
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is intended for the case where the subtype state is derived from the
* parent state (e.g. an inner class) but does not depend on the path prefix at which the
* sub-hierarchy is rooted.
*
* <pre>{@code
* processor
* .addAction("//parent/path", ParentState::newValueCollector)
* .addValueAction("value/suffix", ValueCollector::addValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
*/
public <S> SubProcessor<S> addAction(String pattern, Function<T, S> newStateFn) {
return addAction(pattern, (t, p) -> newStateFn.apply(t));
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is intended for the case where the subtype state is derived from the
* parent state (e.g. an inner class) and the path prefix at which the sub-hierarchy is
* rooted.
*
* <pre>{@code
* processor
* .addAction("//parent/path[@type=*]", ParentState::newCollectorOfType)
* .addValueAction("value/suffix", ValueCollector::addValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern, BiFunction<T, CldrPath, S> newStateFn) {
return addAction(pattern, newStateFn, (t, y) -> {});
}
/**
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
* for the sub-hierarchy.
*
* <p>This method is the most general purpose way to add a sub-hierarchy action and is
* intended for the most complex cases, where subtype state depends on parent state and
* path prefix, and post processing is required. All other implementations of {@code
* addAction} simply delegate to this one in one way or another.
*
* <pre>{@code
* processor
* .addAction("//parent/path[@type=*]", ParentState::newCollector, ParentState::done)
* .addValueAction("value/suffix", ValueCollector::addValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
* @param doneFn called after each sub-processing step.
*/
public <S> SubProcessor<S> addAction(
String pattern,
BiFunction<T, CldrPath, S> newStateFn,
BiConsumer<T, ? super S> doneFn) {
PrefixBuilder<S, T> action =
new PrefixBuilder<>(getMatcher(pattern), newStateFn, doneFn);
prefixActions.add(action);
return action;
}
/**
* Returns a new sub-processor for the specified sub-hierarchy rooted at the given
* {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
* the parent.
*
* <p>This method is intended for the case where multiple sub-processors are needed below
* a certain point in the hierarchy, but they all operate on the same state instance.
*
* <pre>{@code
* SubBuilder<MyCollector> subprocessor = processor.addSubprocessor("//parent/path");
* subprocessor.addValueAction("value/suffix", MyCollector::addValue);
* subprocessor.addValueAction("other/suffix", MyCollector::addOtherValue);
* }</pre>
*
* @param pattern the path pattern for the prefix where sub-processing starts.
*/
public SubProcessor<T> addSubprocessor(String pattern) {
return addAction(pattern, (t, p) -> t);
}
/**
* Returns a new sub-processor for the specified sub-hierarchy rooted at the given
* {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
* the parent.
*
* <p>This method is intended for the case where a some setup is required before a
* sub-hierarchy is processed, but the sub-processor state is the same.
*
* <pre>{@code
* SubBuilder<MyCollector> subprocessor = processor
* .addSubprocessor("//parent/path", MyCollector::startFn)
* .addValueAction("value/suffix", MyCollector::addValue);
* }</pre>
*
* @param startFn a handler called when sub-processing begins
* @param pattern the path pattern for the prefix where sub-processing starts.
*/
public SubProcessor<T> addSubprocessor(String pattern, BiConsumer<T, CldrPath> startFn) {
return addAction(pattern, (t, p) -> {
startFn.accept(t, p);
return t;
});
}
/**
* Adds an action to handle {@link CldrValue}s found in the current sub-hierarchy
* visitation which match the given {@link PathMatcher} leaf-path pattern.
*
* <p>This method is expected to be called at least once for each sub-hierarchy processor
* in order to handle the actual CLDR values being processed, and the path pattern should
* match leaf-paths in the CLDR data hierarchy, rather than path prefixes.
*
* <p>Multiple value actions can be added to a sub-hierarchy processor, and paths are
* matched in the order the actions are added. It is also possible to mix sub-hierarchy
* actions and value actions on the same processor, but note that sub-hierarchy processors
* will take precedence, so you cannot try to match the same value in both a sub-hierarchy
* processor and a value action.
*
* For example:
* <pre>{@code
* processor
* .addAction("//parent/path", ...)
* .addValueAction("value/suffix", ...);
* // This will never match any values since the sub-hierarchy processor takes precedence!
* processor.addValueAction("//parent/path/value/suffix", ...);
* }</pre>
*
* @param pattern the CLDR path suffix idenifying the values to be processed.
* @param doFn the action to be carried out for each value.
*/
public void addValueAction(String pattern, BiConsumer<T, CldrValue> doFn) {
valueActions.add(new ValueAction<>(getMatcher(pattern), doFn));
}
abstract PathMatcher getMatcher(String pattern);
}
/**
* A root builder of a CLDR data processor.
*
* @param <T> the processor state type.
*/
public static final class Builder<T> extends SubProcessor<T> {
private Builder() { }
/** Returns the immutable CLDR data processor. */
public CldrDataProcessor<T> build() {
return new CldrDataProcessor<>(
Lists.transform(prefixActions, PrefixBuilder::build), valueActions);
}
@Override
PathMatcher getMatcher(String pattern) {
return PathMatcher.of(pattern);
}
}
/**
* A sub-hierarchy data processor rooted at some specified path prefix.
*
* @param <S> the subtype processor state.
* @param <T> the parent processor state.
*/
private static class PrefixBuilder<S, T> extends SubProcessor<S> {
private final PathMatcher matcher;
private final BiFunction<T, CldrPath, S> newStateFn;
private final BiConsumer<T, ? super S> doneFn;
PrefixBuilder(
PathMatcher matcher,
BiFunction<T, CldrPath, S> newStateFn,
BiConsumer<T, ? super S> doneFn) {
this.matcher = checkNotNull(matcher);
this.newStateFn = checkNotNull(newStateFn);
this.doneFn = checkNotNull(doneFn);
}
PrefixAction<S, T> build() {
List<PrefixAction<?, S>> actions = Lists.transform(prefixActions, PrefixBuilder::build);
return new PrefixAction<>(actions, valueActions, matcher, newStateFn, doneFn);
}
@Override PathMatcher getMatcher(String pattern) {
return matcher.withSuffix(pattern);
}
}
private final ImmutableList<PrefixAction<?, T>> prefixActions;
private final ImmutableList<ValueAction<T>> valueActions;
private CldrDataProcessor(
List<PrefixAction<?, T>> prefixActions,
List<ValueAction<T>> valueActions) {
this.prefixActions = ImmutableList.copyOf(prefixActions);
this.valueActions = ImmutableList.copyOf(valueActions);
}
/**
* Processes a CLDR data instance according to the actions registered for this processor in DTD
* order. This method is preferred over {@link #process(CldrData, Object, PathOrder)} and
* eventually the ability to even specify a path order for processing will be removed.
*
* <p>This is the main method used to drive the processing of some CLDR data and is typically
* used like:
*
* <pre>{@code
* MyResult result = CLDR_PROCESSOR.process(data, new MyResult(), DTD);
* }</pre>
* <p>or:*
* <pre>{@code
* MyResult result = CLDR_PROCESSOR.process(data, MyResult.newBuilder(), DTD).build();
* }</pre>
*
* @param data the CLDR data to be processed.
* @param state an instance of the "primary" state.
* @return the given primary state (after modification).
*/
public T process(CldrData data, T state) {
return process(data, state, PathOrder.DTD);
}
/**
* Processes a CLDR data instance according to the actions registered for this processor.
* Callers should prefer using {@link #process(CldrData, Object)} whenever possible and avoid
* relying on path ordering for processing.
*
* @param data the CLDR data to be processed.
* @param state an instance of the "primary" state.
* @param pathOrder the order in which CLDR paths should be visited.
* @return the given primary state (after modification).
*/
public T process(CldrData data, T state, PathOrder pathOrder) {
data.accept(pathOrder, new DispatchingVisitor<>(this, state, s -> {}));
return state;
}
private void dispatchPrefixActions(T state, CldrPath prefix, Context context) {
for (PrefixAction<?, T> a : prefixActions) {
if (a.matches(state, prefix, context)) {
break;
}
}
}
private void dispatchValueActions(T state, CldrValue value) {
for (ValueAction<T> a : valueActions) {
if (a.matches(state, value)) {
break;
}
}
}
/*
* Implementation notes:
*
* "PrefixAction" is a critical part of the design of the path visitor. It acts as a bridge
* between the parent visitation (with state type 'T') and child visitation (state type 'S').
*
* It is the only class to need to know about both types. Both types are known when the
* CldrDataProcessor is made, but during visitation the caller of the "matches" method doesn't
* need to know about the child type, which is why the parent can just have a list of
* "PrefixAction<?, T>" and don't need any magical recasting.
*
* It might only be a few lines of code, but it can only exist in a class which knows about
* both parent and child types (obtaining a new child state is a function of the parent state).
*/
static final class PrefixAction<S, T> extends CldrDataProcessor<S> {
private final PathMatcher matcher;
private final BiFunction<T, CldrPath, S> newStateFn;
private final BiConsumer<T, ? super S> doneFn;
PrefixAction(
List<PrefixAction<?, S>> prefixActions,
List<ValueAction<S>> valueActions,
PathMatcher matcher,
BiFunction<T, CldrPath, S> newStateFn,
BiConsumer<T, ? super S> doneFn) {
super(prefixActions, valueActions);
this.matcher = checkNotNull(matcher);
this.newStateFn = checkNotNull(newStateFn);
this.doneFn = checkNotNull(doneFn);
}
public boolean matches(T state, CldrPath prefix, Context context) {
if (matcher.locallyMatches(prefix)) {
Consumer<S> doneFn = childState -> this.doneFn.accept(state, childState);
context.install(
new DispatchingVisitor<>(this, newStateFn.apply(state, prefix), doneFn),
DispatchingVisitor::done);
return true;
}
return false;
}
}
private static final class ValueAction<T> {
private final PathMatcher matcher;
private BiConsumer<T, CldrValue> doFn;
ValueAction(PathMatcher matcher, BiConsumer<T, CldrValue> doFn) {
this.matcher = checkNotNull(matcher);
this.doFn = checkNotNull(doFn);
}
boolean matches(T state, CldrValue value) {
if (matcher.locallyMatches(value.getPath())) {
doFn.accept(state, value);
return true;
}
return false;
}
}
private static final class DispatchingVisitor<T> implements PrefixVisitor {
CldrDataProcessor<T> processor;
private final T state;
private final Consumer<T> doneFn;
DispatchingVisitor(CldrDataProcessor<T> processor, T state, Consumer<T> doneFn) {
this.processor = checkNotNull(processor);
this.state = checkNotNull(state);
this.doneFn = checkNotNull(doneFn);
}
@Override
public void visitPrefixStart(CldrPath prefix, Context context) {
processor.dispatchPrefixActions(state, prefix, context);
}
@Override
public void visitValue(CldrValue value) {
processor.dispatchValueActions(state, value);
}
// Important: This is NOT visitPrefixEnd() since that happens multiple times and isn't
// going to be called for the prefix at which this visitor was started.
void done() {
doneFn.accept(state);
}
}
}

View file

@ -37,12 +37,15 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper;
import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper;
@ -83,15 +86,15 @@ import com.google.common.io.CharStreams;
*/
public final class LdmlConverter {
// TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath).
private static final PathMatcher GENDER_LIST_PATHS =
private static final Predicate<CldrPath> GENDER_LIST_PATHS =
supplementalMatcher("gender");
private static final PathMatcher LIKELY_SUBTAGS_PATHS =
private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS =
supplementalMatcher("likelySubtags");
private static final PathMatcher METAZONE_PATHS =
private static final Predicate<CldrPath> METAZONE_PATHS =
supplementalMatcher("metaZones", "primaryZones");
private static final PathMatcher METADATA_PATHS =
private static final Predicate<CldrPath> METADATA_PATHS =
supplementalMatcher("metadata");
private static final PathMatcher SUPPLEMENTAL_DATA_PATHS =
private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS =
supplementalMatcher(
"calendarData",
"calendarPreferenceData",
@ -109,22 +112,23 @@ public final class LdmlConverter {
"unitPreferenceData",
"weekData",
"weekOfPreference");
private static final PathMatcher CURRENCY_DATA_PATHS =
private static final Predicate<CldrPath> CURRENCY_DATA_PATHS =
supplementalMatcher("currencyData");
private static final PathMatcher NUMBERING_SYSTEMS_PATHS =
private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS =
supplementalMatcher("numberingSystems");
private static final PathMatcher WINDOWS_ZONES_PATHS =
private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS =
supplementalMatcher("windowsZones");
private static PathMatcher supplementalMatcher(String... spec) {
private static Predicate<CldrPath> supplementalMatcher(String... spec) {
checkArgument(spec.length > 0, "must supply at least one matcher spec");
if (spec.length == 1) {
return PathMatcher.of("supplementalData/" + spec[0]);
return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf;
}
return PathMatcher.anyOf(
return
Arrays.stream(spec)
.map(s -> PathMatcher.of("supplementalData/" + s))
.toArray(PathMatcher[]::new));
.map(s -> PathMatcher.of("//supplementalData/" + s))
.map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf))
.reduce(p -> false, Predicate::or);
}
private static RbPath RB_PARENT = RbPath.of("%%Parent");
@ -514,7 +518,7 @@ public final class LdmlConverter {
private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
private void processSupplemental(
String label, PathMatcher paths, String dir, boolean addCldrVersion) {
String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) {
IcuData icuData =
SupplementalMapper.process(src, supplementalTransformer, label, paths);
// A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the

View file

@ -1,260 +0,0 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkPositionIndex;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrPath;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
/**
* An immutable matcher for {@link CldrPath} instances. A path matcher specification looks like
* {@code "foo/*[@x="z"]/bar[@y=*]"}, where element names and attribute values can be wildcards.
*
* <p>Note that the path fragment represented by the specification does not include either leading
* or trailing {@code '/'}. This is because matching can occur at any point in a {@link CldrPath}.
* The choice of where to match in the path is governed by the match method used (e.g.
* {@link PathMatcher#matchesSuffixOf(CldrPath)}.
*/
public abstract class PathMatcher {
/** Parses the path specification into a matcher. */
public static PathMatcher of(String pathSpec) {
// Supported so far: "a", "a/b", "a/b[@x=*]"
return new BasicMatcher(parse(pathSpec));
}
/**
* Combines the given matchers into a single composite matcher which tests all the given
* matchers in order.
*/
public static PathMatcher anyOf(PathMatcher... matchers) {
checkArgument(matchers.length > 0, "must supply at least one matcher");
if (matchers.length == 1) {
return checkNotNull(matchers[0]);
}
return new CompositeMatcher(ImmutableList.copyOf(matchers));
}
/** Attempts a full match against a given path. */
public abstract boolean matches(CldrPath path);
/** Attempts a suffix match against a given path. */
public abstract boolean matchesSuffixOf(CldrPath path);
/** Attempts a prefix match against a given path. */
public abstract boolean matchesPrefixOf(CldrPath path);
// A matcher that simply combines a sequences of other matchers in order.
private static final class CompositeMatcher extends PathMatcher {
private final ImmutableList<PathMatcher> matchers;
private CompositeMatcher(ImmutableList<PathMatcher> matchers) {
checkArgument(matchers.size() > 1);
this.matchers = checkNotNull(matchers);
}
@Override
public boolean matches(CldrPath path) {
for (PathMatcher m : matchers) {
if (m.matches(path)) {
return true;
}
}
return false;
}
@Override
public boolean matchesSuffixOf(CldrPath path) {
for (PathMatcher m : matchers) {
if (m.matchesSuffixOf(path)) {
return true;
}
}
return false;
}
@Override
public boolean matchesPrefixOf(CldrPath path) {
for (PathMatcher m : matchers) {
if (m.matchesPrefixOf(path)) {
return true;
}
}
return false;
}
}
private static final class BasicMatcher extends PathMatcher {
private final ImmutableList<Predicate<CldrPath>> elementMatchers;
private BasicMatcher(List<Predicate<CldrPath>> elementMatchers) {
this.elementMatchers = ImmutableList.copyOf(elementMatchers);
}
@Override
public boolean matches(CldrPath path) {
return elementMatchers.size() == path.getLength() && matchRegion(path, 0);
}
@Override
public boolean matchesSuffixOf(CldrPath path) {
int start = path.getLength() - elementMatchers.size();
return start >= 0 && matchRegion(path, start);
}
@Override
public boolean matchesPrefixOf(CldrPath path) {
return path.getLength() >= elementMatchers.size() && matchRegion(path, 0);
}
private boolean matchRegion(CldrPath path, int offset) {
// offset is the path element corresponding the the "top most" element matcher, it
// must be in the range 0 ... (path.length() - elementMatchers.size()).
checkPositionIndex(offset, path.getLength() - elementMatchers.size());
// First jump over the path parents until we find the last matcher.
int matchPathLength = offset + elementMatchers.size();
while (path.getLength() > matchPathLength) {
path = path.getParent();
}
return matchForward(path, elementMatchers.size() - 1);
}
private boolean matchForward(CldrPath path, int matcherIndex) {
if (matcherIndex < 0) {
return true;
}
return matchForward(path.getParent(), matcherIndex - 1)
&& elementMatchers.get(matcherIndex).test(path);
}
}
// Make a new, non-interned, unique instance here which we can test by reference to
// determine if the argument is to be captured (needed as ImmutableMap prohibits null).
// DO NOT change this code to assign "*" as the value directly, it MUST be a new instance.
@SuppressWarnings("StringOperationCanBeSimplified")
private static final String WILDCARD = new String("*");
private static final Pattern ELEMENT_START_REGEX =
Pattern.compile("(\\*|[-:\\w]+)(?:/|\\[|$)");
private static final Pattern ATTRIBUTE_REGEX =
Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")]");
// element := foo, foo[@bar="baz"], foo[@bar=*]
// pathspec := element{/element}*
private static List<Predicate<CldrPath>> parse(String pathSpec) {
List<Predicate<CldrPath>> specs = new ArrayList<>();
int pos = 0;
do {
pos = parse(pathSpec, pos, specs);
} while (pos >= 0);
return specs;
}
// Return next start index or -1.
private static int parse(String pathSpec, int pos, List<Predicate<CldrPath>> specs) {
Matcher m = ELEMENT_START_REGEX.matcher(pathSpec).region(pos, pathSpec.length());
checkArgument(m.lookingAt(), "invalid path specification (index=%s): %s", pos, pathSpec);
String name = m.group(1);
Map<String, String> attributes = ImmutableMap.of();
pos = m.end(1);
if (pos < pathSpec.length() && pathSpec.charAt(pos) == '[') {
// We have attributes to add.
attributes = new LinkedHashMap<>();
do {
m = ATTRIBUTE_REGEX.matcher(pathSpec).region(pos, pathSpec.length());
checkArgument(m.lookingAt(),
"invalid path specification (index=%s): %s", pos, pathSpec);
// Null if we matched the '*' wildcard.
String value = m.group(2);
attributes.put(m.group(1), value != null ? value : WILDCARD);
pos = m.end();
} while (pos < pathSpec.length() && pathSpec.charAt(pos) == '[');
}
// Wildcard matching is less efficient because attribute keys cannot be made in advance, so
// since it's also very rare, we special case it.
Predicate<CldrPath> matcher = name.equals(WILDCARD)
? new WildcardElementMatcher(attributes)::match
: new ElementMatcher(name, attributes)::match;
specs.add(matcher);
if (pos == pathSpec.length()) {
return -1;
}
checkState(pathSpec.charAt(pos) == '/',
"invalid path specification (index=%s): %s", pos, pathSpec);
return pos + 1;
}
// Matcher for path elements like "foo[@bar=*]" where the name is known in advance.
private static final class ElementMatcher {
private final String name;
private final ImmutableMap<AttributeKey, String> attributes;
private ElementMatcher(String name, Map<String, String> attributes) {
this.name = checkNotNull(name);
this.attributes = attributes.entrySet().stream()
.collect(toImmutableMap(e -> keyOf(name, e.getKey()), Entry::getValue));
}
boolean match(CldrPath path) {
if (!path.getName().equals(name)) {
return false;
}
for (Entry<AttributeKey, String> e : attributes.entrySet()) {
String actual = path.get(e.getKey());
if (actual == null) {
return false;
}
String expected = e.getValue();
// DO NOT change this to use expected.equals(WILDCARD).
if (expected != WILDCARD && !expected.equals(actual)) {
return false;
}
}
return true;
}
}
// Matcher for path elements like "*[@bar=*]", where the name isn't known until match time.
private static final class WildcardElementMatcher {
private final ImmutableMap<String, String> attributes;
private WildcardElementMatcher(Map<String, String> attributes) {
this.attributes = ImmutableMap.copyOf(attributes);
}
private boolean match(CldrPath path) {
// The wildcard matcher never fails due to the element name but must create new key
// instances every time matching occurs (because the key name is dynamic). Since this
// is rare, it's worth making into a separate case.
for (Entry<String, String> attribute : attributes.entrySet()) {
String actual = path.get(keyOf(path.getName(), attribute.getKey()));
if (actual == null) {
return false;
}
String expected = attribute.getValue();
// DO NOT change this to use expected.equals(WILDCARD).
if (expected != WILDCARD && !expected.equals(actual)) {
return false;
}
}
return true;
}
}
}

View file

@ -4,6 +4,7 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT;
import static java.util.function.Function.identity;
@ -26,8 +27,11 @@ import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.cldr.api.FilteredData;
import org.unicode.cldr.api.PathMatcher;
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
@ -147,43 +151,52 @@ public final class PseudoLocales {
}
private static final class PseudoLocaleData extends FilteredData {
private static final PathMatcher LDML = PathMatcher.of("//ldml");
private static final PathMatcher AUX_EXEMPLARS =
PathMatcher.of("ldml/characters/exemplarCharacters[@type=\"auxiliary\"]");
ldml("characters/exemplarCharacters[@type=\"auxiliary\"]");
private static final PathMatcher NUMBERING_SYSTEM =
PathMatcher.of("ldml/numbers/defaultNumberingSystem");
ldml("numbers/defaultNumberingSystem");
// These paths were mostly derived from looking at the previous implementation's behaviour
// and can be modified as needed. Notably there are no "units" here (but they were also
// excluded in the original code).
private static final PathMatcher PSEUDO_PATHS = PathMatcher.anyOf(
ldml("localeDisplayNames"),
ldml("delimiters"),
ldml("dates/calendars/calendar"),
ldml("dates/fields"),
ldml("dates/timeZoneNames"),
ldml("listPatterns"),
ldml("posix/messages"),
ldml("characterLabels"),
ldml("typographicNames"));
// Paths which contain non-localizable data. It is important that these paths catch all the
// non-localizable sub-paths of the list above. This list must be accurate.
private static final PathMatcher EXCLUDE_PATHS = PathMatcher.anyOf(
ldml("localeDisplayNames/localeDisplayPattern"),
ldml("dates/timeZoneNames/fallbackFormat"));
private static final Predicate<CldrPath> IS_PSEUDO_PATH =
matchAnyLdmlPrefix(
"localeDisplayNames",
"delimiters",
"dates/calendars/calendar",
"dates/fields",
"dates/timeZoneNames",
"listPatterns",
"posix/messages",
"characterLabels",
"typographicNames")
.and(matchAnyLdmlPrefix(
"localeDisplayNames/localeDisplayPattern",
"dates/timeZoneNames/fallbackFormat")
.negate());
// The expectation is that all non-alias paths with values under these roots are "date/time
// pattern like" (such as "E h:mm:ss B") in which care must be taken to not pseudo localize
// the patterns in such as way as to break them. This list must be accurate.
private static final PathMatcher PATTERN_PATHS = PathMatcher.anyOf(
ldml("dates/calendars/calendar/timeFormats"),
ldml("dates/calendars/calendar/dateFormats"),
ldml("dates/calendars/calendar/dateTimeFormats"),
ldml("dates/timeZoneNames/hourFormat"));
private static final Predicate<CldrPath> IS_PATTERN_PATH = matchAnyLdmlPrefix(
"dates/calendars/calendar/timeFormats",
"dates/calendars/calendar/dateFormats",
"dates/calendars/calendar/dateTimeFormats",
"dates/timeZoneNames/hourFormat");
private static PathMatcher ldml(String matcherSuffix) {
return PathMatcher.of("ldml/" + matcherSuffix);
private static PathMatcher ldml(String paths) {
return LDML.withSuffix(paths);
}
private static Predicate<CldrPath> matchAnyLdmlPrefix(String... paths) {
ImmutableList<Predicate<CldrPath>> collect =
Arrays.stream(paths)
.map(s -> (Predicate<CldrPath>) ldml(s)::matchesPrefixOf)
.collect(toImmutableList());
return p -> collect.stream().anyMatch(e -> e.test(p));
}
// Look for any attribute in the path with "narrow" in its value. Since "narrow" values
@ -223,7 +236,7 @@ public final class PseudoLocales {
CldrValue defaultReturnValue = isResolved ? value : null;
// This makes it look like we have explicit values only for the included paths.
if (!PSEUDO_PATHS.matchesPrefixOf(path) || EXCLUDE_PATHS.matchesPrefixOf(path)) {
if (!IS_PSEUDO_PATH.test(path)) {
return defaultReturnValue;
}
String fullPath = value.getFullPath();
@ -232,7 +245,7 @@ public final class PseudoLocales {
if (IS_NARROW.test(fullPath)) {
return defaultReturnValue;
}
String text = createMessage(value.getValue(), PATTERN_PATHS.matchesPrefixOf(path));
String text = createMessage(value.getValue(), IS_PATTERN_PATH.test(path));
return CldrValue.parseValue(fullPath, text);
}
@ -357,7 +370,7 @@ public final class PseudoLocales {
public void addFragment(String text, boolean isLocalizable) {
if (isLocalizable) {
boolean wrapping = false;
for (int index = 0; index < text.length();) {
for (int index = 0; index < text.length(); ) {
int codePoint = text.codePointAt(index);
index += Character.charCount(codePoint);
byte directionality = Character.getDirectionality(codePoint);
@ -383,5 +396,6 @@ public final class PseudoLocales {
};
}
private PseudoLocales() {}
private PseudoLocales() {
}
}

View file

@ -26,6 +26,7 @@ import java.util.stream.Stream;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.PathMatcher;
import com.google.common.base.Ascii;
import com.google.common.base.Splitter;
@ -57,22 +58,22 @@ public final class SupplementalData {
private static final Pattern SCRIPT_SUBTAG = Pattern.compile("[A-Z][a-z]{3}");
private static final PathMatcher ALIAS =
PathMatcher.of("supplementalData/metadata/alias/*[@type=*]");
PathMatcher.of("//supplementalData/metadata/alias/*[@type=*]");
private static final PathMatcher PARENT_LOCALE =
PathMatcher.of("supplementalData/parentLocales/parentLocale[@parent=*]");
PathMatcher.of("//supplementalData/parentLocales/parentLocale[@parent=*]");
private static final AttributeKey PARENT = keyOf("parentLocale", "parent");
private static final AttributeKey LOCALES = keyOf("parentLocale", "locales");
private static final PathMatcher CALENDER_PREFERENCE =
PathMatcher.of("supplementalData/calendarPreferenceData/calendarPreference[@territories=*]");
PathMatcher.of("//supplementalData/calendarPreferenceData/calendarPreference[@territories=*]");
private static final AttributeKey CALENDER_TERRITORIES =
keyOf("calendarPreference", "territories");
private static final AttributeKey CALENDER_ORDERING =
keyOf("calendarPreference", "ordering");
private static final PathMatcher LIKELY_SUBTAGS =
PathMatcher.of("supplementalData/likelySubtags/likelySubtag[@from=*]");
PathMatcher.of("//supplementalData/likelySubtags/likelySubtag[@from=*]");
private static final AttributeKey SUBTAG_FROM = keyOf("likelySubtag", "from");
private static final AttributeKey SUBTAG_TO = keyOf("likelySubtag", "to");

View file

@ -3,8 +3,6 @@
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Ascii.toLowerCase;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
@ -17,19 +15,15 @@ import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import javax.annotation.Nullable;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Ascii;
@ -46,12 +40,10 @@ import com.google.common.collect.Sets;
*/
public final class Bcp47Mapper {
// Other attributes (e.g. "alias") are value attributes and don't need to be matched here.
private static final PathMatcher KEY = PathMatcher.of("ldmlBCP47/keyword/key[@name=*]");
private static final AttributeKey KEY_NAME = keyOf("key", "name");
private static final AttributeKey KEY_ALIAS = keyOf("key", "alias");
private static final AttributeKey KEY_VALUE_TYPE = keyOf("key", "valueType");
private static final PathMatcher TYPE = PathMatcher.of("type[@name=*]");
private static final AttributeKey TYPE_NAME = keyOf("type", "name");
private static final AttributeKey TYPE_ALIASES = keyOf("type", "alias");
private static final AttributeKey PREFERRED_TYPE_NAME = keyOf("type", "preferred");
@ -75,6 +67,15 @@ public final class Bcp47Mapper {
private static final RbPath RB_MAP_ALIAS = RbPath.of("typeMap", "timezone:alias");
private static final RbPath RB_BCP_ALIAS = RbPath.of("bcpTypeAlias", "tz:alias");
private static final CldrDataProcessor<Bcp47Mapper> BCP47_PROCESSOR;
static {
CldrDataProcessor.Builder<Bcp47Mapper> processor = CldrDataProcessor.builder();
processor
.addAction("//ldmlBCP47/keyword/key[@name=*]", (m, p) -> m.new ValueCollector(p))
.addValueAction("type[@name=*]", ValueCollector::collect);
BCP47_PROCESSOR = processor.build();
}
/**
* Processes data from the given supplier to generate Timezone and BCP-47 ICU data.
*
@ -87,169 +88,146 @@ public final class Bcp47Mapper {
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static ImmutableList<IcuData> process(CldrData cldrData) {
Bcp47Visitor visitor = new Bcp47Visitor();
cldrData.accept(DTD, visitor);
visitor.addKeyMapValues();
return ImmutableList.of(visitor.keyTypeData.icuData, visitor.tzData.icuData);
Bcp47Mapper mapper = BCP47_PROCESSOR.process(cldrData, new Bcp47Mapper(), DTD);
mapper.addKeyMapValues();
return ImmutableList.of(mapper.keyTypeData, mapper.tzData);
}
// Outer visitor which handles "key" paths by installing sub-visitor methods to process
// each child "type" element. Depending on the key name, values are stored in different
// IcuData instances.
private static final class Bcp47Visitor implements PrefixVisitor {
private final ValueCollector tzData =
new ValueCollector(new IcuData("timezoneTypes", false));
private final ValueCollector keyTypeData =
new ValueCollector(new IcuData("keyTypeData", false));
private final IcuData tzData = new IcuData("timezoneTypes", false);
private final IcuData keyTypeData = new IcuData("keyTypeData", false);
// A map collecting each key and values as they are visited.
// TODO: Convert this to a Map<RbPath, String> which involves removing the '@' prefix hack.
private Map<String, String> keyMap = new LinkedHashMap<>();
// The current key name from the parent path element (set when a prefix is matched).
@Nullable private String keyName = null;
// A map collecting each key and values as they are visited.
// TODO: Convert this to a Map<RbPath, String> which involves removing the '@' prefix hack.
private Map<String, String> keyMap = new LinkedHashMap<>();
private Bcp47Mapper() { }
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
if (KEY.matches(prefix)) {
// Don't inline this since it also sets the field!!
keyName = Ascii.toLowerCase(KEY_NAME.valueFrom(prefix));
// How the data is visited is the same for both timezone and other BCP-47 data,
// it's just split into different data files, so we just install a different
// instance of the visitor class according to where the data in this sub-hierarchy
// should end up.
ctx.install(keyName.equals("tz") ? tzData : keyTypeData);
// Post processing to add additional captured attribute values and some special cases.
private void addKeyMapValues() {
IcuData keyData = keyTypeData;
// Add all the keyMap values into the IcuData file.
for (Entry<String, String> kmData : keyMap.entrySet()) {
String bcpKey = kmData.getKey();
String key = kmData.getValue();
if (bcpKey.startsWith("@")) {
// Undoing the weird hack in addInfoAttributes(). This can be done better.
// We use "parse()" because these are full paths, and not single elements.
keyData.add(RbPath.parse(bcpKey.substring(1)), key);
continue;
}
if (bcpKey.equals(key)) {
// An empty value indicates that the BCP47 key is same as the legacy key.
bcpKey = "";
}
keyData.add(RB_KEYMAP.extendBy(key), bcpKey);
}
// Add aliases for timezone data.
keyData.add(RB_TYPE_ALIAS, "/ICUDATA/timezoneTypes/typeAlias/timezone");
keyData.add(RB_MAP_ALIAS, "/ICUDATA/timezoneTypes/typeMap/timezone");
keyData.add(RB_BCP_ALIAS, "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
}
private final class ValueCollector {
private final String keyName;
// Mutable data to be written into (differs depending on the key name).
private final IcuData icuData;
ValueCollector(CldrPath prefix) {
this.keyName = Ascii.toLowerCase(KEY_NAME.valueFrom(prefix));
this.icuData = keyName.equals("tz") ? tzData : keyTypeData;
}
// Post processing to add additional captured attribute values and some special cases.
private void addKeyMapValues() {
IcuData keyData = keyTypeData.icuData;
// Add all the keyMap values into the IcuData file.
for (Entry<String, String> kmData : keyMap.entrySet()) {
String bcpKey = kmData.getKey();
String key = kmData.getValue();
if (bcpKey.startsWith("@")) {
// Undoing the weird hack in addInfoAttributes(). This can be done better.
// We use "parse()" because these are full paths, and not single elements.
keyData.add(RbPath.parse(bcpKey.substring(1)), key);
private void collect(CldrValue value) {
String typeName = TYPE_NAME.valueFrom(value);
// Note that if a "preferred" type exists, we treat the value specially and add
// it only as an alias. We expected values with a preferred replacement to
// always be explicitly deprecated.
Optional<String> prefName = PREFERRED_TYPE_NAME.optionalValueFrom(value);
if (prefName.isPresent()) {
checkState(KEY_DEPRECATED.booleanValueFrom(value, false)
|| TYPE_DEPRECATED.booleanValueFrom(value, false),
"unexpected 'preferred' attribute for non-deprecated value: %s", value);
icuData.add(RbPath.of("bcpTypeAlias", keyName, typeName), prefName.get());
return;
}
// Note: There are some deprecated values which don't have a preferred
// replacement and these will be processed below (in particular we need to emit
// the fact that they are deprecated).
// Not all key elements have an alias. E.g. in calendar.xml:
// <key name="fw" description="First day of week" since="28">
// But we still add it as a alias to itself (which is later turned into a path with
// an empty value).
String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName));
keyMap.put(keyName, keyAlias);
RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
if (typeAliases.isEmpty()) {
// Generate type map entry using empty value (an empty value indicates same
// type name is used for both BCP47 and legacy type).
icuData.add(typeMapPrefix.extendBy(typeName), "");
} else {
String mainAlias = typeAliases.get(0);
icuData.add(typeMapPrefix.extendBy(quoteAlias(mainAlias)), typeName);
// Put additional aliases as secondary aliases referencing the main alias.
RbPath typeAliasPrefix = RbPath.of("typeAlias", keyAlias);
typeAliases.stream()
.skip(1)
.map(Bcp47Mapper::quoteAlias)
.forEach(a -> icuData.add(typeAliasPrefix.extendBy(a), mainAlias));
}
addInfoAttributes(keyName, typeName, value.getValueAttributes());
}
// Add any additional attributes present to the attribute map. Note that this code was
// copied from largely undocumented code, and the precise reasoning for why this is
// needed or why it's done this way is not completely clear. It is very likely that it
// can be simplified.
//
// The '@' symbol added here is just a magic token that gets stripped off again in the
// addKeyMapValues() method, it appears to just be a way to distinguish keys added via
// this method vs during the collect method. A better approach might just be to have two
// maps.
// TODO: Remove the use of '@' and simplify the logic for "info" attributes (infoMap?).
private void addInfoAttributes(
String keyName, String typeName, ImmutableMap<AttributeKey, String> attributes) {
// Only emit deprecation for the "key" level, even if all types below that are also
// marked as deprecated. Only do this for a subset of attributes (INFO_ATTRIBUTES).
Set<AttributeKey> keys =
Sets.intersection(attributes.keySet(), INFO_ATTRIBUTES.keySet());
for (AttributeKey a : keys) {
String value = attributes.get(a);
// Skip empty or default values in attributes.
if (value.isEmpty() || INFO_ATTRIBUTES.get(a).equals(value)) {
continue;
}
if (bcpKey.equals(key)) {
// An empty value indicates that the BCP47 key is same as the legacy key.
bcpKey = "";
}
keyData.add(RB_KEYMAP.extendBy(key), bcpKey);
// The ID for the xxxInfo paths in ICU is the path fragment at which the
// attribute exists. Since we only process complete paths here, we must do a
// bit of reconstruction based on the element name of the attribute we are
// processing. This relies on explicit knowledge that the paths are "<key>" or
// "<key>/<type>". This all gets less messy if we switch to RbPath.
String id =
a.getElementName().equals("key") ? keyName : keyName + "/" + typeName;
keyMap.put(
"@" + a.getElementName() + "Info/" + a.getAttributeName() + "/" + id,
value);
}
// Add aliases for timezone data.
keyData.add(RB_TYPE_ALIAS, "/ICUDATA/timezoneTypes/typeAlias/timezone");
keyData.add(RB_MAP_ALIAS, "/ICUDATA/timezoneTypes/typeMap/timezone");
keyData.add(RB_BCP_ALIAS, "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
}
private final class ValueCollector implements ValueVisitor {
// Mutable ICU data collected into during visitation.
private final IcuData icuData;
ValueCollector(IcuData data) {
this.icuData = checkNotNull(data);
}
@Override
public void visit(CldrValue value) {
checkArgument(TYPE.matchesSuffixOf(value.getPath()),
"unexpected child element: %s", value.getPath());
String typeName = TYPE_NAME.valueFrom(value);
// Note that if a "preferred" type exists, we treat the value specially and add
// it only as an alias. We expected values with a preferred replacement to
// always be explicitly deprecated.
Optional<String> prefName = PREFERRED_TYPE_NAME.optionalValueFrom(value);
if (prefName.isPresent()) {
checkState(KEY_DEPRECATED.booleanValueFrom(value, false)
|| TYPE_DEPRECATED.booleanValueFrom(value, false),
"unexpected 'preferred' attribute for non-deprecated value: %s", value);
icuData.add(RbPath.of("bcpTypeAlias", keyName, typeName), prefName.get());
return;
}
// Note: There are some deprecated values which don't have a preferred
// replacement and these will be processed below (in particular we need to emit
// the fact that they are deprecated).
// Not all key elements have an alias. E.g. in calendar.xml:
// <key name="fw" description="First day of week" since="28">
// But we still add it as a alias to itself (which is later turned into a path with
// an empty value).
String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName));
keyMap.put(keyName, keyAlias);
RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
if (typeAliases.isEmpty()) {
// Generate type map entry using empty value (an empty value indicates same
// type name is used for both BCP47 and legacy type).
icuData.add(typeMapPrefix.extendBy(typeName), "");
} else {
String mainAlias = typeAliases.get(0);
icuData.add(typeMapPrefix.extendBy(quoteAlias(mainAlias)), typeName);
// Put additional aliases as secondary aliases referencing the main alias.
RbPath typeAliasPrefix = RbPath.of("typeAlias", keyAlias);
typeAliases.stream()
.skip(1)
.map(Bcp47Visitor::quoteAlias)
.forEach(a -> icuData.add(typeAliasPrefix.extendBy(a), mainAlias));
}
addInfoAttributes(keyName, typeName, value.getValueAttributes());
}
// Add any additional attributes present to the attribute map. Note that this code was
// copied from largely undocumented code, and the precise reasoning for why this is
// needed or why it's done this way is not completely clear. It is very likely that it
// can be simplified.
//
// The '@' symbol added here is just a magic token that gets stripped off again in the
// addKeyMapValues() method, it appears to just be a way to distinguish keys added via
// this method vs during the visit method. A better approach might just be to have two
// maps.
// TODO: Remove the use of '@' and simplify the logic for "info" attributes (infoMap?).
private void addInfoAttributes(
String keyName, String typeName, ImmutableMap<AttributeKey, String> attributes) {
// Only emit deprecation for the "key" level, even if all types below that are also
// marked as deprecated. Only do this for a subset of attributes (INFO_ATTRIBUTES).
Set<AttributeKey> keys =
Sets.intersection(attributes.keySet(), INFO_ATTRIBUTES.keySet());
for (AttributeKey a : keys) {
String value = attributes.get(a);
// Skip empty or default values in attributes.
if (value.isEmpty() || INFO_ATTRIBUTES.get(a).equals(value)) {
continue;
}
// The ID for the xxxInfo paths in ICU is the path fragment at which the
// attribute exists. Since we only process complete paths here, we must do a
// bit of reconstruction based on the element name of the attribute we are
// processing. This relies on explicit knowledge that the paths are "<key>" or
// "<key>/<type>". This all gets less messy if we switch to RbPath.
String id =
a.getElementName().equals("key") ? keyName : keyName + "/" + typeName;
keyMap.put(
"@" + a.getElementName() + "Info/" + a.getAttributeName() + "/" + id,
value);
}
}
}
/**
* Escapes alias values containing '/' so they can appear in resource bundle paths. This
* function replaces '/' with ':' and quotes the result (e.g. foo/bar -> "foo:bar").
*
* <p>This is needed for timezone "metazone" ID strings which are of the form 'Foo/Bar'
* in the CLDR data.
*/
// TODO: Switch to RbPath and do quoting automatically when ICU data is written out.
private static String quoteAlias(String str) {
return str.indexOf('/') == -1 ? str : '"' + str.replace('/', ':') + '"';
}
}
private Bcp47Mapper() {}
/**
* Escapes alias values containing '/' so they can appear in resource bundle paths. This
* function replaces '/' with ':' and quotes the result (e.g. foo/bar -> "foo:bar").
*
* <p>This is needed for timezone "metazone" ID strings which are of the form 'Foo/Bar'
* in the CLDR data.
*/
// TODO: Switch to RbPath and do quoting automatically when ICU data is written out.
private static String quoteAlias(String str) {
return str.indexOf('/') == -1 ? str : '"' + str.replace('/', ':') + '"';
}
}

View file

@ -4,18 +4,17 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import java.util.Optional;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor.SubProcessor;
import com.google.common.escape.UnicodeEscaper;
@ -29,20 +28,25 @@ import com.google.common.escape.UnicodeEscaper;
*/
// TODO: This class can almost certainly be replace with a small RegexTransformer config.
public final class BreakIteratorMapper {
// The "type" attribute in /suppressions/ is not required so cannot be in the matcher. And
// its default (and only) value is "standard".
// TODO: Understand and document why this is the case.
private static final PathMatcher SUPPRESSION = PathMatcher.of(
"ldml/segmentations/segmentation[@type=*]/suppressions/suppression");
private static final CldrDataProcessor<BreakIteratorMapper> CLDR_PROCESSOR;
static {
CldrDataProcessor.Builder<BreakIteratorMapper> processor = CldrDataProcessor.builder();
// The "type" attribute in /suppressions/ is not required so cannot be in the matcher. And
// its default (and only) value is "standard".
// TODO: Understand and document why this is the case.
processor.addValueAction(
"//ldml/segmentations/segmentation[@type=*]/suppressions/suppression",
BreakIteratorMapper::addSuppression);
SubProcessor<BreakIteratorMapper> specials =
processor.addSubprocessor("//ldml/special/icu:breakIteratorData");
specials.addValueAction("icu:boundaries/*", BreakIteratorMapper::addBoundary);
specials.addValueAction(
"icu:dictionaries/icu:dictionary", BreakIteratorMapper::addDictionary);
CLDR_PROCESSOR = processor.build();
}
private static final AttributeKey SEGMENTATION_TYPE = keyOf("segmentation", "type");
// Note: This could be done with an intermediate matcher for
// "ldml/special/icu:breakIteratorData" but there are so few "special" values it's not worth it
private static final PathMatcher BOUNDARIES =
PathMatcher.of("ldml/special/icu:breakIteratorData/icu:boundaries/*");
private static final PathMatcher DICTIONARY =
PathMatcher.of("ldml/special/icu:breakIteratorData/icu:dictionaries/icu:dictionary");
private static final AttributeKey DICTIONARY_DEP = keyOf("icu:dictionary", "icu:dependency");
private static final AttributeKey DICTIONARY_TYPE = keyOf("icu:dictionary", "type");
@ -59,8 +63,8 @@ public final class BreakIteratorMapper {
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {
BreakIteratorMapper mapper = new BreakIteratorMapper(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, mapper::addSpecials));
cldrData.accept(DTD, mapper::addSuppression);
icuSpecialData.ifPresent(d -> CLDR_PROCESSOR.process(d, mapper));
CLDR_PROCESSOR.process(cldrData, mapper);
return mapper.icuData;
}
@ -72,28 +76,21 @@ public final class BreakIteratorMapper {
}
private void addSuppression(CldrValue v) {
if (SUPPRESSION.matches(v.getPath())) {
String type = SEGMENTATION_TYPE.valueFrom(v);
// TODO: Understand and document why we escape values here, but not for collation data.
icuData.add(
RbPath.of("exceptions", type + ":array"),
ESCAPE_NON_ASCII.escape(v.getValue()));
}
String type = SEGMENTATION_TYPE.valueFrom(v);
// TODO: Understand and document why we escape values here, but not for collation data.
icuData.add(
RbPath.of("exceptions", type + ":array"), ESCAPE_NON_ASCII.escape(v.getValue()));
}
private void addSpecials(CldrValue v) {
CldrPath p = v.getPath();
if (BOUNDARIES.matches(p)) {
addDependency(
getDependencyName(v),
getBoundaryType(v),
getBoundaryDependency(v));
} else if (DICTIONARY.matches(p)) {
addDependency(
getDependencyName(v),
DICTIONARY_TYPE.valueFrom(v),
DICTIONARY_DEP.optionalValueFrom(v));
}
private void addBoundary(CldrValue v) {
addDependency(getDependencyName(v), getBoundaryType(v), getBoundaryDependency(v));
}
private void addDictionary(CldrValue v) {
addDependency(
getDependencyName(v),
DICTIONARY_TYPE.valueFrom(v),
DICTIONARY_DEP.optionalValueFrom(v));
}
private void addDependency(String name, String type, Optional<String> dependency) {

View file

@ -11,14 +11,13 @@ import java.util.Optional;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor.SubProcessor;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
@ -32,18 +31,22 @@ import com.google.common.base.Splitter;
* }</pre>
*/
public final class CollationMapper {
private static final PathMatcher COLLATIONS = PathMatcher.of("ldml/collations");
// Note that the 'type' attribute is optional, so cannot be in the path matcher.
// However since the CLDR data never actually omits the value, it would be easy to change the
// attribute metadata to stop it being an implicit attribute and then it could appear.
private static final PathMatcher COLLATION_RULE = PathMatcher.of("collation/cr");
private static final CldrDataProcessor<CollationMapper> CLDR_PROCESSOR;
static {
CldrDataProcessor.Builder<CollationMapper> processor = CldrDataProcessor.builder();
SubProcessor<CollationMapper> collations = processor.addSubprocessor("//ldml/collations");
collations.addValueAction("collation/cr", CollationMapper::collectRule);
collations.addValueAction("defaultCollation", CollationMapper::collectDefault);
// This could be a separate processor, since the specials data only contains these paths,
// but it's not clear if in future it could also contain any collation rules.
processor.addValueAction("//ldml/special/*", CollationMapper::maybeAddSpecial);
CLDR_PROCESSOR = processor.build();
}
private static final AttributeKey COLLATION_TYPE = keyOf("collation", "type");
private static final AttributeKey COLLATION_RULE_ALT = keyOf("cr", "alt");
private static final PathMatcher DEFAULT_COLLATION = PathMatcher.of("defaultCollation");
private static final PathMatcher SPECIAL = PathMatcher.of("ldml/special");
private static final AttributeKey SPECIAL_RULES = keyOf("icu:UCARules", "icu:uca_rules");
private static final AttributeKey SPECIAL_DEP = keyOf("icu:depends", "icu:dependency");
@ -68,88 +71,76 @@ public final class CollationMapper {
public static IcuData process(
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData, String cldrVersion) {
CollationVisitor visitor = new CollationVisitor(icuData, cldrVersion);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
cldrData.accept(DTD, visitor);
return visitor.icuData;
CollationMapper mapper = new CollationMapper(icuData, cldrVersion);
icuSpecialData.ifPresent(specialData -> CLDR_PROCESSOR.process(specialData, mapper, DTD));
CLDR_PROCESSOR.process(cldrData, mapper, DTD);
return icuData;
}
final static class CollationVisitor implements PrefixVisitor {
private final IcuData icuData;
private final String cldrVersion;
private final IcuData icuData;
private final String cldrVersion;
CollationVisitor(IcuData icuData, String cldrVersion) {
this.icuData = checkNotNull(icuData);
this.cldrVersion = checkNotNull(cldrVersion);
// Super special hack case because the XML data is a bit broken for the root collation
// data (there's an empty <collation> element that's a non-leaf element and thus not
// visited, but we should add an empty sequence to the output data.
// TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
if (icuData.getName().equals("root")) {
icuData.replace(RB_STANDARD_SEQUENCE, "");
// TODO: Collation versioning probably needs to be improved.
icuData.replace(RB_STANDARD_VERSION, cldrVersion);
}
private CollationMapper(IcuData icuData, String cldrVersion) {
this.icuData = checkNotNull(icuData);
this.cldrVersion = checkNotNull(cldrVersion);
// Super special hack case because the XML data is a bit broken for the root collation
// data (there's an empty <collation> element that's a non-leaf element and thus not
// visited, but we should add an empty sequence to the output data.
// TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
if (icuData.getName().equals("root")) {
icuData.replace(RB_STANDARD_SEQUENCE, "");
// TODO: Collation versioning probably needs to be improved.
icuData.replace(RB_STANDARD_VERSION, cldrVersion);
}
}
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
if (COLLATIONS.matchesPrefixOf(prefix)) {
ctx.install(this::collectRules);
} else if (SPECIAL.matchesPrefixOf(prefix)) {
ctx.install(this::maybeAddSpecial);
}
private void collectRule(CldrValue v) {
String type = COLLATION_TYPE.valueFrom(v);
RbPath rbPath = RbPath.of("collations", type, "Sequence");
// WARNING: This is almost certainly a bug, since while @type can have the value
// "short" it can also have other values. This code was copied from CollationMapper
// which has the line;
// isShort = attr.getValue("alt") != null;
// TODO: Raise a ticket to examine this.
boolean isShort = COLLATION_RULE_ALT.optionalValueFrom(v).isPresent();
// Note that it's not clear why there's a check for "contains()" here. The code
// from which this was derived is largely undocumented and this check could have
// been overly defensive (perhaps a duplicate key should be an error?).
if (isShort || !icuData.getPaths().contains(rbPath)) {
RbValue rules = RbValue.of(
LINE_SPLITTER.splitToList(v.getValue()).stream()
.map(CollationMapper::removeComment)
.filter(s -> !s.isEmpty())::iterator);
icuData.replace(rbPath, rules);
icuData.replace(RbPath.of("collations", type, "Version"), cldrVersion);
}
}
private void collectRules(CldrValue v) {
CldrPath p = v.getPath();
if (COLLATION_RULE.matchesSuffixOf(p)) {
String type = COLLATION_TYPE.valueFrom(v);
RbPath rbPath = RbPath.of("collations", type, "Sequence");
private void collectDefault(CldrValue v) {
icuData.add(RB_COLLATIONS_DEFAULT, v.getValue());
}
// WARNING: This is almost certainly a bug, since while @type can have the value
// "short" it can also have other values. This code was copied from CollationMapper
// which has the line;
// isShort = attr.getValue("alt") != null;
// TODO: Raise a ticket to examine this.
boolean isShort = COLLATION_RULE_ALT.optionalValueFrom(v).isPresent();
// Note that it's not clear why there's a check for "contains()" here. The code
// from which this was derived is largely undocumented and this check could have
// been overly defensive (perhaps a duplicate key should be an error?).
if (isShort || !icuData.getPaths().contains(rbPath)) {
RbValue rules = RbValue.of(
LINE_SPLITTER.splitToList(v.getValue()).stream()
.map(CollationMapper::removeComment)
.filter(s -> !s.isEmpty())::iterator);
icuData.replace(rbPath, rules);
icuData.replace(RbPath.of("collations", type, "Version"), cldrVersion);
}
} else if (DEFAULT_COLLATION.matchesSuffixOf(p)) {
icuData.add(RB_COLLATIONS_DEFAULT, v.getValue());
}
}
// This is a bit special since the attribute we want to add depends on the element we are
// visiting (which is somewhat unusual in the transformation classes).
private void maybeAddSpecial(CldrValue value) {
AttributeKey key;
switch (value.getPath().getName()) {
case "icu:UCARules":
key = SPECIAL_RULES;
break;
case "icu:depends":
key = SPECIAL_DEP;
break;
default:
return;
}
// substring(4) just removes the "icu:" prefix (which we know is present in the key).
RbPath rbPath = RbPath.of(
String.format("%s:process(%s)",
key.getElementName().substring(4), key.getAttributeName().substring(4)));
icuData.add(rbPath, key.valueFrom(value));
// This is a bit special since the attribute we want to add depends on the element we are
// visiting (which is somewhat unusual in the transformation classes).
private void maybeAddSpecial(CldrValue value) {
AttributeKey key;
switch (value.getPath().getName()) {
case "icu:UCARules":
key = SPECIAL_RULES;
break;
case "icu:depends":
key = SPECIAL_DEP;
break;
default:
return;
}
// substring(4) just removes the "icu:" prefix (which we know is present in the key).
RbPath rbPath = RbPath.of(
String.format("%s:process(%s)",
key.getElementName().substring(4), key.getAttributeName().substring(4)));
icuData.add(rbPath, key.valueFrom(value));
}
// Collation data can contain # to mark an end-of-line comment, but it can also contain data
@ -195,6 +186,4 @@ public final class CollationMapper {
checkArgument(!quoted, "mismatched quotes in: %s", s);
return -1;
}
private CollationMapper() {}
}

View file

@ -6,18 +6,15 @@ import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
import java.util.Optional;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import com.google.common.annotations.VisibleForTesting;
@ -29,14 +26,18 @@ import com.google.common.annotations.VisibleForTesting;
* }</pre>
*/
public final class DayPeriodsMapper {
private static final PathMatcher RULESET =
PathMatcher.of("supplementalData/dayPeriodRuleSet");
private static final CldrDataProcessor<DayPeriodsMapper> CLDR_PROCESSOR;
static {
CldrDataProcessor.Builder<DayPeriodsMapper> processor = CldrDataProcessor.builder();
processor.addAction("//supplementalData/dayPeriodRuleSet", (m, p) -> m.new Ruleset(p))
.addSubprocessor("dayPeriodRules[@locales=*]", Ruleset::prefixStart)
.addValueAction("dayPeriodRule[@type=*]", Ruleset::visitRule);
CLDR_PROCESSOR = processor.build();
}
private static final AttributeKey RULESET_TYPE = keyOf("dayPeriodRuleSet", "type");
private static final PathMatcher RULES = PathMatcher.of("dayPeriodRules[@locales=*]");
private static final AttributeKey RULES_LOCALES = keyOf("dayPeriodRules", "locales");
private static final PathMatcher RULE = PathMatcher.of("dayPeriodRule[@type=*]");
private static final AttributeKey RULE_TYPE = keyOf("dayPeriodRule", "type");
private static final RbPath RB_LOCALES = RbPath.of("locales");
@ -53,51 +54,33 @@ public final class DayPeriodsMapper {
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(CldrData data) {
RuleSetVisitor mapper = new RuleSetVisitor();
data.accept(NESTED_GROUPING, mapper);
return mapper.icuData;
return CLDR_PROCESSOR.process(data, new DayPeriodsMapper(), NESTED_GROUPING).icuData;
}
private static final class RuleSetVisitor implements PrefixVisitor {
// Mutable ICU data collected into during visitation.
private final IcuData icuData = new IcuData("dayPeriods", false);
private int setNum = 0;
// Mutable ICU data collected into during visitation.
private final IcuData icuData = new IcuData("dayPeriods", false);
private int setNum = 0;
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
if (RULESET.matches(prefix)) {
ctx.install(new RuleVisitor(RULESET_TYPE.optionalValueFrom(prefix)));
}
private final class Ruleset {
private RbPath localePrefix;
Ruleset(CldrPath prefix) {
this.localePrefix = RULESET_TYPE.optionalValueFrom(prefix)
.map(t -> RbPath.of("locales_" + t))
.orElse(RB_LOCALES);
}
private final class RuleVisitor implements PrefixVisitor {
private final RbPath localePrefix;
private void prefixStart(CldrPath prefix) {
// Sets are arbitrarily identified by the string "setNN".
String setName = "set" + (++setNum);
RULES_LOCALES.listOfValuesFrom(prefix)
.forEach(locale -> icuData.add(localePrefix.extendBy(locale), setName));
}
private RuleVisitor(Optional<String> type) {
// If there's a given type, add it to the prefix path.
this.localePrefix = type.map(t -> RbPath.of("locales_" + t)).orElse(RB_LOCALES);
}
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
if (RULES.matchesSuffixOf(prefix)) {
// Sets are arbitrarily identified by the string "setNN".
String setName = "set" + (++setNum);
RULES_LOCALES.listOfValuesFrom(prefix)
.forEach(locale -> icuData.add(localePrefix.extendBy(locale), setName));
ctx.install(this::visitRule);
}
}
private void visitRule(CldrValue value) {
if (RULE.matchesSuffixOf(value.getPath())) {
RbPath prefix = RbPath.of("rules", "set" + setNum, RULE_TYPE.valueFrom(value));
value.getValueAttributes()
.forEach((k, v) -> icuData.add(prefix.extendBy(k.getAttributeName()), v));
}
}
private void visitRule(CldrValue value) {
RbPath prefix = RbPath.of("rules", "set" + setNum, RULE_TYPE.valueFrom(value));
value.getValueAttributes()
.forEach((k, v) -> icuData.add(prefix.extendBy(k.getAttributeName()), v));
}
}
private DayPeriodsMapper() {}
}

View file

@ -2,22 +2,20 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkState;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import com.google.common.annotations.VisibleForTesting;
@ -29,15 +27,18 @@ import com.google.common.annotations.VisibleForTesting;
* }</pre>
*/
public final class PluralRangesMapper {
// Note that this mapper only matches when there's no "type" specified on the "plurals" element.
// This is a bit weird, since the PluralsMapper expects a type (e.g. cardinal or ordinal) to
// be present. Really this just illustrates that the plural ranges just should not be under the
// same parent element as plurals.
private static final PathMatcher RANGES =
PathMatcher.of("supplementalData/plurals/pluralRanges[@locales=*]");
private static final AttributeKey RANGES_LOCALES = keyOf("pluralRanges", "locales");
private static final PathMatcher RANGE = PathMatcher.of("pluralRange[@start=*][@end=*]");
private static final CldrDataProcessor<PluralRangesMapper> CLDR_PROCESSOR;
static {
CldrDataProcessor.Builder<PluralRangesMapper> processor = CldrDataProcessor.builder();
processor
.addAction(
"//supplementalData/plurals/pluralRanges[@locales=*]", (m, p) -> m.new Ranges(p))
.addValueAction("pluralRange[@start=*][@end=*]", Ranges::visitRange);
CLDR_PROCESSOR = processor.build();
}
private static final AttributeKey RANGES_LOCALES = keyOf("pluralRanges", "locales");
private static final AttributeKey RANGE_START = keyOf("pluralRange", "start");
private static final AttributeKey RANGE_END = keyOf("pluralRange", "end");
private static final AttributeKey RANGE_RESULT = keyOf("pluralRange", "result");
@ -52,46 +53,37 @@ public final class PluralRangesMapper {
* @return the IcuData instance to be written to a file.
*/
public static IcuData process(CldrDataSupplier src) {
CldrData data = src.getDataForType(SUPPLEMENTAL);
return process(data);
return process(src.getDataForType(SUPPLEMENTAL));
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(CldrData data) {
PluralRangesVisitor visitor = new PluralRangesVisitor();
data.accept(NESTED_GROUPING, visitor);
return visitor.icuData;
return CLDR_PROCESSOR.process(data, new PluralRangesMapper(), NESTED_GROUPING).icuData;
}
private static final class PluralRangesVisitor implements PrefixVisitor {
private final IcuData icuData = new IcuData("pluralRanges", false);
private final IcuData icuData = new IcuData("pluralRanges", false);
private int setIndex = 0;
private int setIndex = 0;
private String ruleLabel = null;
private PluralRangesMapper() { }
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
if (RANGES.matches(prefix)) {
ruleLabel = String.format("set%02d", setIndex++);
RANGES_LOCALES.listOfValuesFrom(prefix)
.forEach(l -> icuData.add(RB_LOCALES.extendBy(l), ruleLabel));
ctx.install(this::visitRange);
}
private final class Ranges {
private final String label;
Ranges(CldrPath prefix) {
this.label = String.format("set%02d", setIndex++);
RANGES_LOCALES.listOfValuesFrom(prefix)
.forEach(l -> icuData.add(RB_LOCALES.extendBy(l), label));
}
private void visitRange(CldrValue value) {
checkState(RANGE.matchesSuffixOf(value.getPath()),
"unexpected path: %s", value.getPath());
// Note: "range:start" and "range:end" are optional attributes, but the CLDR DTD
// specifies a default via comments. They should probably be changed to just have a
// default in the DTD (and possibly converted to use an enum here).
icuData.add(RB_RULES.extendBy(ruleLabel),
icuData.add(RB_RULES.extendBy(label),
RbValue.of(
RANGE_START.valueFrom(value, "all"),
RANGE_END.valueFrom(value, "all"),
RANGE_RESULT.valueFrom(value)));
}
}
private PluralRangesMapper() {}
}
}

View file

@ -2,30 +2,29 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.cldr.api.FilteredData;
import org.unicode.cldr.api.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
/**
* A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
@ -35,15 +34,21 @@ import com.google.common.collect.Iterables;
* }</pre>
*/
public final class PluralsMapper {
private static final PathMatcher PLURALS = PathMatcher.of("supplementalData/plurals[@type=*]");
private static final AttributeKey PLURALS_TYPE = keyOf("plurals", "type");
private static final PathMatcher RULES = PathMatcher.of("pluralRules[@locales=*]");
private static final AttributeKey RULES_LOCALES = keyOf("pluralRules", "locales");
private static final PathMatcher RULE = PathMatcher.of("pluralRule[@count=*]");
private static final AttributeKey RULE_COUNT = keyOf("pluralRule", "count");
private static final CldrDataProcessor<PluralsMapper> CLDR_PROCESSOR;
static {
CldrDataProcessor.Builder<PluralsMapper> processor = CldrDataProcessor.builder();
processor
.addAction("//supplementalData/plurals[@type=*]", (m, p) -> m.new Plurals(p))
.addAction("pluralRules[@locales=*]", Rules::new, Plurals::addRules)
.addValueAction("pluralRule[@count=*]", Rules::addRule);
CLDR_PROCESSOR = processor.build();
}
private static final ImmutableMap<String, RbPath> ICU_PREFIX_MAP =
ImmutableMap.of("cardinal", RbPath.of("locales"), "ordinal", RbPath.of("locales_ordinals"));
@ -54,103 +59,94 @@ public final class PluralsMapper {
* @return the IcuData instance to be written to a file.
*/
public static IcuData process(CldrDataSupplier src) {
CldrData data = src.getDataForType(SUPPLEMENTAL);
return process(data);
return process(src.getDataForType(SUPPLEMENTAL));
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(CldrData data) {
PluralsVisitor visitor = new PluralsVisitor();
// Note: We explicitly reset the type to mimic the order of the existing code, since this
PluralsMapper mapper = new PluralsMapper();
// Note: We explicitly filter by type to mimic the order of the existing code, since this
// affects the set indices we generate during processing. Ideally this would all be immune
// to ordering (or just enforce DTD ordering) but right now it's very dependent on
// mimicking the order of the existing code to get identical output.
data.accept(NESTED_GROUPING, visitor.setType("cardinal"));
data.accept(NESTED_GROUPING, visitor.setType("ordinal"));
return visitor.icuData;
// mimicking the order of the existing code to get identical output. Once DTD order is
// everywhere, this can just be a single pass over the original data.
CLDR_PROCESSOR.process(filterByType(data, "cardinal"), mapper, NESTED_GROUPING);
CLDR_PROCESSOR.process(filterByType(data, "ordinal"), mapper, NESTED_GROUPING);
return mapper.icuData;
}
private static final class PluralsVisitor implements PrefixVisitor {
// Mutable ICU data collected into during visitation.
// In a post XML-aware API, is recording the XML file names really a good idea?
private final IcuData icuData = new IcuData("plurals", false);
// Filter for the type we are processing now (this could be removed if we don't mind which
// order the types are processed, and switching to DTD ordering would make it stable).
private String type = null;
private final List<ImmutableMap<String, String>> previousRules = new ArrayList<>();
// Mutable ICU data collected into during visitation.
// In a post XML-aware API, is recording the XML file names really a good idea?
private final IcuData icuData = new IcuData("plurals", false);
private final List<ImmutableMap<String, String>> previousRules = new ArrayList<>();
// Hack method to allow a single type to be processed at a time (the visitor would otherwise
// happily handle both types in a single pass). We can't do this as two different visitors
// (one for each type) because the current behaviour relies on carrying over the calculated
// set numbers from one pass to the next. Once migration is complete we should revisit this
// and allow this visitor to work in a single pass (probably with DTD order for stability).
PluralsVisitor setType(String type) {
this.type = checkNotNull(type);
return this;
private class Plurals {
private final RbPath icuPrefix;
Plurals(CldrPath prefix) {
// Note: "plurals:type" is an optional attribute but the CLDR DTD specifies a
// default via comments. It should probably be changed to just have a default in
// the DTD.
this.icuPrefix = ICU_PREFIX_MAP.get(PLURALS_TYPE.valueFrom(prefix, "cardinal"));
}
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
if (PLURALS.matches(prefix)) {
// Note: "plurals:type" is an optional attribute but the CLDR DTD specifies a
// default via comments. It should probably be changed to just have a default in
// the DTD.
if (PLURALS_TYPE.valueFrom(prefix, "cardinal").equals(type)) {
ctx.install(new RulesVisitor(ICU_PREFIX_MAP.get(type)));
}
}
}
private final class RulesVisitor implements PrefixVisitor {
private final RbPath icuPrefix;
private final List<String> locales = new ArrayList<>();
private final Map<String, String> rules = new LinkedHashMap<>();
RulesVisitor(RbPath icuPrefix) {
this.icuPrefix = checkNotNull(icuPrefix);
}
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
if (RULES.matchesSuffixOf(prefix)) {
Iterables.addAll(locales, RULES_LOCALES.listOfValuesFrom(prefix));
ctx.install(value -> {
if (RULE.matchesSuffixOf(value.getPath())) {
rules.put(RULE_COUNT.valueFrom(value), value.getValue());
}
});
}
}
@Override
public void visitPrefixEnd(CldrPath prefix) {
checkState(!locales.isEmpty(), "missing locale data for plurals: %s", prefix);
// Note: The original mapper code "sort of" coped with empty rules, but it's not
// completely well behaved (or documented), so since this doesn't happen in the
// current CLDR data, I decided to just prohibit it in the new code. Support can
// easily be added in once the expected semantics are clear.
checkState(!rules.isEmpty(), "missing rule data for plurals: %s", prefix);
// Have we seen this set of rules before? If so, reuse the existing index. Note
// that an IDE might report this call as suspicious because the key is not yet an
// immutable map (saves creating immutable maps just to check for inclusion) but
// this is fine because collection equality is based only on contents, not
// collection type.
int idx = previousRules.indexOf(rules);
if (idx == -1) {
int newIdx = previousRules.size();
rules.forEach((k, v) -> icuData.add(RbPath.of("rules", "set" + newIdx, k), v));
// Since "rules" is mutable and reused, we must take an immutable copy here.
previousRules.add(ImmutableMap.copyOf(rules));
idx = newIdx;
}
String setName = "set" + idx;
locales.forEach(locale -> icuData.add(icuPrefix.extendBy(locale), setName));
rules.clear();
locales.clear();
private void addRules(Rules r) {
ImmutableMap<String, String> rules = r.getRules();
// Note: The original mapper code "sort of" coped with empty rules, but it's not
// completely well behaved (or documented), so since this doesn't happen in the
// current CLDR data, I decided to just prohibit it in the new code. Support can
// easily be added in once the expected semantics are clear.
checkState(!rules.isEmpty(), "missing rule data for plurals");
// Have we seen this set of rules before? If so, reuse the existing index. Note
// that an IDE might report this call as suspicious because the key is not yet an
// immutable map (saves creating immutable maps just to check for inclusion) but
// this is fine because collection equality is based only on contents, not
// collection type.
int idx = previousRules.indexOf(rules);
if (idx == -1) {
int newIdx = previousRules.size();
rules.forEach((k, v) -> icuData.add(RbPath.of("rules", "set" + newIdx, k), v));
// Since "rules" is mutable and reused, we must take an immutable copy here.
previousRules.add(rules);
idx = newIdx;
}
String setName = "set" + idx;
r.getLocales().forEach(locale -> icuData.add(icuPrefix.extendBy(locale), setName));
}
}
private PluralsMapper() {}
private static class Rules {
private final ImmutableList<String> locales;
private final ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
private Rules(CldrPath prefix) {
this.locales = ImmutableList.copyOf(RULES_LOCALES.listOfValuesFrom(prefix));
checkState(!locales.isEmpty(), "missing locale data for plurals: %s", prefix);
}
private void addRule(CldrValue value) {
map.put(RULE_COUNT.valueFrom(value), value.getValue());
}
private ImmutableList<String> getLocales() {
return locales;
}
private ImmutableMap<String, String> getRules() {
return map.build();
}
}
// A hack to allow us to process "cardinal" data before "ordinal" data (even though DTD order
// is the other way round). Once DTD order is the only ordering used, this can be removed.
private static CldrData filterByType(CldrData data, String pluralType) {
PathMatcher matcher =
PathMatcher.of("//supplementalData/plurals[@type=\"" + pluralType + "\"]");
return new FilteredData(data) {
@Override protected CldrValue filter(CldrValue value) {
return matcher.matchesPrefixOf(value.getPath()) ? value : null;
}
};
}
}

View file

@ -7,16 +7,15 @@ import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import com.google.common.escape.UnicodeEscaper;
@ -26,21 +25,26 @@ import com.google.common.escape.UnicodeEscaper;
* //ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]
* }</pre>
*/
// TODO: This class can almost certainly be written using RegexTransformer and a small config.
public final class RbnfMapper {
private static final PathMatcher RULE_SET =
PathMatcher.of("ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]");
private static final AttributeKey GROUPING_TYPE = keyOf("rulesetGrouping", "type");
private static final AttributeKey RULESET_TYPE = keyOf("ruleset", "type");
private static final PathMatcher RBNF_RULE = PathMatcher.of("rbnfrule");
private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
private static final AttributeKey RBNF_VALUE = keyOf("rbnfrule", "value");
private static final AttributeKey RBNF_RADIX = keyOf("rbnfrule", "radix");
private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
// This is the ICU path prefix, below which everything generated by this visitor will go.
private static final RbPath RB_ROOT = RbPath.of("RBNFRules");
private static final CldrDataProcessor<RbnfMapper> RBNF_PROCESSOR;
static {
CldrDataProcessor.Builder<RbnfMapper> processor = CldrDataProcessor.builder();
processor
.addAction(
"//ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]", (m, p) -> m.new Ruleset(p))
.addValueAction("rbnfrule", Ruleset::addRule);
RBNF_PROCESSOR = processor.build();
}
/**
* Processes data from the given supplier to generate RBNF data for a set of locale IDs.
*
@ -55,89 +59,78 @@ public final class RbnfMapper {
// Using DTD order is essential here because the RBNF paths contain ordered elements,
// so we must ensure that they appear in sorted order (otherwise we'd have to do more
// work at this end to re-sort the results).
RulesetVisitor visitor = new RulesetVisitor(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
cldrData.accept(DTD, visitor);
return visitor.icuData;
RbnfMapper mapper = new RbnfMapper(icuData);
icuSpecialData.ifPresent(s -> RBNF_PROCESSOR.process(s, mapper, DTD));
RBNF_PROCESSOR.process(cldrData, mapper, DTD);
return mapper.icuData;
}
static final class RulesetVisitor implements PrefixVisitor {
private final IcuData icuData;
private final IcuData icuData;
private RbnfMapper(IcuData icuData) {
this.icuData = checkNotNull(icuData);
}
private RulesetVisitor(IcuData icuData) {
this.icuData = checkNotNull(icuData);
private class Ruleset {
private final RbPath rbPath;
private final String rulesetType;
private final boolean isStrict;
private boolean hasHeader = false;
Ruleset(CldrPath prefix) {
this.rbPath = RB_ROOT.extendBy(GROUPING_TYPE.valueFrom(prefix));
this.rulesetType = RULESET_TYPE.valueFrom(prefix);
this.isStrict = !"lenient-parse".equals(rulesetType);
}
@Override public void visitPrefixStart(CldrPath prefix, Context context) {
if (RULE_SET.matchesPrefixOf(prefix)) {
RbPath rbPath = RB_ROOT.extendBy(GROUPING_TYPE.valueFrom(prefix));
String rulesetType = RULESET_TYPE.valueFrom(prefix);
boolean isStrict = !"lenient-parse".equals(rulesetType);
// This is rather hacky because the access attribute lives on the parent path
// element, but we cannot use it until we visit the child values (because it's a
// value attribute and will not be in the prefix path). So we need to add the
// header only once, just before we start adding the values relating to the child
// elements, so we need a flag.
//
// This cannot be a boolean field since it must be "effectively final".
AtomicBoolean hasHeader = new AtomicBoolean(false);
context.install(
value -> {
if (RBNF_RULE.matchesSuffixOf(value.getPath())) {
if (!hasHeader.get()) {
boolean isPrivate =
RULESET_ACCESS.valueFrom(value, "public").equals("private");
icuData.add(rbPath, (isPrivate ? "%%" : "%") + rulesetType + ":");
hasHeader.set(true);
}
String rulePrefix = "";
if (isStrict) {
String basePrefix = RBNF_VALUE.valueFrom(value);
rulePrefix = RBNF_RADIX.optionalValueFrom(value)
.map(r -> basePrefix + "/" + r)
.orElse(basePrefix);
rulePrefix += ": ";
}
icuData.add(
rbPath,
rulePrefix + ESCAPE_RBNF_DATA.escape(value.getValue()));
}
});
void addRule(CldrValue value) {
// This is a bit hacky because the access attribute lives on the parent path element,
// but we cannot use it until we visit the child values (because it's a value attribute
// and will not be in the prefix path) so we need to add the header only once here.
if (!hasHeader) {
boolean isPrivate = RULESET_ACCESS.valueFrom(value, "public").equals("private");
icuData.add(rbPath, (isPrivate ? "%%" : "%") + rulesetType + ":");
hasHeader = true;
}
// Prefix is: "@value: ", "@value/@radix: " or empty (for non strict rules).
String rulePrefix = isStrict
? RBNF_VALUE.valueFrom(value)
+ RBNF_RADIX.optionalValueFrom(value).map(r -> "/" + r).orElse("")
+ ": "
: "";
icuData.add(rbPath, rulePrefix + ESCAPE_RBNF_DATA.escape(value.getValue()));
}
}
/*
* Convert characters outside the range U+0020 to U+007F to Unicode escapes, and convert
* backslash to a double backslash. This class is super slow for non-ASCII escaping due to
* using "String.format()", however there's < 100 values that need any escaping, so it's
* fine.
*/
private static final UnicodeEscaper ESCAPE_RBNF_DATA = new UnicodeEscaper() {
private final char[] DOUBLE_BACKSLASH = "\\\\".toCharArray();
private final char[] LEFT_ANGLE = "<".toCharArray();
private final char[] RIGHT_ANGLE = ">".toCharArray();
/*
* Convert characters outside the range U+0020 to U+007F to Unicode escapes, and convert
* backslash to a double backslash. This class is super slow for non-ASCII escaping due to
* using "String.format()", however there's < 100 values that need any escaping, so it's
* fine.
*/
private static final UnicodeEscaper ESCAPE_RBNF_DATA = new UnicodeEscaper() {
private final char[] DOUBLE_BACKSLASH = "\\\\".toCharArray();
private final char[] LEFT_ANGLE = "<".toCharArray();
private final char[] RIGHT_ANGLE = ">".toCharArray();
@Override
protected char[] escape(int cp) {
// Returning null means "do not escape".
switch (cp) {
case '\\':
return DOUBLE_BACKSLASH;
case '←':
return LEFT_ANGLE;
case '→':
return RIGHT_ANGLE;
default:
if (0x0020 <= cp && cp <= 0x007F) {
return null;
} else if (cp <= 0xFFFF) {
return String.format("\\u%04X", cp).toCharArray();
}
return String.format("\\U%08X", cp).toCharArray();
@Override
protected char[] escape(int cp) {
// Returning null means "do not escape".
switch (cp) {
case '\\':
return DOUBLE_BACKSLASH;
case '←':
return LEFT_ANGLE;
case '→':
return RIGHT_ANGLE;
default:
if (0x0020 <= cp && cp <= 0x007F) {
return null;
} else if (cp <= 0xFFFF) {
return String.format("\\u%04X", cp).toCharArray();
}
return String.format("\\U%08X", cp).toCharArray();
}
};
}
}
};
}

View file

@ -5,11 +5,13 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
import java.util.function.Predicate;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
@ -36,18 +38,21 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
*/
// TODO: Improve external data splitting and remove need for a PathMatcher here.
public static IcuData process(
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
CldrDataSupplier src,
PathValueTransformer transformer,
String icuName,
Predicate<CldrPath> paths) {
IcuData icuData = new IcuData(icuName, false);
new SupplementalMapper(src, transformer, paths).addIcuData(icuData);
return icuData;
}
private final PathMatcher paths;
private final Predicate<CldrPath> paths;
private int fifoCounter = 0;
private SupplementalMapper(
CldrDataSupplier src, PathValueTransformer transformer, PathMatcher pathFilter) {
CldrDataSupplier src, PathValueTransformer transformer, Predicate<CldrPath> pathFilter) {
super(src.getDataForType(CldrDataType.SUPPLEMENTAL), transformer);
this.paths = checkNotNull(pathFilter);
@ -63,7 +68,7 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
}
private void visit(CldrValue value) {
if (paths.matchesPrefixOf(value.getPath())) {
if (paths.test(value.getPath())) {
transformValue(value).forEach(this::collectResult);
fifoCounter++;
}

View file

@ -6,7 +6,6 @@ import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.nio.file.StandardOpenOption.CREATE_NEW;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
import java.io.IOException;
@ -20,14 +19,13 @@ import java.util.function.Function;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
@ -43,8 +41,15 @@ import com.ibm.icu.text.Transliterator;
* <p>This mapper also writes out the transform rule files into a specified directory.
*/
public final class TransformsMapper {
private static final PathMatcher TRULE =
PathMatcher.of("supplementalData/transforms/transform/tRule");
private static final CldrDataProcessor<TransformsMapper> CLDR_PROCESSOR;
static {
CldrDataProcessor.Builder<TransformsMapper> processor = CldrDataProcessor.builder();
processor.addValueAction(
"//supplementalData/transforms/transform/tRule", TransformsMapper::processRule);
CLDR_PROCESSOR = processor.build();
}
private static final AttributeKey TRANSFORM_SOURCE = keyOf("transform", "source");
private static final AttributeKey TRANSFORM_TARGET = keyOf("transform", "target");
private static final AttributeKey TRANSFORM_DIRECTION = keyOf("transform", "direction");
@ -99,74 +104,68 @@ public final class TransformsMapper {
static IcuData process(
CldrData cldrData, Function<Path, PrintWriter> fileWriterFn, List<String> header) {
RuleVisitor visitor = new RuleVisitor(fileWriterFn, header);
cldrData.accept(DTD, visitor);
addSpecialCaseValues(visitor.icuData);
return visitor.icuData;
TransformsMapper mapper = new TransformsMapper(fileWriterFn, header);
CLDR_PROCESSOR.process(cldrData, mapper);
addSpecialCaseValues(mapper.icuData);
return mapper.icuData;
}
private static class RuleVisitor implements ValueVisitor {
private final IcuData icuData = new IcuData("root", false);
private final Function<Path, PrintWriter> outFn;
private final ImmutableList<String> header;
private final IcuData icuData = new IcuData("root", false);
private final Function<Path, PrintWriter> outFn;
private final ImmutableList<String> header;
RuleVisitor(Function<Path, PrintWriter> outFn, List<String> header) {
this.outFn = checkNotNull(outFn);
this.header = ImmutableList.copyOf(header);
icuData.setFileComment("File: root.txt");
private TransformsMapper(Function<Path, PrintWriter> outFn, List<String> header) {
this.outFn = checkNotNull(outFn);
this.header = ImmutableList.copyOf(header);
icuData.setFileComment("File: root.txt");
}
private void processRule(CldrValue value) {
String source = getExpectedOptionalAttribute(value, TRANSFORM_SOURCE);
String target = getExpectedOptionalAttribute(value, TRANSFORM_TARGET);
Optional<String> variant = TRANSFORM_VARIANT.optionalValueFrom(value);
String baseFilename = source + "_" + target;
String filename = variant.map(v -> baseFilename + "_" + v).orElse(baseFilename) + ".txt";
writeRootIndexEntry(value, source, target, variant, filename);
writeDataFile(filename, value);
}
private void writeDataFile(String filename, CldrValue value) {
try (PrintWriter out = outFn.apply(Paths.get(filename))) {
out.print("\uFEFF");
header.forEach(s -> out.println("# " + s));
out.println("#");
out.println("# File: " + filename);
out.println("# Generated from CLDR");
out.println("#");
out.println();
out.println(FIXUP.transliterate(whitespace().trimFrom(value.getValue())));
out.println();
}
}
@Override public void visit(CldrValue value) {
// The other possible element is "comment" but we currently ignore those.
if (TRULE.matches(value.getPath())) {
String source = getExpectedOptionalAttribute(value, TRANSFORM_SOURCE);
String target = getExpectedOptionalAttribute(value, TRANSFORM_TARGET);
Optional<String> variant = TRANSFORM_VARIANT.optionalValueFrom(value);
String baseFilename = source + "_" + target;
String filename =
variant.map(v -> baseFilename + "_" + v).orElse(baseFilename) + ".txt";
writeRootIndexEntry(value, source, target, variant, filename);
writeDataFile(filename, value);
}
private void writeRootIndexEntry(
CldrValue value, String source, String target, Optional<String> variant, String filename) {
Visibility visibility = TRANSFORM_VISIBILITY.valueFrom(value, Visibility.class);
String status = visibility == Visibility.internal ? "internal" : "file";
Direction dir = TRANSFORM_DIRECTION.valueFrom(value, Direction.class);
// TODO: Consider checks for unused data (e.g. forward aliases in a backward rule).
if (dir != Direction.backward) {
String id = getId(source, target, variant);
TRANSFORM_ALIAS.listOfValuesFrom(value)
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
icuData.add(rbPrefix.extendBy("direction"), "FORWARD");
}
private void writeDataFile(String filename, CldrValue value) {
try (PrintWriter out = outFn.apply(Paths.get(filename))) {
out.print("\uFEFF");
header.forEach(s -> out.println("# " + s));
out.println("#");
out.println("# File: " + filename);
out.println("# Generated from CLDR");
out.println("#");
out.println();
out.println(FIXUP.transliterate(whitespace().trimFrom(value.getValue())));
out.println();
}
}
private void writeRootIndexEntry(
CldrValue value, String source, String target, Optional<String> variant, String filename) {
Visibility visibility = TRANSFORM_VISIBILITY.valueFrom(value, Visibility.class);
String status = visibility == Visibility.internal ? "internal" : "file";
Direction dir = TRANSFORM_DIRECTION.valueFrom(value, Direction.class);
// TODO: Consider checks for unused data (e.g. forward aliases in a backward rule).
if (dir != Direction.backward) {
String id = getId(source, target, variant);
TRANSFORM_ALIAS.listOfValuesFrom(value)
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
icuData.add(rbPrefix.extendBy("direction"), "FORWARD");
}
if (dir != Direction.forward) {
String id = getId(target, source, variant);
TRANSFORM_BACKALIAS.listOfValuesFrom(value)
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
icuData.add(rbPrefix.extendBy("direction"), "REVERSE");
}
if (dir != Direction.forward) {
String id = getId(target, source, variant);
TRANSFORM_BACKALIAS.listOfValuesFrom(value)
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
icuData.add(rbPrefix.extendBy("direction"), "REVERSE");
}
}

View file

@ -46,14 +46,14 @@ public class AlternateLocaleDataTest {
FakeDataSupplier src = new FakeDataSupplier()
.addLocaleData("xx", target, source, other)
.addInheritedData("xx", inherited);
CldrDataSupplier transformed = AlternateLocaleData.transform(
src,
ImmutableMap.of(target.getPath(), source.getPath()),
ImmutableTable.of());
CldrDataSupplier transformed =
AlternateLocaleData.transform(
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
// Note that the source is always removed (unless it's also a target).
assertValuesUnordered(unresolved, altValue, other);
assertValuesUnordered(resolved, altValue, other, inherited);
}
@ -67,10 +67,9 @@ public class AlternateLocaleDataTest {
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", target);
CldrDataSupplier transformed = AlternateLocaleData.transform(
src,
ImmutableMap.of(target.getPath(), source.getPath()),
ImmutableTable.of());
CldrDataSupplier transformed =
AlternateLocaleData.transform(
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
@ -87,19 +86,21 @@ public class AlternateLocaleDataTest {
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
CldrValue source =
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
CldrValue other =
ldml("numbers/currencies/currency[@type=\"EUR\"]/displayName", "Euro");
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source);
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source, other);
CldrDataSupplier transformed =
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()),
ImmutableTable.of());
AlternateLocaleData.transform(
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
// If there's no target the alt-path mapping is incomplete and we do nothing (this matches
// the old CLDR tool behaviour and reasonable but can hide inconsistencies in CLDR data).
assertValuesUnordered(unresolved, source);
assertValuesUnordered(resolved, source);
// Even though the missing target is not matched (so no change there) the source is always
// removed from the transformed data.
assertValuesUnordered(unresolved, other);
assertValuesUnordered(resolved, other);
}
@Test

View file

@ -0,0 +1,154 @@
// © 2020 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.truth.Truth.assertThat;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableMap;
@RunWith(JUnit4.class)
public class CldrDataProcessorTest {
private static final AttributeKey TERRITORY_TYPE = AttributeKey.keyOf("territory", "type");
private static final AttributeKey CURRENCY_TYPE = AttributeKey.keyOf("currency", "type");
// An overly simplistic value type for currency for testing purposes. In real code you would
// probably want an immutable type and a separate builder, or a mutable type just to collect
// values that doesn't need equals/hashcode (this class serves 2 purposes in the test).
private static final class CurrencyData {
final String key;
String name = "";
String symbol = "";
CurrencyData(String key) {
this.key = key;
}
CurrencyData(String key, String name, String symbol) {
this.key = key;
this.name = name;
this.symbol = symbol;
}
@Override public boolean equals(Object o) {
if (o instanceof CurrencyData) {
CurrencyData that = (CurrencyData) o;
return key.equals(that.key) && name.equals(that.name) && symbol.equals(that.symbol);
}
return false;
}
@Override public int hashCode() {
return Objects.hash(key, name, symbol);
}
@Override public String toString() {
return String.format("CurrencyData{name=%s, symbol='%s'}", name, symbol);
}
}
// For collecting processed values.
private static final class State {
ImmutableMap<String, String> names = ImmutableMap.of();
ImmutableMap<String, CurrencyData> currencies = ImmutableMap.of();
void setNames(Map<String, String> map) {
names = ImmutableMap.copyOf(map);
}
void setCurrencies(Map<String, CurrencyData> map) {
currencies = ImmutableMap.copyOf(map);
}
}
private static final CldrDataProcessor<State> VISITOR = createTestVisitor();
private static CldrDataProcessor<State> createTestVisitor() {
// Note that this is deliberately doing things the "messy" way by creating and then copying
// a map. This is to show an extra level of processing in tests. You could just have a
// value action which adds the territory to a map in the State object.
CldrDataProcessor.Builder<State> builder = CldrDataProcessor.builder();
builder
.addAction(
"//ldml/localeDisplayNames/territories",
() -> new LinkedHashMap<String, String>(),
State::setNames)
.addValueAction(
"territory[@type=*]",
(map, value) -> map.put(value.getPath().get(TERRITORY_TYPE), value.getValue()));
// Another convoluted example for testing. This has the same additional level for a map
// just so we can show a 3-level processor. In real code this wouldn't look so messy.
CldrDataProcessor.SubProcessor<CurrencyData> currencyProcessor = builder
.addAction(
"//ldml/numbers/currencies",
() -> new LinkedHashMap<String, CurrencyData>(),
State::setCurrencies)
.addAction(
"currency[@type=*]",
(map, path) -> new CurrencyData(path.get(CURRENCY_TYPE)),
(map, data) -> map.put(data.key, data));
currencyProcessor.addValueAction(
"displayName",
(data, value) -> data.name = value.getValue());
currencyProcessor.addValueAction(
"symbol",
(data, value) -> data.symbol = value.getValue());
return builder.build();
}
@Test
public void testTwoLevelProcessing() {
CldrData data = CldrDataSupplier.forValues(Arrays.asList(
ldml("localeDisplayNames/territories/territory[@type=\"BE\"]", "Belgium"),
ldml("localeDisplayNames/territories/territory[@type=\"CH\"]", "Switzerland"),
ldml("localeDisplayNames/territories/territory[@type=\"IN\"]", "India")));
State state = VISITOR.process(data, new State(), CldrData.PathOrder.DTD);
assertThat(state.names)
.containsExactly(
"BE", "Belgium",
"CH", "Switzerland",
"IN", "India")
.inOrder();
}
@Test
public void testThreeLevelProcessing() {
CldrData data = CldrDataSupplier.forValues(Arrays.asList(
ldml("numbers/currencies/currency[@type=\"EUR\"]/displayName", "euro"),
ldml("numbers/currencies/currency[@type=\"EUR\"]/symbol", ""),
ldml("numbers/currencies/currency[@type=\"CHF\"]/displayName", "Swiss franc"),
ldml("numbers/currencies/currency[@type=\"CHF\"]/symbol", "Fr."),
ldml("numbers/currencies/currency[@type=\"INR\"]/displayName", "Indian rupee"),
ldml("numbers/currencies/currency[@type=\"INR\"]/symbol", "")));
State state = VISITOR.process(data, new State(), CldrData.PathOrder.DTD);
assertThat(state.currencies)
.containsExactly(
"CHF", new CurrencyData("CHF", "Swiss franc", "Fr."),
"EUR", new CurrencyData("EUR", "euro", ""),
"INR", new CurrencyData("INR", "Indian rupee", ""))
.inOrder();
}
private static CldrValue ldml(String path, String value) {
return CldrValue.parseValue("//ldml/" + path, value);
}
}

View file

@ -1,158 +0,0 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrPath;
@RunWith(JUnit4.class)
public class PathMatcherTest {
@Test
public void testMatcher() {
CldrPath calEra = parseDistinguishingPath(
"//ldml/dates/calendars/calendar[@type=\"buddhist\"]/eras/eraAbbr/era[@type=\"0\"]");
CldrPath chineseMon1 = monthInfo("chinese", "format", "abbreviated", 1);
CldrPath chineseMon2 = monthInfo("chinese", "format", "abbreviated", 2);
CldrPath genericMon1 = monthInfo("generic", "stand-alone", "narrow", 1);
CldrPath genericMon2 = monthInfo("generic", "stand-alone", "narrow", 2);
List<CldrPath> calPaths =
Arrays.asList(calEra, chineseMon1, chineseMon2, genericMon1, genericMon2);
PathMatcher anyCalendarPaths = PathMatcher.of("ldml/dates/calendars/calendar");
assertThat(calPaths.stream().allMatch(anyCalendarPaths::matchesPrefixOf)).isTrue();
assertThat(calPaths.stream().noneMatch(anyCalendarPaths::matches)).isTrue();
assertThat(calPaths.stream().noneMatch(anyCalendarPaths::matchesSuffixOf)).isTrue();
PathMatcher chineseCalendars =
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"chinese\"]");
assertThat(calPaths.stream().filter(chineseCalendars::matchesPrefixOf))
.containsExactly(chineseMon1, chineseMon2);
PathMatcher anyMonth = PathMatcher.of("monthWidth[@type=*]/month[@type=*]");
assertThat(calPaths.stream().filter(anyMonth::matchesSuffixOf))
.containsExactly(chineseMon1, chineseMon2, genericMon1, genericMon2);
PathMatcher narrowMonth = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
assertThat(calPaths.stream().filter(narrowMonth::matchesSuffixOf))
.containsExactly(genericMon1, genericMon2);
assertThat(calPaths.stream().filter(narrowMonth::matches)).isEmpty();
PathMatcher firstMonth = PathMatcher.of("month[@type=\"1\"]");
assertThat(calPaths.stream().filter(firstMonth::matchesSuffixOf))
.containsExactly(chineseMon1, genericMon1);
PathMatcher fullMatch = PathMatcher.of("ldml/dates"
+ "/calendars/calendar[@type=\"generic\"]"
+ "/months/monthContext[@type=\"stand-alone\"]"
+ "/monthWidth[@type=\"narrow\"]"
+ "/month[@type=\"2\"]");
assertThat(calPaths.stream().filter(fullMatch::matches)).containsExactly(genericMon2);
}
@Test
public void testWildcardSegment() {
PathMatcher wildcard = PathMatcher.of("ldml/dates"
+ "/calendars/calendar[@type=\"generic\"]"
+ "/*/*[@type=\"format\"]/*[@type=\"narrow\"]/*[@type=*]");
assertThat(wildcard.matches(monthInfo("generic", "format", "narrow", 1))).isTrue();
assertThat(wildcard.matches(monthInfo("generic", "format", "narrow", 9))).isTrue();
assertThat(wildcard.matches(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
assertThat(wildcard.matches(monthInfo("chinese", "format", "narrow", 1))).isFalse();
assertThat(wildcard.matches(monthInfo("generic", "stand-alone", "narrow", 1))).isFalse();
assertThat(wildcard.matches(dayInfo("generic", "format", "wide", "mon"))).isFalse();
}
@Test
public void testAnyOf_match() {
PathMatcher narrowMonth =
PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/months"
+ "/monthContext[@type=\"format\"]/monthWidth[@type=\"narrow\"]/month[@type=*]");
PathMatcher narrowDay =
PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/days"
+ "/dayContext[@type=\"format\"]/dayWidth[@type=\"narrow\"]/day[@type=*]");
PathMatcher prefix = PathMatcher.anyOf(narrowMonth, narrowDay);
assertThat(prefix.matches(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
assertThat(prefix.matches(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
assertThat(prefix.matches(monthInfo("hindu", "format", "wide", 1))).isFalse();
assertThat(prefix.matches(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
}
@Test
public void testAnyOf_suffix() {
PathMatcher monthSuffix = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
PathMatcher daySuffix = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]");
PathMatcher suffix = PathMatcher.anyOf(monthSuffix, daySuffix);
assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue();
assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse();
assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse();
}
@Test
public void testAnyOf_prefix() {
PathMatcher monthPrefix =
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"gregorian\"]/months");
PathMatcher dayPrefix =
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"buddhist\"]/days");
PathMatcher prefix = PathMatcher.anyOf(monthPrefix, dayPrefix);
assertThat(prefix.matchesPrefixOf(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
assertThat(prefix.matchesPrefixOf(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
assertThat(prefix.matchesPrefixOf(monthInfo("hindu", "format", "wide", 1))).isFalse();
assertThat(prefix.matchesPrefixOf(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
}
@Test
public void testBadSpecifiers() {
assertInvalidPathSpecification("");
// Leading and trailing '/' are not permitted (they imply empty segments.
assertInvalidPathSpecification("/foo/");
assertInvalidPathSpecification("foo//bar");
assertInvalidPathSpecification("foo/bad segment name");
assertInvalidPathSpecification("foo/bar[type=*]");
assertInvalidPathSpecification("foo/bar[@type=**]");
assertInvalidPathSpecification("foo/bar[@type='double-quotes-only']");
}
private void assertInvalidPathSpecification(String spec) {
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> PathMatcher.of(spec));
assertThat(e).hasMessageThat().startsWith("invalid path specification");
assertThat(e).hasMessageThat().contains(spec);
}
private static CldrPath monthInfo(String type, String context, String width, int number) {
return CldrPath.parseDistinguishingPath(String.format(
"//ldml/dates/calendars/calendar[@type=\"%s\"]"
+ "/months/monthContext[@type=\"%s\"]"
+ "/monthWidth[@type=\"%s\"]"
+ "/month[@type=\"%d\"]",
type, context, width, number));
}
private static CldrPath dayInfo(String type, String context, String width, String id) {
return CldrPath.parseDistinguishingPath(String.format(
"//ldml/dates/calendars/calendar[@type=\"%s\"]"
+ "/days/dayContext[@type=\"%s\"]"
+ "/dayWidth[@type=\"%s\"]"
+ "/day[@type=\"%s\"]",
type, context, width, id));
}
}

View file

@ -5,12 +5,15 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.cldr.api.CldrValue.parseValue;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.function.Predicate;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.cldr.api.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
@ -29,8 +32,7 @@ public class SupplementalMapperTest {
supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"),
simpleResult("/Foo", "Bar"));
PathMatcher allPaths = PathMatcher.of("supplementalData");
IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
IcuData icuData = SupplementalMapper.process(src, transformer, "name", p -> true);
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/Foo", "Bar");
@ -55,8 +57,7 @@ public class SupplementalMapperTest {
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USD\"]"),
simpleResult("/CurrencyMap/US/<FIFO>/id", "USD"));
PathMatcher allPaths = PathMatcher.of("supplementalData");
IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
IcuData icuData = SupplementalMapper.process(src, transformer, "name", p -> true);
assertThat(icuData).getPaths().hasSize(3);
assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0000>/id", "USD");
@ -73,7 +74,8 @@ public class SupplementalMapperTest {
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"),
simpleResult("/CurrencyMap/US/<FIFO>/id", "USN"));
PathMatcher filter = PathMatcher.of("supplementalData/likelySubtags");
Predicate<CldrPath> filter =
PathMatcher.of("//supplementalData/likelySubtags")::matchesPrefixOf;
IcuData icuData = SupplementalMapper.process(src, transformer, "name", filter);
assertThat(icuData).getPaths().hasSize(1);