mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-21084 Migrating ICU tools to use PathMatcher
This commit is contained in:
parent
4231ca5be0
commit
566e0f8686
18 changed files with 1247 additions and 1100 deletions
|
@ -0,0 +1,457 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PathOrder;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor.Context;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.cldr.api.PathMatcher;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* An immutable processor which can be configured to process CLDR data according to a series of
|
||||
* mappings from CLDR paths to "actions".
|
||||
*
|
||||
* <p>In typical use a processor would be statically created to bind paths and handler functions
|
||||
* (actions) together, and calling {@link CldrDataProcessor#process(CldrData, Object, PathOrder)}
|
||||
* once for each {@link CldrData} instance.
|
||||
*
|
||||
* <p>A processor is built by adding a mixture of "actions" to a builder. An action either defines
|
||||
* how to handle a single value (see {@link SubProcessor#addValueAction addValueAction()}) or how
|
||||
* to start a new sub-processor at a specific point in the data hierarchy (see {@link
|
||||
* SubProcessor#addAction addAction()} or {@link SubProcessor#addSubprocessor addSubprocessor()}).
|
||||
*
|
||||
* @param <T> the main "state" type used by the processor for the top-level processing.
|
||||
*/
|
||||
public class CldrDataProcessor<T> {
|
||||
/** Returns a processor builder which operates on a "state" of type {@code <T>}. */
|
||||
public static <T> Builder<T> builder() {
|
||||
return new Builder<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* A builder for processing a CLDR data sub-hierarchy.
|
||||
*
|
||||
* @param <T> the "state" type used by the processor.
|
||||
*/
|
||||
public static abstract class SubProcessor<T> {
|
||||
final List<PrefixBuilder<?, T>> prefixActions = new ArrayList<>();
|
||||
final List<ValueAction<T>> valueActions = new ArrayList<>();
|
||||
|
||||
private SubProcessor() { }
|
||||
|
||||
/**
|
||||
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
|
||||
* for the sub-hierarchy.
|
||||
*
|
||||
* <p>This method is intended for cases where the subtype state does not depend on the
|
||||
* parent state or the path prefix, but needs some post-processing. For example, the
|
||||
* subtype state might just be a {@code List} and the elements added to it must be
|
||||
* combined with the parent state after sub-hierarchy is processing is complete.
|
||||
*
|
||||
* <pre>{@code
|
||||
* processor
|
||||
* .addAction("//parent/path", ArrayList::new, ParentState::addValues)
|
||||
* .addValueAction("value/suffix", List::add);
|
||||
* }</pre>
|
||||
*
|
||||
* @param pattern the path pattern for the prefix where sub-processing starts.
|
||||
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
|
||||
* @param doneFn called after each sub-processing step.
|
||||
*/
|
||||
public <S> SubProcessor<S> addAction(
|
||||
String pattern, Supplier<S> newStateFn, BiConsumer<T, ? super S> doneFn) {
|
||||
return addAction(pattern, (t, p) -> newStateFn.get(), doneFn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
|
||||
* for the sub-hierarchy.
|
||||
*
|
||||
* <p>This method is similar to {@link #addAction(String, Supplier, BiConsumer)} but is
|
||||
* intended for cases where the subtype state depends on the parent path prefix.
|
||||
*
|
||||
* <pre>{@code
|
||||
* processor
|
||||
* .addAction("//parent/path[@type=*]", SubState::fromType, ParentState::addSubState)
|
||||
* .addValueAction("value/suffix", SubState::collectValue);
|
||||
* }</pre>
|
||||
*
|
||||
* @param pattern the path pattern for the prefix where sub-processing starts.
|
||||
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
|
||||
* @param doneFn called after each sub-processing step.
|
||||
*/
|
||||
public <S> SubProcessor<S> addAction(
|
||||
String pattern, Function<CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn) {
|
||||
return addAction(pattern, (t, p) -> newStateFn.apply(p), doneFn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
|
||||
* for the sub-hierarchy.
|
||||
*
|
||||
* <p>This method is intended for the case where the subtype state is derived from the
|
||||
* parent state (e.g. an inner class) but does not depend on the path prefix at which the
|
||||
* sub-hierarchy is rooted.
|
||||
*
|
||||
* <pre>{@code
|
||||
* processor
|
||||
* .addAction("//parent/path", ParentState::newValueCollector)
|
||||
* .addValueAction("value/suffix", ValueCollector::addValue);
|
||||
* }</pre>
|
||||
*
|
||||
* @param pattern the path pattern for the prefix where sub-processing starts.
|
||||
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
|
||||
*/
|
||||
public <S> SubProcessor<S> addAction(String pattern, Function<T, S> newStateFn) {
|
||||
return addAction(pattern, (t, p) -> newStateFn.apply(t));
|
||||
}
|
||||
|
||||
/**
|
||||
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
|
||||
* for the sub-hierarchy.
|
||||
*
|
||||
* <p>This method is intended for the case where the subtype state is derived from the
|
||||
* parent state (e.g. an inner class) and the path prefix at which the sub-hierarchy is
|
||||
* rooted.
|
||||
*
|
||||
* <pre>{@code
|
||||
* processor
|
||||
* .addAction("//parent/path[@type=*]", ParentState::newCollectorOfType)
|
||||
* .addValueAction("value/suffix", ValueCollector::addValue);
|
||||
* }</pre>
|
||||
*
|
||||
* @param pattern the path pattern for the prefix where sub-processing starts.
|
||||
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
|
||||
*/
|
||||
public <S> SubProcessor<S> addAction(
|
||||
String pattern, BiFunction<T, CldrPath, S> newStateFn) {
|
||||
return addAction(pattern, newStateFn, (t, y) -> {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
|
||||
* for the sub-hierarchy.
|
||||
*
|
||||
* <p>This method is the most general purpose way to add a sub-hierarchy action and is
|
||||
* intended for the most complex cases, where subtype state depends on parent state and
|
||||
* path prefix, and post processing is required. All other implementations of {@code
|
||||
* addAction} simply delegate to this one in one way or another.
|
||||
*
|
||||
* <pre>{@code
|
||||
* processor
|
||||
* .addAction("//parent/path[@type=*]", ParentState::newCollector, ParentState::done)
|
||||
* .addValueAction("value/suffix", ValueCollector::addValue);
|
||||
* }</pre>
|
||||
*
|
||||
* @param pattern the path pattern for the prefix where sub-processing starts.
|
||||
* @param newStateFn a supplier of subtype state instances for each sub-processing step.
|
||||
* @param doneFn called after each sub-processing step.
|
||||
*/
|
||||
public <S> SubProcessor<S> addAction(
|
||||
String pattern,
|
||||
BiFunction<T, CldrPath, S> newStateFn,
|
||||
BiConsumer<T, ? super S> doneFn) {
|
||||
|
||||
PrefixBuilder<S, T> action =
|
||||
new PrefixBuilder<>(getMatcher(pattern), newStateFn, doneFn);
|
||||
prefixActions.add(action);
|
||||
return action;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new sub-processor for the specified sub-hierarchy rooted at the given
|
||||
* {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
|
||||
* the parent.
|
||||
*
|
||||
* <p>This method is intended for the case where multiple sub-processors are needed below
|
||||
* a certain point in the hierarchy, but they all operate on the same state instance.
|
||||
*
|
||||
* <pre>{@code
|
||||
* SubBuilder<MyCollector> subprocessor = processor.addSubprocessor("//parent/path");
|
||||
* subprocessor.addValueAction("value/suffix", MyCollector::addValue);
|
||||
* subprocessor.addValueAction("other/suffix", MyCollector::addOtherValue);
|
||||
* }</pre>
|
||||
*
|
||||
* @param pattern the path pattern for the prefix where sub-processing starts.
|
||||
*/
|
||||
public SubProcessor<T> addSubprocessor(String pattern) {
|
||||
return addAction(pattern, (t, p) -> t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new sub-processor for the specified sub-hierarchy rooted at the given
|
||||
* {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
|
||||
* the parent.
|
||||
*
|
||||
* <p>This method is intended for the case where a some setup is required before a
|
||||
* sub-hierarchy is processed, but the sub-processor state is the same.
|
||||
*
|
||||
* <pre>{@code
|
||||
* SubBuilder<MyCollector> subprocessor = processor
|
||||
* .addSubprocessor("//parent/path", MyCollector::startFn)
|
||||
* .addValueAction("value/suffix", MyCollector::addValue);
|
||||
* }</pre>
|
||||
*
|
||||
* @param startFn a handler called when sub-processing begins
|
||||
* @param pattern the path pattern for the prefix where sub-processing starts.
|
||||
*/
|
||||
public SubProcessor<T> addSubprocessor(String pattern, BiConsumer<T, CldrPath> startFn) {
|
||||
return addAction(pattern, (t, p) -> {
|
||||
startFn.accept(t, p);
|
||||
return t;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an action to handle {@link CldrValue}s found in the current sub-hierarchy
|
||||
* visitation which match the given {@link PathMatcher} leaf-path pattern.
|
||||
*
|
||||
* <p>This method is expected to be called at least once for each sub-hierarchy processor
|
||||
* in order to handle the actual CLDR values being processed, and the path pattern should
|
||||
* match leaf-paths in the CLDR data hierarchy, rather than path prefixes.
|
||||
*
|
||||
* <p>Multiple value actions can be added to a sub-hierarchy processor, and paths are
|
||||
* matched in the order the actions are added. It is also possible to mix sub-hierarchy
|
||||
* actions and value actions on the same processor, but note that sub-hierarchy processors
|
||||
* will take precedence, so you cannot try to match the same value in both a sub-hierarchy
|
||||
* processor and a value action.
|
||||
*
|
||||
* For example:
|
||||
* <pre>{@code
|
||||
* processor
|
||||
* .addAction("//parent/path", ...)
|
||||
* .addValueAction("value/suffix", ...);
|
||||
* // This will never match any values since the sub-hierarchy processor takes precedence!
|
||||
* processor.addValueAction("//parent/path/value/suffix", ...);
|
||||
* }</pre>
|
||||
*
|
||||
* @param pattern the CLDR path suffix idenifying the values to be processed.
|
||||
* @param doFn the action to be carried out for each value.
|
||||
*/
|
||||
public void addValueAction(String pattern, BiConsumer<T, CldrValue> doFn) {
|
||||
valueActions.add(new ValueAction<>(getMatcher(pattern), doFn));
|
||||
}
|
||||
|
||||
abstract PathMatcher getMatcher(String pattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* A root builder of a CLDR data processor.
|
||||
*
|
||||
* @param <T> the processor state type.
|
||||
*/
|
||||
public static final class Builder<T> extends SubProcessor<T> {
|
||||
private Builder() { }
|
||||
|
||||
/** Returns the immutable CLDR data processor. */
|
||||
public CldrDataProcessor<T> build() {
|
||||
return new CldrDataProcessor<>(
|
||||
Lists.transform(prefixActions, PrefixBuilder::build), valueActions);
|
||||
}
|
||||
|
||||
@Override
|
||||
PathMatcher getMatcher(String pattern) {
|
||||
return PathMatcher.of(pattern);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A sub-hierarchy data processor rooted at some specified path prefix.
|
||||
*
|
||||
* @param <S> the subtype processor state.
|
||||
* @param <T> the parent processor state.
|
||||
*/
|
||||
private static class PrefixBuilder<S, T> extends SubProcessor<S> {
|
||||
private final PathMatcher matcher;
|
||||
private final BiFunction<T, CldrPath, S> newStateFn;
|
||||
private final BiConsumer<T, ? super S> doneFn;
|
||||
|
||||
PrefixBuilder(
|
||||
PathMatcher matcher,
|
||||
BiFunction<T, CldrPath, S> newStateFn,
|
||||
BiConsumer<T, ? super S> doneFn) {
|
||||
this.matcher = checkNotNull(matcher);
|
||||
this.newStateFn = checkNotNull(newStateFn);
|
||||
this.doneFn = checkNotNull(doneFn);
|
||||
}
|
||||
|
||||
PrefixAction<S, T> build() {
|
||||
List<PrefixAction<?, S>> actions = Lists.transform(prefixActions, PrefixBuilder::build);
|
||||
return new PrefixAction<>(actions, valueActions, matcher, newStateFn, doneFn);
|
||||
}
|
||||
|
||||
@Override PathMatcher getMatcher(String pattern) {
|
||||
return matcher.withSuffix(pattern);
|
||||
}
|
||||
}
|
||||
|
||||
private final ImmutableList<PrefixAction<?, T>> prefixActions;
|
||||
private final ImmutableList<ValueAction<T>> valueActions;
|
||||
|
||||
private CldrDataProcessor(
|
||||
List<PrefixAction<?, T>> prefixActions,
|
||||
List<ValueAction<T>> valueActions) {
|
||||
this.prefixActions = ImmutableList.copyOf(prefixActions);
|
||||
this.valueActions = ImmutableList.copyOf(valueActions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a CLDR data instance according to the actions registered for this processor in DTD
|
||||
* order. This method is preferred over {@link #process(CldrData, Object, PathOrder)} and
|
||||
* eventually the ability to even specify a path order for processing will be removed.
|
||||
*
|
||||
* <p>This is the main method used to drive the processing of some CLDR data and is typically
|
||||
* used like:
|
||||
*
|
||||
* <pre>{@code
|
||||
* MyResult result = CLDR_PROCESSOR.process(data, new MyResult(), DTD);
|
||||
* }</pre>
|
||||
* <p>or:*
|
||||
* <pre>{@code
|
||||
* MyResult result = CLDR_PROCESSOR.process(data, MyResult.newBuilder(), DTD).build();
|
||||
* }</pre>
|
||||
*
|
||||
* @param data the CLDR data to be processed.
|
||||
* @param state an instance of the "primary" state.
|
||||
* @return the given primary state (after modification).
|
||||
*/
|
||||
public T process(CldrData data, T state) {
|
||||
return process(data, state, PathOrder.DTD);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a CLDR data instance according to the actions registered for this processor.
|
||||
* Callers should prefer using {@link #process(CldrData, Object)} whenever possible and avoid
|
||||
* relying on path ordering for processing.
|
||||
*
|
||||
* @param data the CLDR data to be processed.
|
||||
* @param state an instance of the "primary" state.
|
||||
* @param pathOrder the order in which CLDR paths should be visited.
|
||||
* @return the given primary state (after modification).
|
||||
*/
|
||||
public T process(CldrData data, T state, PathOrder pathOrder) {
|
||||
data.accept(pathOrder, new DispatchingVisitor<>(this, state, s -> {}));
|
||||
return state;
|
||||
}
|
||||
|
||||
private void dispatchPrefixActions(T state, CldrPath prefix, Context context) {
|
||||
for (PrefixAction<?, T> a : prefixActions) {
|
||||
if (a.matches(state, prefix, context)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void dispatchValueActions(T state, CldrValue value) {
|
||||
for (ValueAction<T> a : valueActions) {
|
||||
if (a.matches(state, value)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Implementation notes:
|
||||
*
|
||||
* "PrefixAction" is a critical part of the design of the path visitor. It acts as a bridge
|
||||
* between the parent visitation (with state type 'T') and child visitation (state type 'S').
|
||||
*
|
||||
* It is the only class to need to know about both types. Both types are known when the
|
||||
* CldrDataProcessor is made, but during visitation the caller of the "matches" method doesn't
|
||||
* need to know about the child type, which is why the parent can just have a list of
|
||||
* "PrefixAction<?, T>" and don't need any magical recasting.
|
||||
*
|
||||
* It might only be a few lines of code, but it can only exist in a class which knows about
|
||||
* both parent and child types (obtaining a new child state is a function of the parent state).
|
||||
*/
|
||||
static final class PrefixAction<S, T> extends CldrDataProcessor<S> {
|
||||
private final PathMatcher matcher;
|
||||
private final BiFunction<T, CldrPath, S> newStateFn;
|
||||
private final BiConsumer<T, ? super S> doneFn;
|
||||
|
||||
PrefixAction(
|
||||
List<PrefixAction<?, S>> prefixActions,
|
||||
List<ValueAction<S>> valueActions,
|
||||
PathMatcher matcher,
|
||||
BiFunction<T, CldrPath, S> newStateFn,
|
||||
BiConsumer<T, ? super S> doneFn) {
|
||||
super(prefixActions, valueActions);
|
||||
this.matcher = checkNotNull(matcher);
|
||||
this.newStateFn = checkNotNull(newStateFn);
|
||||
this.doneFn = checkNotNull(doneFn);
|
||||
}
|
||||
|
||||
public boolean matches(T state, CldrPath prefix, Context context) {
|
||||
if (matcher.locallyMatches(prefix)) {
|
||||
Consumer<S> doneFn = childState -> this.doneFn.accept(state, childState);
|
||||
context.install(
|
||||
new DispatchingVisitor<>(this, newStateFn.apply(state, prefix), doneFn),
|
||||
DispatchingVisitor::done);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class ValueAction<T> {
|
||||
private final PathMatcher matcher;
|
||||
private BiConsumer<T, CldrValue> doFn;
|
||||
|
||||
ValueAction(PathMatcher matcher, BiConsumer<T, CldrValue> doFn) {
|
||||
this.matcher = checkNotNull(matcher);
|
||||
this.doFn = checkNotNull(doFn);
|
||||
}
|
||||
|
||||
boolean matches(T state, CldrValue value) {
|
||||
if (matcher.locallyMatches(value.getPath())) {
|
||||
doFn.accept(state, value);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class DispatchingVisitor<T> implements PrefixVisitor {
|
||||
CldrDataProcessor<T> processor;
|
||||
private final T state;
|
||||
private final Consumer<T> doneFn;
|
||||
|
||||
DispatchingVisitor(CldrDataProcessor<T> processor, T state, Consumer<T> doneFn) {
|
||||
this.processor = checkNotNull(processor);
|
||||
this.state = checkNotNull(state);
|
||||
this.doneFn = checkNotNull(doneFn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context context) {
|
||||
processor.dispatchPrefixActions(state, prefix, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitValue(CldrValue value) {
|
||||
processor.dispatchValueActions(state, value);
|
||||
}
|
||||
|
||||
// Important: This is NOT visitPrefixEnd() since that happens multiple times and isn't
|
||||
// going to be called for the prefix at which this visitor was started.
|
||||
void done() {
|
||||
doneFn.accept(state);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -37,12 +37,15 @@ import java.util.Map;
|
|||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
|
||||
import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper;
|
||||
import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper;
|
||||
|
@ -83,15 +86,15 @@ import com.google.common.io.CharStreams;
|
|||
*/
|
||||
public final class LdmlConverter {
|
||||
// TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath).
|
||||
private static final PathMatcher GENDER_LIST_PATHS =
|
||||
private static final Predicate<CldrPath> GENDER_LIST_PATHS =
|
||||
supplementalMatcher("gender");
|
||||
private static final PathMatcher LIKELY_SUBTAGS_PATHS =
|
||||
private static final Predicate<CldrPath> LIKELY_SUBTAGS_PATHS =
|
||||
supplementalMatcher("likelySubtags");
|
||||
private static final PathMatcher METAZONE_PATHS =
|
||||
private static final Predicate<CldrPath> METAZONE_PATHS =
|
||||
supplementalMatcher("metaZones", "primaryZones");
|
||||
private static final PathMatcher METADATA_PATHS =
|
||||
private static final Predicate<CldrPath> METADATA_PATHS =
|
||||
supplementalMatcher("metadata");
|
||||
private static final PathMatcher SUPPLEMENTAL_DATA_PATHS =
|
||||
private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS =
|
||||
supplementalMatcher(
|
||||
"calendarData",
|
||||
"calendarPreferenceData",
|
||||
|
@ -109,22 +112,23 @@ public final class LdmlConverter {
|
|||
"unitPreferenceData",
|
||||
"weekData",
|
||||
"weekOfPreference");
|
||||
private static final PathMatcher CURRENCY_DATA_PATHS =
|
||||
private static final Predicate<CldrPath> CURRENCY_DATA_PATHS =
|
||||
supplementalMatcher("currencyData");
|
||||
private static final PathMatcher NUMBERING_SYSTEMS_PATHS =
|
||||
private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS =
|
||||
supplementalMatcher("numberingSystems");
|
||||
private static final PathMatcher WINDOWS_ZONES_PATHS =
|
||||
private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS =
|
||||
supplementalMatcher("windowsZones");
|
||||
|
||||
private static PathMatcher supplementalMatcher(String... spec) {
|
||||
private static Predicate<CldrPath> supplementalMatcher(String... spec) {
|
||||
checkArgument(spec.length > 0, "must supply at least one matcher spec");
|
||||
if (spec.length == 1) {
|
||||
return PathMatcher.of("supplementalData/" + spec[0]);
|
||||
return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf;
|
||||
}
|
||||
return PathMatcher.anyOf(
|
||||
return
|
||||
Arrays.stream(spec)
|
||||
.map(s -> PathMatcher.of("supplementalData/" + s))
|
||||
.toArray(PathMatcher[]::new));
|
||||
.map(s -> PathMatcher.of("//supplementalData/" + s))
|
||||
.map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf))
|
||||
.reduce(p -> false, Predicate::or);
|
||||
}
|
||||
|
||||
private static RbPath RB_PARENT = RbPath.of("%%Parent");
|
||||
|
@ -514,7 +518,7 @@ public final class LdmlConverter {
|
|||
private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
|
||||
|
||||
private void processSupplemental(
|
||||
String label, PathMatcher paths, String dir, boolean addCldrVersion) {
|
||||
String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) {
|
||||
IcuData icuData =
|
||||
SupplementalMapper.process(src, supplementalTransformer, label, paths);
|
||||
// A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
|
||||
|
|
|
@ -1,260 +0,0 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkPositionIndex;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
import static com.google.common.collect.ImmutableMap.toImmutableMap;
|
||||
import static org.unicode.cldr.api.AttributeKey.keyOf;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
/**
|
||||
* An immutable matcher for {@link CldrPath} instances. A path matcher specification looks like
|
||||
* {@code "foo/*[@x="z"]/bar[@y=*]"}, where element names and attribute values can be wildcards.
|
||||
*
|
||||
* <p>Note that the path fragment represented by the specification does not include either leading
|
||||
* or trailing {@code '/'}. This is because matching can occur at any point in a {@link CldrPath}.
|
||||
* The choice of where to match in the path is governed by the match method used (e.g.
|
||||
* {@link PathMatcher#matchesSuffixOf(CldrPath)}.
|
||||
*/
|
||||
public abstract class PathMatcher {
|
||||
/** Parses the path specification into a matcher. */
|
||||
public static PathMatcher of(String pathSpec) {
|
||||
// Supported so far: "a", "a/b", "a/b[@x=*]"
|
||||
return new BasicMatcher(parse(pathSpec));
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines the given matchers into a single composite matcher which tests all the given
|
||||
* matchers in order.
|
||||
*/
|
||||
public static PathMatcher anyOf(PathMatcher... matchers) {
|
||||
checkArgument(matchers.length > 0, "must supply at least one matcher");
|
||||
if (matchers.length == 1) {
|
||||
return checkNotNull(matchers[0]);
|
||||
}
|
||||
return new CompositeMatcher(ImmutableList.copyOf(matchers));
|
||||
}
|
||||
|
||||
/** Attempts a full match against a given path. */
|
||||
public abstract boolean matches(CldrPath path);
|
||||
|
||||
/** Attempts a suffix match against a given path. */
|
||||
public abstract boolean matchesSuffixOf(CldrPath path);
|
||||
|
||||
/** Attempts a prefix match against a given path. */
|
||||
public abstract boolean matchesPrefixOf(CldrPath path);
|
||||
|
||||
// A matcher that simply combines a sequences of other matchers in order.
|
||||
private static final class CompositeMatcher extends PathMatcher {
|
||||
private final ImmutableList<PathMatcher> matchers;
|
||||
|
||||
private CompositeMatcher(ImmutableList<PathMatcher> matchers) {
|
||||
checkArgument(matchers.size() > 1);
|
||||
this.matchers = checkNotNull(matchers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matches(CldrPath path) {
|
||||
for (PathMatcher m : matchers) {
|
||||
if (m.matches(path)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchesSuffixOf(CldrPath path) {
|
||||
for (PathMatcher m : matchers) {
|
||||
if (m.matchesSuffixOf(path)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchesPrefixOf(CldrPath path) {
|
||||
for (PathMatcher m : matchers) {
|
||||
if (m.matchesPrefixOf(path)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class BasicMatcher extends PathMatcher {
|
||||
private final ImmutableList<Predicate<CldrPath>> elementMatchers;
|
||||
|
||||
private BasicMatcher(List<Predicate<CldrPath>> elementMatchers) {
|
||||
this.elementMatchers = ImmutableList.copyOf(elementMatchers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matches(CldrPath path) {
|
||||
return elementMatchers.size() == path.getLength() && matchRegion(path, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchesSuffixOf(CldrPath path) {
|
||||
int start = path.getLength() - elementMatchers.size();
|
||||
return start >= 0 && matchRegion(path, start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchesPrefixOf(CldrPath path) {
|
||||
return path.getLength() >= elementMatchers.size() && matchRegion(path, 0);
|
||||
}
|
||||
|
||||
private boolean matchRegion(CldrPath path, int offset) {
|
||||
// offset is the path element corresponding the the "top most" element matcher, it
|
||||
// must be in the range 0 ... (path.length() - elementMatchers.size()).
|
||||
checkPositionIndex(offset, path.getLength() - elementMatchers.size());
|
||||
// First jump over the path parents until we find the last matcher.
|
||||
int matchPathLength = offset + elementMatchers.size();
|
||||
while (path.getLength() > matchPathLength) {
|
||||
path = path.getParent();
|
||||
}
|
||||
return matchForward(path, elementMatchers.size() - 1);
|
||||
}
|
||||
|
||||
private boolean matchForward(CldrPath path, int matcherIndex) {
|
||||
if (matcherIndex < 0) {
|
||||
return true;
|
||||
}
|
||||
return matchForward(path.getParent(), matcherIndex - 1)
|
||||
&& elementMatchers.get(matcherIndex).test(path);
|
||||
}
|
||||
}
|
||||
|
||||
// Make a new, non-interned, unique instance here which we can test by reference to
|
||||
// determine if the argument is to be captured (needed as ImmutableMap prohibits null).
|
||||
// DO NOT change this code to assign "*" as the value directly, it MUST be a new instance.
|
||||
@SuppressWarnings("StringOperationCanBeSimplified")
|
||||
private static final String WILDCARD = new String("*");
|
||||
|
||||
private static final Pattern ELEMENT_START_REGEX =
|
||||
Pattern.compile("(\\*|[-:\\w]+)(?:/|\\[|$)");
|
||||
private static final Pattern ATTRIBUTE_REGEX =
|
||||
Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")]");
|
||||
|
||||
// element := foo, foo[@bar="baz"], foo[@bar=*]
|
||||
// pathspec := element{/element}*
|
||||
private static List<Predicate<CldrPath>> parse(String pathSpec) {
|
||||
List<Predicate<CldrPath>> specs = new ArrayList<>();
|
||||
int pos = 0;
|
||||
do {
|
||||
pos = parse(pathSpec, pos, specs);
|
||||
} while (pos >= 0);
|
||||
return specs;
|
||||
}
|
||||
|
||||
// Return next start index or -1.
|
||||
private static int parse(String pathSpec, int pos, List<Predicate<CldrPath>> specs) {
|
||||
Matcher m = ELEMENT_START_REGEX.matcher(pathSpec).region(pos, pathSpec.length());
|
||||
checkArgument(m.lookingAt(), "invalid path specification (index=%s): %s", pos, pathSpec);
|
||||
String name = m.group(1);
|
||||
Map<String, String> attributes = ImmutableMap.of();
|
||||
pos = m.end(1);
|
||||
if (pos < pathSpec.length() && pathSpec.charAt(pos) == '[') {
|
||||
// We have attributes to add.
|
||||
attributes = new LinkedHashMap<>();
|
||||
do {
|
||||
m = ATTRIBUTE_REGEX.matcher(pathSpec).region(pos, pathSpec.length());
|
||||
checkArgument(m.lookingAt(),
|
||||
"invalid path specification (index=%s): %s", pos, pathSpec);
|
||||
// Null if we matched the '*' wildcard.
|
||||
String value = m.group(2);
|
||||
attributes.put(m.group(1), value != null ? value : WILDCARD);
|
||||
pos = m.end();
|
||||
} while (pos < pathSpec.length() && pathSpec.charAt(pos) == '[');
|
||||
}
|
||||
// Wildcard matching is less efficient because attribute keys cannot be made in advance, so
|
||||
// since it's also very rare, we special case it.
|
||||
Predicate<CldrPath> matcher = name.equals(WILDCARD)
|
||||
? new WildcardElementMatcher(attributes)::match
|
||||
: new ElementMatcher(name, attributes)::match;
|
||||
specs.add(matcher);
|
||||
if (pos == pathSpec.length()) {
|
||||
return -1;
|
||||
}
|
||||
checkState(pathSpec.charAt(pos) == '/',
|
||||
"invalid path specification (index=%s): %s", pos, pathSpec);
|
||||
return pos + 1;
|
||||
}
|
||||
|
||||
// Matcher for path elements like "foo[@bar=*]" where the name is known in advance.
|
||||
private static final class ElementMatcher {
|
||||
private final String name;
|
||||
private final ImmutableMap<AttributeKey, String> attributes;
|
||||
|
||||
private ElementMatcher(String name, Map<String, String> attributes) {
|
||||
this.name = checkNotNull(name);
|
||||
this.attributes = attributes.entrySet().stream()
|
||||
.collect(toImmutableMap(e -> keyOf(name, e.getKey()), Entry::getValue));
|
||||
}
|
||||
|
||||
boolean match(CldrPath path) {
|
||||
if (!path.getName().equals(name)) {
|
||||
return false;
|
||||
}
|
||||
for (Entry<AttributeKey, String> e : attributes.entrySet()) {
|
||||
String actual = path.get(e.getKey());
|
||||
if (actual == null) {
|
||||
return false;
|
||||
}
|
||||
String expected = e.getValue();
|
||||
// DO NOT change this to use expected.equals(WILDCARD).
|
||||
if (expected != WILDCARD && !expected.equals(actual)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Matcher for path elements like "*[@bar=*]", where the name isn't known until match time.
|
||||
private static final class WildcardElementMatcher {
|
||||
private final ImmutableMap<String, String> attributes;
|
||||
|
||||
private WildcardElementMatcher(Map<String, String> attributes) {
|
||||
this.attributes = ImmutableMap.copyOf(attributes);
|
||||
}
|
||||
|
||||
private boolean match(CldrPath path) {
|
||||
// The wildcard matcher never fails due to the element name but must create new key
|
||||
// instances every time matching occurs (because the key name is dynamic). Since this
|
||||
// is rare, it's worth making into a separate case.
|
||||
for (Entry<String, String> attribute : attributes.entrySet()) {
|
||||
String actual = path.get(keyOf(path.getName(), attribute.getKey()));
|
||||
if (actual == null) {
|
||||
return false;
|
||||
}
|
||||
String expected = attribute.getValue();
|
||||
// DO NOT change this to use expected.equals(WILDCARD).
|
||||
if (expected != WILDCARD && !expected.equals(actual)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -4,6 +4,7 @@ package org.unicode.icu.tool.cldrtoicu;
|
|||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.collect.ImmutableList.toImmutableList;
|
||||
import static com.google.common.collect.ImmutableMap.toImmutableMap;
|
||||
import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT;
|
||||
import static java.util.function.Function.identity;
|
||||
|
@ -26,8 +27,11 @@ import org.unicode.cldr.api.CldrDataType;
|
|||
import org.unicode.cldr.api.CldrDraftStatus;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.cldr.api.FilteredData;
|
||||
import org.unicode.cldr.api.PathMatcher;
|
||||
|
||||
import com.google.common.base.CharMatcher;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Sets;
|
||||
|
@ -147,43 +151,52 @@ public final class PseudoLocales {
|
|||
}
|
||||
|
||||
private static final class PseudoLocaleData extends FilteredData {
|
||||
private static final PathMatcher LDML = PathMatcher.of("//ldml");
|
||||
|
||||
private static final PathMatcher AUX_EXEMPLARS =
|
||||
PathMatcher.of("ldml/characters/exemplarCharacters[@type=\"auxiliary\"]");
|
||||
ldml("characters/exemplarCharacters[@type=\"auxiliary\"]");
|
||||
|
||||
private static final PathMatcher NUMBERING_SYSTEM =
|
||||
PathMatcher.of("ldml/numbers/defaultNumberingSystem");
|
||||
ldml("numbers/defaultNumberingSystem");
|
||||
|
||||
// These paths were mostly derived from looking at the previous implementation's behaviour
|
||||
// and can be modified as needed. Notably there are no "units" here (but they were also
|
||||
// excluded in the original code).
|
||||
private static final PathMatcher PSEUDO_PATHS = PathMatcher.anyOf(
|
||||
ldml("localeDisplayNames"),
|
||||
ldml("delimiters"),
|
||||
ldml("dates/calendars/calendar"),
|
||||
ldml("dates/fields"),
|
||||
ldml("dates/timeZoneNames"),
|
||||
ldml("listPatterns"),
|
||||
ldml("posix/messages"),
|
||||
ldml("characterLabels"),
|
||||
ldml("typographicNames"));
|
||||
|
||||
// Paths which contain non-localizable data. It is important that these paths catch all the
|
||||
// non-localizable sub-paths of the list above. This list must be accurate.
|
||||
private static final PathMatcher EXCLUDE_PATHS = PathMatcher.anyOf(
|
||||
ldml("localeDisplayNames/localeDisplayPattern"),
|
||||
ldml("dates/timeZoneNames/fallbackFormat"));
|
||||
private static final Predicate<CldrPath> IS_PSEUDO_PATH =
|
||||
matchAnyLdmlPrefix(
|
||||
"localeDisplayNames",
|
||||
"delimiters",
|
||||
"dates/calendars/calendar",
|
||||
"dates/fields",
|
||||
"dates/timeZoneNames",
|
||||
"listPatterns",
|
||||
"posix/messages",
|
||||
"characterLabels",
|
||||
"typographicNames")
|
||||
.and(matchAnyLdmlPrefix(
|
||||
"localeDisplayNames/localeDisplayPattern",
|
||||
"dates/timeZoneNames/fallbackFormat")
|
||||
.negate());
|
||||
|
||||
// The expectation is that all non-alias paths with values under these roots are "date/time
|
||||
// pattern like" (such as "E h:mm:ss B") in which care must be taken to not pseudo localize
|
||||
// the patterns in such as way as to break them. This list must be accurate.
|
||||
private static final PathMatcher PATTERN_PATHS = PathMatcher.anyOf(
|
||||
ldml("dates/calendars/calendar/timeFormats"),
|
||||
ldml("dates/calendars/calendar/dateFormats"),
|
||||
ldml("dates/calendars/calendar/dateTimeFormats"),
|
||||
ldml("dates/timeZoneNames/hourFormat"));
|
||||
private static final Predicate<CldrPath> IS_PATTERN_PATH = matchAnyLdmlPrefix(
|
||||
"dates/calendars/calendar/timeFormats",
|
||||
"dates/calendars/calendar/dateFormats",
|
||||
"dates/calendars/calendar/dateTimeFormats",
|
||||
"dates/timeZoneNames/hourFormat");
|
||||
|
||||
private static PathMatcher ldml(String matcherSuffix) {
|
||||
return PathMatcher.of("ldml/" + matcherSuffix);
|
||||
private static PathMatcher ldml(String paths) {
|
||||
return LDML.withSuffix(paths);
|
||||
}
|
||||
|
||||
private static Predicate<CldrPath> matchAnyLdmlPrefix(String... paths) {
|
||||
ImmutableList<Predicate<CldrPath>> collect =
|
||||
Arrays.stream(paths)
|
||||
.map(s -> (Predicate<CldrPath>) ldml(s)::matchesPrefixOf)
|
||||
.collect(toImmutableList());
|
||||
return p -> collect.stream().anyMatch(e -> e.test(p));
|
||||
}
|
||||
|
||||
// Look for any attribute in the path with "narrow" in its value. Since "narrow" values
|
||||
|
@ -223,7 +236,7 @@ public final class PseudoLocales {
|
|||
|
||||
CldrValue defaultReturnValue = isResolved ? value : null;
|
||||
// This makes it look like we have explicit values only for the included paths.
|
||||
if (!PSEUDO_PATHS.matchesPrefixOf(path) || EXCLUDE_PATHS.matchesPrefixOf(path)) {
|
||||
if (!IS_PSEUDO_PATH.test(path)) {
|
||||
return defaultReturnValue;
|
||||
}
|
||||
String fullPath = value.getFullPath();
|
||||
|
@ -232,7 +245,7 @@ public final class PseudoLocales {
|
|||
if (IS_NARROW.test(fullPath)) {
|
||||
return defaultReturnValue;
|
||||
}
|
||||
String text = createMessage(value.getValue(), PATTERN_PATHS.matchesPrefixOf(path));
|
||||
String text = createMessage(value.getValue(), IS_PATTERN_PATH.test(path));
|
||||
return CldrValue.parseValue(fullPath, text);
|
||||
}
|
||||
|
||||
|
@ -357,7 +370,7 @@ public final class PseudoLocales {
|
|||
public void addFragment(String text, boolean isLocalizable) {
|
||||
if (isLocalizable) {
|
||||
boolean wrapping = false;
|
||||
for (int index = 0; index < text.length();) {
|
||||
for (int index = 0; index < text.length(); ) {
|
||||
int codePoint = text.codePointAt(index);
|
||||
index += Character.charCount(codePoint);
|
||||
byte directionality = Character.getDirectionality(codePoint);
|
||||
|
@ -383,5 +396,6 @@ public final class PseudoLocales {
|
|||
};
|
||||
}
|
||||
|
||||
private PseudoLocales() {}
|
||||
private PseudoLocales() {
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.stream.Stream;
|
|||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.PathMatcher;
|
||||
|
||||
import com.google.common.base.Ascii;
|
||||
import com.google.common.base.Splitter;
|
||||
|
@ -57,22 +58,22 @@ public final class SupplementalData {
|
|||
private static final Pattern SCRIPT_SUBTAG = Pattern.compile("[A-Z][a-z]{3}");
|
||||
|
||||
private static final PathMatcher ALIAS =
|
||||
PathMatcher.of("supplementalData/metadata/alias/*[@type=*]");
|
||||
PathMatcher.of("//supplementalData/metadata/alias/*[@type=*]");
|
||||
|
||||
private static final PathMatcher PARENT_LOCALE =
|
||||
PathMatcher.of("supplementalData/parentLocales/parentLocale[@parent=*]");
|
||||
PathMatcher.of("//supplementalData/parentLocales/parentLocale[@parent=*]");
|
||||
private static final AttributeKey PARENT = keyOf("parentLocale", "parent");
|
||||
private static final AttributeKey LOCALES = keyOf("parentLocale", "locales");
|
||||
|
||||
private static final PathMatcher CALENDER_PREFERENCE =
|
||||
PathMatcher.of("supplementalData/calendarPreferenceData/calendarPreference[@territories=*]");
|
||||
PathMatcher.of("//supplementalData/calendarPreferenceData/calendarPreference[@territories=*]");
|
||||
private static final AttributeKey CALENDER_TERRITORIES =
|
||||
keyOf("calendarPreference", "territories");
|
||||
private static final AttributeKey CALENDER_ORDERING =
|
||||
keyOf("calendarPreference", "ordering");
|
||||
|
||||
private static final PathMatcher LIKELY_SUBTAGS =
|
||||
PathMatcher.of("supplementalData/likelySubtags/likelySubtag[@from=*]");
|
||||
PathMatcher.of("//supplementalData/likelySubtags/likelySubtag[@from=*]");
|
||||
private static final AttributeKey SUBTAG_FROM = keyOf("likelySubtag", "from");
|
||||
private static final AttributeKey SUBTAG_TO = keyOf("likelySubtag", "to");
|
||||
|
||||
|
|
|
@ -3,8 +3,6 @@
|
|||
package org.unicode.icu.tool.cldrtoicu.mapper;
|
||||
|
||||
import static com.google.common.base.Ascii.toLowerCase;
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
import static org.unicode.cldr.api.AttributeKey.keyOf;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
|
||||
|
@ -17,19 +15,15 @@ import java.util.Map.Entry;
|
|||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrData.ValueVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Ascii;
|
||||
|
@ -46,12 +40,10 @@ import com.google.common.collect.Sets;
|
|||
*/
|
||||
public final class Bcp47Mapper {
|
||||
// Other attributes (e.g. "alias") are value attributes and don't need to be matched here.
|
||||
private static final PathMatcher KEY = PathMatcher.of("ldmlBCP47/keyword/key[@name=*]");
|
||||
private static final AttributeKey KEY_NAME = keyOf("key", "name");
|
||||
private static final AttributeKey KEY_ALIAS = keyOf("key", "alias");
|
||||
private static final AttributeKey KEY_VALUE_TYPE = keyOf("key", "valueType");
|
||||
|
||||
private static final PathMatcher TYPE = PathMatcher.of("type[@name=*]");
|
||||
private static final AttributeKey TYPE_NAME = keyOf("type", "name");
|
||||
private static final AttributeKey TYPE_ALIASES = keyOf("type", "alias");
|
||||
private static final AttributeKey PREFERRED_TYPE_NAME = keyOf("type", "preferred");
|
||||
|
@ -75,6 +67,15 @@ public final class Bcp47Mapper {
|
|||
private static final RbPath RB_MAP_ALIAS = RbPath.of("typeMap", "timezone:alias");
|
||||
private static final RbPath RB_BCP_ALIAS = RbPath.of("bcpTypeAlias", "tz:alias");
|
||||
|
||||
private static final CldrDataProcessor<Bcp47Mapper> BCP47_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<Bcp47Mapper> processor = CldrDataProcessor.builder();
|
||||
processor
|
||||
.addAction("//ldmlBCP47/keyword/key[@name=*]", (m, p) -> m.new ValueCollector(p))
|
||||
.addValueAction("type[@name=*]", ValueCollector::collect);
|
||||
BCP47_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes data from the given supplier to generate Timezone and BCP-47 ICU data.
|
||||
*
|
||||
|
@ -87,169 +88,146 @@ public final class Bcp47Mapper {
|
|||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static ImmutableList<IcuData> process(CldrData cldrData) {
|
||||
Bcp47Visitor visitor = new Bcp47Visitor();
|
||||
cldrData.accept(DTD, visitor);
|
||||
visitor.addKeyMapValues();
|
||||
return ImmutableList.of(visitor.keyTypeData.icuData, visitor.tzData.icuData);
|
||||
Bcp47Mapper mapper = BCP47_PROCESSOR.process(cldrData, new Bcp47Mapper(), DTD);
|
||||
mapper.addKeyMapValues();
|
||||
return ImmutableList.of(mapper.keyTypeData, mapper.tzData);
|
||||
}
|
||||
|
||||
// Outer visitor which handles "key" paths by installing sub-visitor methods to process
|
||||
// each child "type" element. Depending on the key name, values are stored in different
|
||||
// IcuData instances.
|
||||
private static final class Bcp47Visitor implements PrefixVisitor {
|
||||
private final ValueCollector tzData =
|
||||
new ValueCollector(new IcuData("timezoneTypes", false));
|
||||
private final ValueCollector keyTypeData =
|
||||
new ValueCollector(new IcuData("keyTypeData", false));
|
||||
private final IcuData tzData = new IcuData("timezoneTypes", false);
|
||||
private final IcuData keyTypeData = new IcuData("keyTypeData", false);
|
||||
// A map collecting each key and values as they are visited.
|
||||
// TODO: Convert this to a Map<RbPath, String> which involves removing the '@' prefix hack.
|
||||
private Map<String, String> keyMap = new LinkedHashMap<>();
|
||||
|
||||
// The current key name from the parent path element (set when a prefix is matched).
|
||||
@Nullable private String keyName = null;
|
||||
// A map collecting each key and values as they are visited.
|
||||
// TODO: Convert this to a Map<RbPath, String> which involves removing the '@' prefix hack.
|
||||
private Map<String, String> keyMap = new LinkedHashMap<>();
|
||||
private Bcp47Mapper() { }
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
if (KEY.matches(prefix)) {
|
||||
// Don't inline this since it also sets the field!!
|
||||
keyName = Ascii.toLowerCase(KEY_NAME.valueFrom(prefix));
|
||||
|
||||
// How the data is visited is the same for both timezone and other BCP-47 data,
|
||||
// it's just split into different data files, so we just install a different
|
||||
// instance of the visitor class according to where the data in this sub-hierarchy
|
||||
// should end up.
|
||||
ctx.install(keyName.equals("tz") ? tzData : keyTypeData);
|
||||
// Post processing to add additional captured attribute values and some special cases.
|
||||
private void addKeyMapValues() {
|
||||
IcuData keyData = keyTypeData;
|
||||
// Add all the keyMap values into the IcuData file.
|
||||
for (Entry<String, String> kmData : keyMap.entrySet()) {
|
||||
String bcpKey = kmData.getKey();
|
||||
String key = kmData.getValue();
|
||||
if (bcpKey.startsWith("@")) {
|
||||
// Undoing the weird hack in addInfoAttributes(). This can be done better.
|
||||
// We use "parse()" because these are full paths, and not single elements.
|
||||
keyData.add(RbPath.parse(bcpKey.substring(1)), key);
|
||||
continue;
|
||||
}
|
||||
if (bcpKey.equals(key)) {
|
||||
// An empty value indicates that the BCP47 key is same as the legacy key.
|
||||
bcpKey = "";
|
||||
}
|
||||
keyData.add(RB_KEYMAP.extendBy(key), bcpKey);
|
||||
}
|
||||
// Add aliases for timezone data.
|
||||
keyData.add(RB_TYPE_ALIAS, "/ICUDATA/timezoneTypes/typeAlias/timezone");
|
||||
keyData.add(RB_MAP_ALIAS, "/ICUDATA/timezoneTypes/typeMap/timezone");
|
||||
keyData.add(RB_BCP_ALIAS, "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
|
||||
}
|
||||
|
||||
private final class ValueCollector {
|
||||
private final String keyName;
|
||||
// Mutable data to be written into (differs depending on the key name).
|
||||
private final IcuData icuData;
|
||||
|
||||
ValueCollector(CldrPath prefix) {
|
||||
this.keyName = Ascii.toLowerCase(KEY_NAME.valueFrom(prefix));
|
||||
this.icuData = keyName.equals("tz") ? tzData : keyTypeData;
|
||||
}
|
||||
|
||||
// Post processing to add additional captured attribute values and some special cases.
|
||||
private void addKeyMapValues() {
|
||||
IcuData keyData = keyTypeData.icuData;
|
||||
// Add all the keyMap values into the IcuData file.
|
||||
for (Entry<String, String> kmData : keyMap.entrySet()) {
|
||||
String bcpKey = kmData.getKey();
|
||||
String key = kmData.getValue();
|
||||
if (bcpKey.startsWith("@")) {
|
||||
// Undoing the weird hack in addInfoAttributes(). This can be done better.
|
||||
// We use "parse()" because these are full paths, and not single elements.
|
||||
keyData.add(RbPath.parse(bcpKey.substring(1)), key);
|
||||
private void collect(CldrValue value) {
|
||||
String typeName = TYPE_NAME.valueFrom(value);
|
||||
// Note that if a "preferred" type exists, we treat the value specially and add
|
||||
// it only as an alias. We expected values with a preferred replacement to
|
||||
// always be explicitly deprecated.
|
||||
Optional<String> prefName = PREFERRED_TYPE_NAME.optionalValueFrom(value);
|
||||
if (prefName.isPresent()) {
|
||||
checkState(KEY_DEPRECATED.booleanValueFrom(value, false)
|
||||
|| TYPE_DEPRECATED.booleanValueFrom(value, false),
|
||||
"unexpected 'preferred' attribute for non-deprecated value: %s", value);
|
||||
icuData.add(RbPath.of("bcpTypeAlias", keyName, typeName), prefName.get());
|
||||
return;
|
||||
}
|
||||
// Note: There are some deprecated values which don't have a preferred
|
||||
// replacement and these will be processed below (in particular we need to emit
|
||||
// the fact that they are deprecated).
|
||||
|
||||
// Not all key elements have an alias. E.g. in calendar.xml:
|
||||
// <key name="fw" description="First day of week" since="28">
|
||||
// But we still add it as a alias to itself (which is later turned into a path with
|
||||
// an empty value).
|
||||
String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName));
|
||||
|
||||
keyMap.put(keyName, keyAlias);
|
||||
RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
|
||||
|
||||
List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
|
||||
if (typeAliases.isEmpty()) {
|
||||
// Generate type map entry using empty value (an empty value indicates same
|
||||
// type name is used for both BCP47 and legacy type).
|
||||
icuData.add(typeMapPrefix.extendBy(typeName), "");
|
||||
} else {
|
||||
String mainAlias = typeAliases.get(0);
|
||||
icuData.add(typeMapPrefix.extendBy(quoteAlias(mainAlias)), typeName);
|
||||
// Put additional aliases as secondary aliases referencing the main alias.
|
||||
RbPath typeAliasPrefix = RbPath.of("typeAlias", keyAlias);
|
||||
typeAliases.stream()
|
||||
.skip(1)
|
||||
.map(Bcp47Mapper::quoteAlias)
|
||||
.forEach(a -> icuData.add(typeAliasPrefix.extendBy(a), mainAlias));
|
||||
}
|
||||
addInfoAttributes(keyName, typeName, value.getValueAttributes());
|
||||
}
|
||||
|
||||
// Add any additional attributes present to the attribute map. Note that this code was
|
||||
// copied from largely undocumented code, and the precise reasoning for why this is
|
||||
// needed or why it's done this way is not completely clear. It is very likely that it
|
||||
// can be simplified.
|
||||
//
|
||||
// The '@' symbol added here is just a magic token that gets stripped off again in the
|
||||
// addKeyMapValues() method, it appears to just be a way to distinguish keys added via
|
||||
// this method vs during the collect method. A better approach might just be to have two
|
||||
// maps.
|
||||
// TODO: Remove the use of '@' and simplify the logic for "info" attributes (infoMap?).
|
||||
private void addInfoAttributes(
|
||||
String keyName, String typeName, ImmutableMap<AttributeKey, String> attributes) {
|
||||
// Only emit deprecation for the "key" level, even if all types below that are also
|
||||
// marked as deprecated. Only do this for a subset of attributes (INFO_ATTRIBUTES).
|
||||
Set<AttributeKey> keys =
|
||||
Sets.intersection(attributes.keySet(), INFO_ATTRIBUTES.keySet());
|
||||
for (AttributeKey a : keys) {
|
||||
String value = attributes.get(a);
|
||||
// Skip empty or default values in attributes.
|
||||
if (value.isEmpty() || INFO_ATTRIBUTES.get(a).equals(value)) {
|
||||
continue;
|
||||
}
|
||||
if (bcpKey.equals(key)) {
|
||||
// An empty value indicates that the BCP47 key is same as the legacy key.
|
||||
bcpKey = "";
|
||||
}
|
||||
keyData.add(RB_KEYMAP.extendBy(key), bcpKey);
|
||||
// The ID for the xxxInfo paths in ICU is the path fragment at which the
|
||||
// attribute exists. Since we only process complete paths here, we must do a
|
||||
// bit of reconstruction based on the element name of the attribute we are
|
||||
// processing. This relies on explicit knowledge that the paths are "<key>" or
|
||||
// "<key>/<type>". This all gets less messy if we switch to RbPath.
|
||||
String id =
|
||||
a.getElementName().equals("key") ? keyName : keyName + "/" + typeName;
|
||||
keyMap.put(
|
||||
"@" + a.getElementName() + "Info/" + a.getAttributeName() + "/" + id,
|
||||
value);
|
||||
}
|
||||
// Add aliases for timezone data.
|
||||
keyData.add(RB_TYPE_ALIAS, "/ICUDATA/timezoneTypes/typeAlias/timezone");
|
||||
keyData.add(RB_MAP_ALIAS, "/ICUDATA/timezoneTypes/typeMap/timezone");
|
||||
keyData.add(RB_BCP_ALIAS, "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
|
||||
}
|
||||
|
||||
private final class ValueCollector implements ValueVisitor {
|
||||
// Mutable ICU data collected into during visitation.
|
||||
private final IcuData icuData;
|
||||
|
||||
ValueCollector(IcuData data) {
|
||||
this.icuData = checkNotNull(data);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(CldrValue value) {
|
||||
checkArgument(TYPE.matchesSuffixOf(value.getPath()),
|
||||
"unexpected child element: %s", value.getPath());
|
||||
String typeName = TYPE_NAME.valueFrom(value);
|
||||
// Note that if a "preferred" type exists, we treat the value specially and add
|
||||
// it only as an alias. We expected values with a preferred replacement to
|
||||
// always be explicitly deprecated.
|
||||
Optional<String> prefName = PREFERRED_TYPE_NAME.optionalValueFrom(value);
|
||||
if (prefName.isPresent()) {
|
||||
checkState(KEY_DEPRECATED.booleanValueFrom(value, false)
|
||||
|| TYPE_DEPRECATED.booleanValueFrom(value, false),
|
||||
"unexpected 'preferred' attribute for non-deprecated value: %s", value);
|
||||
icuData.add(RbPath.of("bcpTypeAlias", keyName, typeName), prefName.get());
|
||||
return;
|
||||
}
|
||||
// Note: There are some deprecated values which don't have a preferred
|
||||
// replacement and these will be processed below (in particular we need to emit
|
||||
// the fact that they are deprecated).
|
||||
|
||||
// Not all key elements have an alias. E.g. in calendar.xml:
|
||||
// <key name="fw" description="First day of week" since="28">
|
||||
// But we still add it as a alias to itself (which is later turned into a path with
|
||||
// an empty value).
|
||||
String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName));
|
||||
|
||||
keyMap.put(keyName, keyAlias);
|
||||
RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
|
||||
|
||||
List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
|
||||
if (typeAliases.isEmpty()) {
|
||||
// Generate type map entry using empty value (an empty value indicates same
|
||||
// type name is used for both BCP47 and legacy type).
|
||||
icuData.add(typeMapPrefix.extendBy(typeName), "");
|
||||
} else {
|
||||
String mainAlias = typeAliases.get(0);
|
||||
icuData.add(typeMapPrefix.extendBy(quoteAlias(mainAlias)), typeName);
|
||||
// Put additional aliases as secondary aliases referencing the main alias.
|
||||
RbPath typeAliasPrefix = RbPath.of("typeAlias", keyAlias);
|
||||
typeAliases.stream()
|
||||
.skip(1)
|
||||
.map(Bcp47Visitor::quoteAlias)
|
||||
.forEach(a -> icuData.add(typeAliasPrefix.extendBy(a), mainAlias));
|
||||
}
|
||||
addInfoAttributes(keyName, typeName, value.getValueAttributes());
|
||||
}
|
||||
|
||||
// Add any additional attributes present to the attribute map. Note that this code was
|
||||
// copied from largely undocumented code, and the precise reasoning for why this is
|
||||
// needed or why it's done this way is not completely clear. It is very likely that it
|
||||
// can be simplified.
|
||||
//
|
||||
// The '@' symbol added here is just a magic token that gets stripped off again in the
|
||||
// addKeyMapValues() method, it appears to just be a way to distinguish keys added via
|
||||
// this method vs during the visit method. A better approach might just be to have two
|
||||
// maps.
|
||||
// TODO: Remove the use of '@' and simplify the logic for "info" attributes (infoMap?).
|
||||
private void addInfoAttributes(
|
||||
String keyName, String typeName, ImmutableMap<AttributeKey, String> attributes) {
|
||||
// Only emit deprecation for the "key" level, even if all types below that are also
|
||||
// marked as deprecated. Only do this for a subset of attributes (INFO_ATTRIBUTES).
|
||||
Set<AttributeKey> keys =
|
||||
Sets.intersection(attributes.keySet(), INFO_ATTRIBUTES.keySet());
|
||||
for (AttributeKey a : keys) {
|
||||
String value = attributes.get(a);
|
||||
// Skip empty or default values in attributes.
|
||||
if (value.isEmpty() || INFO_ATTRIBUTES.get(a).equals(value)) {
|
||||
continue;
|
||||
}
|
||||
// The ID for the xxxInfo paths in ICU is the path fragment at which the
|
||||
// attribute exists. Since we only process complete paths here, we must do a
|
||||
// bit of reconstruction based on the element name of the attribute we are
|
||||
// processing. This relies on explicit knowledge that the paths are "<key>" or
|
||||
// "<key>/<type>". This all gets less messy if we switch to RbPath.
|
||||
String id =
|
||||
a.getElementName().equals("key") ? keyName : keyName + "/" + typeName;
|
||||
keyMap.put(
|
||||
"@" + a.getElementName() + "Info/" + a.getAttributeName() + "/" + id,
|
||||
value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes alias values containing '/' so they can appear in resource bundle paths. This
|
||||
* function replaces '/' with ':' and quotes the result (e.g. foo/bar -> "foo:bar").
|
||||
*
|
||||
* <p>This is needed for timezone "metazone" ID strings which are of the form 'Foo/Bar'
|
||||
* in the CLDR data.
|
||||
*/
|
||||
// TODO: Switch to RbPath and do quoting automatically when ICU data is written out.
|
||||
private static String quoteAlias(String str) {
|
||||
return str.indexOf('/') == -1 ? str : '"' + str.replace('/', ':') + '"';
|
||||
}
|
||||
}
|
||||
|
||||
private Bcp47Mapper() {}
|
||||
/**
|
||||
* Escapes alias values containing '/' so they can appear in resource bundle paths. This
|
||||
* function replaces '/' with ':' and quotes the result (e.g. foo/bar -> "foo:bar").
|
||||
*
|
||||
* <p>This is needed for timezone "metazone" ID strings which are of the form 'Foo/Bar'
|
||||
* in the CLDR data.
|
||||
*/
|
||||
// TODO: Switch to RbPath and do quoting automatically when ICU data is written out.
|
||||
private static String quoteAlias(String str) {
|
||||
return str.indexOf('/') == -1 ? str : '"' + str.replace('/', ':') + '"';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,18 +4,17 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
|
|||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static org.unicode.cldr.api.AttributeKey.keyOf;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor.SubProcessor;
|
||||
|
||||
import com.google.common.escape.UnicodeEscaper;
|
||||
|
||||
|
@ -29,20 +28,25 @@ import com.google.common.escape.UnicodeEscaper;
|
|||
*/
|
||||
// TODO: This class can almost certainly be replace with a small RegexTransformer config.
|
||||
public final class BreakIteratorMapper {
|
||||
// The "type" attribute in /suppressions/ is not required so cannot be in the matcher. And
|
||||
// its default (and only) value is "standard".
|
||||
// TODO: Understand and document why this is the case.
|
||||
private static final PathMatcher SUPPRESSION = PathMatcher.of(
|
||||
"ldml/segmentations/segmentation[@type=*]/suppressions/suppression");
|
||||
|
||||
private static final CldrDataProcessor<BreakIteratorMapper> CLDR_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<BreakIteratorMapper> processor = CldrDataProcessor.builder();
|
||||
// The "type" attribute in /suppressions/ is not required so cannot be in the matcher. And
|
||||
// its default (and only) value is "standard".
|
||||
// TODO: Understand and document why this is the case.
|
||||
processor.addValueAction(
|
||||
"//ldml/segmentations/segmentation[@type=*]/suppressions/suppression",
|
||||
BreakIteratorMapper::addSuppression);
|
||||
SubProcessor<BreakIteratorMapper> specials =
|
||||
processor.addSubprocessor("//ldml/special/icu:breakIteratorData");
|
||||
specials.addValueAction("icu:boundaries/*", BreakIteratorMapper::addBoundary);
|
||||
specials.addValueAction(
|
||||
"icu:dictionaries/icu:dictionary", BreakIteratorMapper::addDictionary);
|
||||
CLDR_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
private static final AttributeKey SEGMENTATION_TYPE = keyOf("segmentation", "type");
|
||||
|
||||
// Note: This could be done with an intermediate matcher for
|
||||
// "ldml/special/icu:breakIteratorData" but there are so few "special" values it's not worth it
|
||||
private static final PathMatcher BOUNDARIES =
|
||||
PathMatcher.of("ldml/special/icu:breakIteratorData/icu:boundaries/*");
|
||||
private static final PathMatcher DICTIONARY =
|
||||
PathMatcher.of("ldml/special/icu:breakIteratorData/icu:dictionaries/icu:dictionary");
|
||||
|
||||
private static final AttributeKey DICTIONARY_DEP = keyOf("icu:dictionary", "icu:dependency");
|
||||
private static final AttributeKey DICTIONARY_TYPE = keyOf("icu:dictionary", "type");
|
||||
|
||||
|
@ -59,8 +63,8 @@ public final class BreakIteratorMapper {
|
|||
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {
|
||||
|
||||
BreakIteratorMapper mapper = new BreakIteratorMapper(icuData);
|
||||
icuSpecialData.ifPresent(s -> s.accept(DTD, mapper::addSpecials));
|
||||
cldrData.accept(DTD, mapper::addSuppression);
|
||||
icuSpecialData.ifPresent(d -> CLDR_PROCESSOR.process(d, mapper));
|
||||
CLDR_PROCESSOR.process(cldrData, mapper);
|
||||
return mapper.icuData;
|
||||
}
|
||||
|
||||
|
@ -72,28 +76,21 @@ public final class BreakIteratorMapper {
|
|||
}
|
||||
|
||||
private void addSuppression(CldrValue v) {
|
||||
if (SUPPRESSION.matches(v.getPath())) {
|
||||
String type = SEGMENTATION_TYPE.valueFrom(v);
|
||||
// TODO: Understand and document why we escape values here, but not for collation data.
|
||||
icuData.add(
|
||||
RbPath.of("exceptions", type + ":array"),
|
||||
ESCAPE_NON_ASCII.escape(v.getValue()));
|
||||
}
|
||||
String type = SEGMENTATION_TYPE.valueFrom(v);
|
||||
// TODO: Understand and document why we escape values here, but not for collation data.
|
||||
icuData.add(
|
||||
RbPath.of("exceptions", type + ":array"), ESCAPE_NON_ASCII.escape(v.getValue()));
|
||||
}
|
||||
|
||||
private void addSpecials(CldrValue v) {
|
||||
CldrPath p = v.getPath();
|
||||
if (BOUNDARIES.matches(p)) {
|
||||
addDependency(
|
||||
getDependencyName(v),
|
||||
getBoundaryType(v),
|
||||
getBoundaryDependency(v));
|
||||
} else if (DICTIONARY.matches(p)) {
|
||||
addDependency(
|
||||
getDependencyName(v),
|
||||
DICTIONARY_TYPE.valueFrom(v),
|
||||
DICTIONARY_DEP.optionalValueFrom(v));
|
||||
}
|
||||
private void addBoundary(CldrValue v) {
|
||||
addDependency(getDependencyName(v), getBoundaryType(v), getBoundaryDependency(v));
|
||||
}
|
||||
|
||||
private void addDictionary(CldrValue v) {
|
||||
addDependency(
|
||||
getDependencyName(v),
|
||||
DICTIONARY_TYPE.valueFrom(v),
|
||||
DICTIONARY_DEP.optionalValueFrom(v));
|
||||
}
|
||||
|
||||
private void addDependency(String name, String type, Optional<String> dependency) {
|
||||
|
|
|
@ -11,14 +11,13 @@ import java.util.Optional;
|
|||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor.SubProcessor;
|
||||
|
||||
import com.google.common.base.CharMatcher;
|
||||
import com.google.common.base.Splitter;
|
||||
|
@ -32,18 +31,22 @@ import com.google.common.base.Splitter;
|
|||
* }</pre>
|
||||
*/
|
||||
public final class CollationMapper {
|
||||
private static final PathMatcher COLLATIONS = PathMatcher.of("ldml/collations");
|
||||
|
||||
// Note that the 'type' attribute is optional, so cannot be in the path matcher.
|
||||
// However since the CLDR data never actually omits the value, it would be easy to change the
|
||||
// attribute metadata to stop it being an implicit attribute and then it could appear.
|
||||
private static final PathMatcher COLLATION_RULE = PathMatcher.of("collation/cr");
|
||||
private static final CldrDataProcessor<CollationMapper> CLDR_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<CollationMapper> processor = CldrDataProcessor.builder();
|
||||
SubProcessor<CollationMapper> collations = processor.addSubprocessor("//ldml/collations");
|
||||
collations.addValueAction("collation/cr", CollationMapper::collectRule);
|
||||
collations.addValueAction("defaultCollation", CollationMapper::collectDefault);
|
||||
// This could be a separate processor, since the specials data only contains these paths,
|
||||
// but it's not clear if in future it could also contain any collation rules.
|
||||
processor.addValueAction("//ldml/special/*", CollationMapper::maybeAddSpecial);
|
||||
CLDR_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
private static final AttributeKey COLLATION_TYPE = keyOf("collation", "type");
|
||||
private static final AttributeKey COLLATION_RULE_ALT = keyOf("cr", "alt");
|
||||
|
||||
private static final PathMatcher DEFAULT_COLLATION = PathMatcher.of("defaultCollation");
|
||||
|
||||
private static final PathMatcher SPECIAL = PathMatcher.of("ldml/special");
|
||||
private static final AttributeKey SPECIAL_RULES = keyOf("icu:UCARules", "icu:uca_rules");
|
||||
private static final AttributeKey SPECIAL_DEP = keyOf("icu:depends", "icu:dependency");
|
||||
|
||||
|
@ -68,88 +71,76 @@ public final class CollationMapper {
|
|||
public static IcuData process(
|
||||
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData, String cldrVersion) {
|
||||
|
||||
CollationVisitor visitor = new CollationVisitor(icuData, cldrVersion);
|
||||
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
|
||||
cldrData.accept(DTD, visitor);
|
||||
return visitor.icuData;
|
||||
CollationMapper mapper = new CollationMapper(icuData, cldrVersion);
|
||||
icuSpecialData.ifPresent(specialData -> CLDR_PROCESSOR.process(specialData, mapper, DTD));
|
||||
CLDR_PROCESSOR.process(cldrData, mapper, DTD);
|
||||
return icuData;
|
||||
}
|
||||
|
||||
final static class CollationVisitor implements PrefixVisitor {
|
||||
private final IcuData icuData;
|
||||
private final String cldrVersion;
|
||||
private final IcuData icuData;
|
||||
private final String cldrVersion;
|
||||
|
||||
CollationVisitor(IcuData icuData, String cldrVersion) {
|
||||
this.icuData = checkNotNull(icuData);
|
||||
this.cldrVersion = checkNotNull(cldrVersion);
|
||||
// Super special hack case because the XML data is a bit broken for the root collation
|
||||
// data (there's an empty <collation> element that's a non-leaf element and thus not
|
||||
// visited, but we should add an empty sequence to the output data.
|
||||
// TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
|
||||
if (icuData.getName().equals("root")) {
|
||||
icuData.replace(RB_STANDARD_SEQUENCE, "");
|
||||
// TODO: Collation versioning probably needs to be improved.
|
||||
icuData.replace(RB_STANDARD_VERSION, cldrVersion);
|
||||
}
|
||||
private CollationMapper(IcuData icuData, String cldrVersion) {
|
||||
this.icuData = checkNotNull(icuData);
|
||||
this.cldrVersion = checkNotNull(cldrVersion);
|
||||
// Super special hack case because the XML data is a bit broken for the root collation
|
||||
// data (there's an empty <collation> element that's a non-leaf element and thus not
|
||||
// visited, but we should add an empty sequence to the output data.
|
||||
// TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
|
||||
if (icuData.getName().equals("root")) {
|
||||
icuData.replace(RB_STANDARD_SEQUENCE, "");
|
||||
// TODO: Collation versioning probably needs to be improved.
|
||||
icuData.replace(RB_STANDARD_VERSION, cldrVersion);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
if (COLLATIONS.matchesPrefixOf(prefix)) {
|
||||
ctx.install(this::collectRules);
|
||||
} else if (SPECIAL.matchesPrefixOf(prefix)) {
|
||||
ctx.install(this::maybeAddSpecial);
|
||||
}
|
||||
private void collectRule(CldrValue v) {
|
||||
String type = COLLATION_TYPE.valueFrom(v);
|
||||
RbPath rbPath = RbPath.of("collations", type, "Sequence");
|
||||
|
||||
// WARNING: This is almost certainly a bug, since while @type can have the value
|
||||
// "short" it can also have other values. This code was copied from CollationMapper
|
||||
// which has the line;
|
||||
// isShort = attr.getValue("alt") != null;
|
||||
// TODO: Raise a ticket to examine this.
|
||||
boolean isShort = COLLATION_RULE_ALT.optionalValueFrom(v).isPresent();
|
||||
|
||||
// Note that it's not clear why there's a check for "contains()" here. The code
|
||||
// from which this was derived is largely undocumented and this check could have
|
||||
// been overly defensive (perhaps a duplicate key should be an error?).
|
||||
if (isShort || !icuData.getPaths().contains(rbPath)) {
|
||||
RbValue rules = RbValue.of(
|
||||
LINE_SPLITTER.splitToList(v.getValue()).stream()
|
||||
.map(CollationMapper::removeComment)
|
||||
.filter(s -> !s.isEmpty())::iterator);
|
||||
icuData.replace(rbPath, rules);
|
||||
icuData.replace(RbPath.of("collations", type, "Version"), cldrVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void collectRules(CldrValue v) {
|
||||
CldrPath p = v.getPath();
|
||||
if (COLLATION_RULE.matchesSuffixOf(p)) {
|
||||
String type = COLLATION_TYPE.valueFrom(v);
|
||||
RbPath rbPath = RbPath.of("collations", type, "Sequence");
|
||||
private void collectDefault(CldrValue v) {
|
||||
icuData.add(RB_COLLATIONS_DEFAULT, v.getValue());
|
||||
}
|
||||
|
||||
// WARNING: This is almost certainly a bug, since while @type can have the value
|
||||
// "short" it can also have other values. This code was copied from CollationMapper
|
||||
// which has the line;
|
||||
// isShort = attr.getValue("alt") != null;
|
||||
// TODO: Raise a ticket to examine this.
|
||||
boolean isShort = COLLATION_RULE_ALT.optionalValueFrom(v).isPresent();
|
||||
|
||||
// Note that it's not clear why there's a check for "contains()" here. The code
|
||||
// from which this was derived is largely undocumented and this check could have
|
||||
// been overly defensive (perhaps a duplicate key should be an error?).
|
||||
if (isShort || !icuData.getPaths().contains(rbPath)) {
|
||||
RbValue rules = RbValue.of(
|
||||
LINE_SPLITTER.splitToList(v.getValue()).stream()
|
||||
.map(CollationMapper::removeComment)
|
||||
.filter(s -> !s.isEmpty())::iterator);
|
||||
icuData.replace(rbPath, rules);
|
||||
icuData.replace(RbPath.of("collations", type, "Version"), cldrVersion);
|
||||
}
|
||||
} else if (DEFAULT_COLLATION.matchesSuffixOf(p)) {
|
||||
icuData.add(RB_COLLATIONS_DEFAULT, v.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
// This is a bit special since the attribute we want to add depends on the element we are
|
||||
// visiting (which is somewhat unusual in the transformation classes).
|
||||
private void maybeAddSpecial(CldrValue value) {
|
||||
AttributeKey key;
|
||||
switch (value.getPath().getName()) {
|
||||
case "icu:UCARules":
|
||||
key = SPECIAL_RULES;
|
||||
break;
|
||||
case "icu:depends":
|
||||
key = SPECIAL_DEP;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
// substring(4) just removes the "icu:" prefix (which we know is present in the key).
|
||||
RbPath rbPath = RbPath.of(
|
||||
String.format("%s:process(%s)",
|
||||
key.getElementName().substring(4), key.getAttributeName().substring(4)));
|
||||
icuData.add(rbPath, key.valueFrom(value));
|
||||
// This is a bit special since the attribute we want to add depends on the element we are
|
||||
// visiting (which is somewhat unusual in the transformation classes).
|
||||
private void maybeAddSpecial(CldrValue value) {
|
||||
AttributeKey key;
|
||||
switch (value.getPath().getName()) {
|
||||
case "icu:UCARules":
|
||||
key = SPECIAL_RULES;
|
||||
break;
|
||||
case "icu:depends":
|
||||
key = SPECIAL_DEP;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
// substring(4) just removes the "icu:" prefix (which we know is present in the key).
|
||||
RbPath rbPath = RbPath.of(
|
||||
String.format("%s:process(%s)",
|
||||
key.getElementName().substring(4), key.getAttributeName().substring(4)));
|
||||
icuData.add(rbPath, key.valueFrom(value));
|
||||
}
|
||||
|
||||
// Collation data can contain # to mark an end-of-line comment, but it can also contain data
|
||||
|
@ -195,6 +186,4 @@ public final class CollationMapper {
|
|||
checkArgument(!quoted, "mismatched quotes in: %s", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
private CollationMapper() {}
|
||||
}
|
||||
|
|
|
@ -6,18 +6,15 @@ import static org.unicode.cldr.api.AttributeKey.keyOf;
|
|||
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
|
||||
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
|
@ -29,14 +26,18 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
* }</pre>
|
||||
*/
|
||||
public final class DayPeriodsMapper {
|
||||
private static final PathMatcher RULESET =
|
||||
PathMatcher.of("supplementalData/dayPeriodRuleSet");
|
||||
|
||||
private static final CldrDataProcessor<DayPeriodsMapper> CLDR_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<DayPeriodsMapper> processor = CldrDataProcessor.builder();
|
||||
processor.addAction("//supplementalData/dayPeriodRuleSet", (m, p) -> m.new Ruleset(p))
|
||||
.addSubprocessor("dayPeriodRules[@locales=*]", Ruleset::prefixStart)
|
||||
.addValueAction("dayPeriodRule[@type=*]", Ruleset::visitRule);
|
||||
CLDR_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
private static final AttributeKey RULESET_TYPE = keyOf("dayPeriodRuleSet", "type");
|
||||
|
||||
private static final PathMatcher RULES = PathMatcher.of("dayPeriodRules[@locales=*]");
|
||||
private static final AttributeKey RULES_LOCALES = keyOf("dayPeriodRules", "locales");
|
||||
|
||||
private static final PathMatcher RULE = PathMatcher.of("dayPeriodRule[@type=*]");
|
||||
private static final AttributeKey RULE_TYPE = keyOf("dayPeriodRule", "type");
|
||||
|
||||
private static final RbPath RB_LOCALES = RbPath.of("locales");
|
||||
|
@ -53,51 +54,33 @@ public final class DayPeriodsMapper {
|
|||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(CldrData data) {
|
||||
RuleSetVisitor mapper = new RuleSetVisitor();
|
||||
data.accept(NESTED_GROUPING, mapper);
|
||||
return mapper.icuData;
|
||||
return CLDR_PROCESSOR.process(data, new DayPeriodsMapper(), NESTED_GROUPING).icuData;
|
||||
}
|
||||
|
||||
private static final class RuleSetVisitor implements PrefixVisitor {
|
||||
// Mutable ICU data collected into during visitation.
|
||||
private final IcuData icuData = new IcuData("dayPeriods", false);
|
||||
private int setNum = 0;
|
||||
// Mutable ICU data collected into during visitation.
|
||||
private final IcuData icuData = new IcuData("dayPeriods", false);
|
||||
private int setNum = 0;
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
if (RULESET.matches(prefix)) {
|
||||
ctx.install(new RuleVisitor(RULESET_TYPE.optionalValueFrom(prefix)));
|
||||
}
|
||||
private final class Ruleset {
|
||||
private RbPath localePrefix;
|
||||
|
||||
Ruleset(CldrPath prefix) {
|
||||
this.localePrefix = RULESET_TYPE.optionalValueFrom(prefix)
|
||||
.map(t -> RbPath.of("locales_" + t))
|
||||
.orElse(RB_LOCALES);
|
||||
}
|
||||
|
||||
private final class RuleVisitor implements PrefixVisitor {
|
||||
private final RbPath localePrefix;
|
||||
private void prefixStart(CldrPath prefix) {
|
||||
// Sets are arbitrarily identified by the string "setNN".
|
||||
String setName = "set" + (++setNum);
|
||||
RULES_LOCALES.listOfValuesFrom(prefix)
|
||||
.forEach(locale -> icuData.add(localePrefix.extendBy(locale), setName));
|
||||
}
|
||||
|
||||
private RuleVisitor(Optional<String> type) {
|
||||
// If there's a given type, add it to the prefix path.
|
||||
this.localePrefix = type.map(t -> RbPath.of("locales_" + t)).orElse(RB_LOCALES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
if (RULES.matchesSuffixOf(prefix)) {
|
||||
// Sets are arbitrarily identified by the string "setNN".
|
||||
String setName = "set" + (++setNum);
|
||||
RULES_LOCALES.listOfValuesFrom(prefix)
|
||||
.forEach(locale -> icuData.add(localePrefix.extendBy(locale), setName));
|
||||
ctx.install(this::visitRule);
|
||||
}
|
||||
}
|
||||
|
||||
private void visitRule(CldrValue value) {
|
||||
if (RULE.matchesSuffixOf(value.getPath())) {
|
||||
RbPath prefix = RbPath.of("rules", "set" + setNum, RULE_TYPE.valueFrom(value));
|
||||
value.getValueAttributes()
|
||||
.forEach((k, v) -> icuData.add(prefix.extendBy(k.getAttributeName()), v));
|
||||
}
|
||||
}
|
||||
private void visitRule(CldrValue value) {
|
||||
RbPath prefix = RbPath.of("rules", "set" + setNum, RULE_TYPE.valueFrom(value));
|
||||
value.getValueAttributes()
|
||||
.forEach((k, v) -> icuData.add(prefix.extendBy(k.getAttributeName()), v));
|
||||
}
|
||||
}
|
||||
|
||||
private DayPeriodsMapper() {}
|
||||
}
|
||||
|
|
|
@ -2,22 +2,20 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu.mapper;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
import static org.unicode.cldr.api.AttributeKey.keyOf;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
|
||||
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
|
@ -29,15 +27,18 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
* }</pre>
|
||||
*/
|
||||
public final class PluralRangesMapper {
|
||||
// Note that this mapper only matches when there's no "type" specified on the "plurals" element.
|
||||
// This is a bit weird, since the PluralsMapper expects a type (e.g. cardinal or ordinal) to
|
||||
// be present. Really this just illustrates that the plural ranges just should not be under the
|
||||
// same parent element as plurals.
|
||||
private static final PathMatcher RANGES =
|
||||
PathMatcher.of("supplementalData/plurals/pluralRanges[@locales=*]");
|
||||
private static final AttributeKey RANGES_LOCALES = keyOf("pluralRanges", "locales");
|
||||
|
||||
private static final PathMatcher RANGE = PathMatcher.of("pluralRange[@start=*][@end=*]");
|
||||
private static final CldrDataProcessor<PluralRangesMapper> CLDR_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<PluralRangesMapper> processor = CldrDataProcessor.builder();
|
||||
processor
|
||||
.addAction(
|
||||
"//supplementalData/plurals/pluralRanges[@locales=*]", (m, p) -> m.new Ranges(p))
|
||||
.addValueAction("pluralRange[@start=*][@end=*]", Ranges::visitRange);
|
||||
CLDR_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
private static final AttributeKey RANGES_LOCALES = keyOf("pluralRanges", "locales");
|
||||
private static final AttributeKey RANGE_START = keyOf("pluralRange", "start");
|
||||
private static final AttributeKey RANGE_END = keyOf("pluralRange", "end");
|
||||
private static final AttributeKey RANGE_RESULT = keyOf("pluralRange", "result");
|
||||
|
@ -52,46 +53,37 @@ public final class PluralRangesMapper {
|
|||
* @return the IcuData instance to be written to a file.
|
||||
*/
|
||||
public static IcuData process(CldrDataSupplier src) {
|
||||
CldrData data = src.getDataForType(SUPPLEMENTAL);
|
||||
return process(data);
|
||||
return process(src.getDataForType(SUPPLEMENTAL));
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(CldrData data) {
|
||||
PluralRangesVisitor visitor = new PluralRangesVisitor();
|
||||
data.accept(NESTED_GROUPING, visitor);
|
||||
return visitor.icuData;
|
||||
return CLDR_PROCESSOR.process(data, new PluralRangesMapper(), NESTED_GROUPING).icuData;
|
||||
}
|
||||
|
||||
private static final class PluralRangesVisitor implements PrefixVisitor {
|
||||
private final IcuData icuData = new IcuData("pluralRanges", false);
|
||||
private final IcuData icuData = new IcuData("pluralRanges", false);
|
||||
private int setIndex = 0;
|
||||
|
||||
private int setIndex = 0;
|
||||
private String ruleLabel = null;
|
||||
private PluralRangesMapper() { }
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
if (RANGES.matches(prefix)) {
|
||||
ruleLabel = String.format("set%02d", setIndex++);
|
||||
RANGES_LOCALES.listOfValuesFrom(prefix)
|
||||
.forEach(l -> icuData.add(RB_LOCALES.extendBy(l), ruleLabel));
|
||||
ctx.install(this::visitRange);
|
||||
}
|
||||
private final class Ranges {
|
||||
private final String label;
|
||||
|
||||
Ranges(CldrPath prefix) {
|
||||
this.label = String.format("set%02d", setIndex++);
|
||||
RANGES_LOCALES.listOfValuesFrom(prefix)
|
||||
.forEach(l -> icuData.add(RB_LOCALES.extendBy(l), label));
|
||||
}
|
||||
|
||||
private void visitRange(CldrValue value) {
|
||||
checkState(RANGE.matchesSuffixOf(value.getPath()),
|
||||
"unexpected path: %s", value.getPath());
|
||||
// Note: "range:start" and "range:end" are optional attributes, but the CLDR DTD
|
||||
// specifies a default via comments. They should probably be changed to just have a
|
||||
// default in the DTD (and possibly converted to use an enum here).
|
||||
icuData.add(RB_RULES.extendBy(ruleLabel),
|
||||
icuData.add(RB_RULES.extendBy(label),
|
||||
RbValue.of(
|
||||
RANGE_START.valueFrom(value, "all"),
|
||||
RANGE_END.valueFrom(value, "all"),
|
||||
RANGE_RESULT.valueFrom(value)));
|
||||
}
|
||||
}
|
||||
|
||||
private PluralRangesMapper() {}
|
||||
}
|
||||
}
|
|
@ -2,30 +2,29 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu.mapper;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
import static org.unicode.cldr.api.AttributeKey.keyOf;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
|
||||
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.cldr.api.FilteredData;
|
||||
import org.unicode.cldr.api.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Iterables;
|
||||
|
||||
/**
|
||||
* A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
|
||||
|
@ -35,15 +34,21 @@ import com.google.common.collect.Iterables;
|
|||
* }</pre>
|
||||
*/
|
||||
public final class PluralsMapper {
|
||||
private static final PathMatcher PLURALS = PathMatcher.of("supplementalData/plurals[@type=*]");
|
||||
|
||||
private static final AttributeKey PLURALS_TYPE = keyOf("plurals", "type");
|
||||
|
||||
private static final PathMatcher RULES = PathMatcher.of("pluralRules[@locales=*]");
|
||||
private static final AttributeKey RULES_LOCALES = keyOf("pluralRules", "locales");
|
||||
|
||||
private static final PathMatcher RULE = PathMatcher.of("pluralRule[@count=*]");
|
||||
private static final AttributeKey RULE_COUNT = keyOf("pluralRule", "count");
|
||||
|
||||
private static final CldrDataProcessor<PluralsMapper> CLDR_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<PluralsMapper> processor = CldrDataProcessor.builder();
|
||||
processor
|
||||
.addAction("//supplementalData/plurals[@type=*]", (m, p) -> m.new Plurals(p))
|
||||
.addAction("pluralRules[@locales=*]", Rules::new, Plurals::addRules)
|
||||
.addValueAction("pluralRule[@count=*]", Rules::addRule);
|
||||
CLDR_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
private static final ImmutableMap<String, RbPath> ICU_PREFIX_MAP =
|
||||
ImmutableMap.of("cardinal", RbPath.of("locales"), "ordinal", RbPath.of("locales_ordinals"));
|
||||
|
||||
|
@ -54,103 +59,94 @@ public final class PluralsMapper {
|
|||
* @return the IcuData instance to be written to a file.
|
||||
*/
|
||||
public static IcuData process(CldrDataSupplier src) {
|
||||
CldrData data = src.getDataForType(SUPPLEMENTAL);
|
||||
return process(data);
|
||||
return process(src.getDataForType(SUPPLEMENTAL));
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(CldrData data) {
|
||||
PluralsVisitor visitor = new PluralsVisitor();
|
||||
// Note: We explicitly reset the type to mimic the order of the existing code, since this
|
||||
PluralsMapper mapper = new PluralsMapper();
|
||||
// Note: We explicitly filter by type to mimic the order of the existing code, since this
|
||||
// affects the set indices we generate during processing. Ideally this would all be immune
|
||||
// to ordering (or just enforce DTD ordering) but right now it's very dependent on
|
||||
// mimicking the order of the existing code to get identical output.
|
||||
data.accept(NESTED_GROUPING, visitor.setType("cardinal"));
|
||||
data.accept(NESTED_GROUPING, visitor.setType("ordinal"));
|
||||
return visitor.icuData;
|
||||
// mimicking the order of the existing code to get identical output. Once DTD order is
|
||||
// everywhere, this can just be a single pass over the original data.
|
||||
CLDR_PROCESSOR.process(filterByType(data, "cardinal"), mapper, NESTED_GROUPING);
|
||||
CLDR_PROCESSOR.process(filterByType(data, "ordinal"), mapper, NESTED_GROUPING);
|
||||
return mapper.icuData;
|
||||
}
|
||||
|
||||
private static final class PluralsVisitor implements PrefixVisitor {
|
||||
// Mutable ICU data collected into during visitation.
|
||||
// In a post XML-aware API, is recording the XML file names really a good idea?
|
||||
private final IcuData icuData = new IcuData("plurals", false);
|
||||
// Filter for the type we are processing now (this could be removed if we don't mind which
|
||||
// order the types are processed, and switching to DTD ordering would make it stable).
|
||||
private String type = null;
|
||||
private final List<ImmutableMap<String, String>> previousRules = new ArrayList<>();
|
||||
// Mutable ICU data collected into during visitation.
|
||||
// In a post XML-aware API, is recording the XML file names really a good idea?
|
||||
private final IcuData icuData = new IcuData("plurals", false);
|
||||
private final List<ImmutableMap<String, String>> previousRules = new ArrayList<>();
|
||||
|
||||
// Hack method to allow a single type to be processed at a time (the visitor would otherwise
|
||||
// happily handle both types in a single pass). We can't do this as two different visitors
|
||||
// (one for each type) because the current behaviour relies on carrying over the calculated
|
||||
// set numbers from one pass to the next. Once migration is complete we should revisit this
|
||||
// and allow this visitor to work in a single pass (probably with DTD order for stability).
|
||||
PluralsVisitor setType(String type) {
|
||||
this.type = checkNotNull(type);
|
||||
return this;
|
||||
private class Plurals {
|
||||
private final RbPath icuPrefix;
|
||||
|
||||
Plurals(CldrPath prefix) {
|
||||
// Note: "plurals:type" is an optional attribute but the CLDR DTD specifies a
|
||||
// default via comments. It should probably be changed to just have a default in
|
||||
// the DTD.
|
||||
this.icuPrefix = ICU_PREFIX_MAP.get(PLURALS_TYPE.valueFrom(prefix, "cardinal"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
if (PLURALS.matches(prefix)) {
|
||||
// Note: "plurals:type" is an optional attribute but the CLDR DTD specifies a
|
||||
// default via comments. It should probably be changed to just have a default in
|
||||
// the DTD.
|
||||
if (PLURALS_TYPE.valueFrom(prefix, "cardinal").equals(type)) {
|
||||
ctx.install(new RulesVisitor(ICU_PREFIX_MAP.get(type)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final class RulesVisitor implements PrefixVisitor {
|
||||
private final RbPath icuPrefix;
|
||||
private final List<String> locales = new ArrayList<>();
|
||||
private final Map<String, String> rules = new LinkedHashMap<>();
|
||||
|
||||
RulesVisitor(RbPath icuPrefix) {
|
||||
this.icuPrefix = checkNotNull(icuPrefix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
if (RULES.matchesSuffixOf(prefix)) {
|
||||
Iterables.addAll(locales, RULES_LOCALES.listOfValuesFrom(prefix));
|
||||
ctx.install(value -> {
|
||||
if (RULE.matchesSuffixOf(value.getPath())) {
|
||||
rules.put(RULE_COUNT.valueFrom(value), value.getValue());
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitPrefixEnd(CldrPath prefix) {
|
||||
checkState(!locales.isEmpty(), "missing locale data for plurals: %s", prefix);
|
||||
// Note: The original mapper code "sort of" coped with empty rules, but it's not
|
||||
// completely well behaved (or documented), so since this doesn't happen in the
|
||||
// current CLDR data, I decided to just prohibit it in the new code. Support can
|
||||
// easily be added in once the expected semantics are clear.
|
||||
checkState(!rules.isEmpty(), "missing rule data for plurals: %s", prefix);
|
||||
|
||||
// Have we seen this set of rules before? If so, reuse the existing index. Note
|
||||
// that an IDE might report this call as suspicious because the key is not yet an
|
||||
// immutable map (saves creating immutable maps just to check for inclusion) but
|
||||
// this is fine because collection equality is based only on contents, not
|
||||
// collection type.
|
||||
int idx = previousRules.indexOf(rules);
|
||||
if (idx == -1) {
|
||||
int newIdx = previousRules.size();
|
||||
rules.forEach((k, v) -> icuData.add(RbPath.of("rules", "set" + newIdx, k), v));
|
||||
// Since "rules" is mutable and reused, we must take an immutable copy here.
|
||||
previousRules.add(ImmutableMap.copyOf(rules));
|
||||
idx = newIdx;
|
||||
}
|
||||
String setName = "set" + idx;
|
||||
locales.forEach(locale -> icuData.add(icuPrefix.extendBy(locale), setName));
|
||||
rules.clear();
|
||||
locales.clear();
|
||||
private void addRules(Rules r) {
|
||||
ImmutableMap<String, String> rules = r.getRules();
|
||||
// Note: The original mapper code "sort of" coped with empty rules, but it's not
|
||||
// completely well behaved (or documented), so since this doesn't happen in the
|
||||
// current CLDR data, I decided to just prohibit it in the new code. Support can
|
||||
// easily be added in once the expected semantics are clear.
|
||||
checkState(!rules.isEmpty(), "missing rule data for plurals");
|
||||
|
||||
// Have we seen this set of rules before? If so, reuse the existing index. Note
|
||||
// that an IDE might report this call as suspicious because the key is not yet an
|
||||
// immutable map (saves creating immutable maps just to check for inclusion) but
|
||||
// this is fine because collection equality is based only on contents, not
|
||||
// collection type.
|
||||
int idx = previousRules.indexOf(rules);
|
||||
if (idx == -1) {
|
||||
int newIdx = previousRules.size();
|
||||
rules.forEach((k, v) -> icuData.add(RbPath.of("rules", "set" + newIdx, k), v));
|
||||
// Since "rules" is mutable and reused, we must take an immutable copy here.
|
||||
previousRules.add(rules);
|
||||
idx = newIdx;
|
||||
}
|
||||
String setName = "set" + idx;
|
||||
r.getLocales().forEach(locale -> icuData.add(icuPrefix.extendBy(locale), setName));
|
||||
}
|
||||
}
|
||||
|
||||
private PluralsMapper() {}
|
||||
private static class Rules {
|
||||
private final ImmutableList<String> locales;
|
||||
private final ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
|
||||
|
||||
private Rules(CldrPath prefix) {
|
||||
this.locales = ImmutableList.copyOf(RULES_LOCALES.listOfValuesFrom(prefix));
|
||||
checkState(!locales.isEmpty(), "missing locale data for plurals: %s", prefix);
|
||||
}
|
||||
|
||||
private void addRule(CldrValue value) {
|
||||
map.put(RULE_COUNT.valueFrom(value), value.getValue());
|
||||
}
|
||||
|
||||
private ImmutableList<String> getLocales() {
|
||||
return locales;
|
||||
}
|
||||
|
||||
private ImmutableMap<String, String> getRules() {
|
||||
return map.build();
|
||||
}
|
||||
}
|
||||
|
||||
// A hack to allow us to process "cardinal" data before "ordinal" data (even though DTD order
|
||||
// is the other way round). Once DTD order is the only ordering used, this can be removed.
|
||||
private static CldrData filterByType(CldrData data, String pluralType) {
|
||||
PathMatcher matcher =
|
||||
PathMatcher.of("//supplementalData/plurals[@type=\"" + pluralType + "\"]");
|
||||
return new FilteredData(data) {
|
||||
@Override protected CldrValue filter(CldrValue value) {
|
||||
return matcher.matchesPrefixOf(value.getPath()) ? value : null;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,16 +7,15 @@ import static org.unicode.cldr.api.AttributeKey.keyOf;
|
|||
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
|
||||
import com.google.common.escape.UnicodeEscaper;
|
||||
|
||||
|
@ -26,21 +25,26 @@ import com.google.common.escape.UnicodeEscaper;
|
|||
* //ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]
|
||||
* }</pre>
|
||||
*/
|
||||
// TODO: This class can almost certainly be written using RegexTransformer and a small config.
|
||||
public final class RbnfMapper {
|
||||
private static final PathMatcher RULE_SET =
|
||||
PathMatcher.of("ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]");
|
||||
private static final AttributeKey GROUPING_TYPE = keyOf("rulesetGrouping", "type");
|
||||
private static final AttributeKey RULESET_TYPE = keyOf("ruleset", "type");
|
||||
|
||||
private static final PathMatcher RBNF_RULE = PathMatcher.of("rbnfrule");
|
||||
private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
|
||||
private static final AttributeKey RBNF_VALUE = keyOf("rbnfrule", "value");
|
||||
private static final AttributeKey RBNF_RADIX = keyOf("rbnfrule", "radix");
|
||||
private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
|
||||
|
||||
// This is the ICU path prefix, below which everything generated by this visitor will go.
|
||||
private static final RbPath RB_ROOT = RbPath.of("RBNFRules");
|
||||
|
||||
private static final CldrDataProcessor<RbnfMapper> RBNF_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<RbnfMapper> processor = CldrDataProcessor.builder();
|
||||
processor
|
||||
.addAction(
|
||||
"//ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]", (m, p) -> m.new Ruleset(p))
|
||||
.addValueAction("rbnfrule", Ruleset::addRule);
|
||||
RBNF_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes data from the given supplier to generate RBNF data for a set of locale IDs.
|
||||
*
|
||||
|
@ -55,89 +59,78 @@ public final class RbnfMapper {
|
|||
// Using DTD order is essential here because the RBNF paths contain ordered elements,
|
||||
// so we must ensure that they appear in sorted order (otherwise we'd have to do more
|
||||
// work at this end to re-sort the results).
|
||||
RulesetVisitor visitor = new RulesetVisitor(icuData);
|
||||
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
|
||||
cldrData.accept(DTD, visitor);
|
||||
return visitor.icuData;
|
||||
RbnfMapper mapper = new RbnfMapper(icuData);
|
||||
icuSpecialData.ifPresent(s -> RBNF_PROCESSOR.process(s, mapper, DTD));
|
||||
RBNF_PROCESSOR.process(cldrData, mapper, DTD);
|
||||
return mapper.icuData;
|
||||
}
|
||||
|
||||
static final class RulesetVisitor implements PrefixVisitor {
|
||||
private final IcuData icuData;
|
||||
|
||||
private final IcuData icuData;
|
||||
private RbnfMapper(IcuData icuData) {
|
||||
this.icuData = checkNotNull(icuData);
|
||||
}
|
||||
|
||||
private RulesetVisitor(IcuData icuData) {
|
||||
this.icuData = checkNotNull(icuData);
|
||||
private class Ruleset {
|
||||
private final RbPath rbPath;
|
||||
private final String rulesetType;
|
||||
private final boolean isStrict;
|
||||
private boolean hasHeader = false;
|
||||
|
||||
Ruleset(CldrPath prefix) {
|
||||
this.rbPath = RB_ROOT.extendBy(GROUPING_TYPE.valueFrom(prefix));
|
||||
this.rulesetType = RULESET_TYPE.valueFrom(prefix);
|
||||
this.isStrict = !"lenient-parse".equals(rulesetType);
|
||||
}
|
||||
|
||||
@Override public void visitPrefixStart(CldrPath prefix, Context context) {
|
||||
if (RULE_SET.matchesPrefixOf(prefix)) {
|
||||
RbPath rbPath = RB_ROOT.extendBy(GROUPING_TYPE.valueFrom(prefix));
|
||||
String rulesetType = RULESET_TYPE.valueFrom(prefix);
|
||||
boolean isStrict = !"lenient-parse".equals(rulesetType);
|
||||
|
||||
// This is rather hacky because the access attribute lives on the parent path
|
||||
// element, but we cannot use it until we visit the child values (because it's a
|
||||
// value attribute and will not be in the prefix path). So we need to add the
|
||||
// header only once, just before we start adding the values relating to the child
|
||||
// elements, so we need a flag.
|
||||
//
|
||||
// This cannot be a boolean field since it must be "effectively final".
|
||||
AtomicBoolean hasHeader = new AtomicBoolean(false);
|
||||
context.install(
|
||||
value -> {
|
||||
if (RBNF_RULE.matchesSuffixOf(value.getPath())) {
|
||||
if (!hasHeader.get()) {
|
||||
boolean isPrivate =
|
||||
RULESET_ACCESS.valueFrom(value, "public").equals("private");
|
||||
icuData.add(rbPath, (isPrivate ? "%%" : "%") + rulesetType + ":");
|
||||
hasHeader.set(true);
|
||||
}
|
||||
String rulePrefix = "";
|
||||
if (isStrict) {
|
||||
String basePrefix = RBNF_VALUE.valueFrom(value);
|
||||
rulePrefix = RBNF_RADIX.optionalValueFrom(value)
|
||||
.map(r -> basePrefix + "/" + r)
|
||||
.orElse(basePrefix);
|
||||
rulePrefix += ": ";
|
||||
}
|
||||
icuData.add(
|
||||
rbPath,
|
||||
rulePrefix + ESCAPE_RBNF_DATA.escape(value.getValue()));
|
||||
}
|
||||
});
|
||||
void addRule(CldrValue value) {
|
||||
// This is a bit hacky because the access attribute lives on the parent path element,
|
||||
// but we cannot use it until we visit the child values (because it's a value attribute
|
||||
// and will not be in the prefix path) so we need to add the header only once here.
|
||||
if (!hasHeader) {
|
||||
boolean isPrivate = RULESET_ACCESS.valueFrom(value, "public").equals("private");
|
||||
icuData.add(rbPath, (isPrivate ? "%%" : "%") + rulesetType + ":");
|
||||
hasHeader = true;
|
||||
}
|
||||
// Prefix is: "@value: ", "@value/@radix: " or empty (for non strict rules).
|
||||
String rulePrefix = isStrict
|
||||
? RBNF_VALUE.valueFrom(value)
|
||||
+ RBNF_RADIX.optionalValueFrom(value).map(r -> "/" + r).orElse("")
|
||||
+ ": "
|
||||
: "";
|
||||
icuData.add(rbPath, rulePrefix + ESCAPE_RBNF_DATA.escape(value.getValue()));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert characters outside the range U+0020 to U+007F to Unicode escapes, and convert
|
||||
* backslash to a double backslash. This class is super slow for non-ASCII escaping due to
|
||||
* using "String.format()", however there's < 100 values that need any escaping, so it's
|
||||
* fine.
|
||||
*/
|
||||
private static final UnicodeEscaper ESCAPE_RBNF_DATA = new UnicodeEscaper() {
|
||||
private final char[] DOUBLE_BACKSLASH = "\\\\".toCharArray();
|
||||
private final char[] LEFT_ANGLE = "<".toCharArray();
|
||||
private final char[] RIGHT_ANGLE = ">".toCharArray();
|
||||
/*
|
||||
* Convert characters outside the range U+0020 to U+007F to Unicode escapes, and convert
|
||||
* backslash to a double backslash. This class is super slow for non-ASCII escaping due to
|
||||
* using "String.format()", however there's < 100 values that need any escaping, so it's
|
||||
* fine.
|
||||
*/
|
||||
private static final UnicodeEscaper ESCAPE_RBNF_DATA = new UnicodeEscaper() {
|
||||
private final char[] DOUBLE_BACKSLASH = "\\\\".toCharArray();
|
||||
private final char[] LEFT_ANGLE = "<".toCharArray();
|
||||
private final char[] RIGHT_ANGLE = ">".toCharArray();
|
||||
|
||||
@Override
|
||||
protected char[] escape(int cp) {
|
||||
// Returning null means "do not escape".
|
||||
switch (cp) {
|
||||
case '\\':
|
||||
return DOUBLE_BACKSLASH;
|
||||
case '←':
|
||||
return LEFT_ANGLE;
|
||||
case '→':
|
||||
return RIGHT_ANGLE;
|
||||
default:
|
||||
if (0x0020 <= cp && cp <= 0x007F) {
|
||||
return null;
|
||||
} else if (cp <= 0xFFFF) {
|
||||
return String.format("\\u%04X", cp).toCharArray();
|
||||
}
|
||||
return String.format("\\U%08X", cp).toCharArray();
|
||||
@Override
|
||||
protected char[] escape(int cp) {
|
||||
// Returning null means "do not escape".
|
||||
switch (cp) {
|
||||
case '\\':
|
||||
return DOUBLE_BACKSLASH;
|
||||
case '←':
|
||||
return LEFT_ANGLE;
|
||||
case '→':
|
||||
return RIGHT_ANGLE;
|
||||
default:
|
||||
if (0x0020 <= cp && cp <= 0x007F) {
|
||||
return null;
|
||||
} else if (cp <= 0xFFFF) {
|
||||
return String.format("\\u%04X", cp).toCharArray();
|
||||
}
|
||||
return String.format("\\U%08X", cp).toCharArray();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -5,11 +5,13 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
|
|||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.NESTED_GROUPING;
|
||||
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
@ -36,18 +38,21 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
|
|||
*/
|
||||
// TODO: Improve external data splitting and remove need for a PathMatcher here.
|
||||
public static IcuData process(
|
||||
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
|
||||
CldrDataSupplier src,
|
||||
PathValueTransformer transformer,
|
||||
String icuName,
|
||||
Predicate<CldrPath> paths) {
|
||||
|
||||
IcuData icuData = new IcuData(icuName, false);
|
||||
new SupplementalMapper(src, transformer, paths).addIcuData(icuData);
|
||||
return icuData;
|
||||
}
|
||||
|
||||
private final PathMatcher paths;
|
||||
private final Predicate<CldrPath> paths;
|
||||
private int fifoCounter = 0;
|
||||
|
||||
private SupplementalMapper(
|
||||
CldrDataSupplier src, PathValueTransformer transformer, PathMatcher pathFilter) {
|
||||
CldrDataSupplier src, PathValueTransformer transformer, Predicate<CldrPath> pathFilter) {
|
||||
|
||||
super(src.getDataForType(CldrDataType.SUPPLEMENTAL), transformer);
|
||||
this.paths = checkNotNull(pathFilter);
|
||||
|
@ -63,7 +68,7 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
|
|||
}
|
||||
|
||||
private void visit(CldrValue value) {
|
||||
if (paths.matchesPrefixOf(value.getPath())) {
|
||||
if (paths.test(value.getPath())) {
|
||||
transformValue(value).forEach(this::collectResult);
|
||||
fifoCounter++;
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ import static com.google.common.base.CharMatcher.whitespace;
|
|||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static java.nio.file.StandardOpenOption.CREATE_NEW;
|
||||
import static org.unicode.cldr.api.AttributeKey.keyOf;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
|
||||
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -20,14 +19,13 @@ import java.util.function.Function;
|
|||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.ValueVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
@ -43,8 +41,15 @@ import com.ibm.icu.text.Transliterator;
|
|||
* <p>This mapper also writes out the transform rule files into a specified directory.
|
||||
*/
|
||||
public final class TransformsMapper {
|
||||
private static final PathMatcher TRULE =
|
||||
PathMatcher.of("supplementalData/transforms/transform/tRule");
|
||||
|
||||
private static final CldrDataProcessor<TransformsMapper> CLDR_PROCESSOR;
|
||||
static {
|
||||
CldrDataProcessor.Builder<TransformsMapper> processor = CldrDataProcessor.builder();
|
||||
processor.addValueAction(
|
||||
"//supplementalData/transforms/transform/tRule", TransformsMapper::processRule);
|
||||
CLDR_PROCESSOR = processor.build();
|
||||
}
|
||||
|
||||
private static final AttributeKey TRANSFORM_SOURCE = keyOf("transform", "source");
|
||||
private static final AttributeKey TRANSFORM_TARGET = keyOf("transform", "target");
|
||||
private static final AttributeKey TRANSFORM_DIRECTION = keyOf("transform", "direction");
|
||||
|
@ -99,74 +104,68 @@ public final class TransformsMapper {
|
|||
static IcuData process(
|
||||
CldrData cldrData, Function<Path, PrintWriter> fileWriterFn, List<String> header) {
|
||||
|
||||
RuleVisitor visitor = new RuleVisitor(fileWriterFn, header);
|
||||
cldrData.accept(DTD, visitor);
|
||||
addSpecialCaseValues(visitor.icuData);
|
||||
return visitor.icuData;
|
||||
TransformsMapper mapper = new TransformsMapper(fileWriterFn, header);
|
||||
CLDR_PROCESSOR.process(cldrData, mapper);
|
||||
addSpecialCaseValues(mapper.icuData);
|
||||
return mapper.icuData;
|
||||
}
|
||||
|
||||
private static class RuleVisitor implements ValueVisitor {
|
||||
private final IcuData icuData = new IcuData("root", false);
|
||||
private final Function<Path, PrintWriter> outFn;
|
||||
private final ImmutableList<String> header;
|
||||
private final IcuData icuData = new IcuData("root", false);
|
||||
private final Function<Path, PrintWriter> outFn;
|
||||
private final ImmutableList<String> header;
|
||||
|
||||
RuleVisitor(Function<Path, PrintWriter> outFn, List<String> header) {
|
||||
this.outFn = checkNotNull(outFn);
|
||||
this.header = ImmutableList.copyOf(header);
|
||||
icuData.setFileComment("File: root.txt");
|
||||
private TransformsMapper(Function<Path, PrintWriter> outFn, List<String> header) {
|
||||
this.outFn = checkNotNull(outFn);
|
||||
this.header = ImmutableList.copyOf(header);
|
||||
icuData.setFileComment("File: root.txt");
|
||||
}
|
||||
|
||||
private void processRule(CldrValue value) {
|
||||
String source = getExpectedOptionalAttribute(value, TRANSFORM_SOURCE);
|
||||
String target = getExpectedOptionalAttribute(value, TRANSFORM_TARGET);
|
||||
Optional<String> variant = TRANSFORM_VARIANT.optionalValueFrom(value);
|
||||
String baseFilename = source + "_" + target;
|
||||
String filename = variant.map(v -> baseFilename + "_" + v).orElse(baseFilename) + ".txt";
|
||||
writeRootIndexEntry(value, source, target, variant, filename);
|
||||
writeDataFile(filename, value);
|
||||
}
|
||||
|
||||
private void writeDataFile(String filename, CldrValue value) {
|
||||
try (PrintWriter out = outFn.apply(Paths.get(filename))) {
|
||||
out.print("\uFEFF");
|
||||
header.forEach(s -> out.println("# " + s));
|
||||
out.println("#");
|
||||
out.println("# File: " + filename);
|
||||
out.println("# Generated from CLDR");
|
||||
out.println("#");
|
||||
out.println();
|
||||
out.println(FIXUP.transliterate(whitespace().trimFrom(value.getValue())));
|
||||
out.println();
|
||||
}
|
||||
}
|
||||
|
||||
@Override public void visit(CldrValue value) {
|
||||
// The other possible element is "comment" but we currently ignore those.
|
||||
if (TRULE.matches(value.getPath())) {
|
||||
String source = getExpectedOptionalAttribute(value, TRANSFORM_SOURCE);
|
||||
String target = getExpectedOptionalAttribute(value, TRANSFORM_TARGET);
|
||||
Optional<String> variant = TRANSFORM_VARIANT.optionalValueFrom(value);
|
||||
String baseFilename = source + "_" + target;
|
||||
String filename =
|
||||
variant.map(v -> baseFilename + "_" + v).orElse(baseFilename) + ".txt";
|
||||
writeRootIndexEntry(value, source, target, variant, filename);
|
||||
writeDataFile(filename, value);
|
||||
}
|
||||
private void writeRootIndexEntry(
|
||||
CldrValue value, String source, String target, Optional<String> variant, String filename) {
|
||||
Visibility visibility = TRANSFORM_VISIBILITY.valueFrom(value, Visibility.class);
|
||||
String status = visibility == Visibility.internal ? "internal" : "file";
|
||||
|
||||
Direction dir = TRANSFORM_DIRECTION.valueFrom(value, Direction.class);
|
||||
// TODO: Consider checks for unused data (e.g. forward aliases in a backward rule).
|
||||
if (dir != Direction.backward) {
|
||||
String id = getId(source, target, variant);
|
||||
TRANSFORM_ALIAS.listOfValuesFrom(value)
|
||||
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
|
||||
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
|
||||
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
|
||||
icuData.add(rbPrefix.extendBy("direction"), "FORWARD");
|
||||
}
|
||||
|
||||
private void writeDataFile(String filename, CldrValue value) {
|
||||
try (PrintWriter out = outFn.apply(Paths.get(filename))) {
|
||||
out.print("\uFEFF");
|
||||
header.forEach(s -> out.println("# " + s));
|
||||
out.println("#");
|
||||
out.println("# File: " + filename);
|
||||
out.println("# Generated from CLDR");
|
||||
out.println("#");
|
||||
out.println();
|
||||
out.println(FIXUP.transliterate(whitespace().trimFrom(value.getValue())));
|
||||
out.println();
|
||||
}
|
||||
}
|
||||
|
||||
private void writeRootIndexEntry(
|
||||
CldrValue value, String source, String target, Optional<String> variant, String filename) {
|
||||
Visibility visibility = TRANSFORM_VISIBILITY.valueFrom(value, Visibility.class);
|
||||
String status = visibility == Visibility.internal ? "internal" : "file";
|
||||
|
||||
Direction dir = TRANSFORM_DIRECTION.valueFrom(value, Direction.class);
|
||||
// TODO: Consider checks for unused data (e.g. forward aliases in a backward rule).
|
||||
if (dir != Direction.backward) {
|
||||
String id = getId(source, target, variant);
|
||||
TRANSFORM_ALIAS.listOfValuesFrom(value)
|
||||
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
|
||||
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
|
||||
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
|
||||
icuData.add(rbPrefix.extendBy("direction"), "FORWARD");
|
||||
}
|
||||
if (dir != Direction.forward) {
|
||||
String id = getId(target, source, variant);
|
||||
TRANSFORM_BACKALIAS.listOfValuesFrom(value)
|
||||
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
|
||||
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
|
||||
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
|
||||
icuData.add(rbPrefix.extendBy("direction"), "REVERSE");
|
||||
}
|
||||
if (dir != Direction.forward) {
|
||||
String id = getId(target, source, variant);
|
||||
TRANSFORM_BACKALIAS.listOfValuesFrom(value)
|
||||
.forEach(a -> icuData.add(RB_TRANSLITERATOR_IDS.extendBy(a, "alias"), id));
|
||||
RbPath rbPrefix = RB_TRANSLITERATOR_IDS.extendBy(id, status);
|
||||
icuData.add(rbPrefix.extendBy("resource:process(transliterator)"), filename);
|
||||
icuData.add(rbPrefix.extendBy("direction"), "REVERSE");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,14 +46,14 @@ public class AlternateLocaleDataTest {
|
|||
FakeDataSupplier src = new FakeDataSupplier()
|
||||
.addLocaleData("xx", target, source, other)
|
||||
.addInheritedData("xx", inherited);
|
||||
CldrDataSupplier transformed = AlternateLocaleData.transform(
|
||||
src,
|
||||
ImmutableMap.of(target.getPath(), source.getPath()),
|
||||
ImmutableTable.of());
|
||||
CldrDataSupplier transformed =
|
||||
AlternateLocaleData.transform(
|
||||
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
|
||||
|
||||
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
|
||||
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
|
||||
|
||||
// Note that the source is always removed (unless it's also a target).
|
||||
assertValuesUnordered(unresolved, altValue, other);
|
||||
assertValuesUnordered(resolved, altValue, other, inherited);
|
||||
}
|
||||
|
@ -67,10 +67,9 @@ public class AlternateLocaleDataTest {
|
|||
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
|
||||
|
||||
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", target);
|
||||
CldrDataSupplier transformed = AlternateLocaleData.transform(
|
||||
src,
|
||||
ImmutableMap.of(target.getPath(), source.getPath()),
|
||||
ImmutableTable.of());
|
||||
CldrDataSupplier transformed =
|
||||
AlternateLocaleData.transform(
|
||||
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
|
||||
|
||||
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
|
||||
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
|
||||
|
@ -87,19 +86,21 @@ public class AlternateLocaleDataTest {
|
|||
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
|
||||
CldrValue source =
|
||||
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
|
||||
CldrValue other =
|
||||
ldml("numbers/currencies/currency[@type=\"EUR\"]/displayName", "Euro");
|
||||
|
||||
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source);
|
||||
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source, other);
|
||||
CldrDataSupplier transformed =
|
||||
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()),
|
||||
ImmutableTable.of());
|
||||
AlternateLocaleData.transform(
|
||||
src, ImmutableMap.of(target.getPath(), source.getPath()), ImmutableTable.of());
|
||||
|
||||
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
|
||||
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
|
||||
|
||||
// If there's no target the alt-path mapping is incomplete and we do nothing (this matches
|
||||
// the old CLDR tool behaviour and reasonable but can hide inconsistencies in CLDR data).
|
||||
assertValuesUnordered(unresolved, source);
|
||||
assertValuesUnordered(resolved, source);
|
||||
// Even though the missing target is not matched (so no change there) the source is always
|
||||
// removed from the transformed data.
|
||||
assertValuesUnordered(unresolved, other);
|
||||
assertValuesUnordered(resolved, other);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
// © 2020 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
public class CldrDataProcessorTest {
|
||||
|
||||
private static final AttributeKey TERRITORY_TYPE = AttributeKey.keyOf("territory", "type");
|
||||
private static final AttributeKey CURRENCY_TYPE = AttributeKey.keyOf("currency", "type");
|
||||
|
||||
// An overly simplistic value type for currency for testing purposes. In real code you would
|
||||
// probably want an immutable type and a separate builder, or a mutable type just to collect
|
||||
// values that doesn't need equals/hashcode (this class serves 2 purposes in the test).
|
||||
private static final class CurrencyData {
|
||||
final String key;
|
||||
String name = "";
|
||||
String symbol = "";
|
||||
|
||||
CurrencyData(String key) {
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
CurrencyData(String key, String name, String symbol) {
|
||||
this.key = key;
|
||||
this.name = name;
|
||||
this.symbol = symbol;
|
||||
}
|
||||
|
||||
@Override public boolean equals(Object o) {
|
||||
if (o instanceof CurrencyData) {
|
||||
CurrencyData that = (CurrencyData) o;
|
||||
return key.equals(that.key) && name.equals(that.name) && symbol.equals(that.symbol);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override public int hashCode() {
|
||||
return Objects.hash(key, name, symbol);
|
||||
}
|
||||
|
||||
@Override public String toString() {
|
||||
return String.format("CurrencyData{name=%s, symbol='%s'}", name, symbol);
|
||||
}
|
||||
}
|
||||
|
||||
// For collecting processed values.
|
||||
private static final class State {
|
||||
ImmutableMap<String, String> names = ImmutableMap.of();
|
||||
ImmutableMap<String, CurrencyData> currencies = ImmutableMap.of();
|
||||
|
||||
void setNames(Map<String, String> map) {
|
||||
names = ImmutableMap.copyOf(map);
|
||||
}
|
||||
|
||||
void setCurrencies(Map<String, CurrencyData> map) {
|
||||
currencies = ImmutableMap.copyOf(map);
|
||||
}
|
||||
}
|
||||
|
||||
private static final CldrDataProcessor<State> VISITOR = createTestVisitor();
|
||||
|
||||
private static CldrDataProcessor<State> createTestVisitor() {
|
||||
// Note that this is deliberately doing things the "messy" way by creating and then copying
|
||||
// a map. This is to show an extra level of processing in tests. You could just have a
|
||||
// value action which adds the territory to a map in the State object.
|
||||
CldrDataProcessor.Builder<State> builder = CldrDataProcessor.builder();
|
||||
builder
|
||||
.addAction(
|
||||
"//ldml/localeDisplayNames/territories",
|
||||
() -> new LinkedHashMap<String, String>(),
|
||||
State::setNames)
|
||||
.addValueAction(
|
||||
"territory[@type=*]",
|
||||
(map, value) -> map.put(value.getPath().get(TERRITORY_TYPE), value.getValue()));
|
||||
|
||||
// Another convoluted example for testing. This has the same additional level for a map
|
||||
// just so we can show a 3-level processor. In real code this wouldn't look so messy.
|
||||
CldrDataProcessor.SubProcessor<CurrencyData> currencyProcessor = builder
|
||||
.addAction(
|
||||
"//ldml/numbers/currencies",
|
||||
() -> new LinkedHashMap<String, CurrencyData>(),
|
||||
State::setCurrencies)
|
||||
.addAction(
|
||||
"currency[@type=*]",
|
||||
(map, path) -> new CurrencyData(path.get(CURRENCY_TYPE)),
|
||||
(map, data) -> map.put(data.key, data));
|
||||
currencyProcessor.addValueAction(
|
||||
"displayName",
|
||||
(data, value) -> data.name = value.getValue());
|
||||
currencyProcessor.addValueAction(
|
||||
"symbol",
|
||||
(data, value) -> data.symbol = value.getValue());
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTwoLevelProcessing() {
|
||||
CldrData data = CldrDataSupplier.forValues(Arrays.asList(
|
||||
ldml("localeDisplayNames/territories/territory[@type=\"BE\"]", "Belgium"),
|
||||
ldml("localeDisplayNames/territories/territory[@type=\"CH\"]", "Switzerland"),
|
||||
ldml("localeDisplayNames/territories/territory[@type=\"IN\"]", "India")));
|
||||
|
||||
State state = VISITOR.process(data, new State(), CldrData.PathOrder.DTD);
|
||||
|
||||
assertThat(state.names)
|
||||
.containsExactly(
|
||||
"BE", "Belgium",
|
||||
"CH", "Switzerland",
|
||||
"IN", "India")
|
||||
.inOrder();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThreeLevelProcessing() {
|
||||
CldrData data = CldrDataSupplier.forValues(Arrays.asList(
|
||||
ldml("numbers/currencies/currency[@type=\"EUR\"]/displayName", "euro"),
|
||||
ldml("numbers/currencies/currency[@type=\"EUR\"]/symbol", "€"),
|
||||
ldml("numbers/currencies/currency[@type=\"CHF\"]/displayName", "Swiss franc"),
|
||||
ldml("numbers/currencies/currency[@type=\"CHF\"]/symbol", "Fr."),
|
||||
ldml("numbers/currencies/currency[@type=\"INR\"]/displayName", "Indian rupee"),
|
||||
ldml("numbers/currencies/currency[@type=\"INR\"]/symbol", "₹")));
|
||||
|
||||
State state = VISITOR.process(data, new State(), CldrData.PathOrder.DTD);
|
||||
|
||||
assertThat(state.currencies)
|
||||
.containsExactly(
|
||||
"CHF", new CurrencyData("CHF", "Swiss franc", "Fr."),
|
||||
"EUR", new CurrencyData("EUR", "euro", "€"),
|
||||
"INR", new CurrencyData("INR", "Indian rupee", "₹"))
|
||||
.inOrder();
|
||||
}
|
||||
|
||||
private static CldrValue ldml(String path, String value) {
|
||||
return CldrValue.parseValue("//ldml/" + path, value);
|
||||
}
|
||||
}
|
|
@ -1,158 +0,0 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static com.google.common.truth.Truth8.assertThat;
|
||||
import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
|
||||
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
public class PathMatcherTest {
|
||||
@Test
|
||||
public void testMatcher() {
|
||||
CldrPath calEra = parseDistinguishingPath(
|
||||
"//ldml/dates/calendars/calendar[@type=\"buddhist\"]/eras/eraAbbr/era[@type=\"0\"]");
|
||||
CldrPath chineseMon1 = monthInfo("chinese", "format", "abbreviated", 1);
|
||||
CldrPath chineseMon2 = monthInfo("chinese", "format", "abbreviated", 2);
|
||||
CldrPath genericMon1 = monthInfo("generic", "stand-alone", "narrow", 1);
|
||||
CldrPath genericMon2 = monthInfo("generic", "stand-alone", "narrow", 2);
|
||||
List<CldrPath> calPaths =
|
||||
Arrays.asList(calEra, chineseMon1, chineseMon2, genericMon1, genericMon2);
|
||||
|
||||
PathMatcher anyCalendarPaths = PathMatcher.of("ldml/dates/calendars/calendar");
|
||||
assertThat(calPaths.stream().allMatch(anyCalendarPaths::matchesPrefixOf)).isTrue();
|
||||
assertThat(calPaths.stream().noneMatch(anyCalendarPaths::matches)).isTrue();
|
||||
assertThat(calPaths.stream().noneMatch(anyCalendarPaths::matchesSuffixOf)).isTrue();
|
||||
|
||||
PathMatcher chineseCalendars =
|
||||
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"chinese\"]");
|
||||
assertThat(calPaths.stream().filter(chineseCalendars::matchesPrefixOf))
|
||||
.containsExactly(chineseMon1, chineseMon2);
|
||||
|
||||
PathMatcher anyMonth = PathMatcher.of("monthWidth[@type=*]/month[@type=*]");
|
||||
assertThat(calPaths.stream().filter(anyMonth::matchesSuffixOf))
|
||||
.containsExactly(chineseMon1, chineseMon2, genericMon1, genericMon2);
|
||||
|
||||
PathMatcher narrowMonth = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
|
||||
assertThat(calPaths.stream().filter(narrowMonth::matchesSuffixOf))
|
||||
.containsExactly(genericMon1, genericMon2);
|
||||
assertThat(calPaths.stream().filter(narrowMonth::matches)).isEmpty();
|
||||
|
||||
PathMatcher firstMonth = PathMatcher.of("month[@type=\"1\"]");
|
||||
assertThat(calPaths.stream().filter(firstMonth::matchesSuffixOf))
|
||||
.containsExactly(chineseMon1, genericMon1);
|
||||
|
||||
PathMatcher fullMatch = PathMatcher.of("ldml/dates"
|
||||
+ "/calendars/calendar[@type=\"generic\"]"
|
||||
+ "/months/monthContext[@type=\"stand-alone\"]"
|
||||
+ "/monthWidth[@type=\"narrow\"]"
|
||||
+ "/month[@type=\"2\"]");
|
||||
assertThat(calPaths.stream().filter(fullMatch::matches)).containsExactly(genericMon2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWildcardSegment() {
|
||||
PathMatcher wildcard = PathMatcher.of("ldml/dates"
|
||||
+ "/calendars/calendar[@type=\"generic\"]"
|
||||
+ "/*/*[@type=\"format\"]/*[@type=\"narrow\"]/*[@type=*]");
|
||||
|
||||
assertThat(wildcard.matches(monthInfo("generic", "format", "narrow", 1))).isTrue();
|
||||
assertThat(wildcard.matches(monthInfo("generic", "format", "narrow", 9))).isTrue();
|
||||
assertThat(wildcard.matches(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
|
||||
|
||||
assertThat(wildcard.matches(monthInfo("chinese", "format", "narrow", 1))).isFalse();
|
||||
assertThat(wildcard.matches(monthInfo("generic", "stand-alone", "narrow", 1))).isFalse();
|
||||
assertThat(wildcard.matches(dayInfo("generic", "format", "wide", "mon"))).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnyOf_match() {
|
||||
PathMatcher narrowMonth =
|
||||
PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/months"
|
||||
+ "/monthContext[@type=\"format\"]/monthWidth[@type=\"narrow\"]/month[@type=*]");
|
||||
PathMatcher narrowDay =
|
||||
PathMatcher.of("ldml/dates/calendars/calendar[@type=*]/days"
|
||||
+ "/dayContext[@type=\"format\"]/dayWidth[@type=\"narrow\"]/day[@type=*]");
|
||||
PathMatcher prefix = PathMatcher.anyOf(narrowMonth, narrowDay);
|
||||
|
||||
assertThat(prefix.matches(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
|
||||
assertThat(prefix.matches(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
|
||||
|
||||
assertThat(prefix.matches(monthInfo("hindu", "format", "wide", 1))).isFalse();
|
||||
assertThat(prefix.matches(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnyOf_suffix() {
|
||||
PathMatcher monthSuffix = PathMatcher.of("monthWidth[@type=\"narrow\"]/month[@type=*]");
|
||||
PathMatcher daySuffix = PathMatcher.of("dayWidth[@type=\"narrow\"]/day[@type=*]");
|
||||
PathMatcher suffix = PathMatcher.anyOf(monthSuffix, daySuffix);
|
||||
|
||||
assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "narrow", 1))).isTrue();
|
||||
assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "narrow", "sun"))).isTrue();
|
||||
|
||||
assertThat(suffix.matchesSuffixOf(monthInfo("generic", "format", "wide", 1))).isFalse();
|
||||
assertThat(suffix.matchesSuffixOf(dayInfo("generic", "format", "wide", "mon"))).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnyOf_prefix() {
|
||||
PathMatcher monthPrefix =
|
||||
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"gregorian\"]/months");
|
||||
PathMatcher dayPrefix =
|
||||
PathMatcher.of("ldml/dates/calendars/calendar[@type=\"buddhist\"]/days");
|
||||
PathMatcher prefix = PathMatcher.anyOf(monthPrefix, dayPrefix);
|
||||
|
||||
assertThat(prefix.matchesPrefixOf(monthInfo("gregorian", "format", "narrow", 1))).isTrue();
|
||||
assertThat(prefix.matchesPrefixOf(dayInfo("buddhist", "format", "narrow", "sun"))).isTrue();
|
||||
|
||||
assertThat(prefix.matchesPrefixOf(monthInfo("hindu", "format", "wide", 1))).isFalse();
|
||||
assertThat(prefix.matchesPrefixOf(dayInfo("hindu", "format", "wide", "mon"))).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadSpecifiers() {
|
||||
assertInvalidPathSpecification("");
|
||||
// Leading and trailing '/' are not permitted (they imply empty segments.
|
||||
assertInvalidPathSpecification("/foo/");
|
||||
assertInvalidPathSpecification("foo//bar");
|
||||
assertInvalidPathSpecification("foo/bad segment name");
|
||||
assertInvalidPathSpecification("foo/bar[type=*]");
|
||||
assertInvalidPathSpecification("foo/bar[@type=**]");
|
||||
assertInvalidPathSpecification("foo/bar[@type='double-quotes-only']");
|
||||
}
|
||||
|
||||
private void assertInvalidPathSpecification(String spec) {
|
||||
IllegalArgumentException e =
|
||||
assertThrows(IllegalArgumentException.class, () -> PathMatcher.of(spec));
|
||||
assertThat(e).hasMessageThat().startsWith("invalid path specification");
|
||||
assertThat(e).hasMessageThat().contains(spec);
|
||||
}
|
||||
|
||||
private static CldrPath monthInfo(String type, String context, String width, int number) {
|
||||
return CldrPath.parseDistinguishingPath(String.format(
|
||||
"//ldml/dates/calendars/calendar[@type=\"%s\"]"
|
||||
+ "/months/monthContext[@type=\"%s\"]"
|
||||
+ "/monthWidth[@type=\"%s\"]"
|
||||
+ "/month[@type=\"%d\"]",
|
||||
type, context, width, number));
|
||||
}
|
||||
|
||||
private static CldrPath dayInfo(String type, String context, String width, String id) {
|
||||
return CldrPath.parseDistinguishingPath(String.format(
|
||||
"//ldml/dates/calendars/calendar[@type=\"%s\"]"
|
||||
+ "/days/dayContext[@type=\"%s\"]"
|
||||
+ "/dayWidth[@type=\"%s\"]"
|
||||
+ "/day[@type=\"%s\"]",
|
||||
type, context, width, id));
|
||||
}
|
||||
}
|
|
@ -5,12 +5,15 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
|
|||
import static org.unicode.cldr.api.CldrValue.parseValue;
|
||||
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
|
||||
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.cldr.api.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
|
||||
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
|
||||
import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
|
||||
|
@ -29,8 +32,7 @@ public class SupplementalMapperTest {
|
|||
supplementalData("likelySubtags/likelySubtag[@from=\"Foo\"][@to=\"Bar\"]"),
|
||||
simpleResult("/Foo", "Bar"));
|
||||
|
||||
PathMatcher allPaths = PathMatcher.of("supplementalData");
|
||||
IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
|
||||
IcuData icuData = SupplementalMapper.process(src, transformer, "name", p -> true);
|
||||
|
||||
assertThat(icuData).getPaths().hasSize(1);
|
||||
assertThat(icuData).hasValuesFor("/Foo", "Bar");
|
||||
|
@ -55,8 +57,7 @@ public class SupplementalMapperTest {
|
|||
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USD\"]"),
|
||||
simpleResult("/CurrencyMap/US/<FIFO>/id", "USD"));
|
||||
|
||||
PathMatcher allPaths = PathMatcher.of("supplementalData");
|
||||
IcuData icuData = SupplementalMapper.process(src, transformer, "name", allPaths);
|
||||
IcuData icuData = SupplementalMapper.process(src, transformer, "name", p -> true);
|
||||
|
||||
assertThat(icuData).getPaths().hasSize(3);
|
||||
assertThat(icuData).hasValuesFor("/CurrencyMap/US/<0000>/id", "USD");
|
||||
|
@ -73,7 +74,8 @@ public class SupplementalMapperTest {
|
|||
supplementalData("currencyData/region[@iso3166=\"US\"]/currency[@iso4217=\"USN\"]"),
|
||||
simpleResult("/CurrencyMap/US/<FIFO>/id", "USN"));
|
||||
|
||||
PathMatcher filter = PathMatcher.of("supplementalData/likelySubtags");
|
||||
Predicate<CldrPath> filter =
|
||||
PathMatcher.of("//supplementalData/likelySubtags")::matchesPrefixOf;
|
||||
IcuData icuData = SupplementalMapper.process(src, transformer, "name", filter);
|
||||
|
||||
assertThat(icuData).getPaths().hasSize(1);
|
||||
|
|
Loading…
Add table
Reference in a new issue