ICU-20693 Removing unused code, fixing IDE warnings and preparing for unit tests

See #773
This commit is contained in:
David Beaumont 2019-08-30 14:21:32 +00:00 committed by pedberg-icu
parent f46b567ba3
commit e984474ee5
19 changed files with 132 additions and 107 deletions

View file

@ -4,6 +4,8 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -22,7 +24,7 @@ import com.google.common.collect.ListMultimap;
*/
public final class IcuData {
private static final RbPath RB_VERSION = RbPath.of("Version");
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)\\])?$");
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
private final String name;
private final boolean hasFallback;
@ -162,4 +164,18 @@ public final class IcuData {
public boolean isEmpty() {
return paths.isEmpty();
}
@Override public String toString() {
StringWriter out = new StringWriter();
PrintWriter w = new PrintWriter(out);
w.format("IcuData{ name=%s, fallback=%s\n", name, hasFallback);
commentLines.forEach(c -> w.format(" # %s\n", c));
paths.forEach(p -> {
w.format(" %s:\n", p);
rbPathToValues.get(p).forEach(v -> w.format(" %s\n", v));
});
w.format("}\n");
w.flush();
return out.toString();
}
}

View file

@ -3,7 +3,6 @@
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.lang.Integer.parseInt;
import java.time.LocalDate;
@ -13,10 +12,11 @@ import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction;
import com.google.common.base.Ascii;
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableMap;
import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction;
/**
* The named functions used by the {@code RegexTransformer} for {@code ldml2icu_supplemental.txt}.
@ -46,7 +46,7 @@ final class IcuFunctions {
return hiBits + " " + loBits;
});
// TODO(dbeaumont): Improve this documentation (e.g. why is this being done, give examples?).
// TODO: Improve this documentation (e.g. why is this being done, give examples?).
/**
* Inserts '%' into numberingSystems descriptions.
*

View file

@ -27,7 +27,7 @@ final class IcuTextWriter {
// List of characters to escape in UnicodeSets
// ('\' followed by any of '\', '[', ']', '{', '}', '-', '&', ':', '^', '=').
private static final Pattern UNICODESET_ESCAPE =
Pattern.compile("\\\\[\\\\\\[\\]\\{\\}\\-&:^=]");
Pattern.compile("\\\\[\\\\\\[\\]{}\\-&:^=]");
// Only escape \ and " from other strings.
private static final Pattern STRING_ESCAPE = Pattern.compile("(?!')\\\\\\\\(?!')");
private static final Pattern QUOTE_ESCAPE = Pattern.compile("\\\\?\"");
@ -54,7 +54,7 @@ final class IcuTextWriter {
}
// TODO: Write a UTF-8 header (see https://unicode-org.atlassian.net/browse/ICU-10197).
private void writeTo(PrintWriter out, List<String> header) throws IOException {
private void writeTo(PrintWriter out, List<String> header) {
out.write('\uFEFF');
writeHeaderAndComments(out, header, icuData.getFileComment());
@ -83,20 +83,20 @@ final class IcuTextWriter {
// account for the implicit root segment.
int commonDepth = RbPath.getCommonPrefixLength(lastPath, path) + 1;
// Before closing, the "cursor" is at the end of the last value written.
closeLastPath(lastPath, commonDepth, out);
closeLastPath(commonDepth, out);
// After opening the value will be ready for the next value to be written.
openNextPath(path, out);
valueWasInline = appendValues(icuData.getName(), path, icuData.get(path), out);
lastPath = path;
}
closeLastPath(lastPath, 0, out);
closeLastPath(0, out);
out.println();
out.close();
}
// Before: Cursor is at the end of the previous line.
// After: Cursor is positioned immediately after the last closed '}'
private void closeLastPath(RbPath lastPath, int minDepth, PrintWriter out) {
private void closeLastPath(int minDepth, PrintWriter out) {
if (valueWasInline) {
depth--;
out.print('}');
@ -164,7 +164,7 @@ final class IcuTextWriter {
boolean isSequence = rbPath.endsWith(RB_SEQUENCE);
if (values.size() == 1 && !mustBeArray(true, name, rbPath)) {
onlyValue = values.get(0);
if (onlyValue.size() == 1 && !mustBeArray(false, name, rbPath)) {
if (onlyValue.isSingleton() && !mustBeArray(false, name, rbPath)) {
// Value has a single element and is not being forced to be an array.
String onlyElement = onlyValue.getElement(0);
if (quote) {
@ -195,7 +195,7 @@ final class IcuTextWriter {
}
} else {
for (RbValue value : values) {
if (value.size() == 1) {
if (value.isSingleton()) {
// Single-value array: print normally.
printArray(value, quote, isSequence, out);
} else {
@ -238,9 +238,9 @@ final class IcuTextWriter {
}
private void printArray(RbValue rbValue, boolean quote, boolean isSequence, PrintWriter out) {
for (int n = 0; n < rbValue.size(); n++) {
for (String v : rbValue.getElements()) {
newLineAndIndent(out);
printValue(out, quoteInside(rbValue.getElement(n)), quote);
printValue(out, quoteInside(v), quote);
if (!isSequence) {
out.print(",");
}

View file

@ -148,12 +148,13 @@ public abstract class PathMatcher {
// Make a new, non-interned, unique instance here which we can test by reference to
// determine if the argument is to be captured (needed as ImmutableMap prohibits null).
// DO NOT change this code to assign "*" as the value directly, it MUST be a new instance.
@SuppressWarnings("StringOperationCanBeSimplified")
private static final String WILDCARD = new String("*");
private static final Pattern ELEMENT_START_REGEX =
Pattern.compile("(\\*|[-:\\w]+)(?:/|\\[|$)");
private static final Pattern ATTRIBUTE_REGEX =
Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")\\]");
Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")]");
// element := foo, foo[@bar="baz"], foo[@bar=*]
// pathspec := element{/element}*

View file

@ -54,10 +54,6 @@ public final class RbPath implements Comparable<RbPath> {
private static final CharMatcher UNQUOTED_SEGMENT_CHARS =
QUOTED_SEGMENT_CHARS.and(whitespace().negate());
// Characters allowed in path segments which separate the "base name" from any suffix (e.g.
// the base name of "Foo:intvector" is "Foo").
private static final CharMatcher SEGMENT_SEPARATORS = CharMatcher.anyOf("%:");
private static final RbPath EMPTY = new RbPath(ImmutableList.of());
public static RbPath empty() {
@ -97,12 +93,10 @@ public final class RbPath implements Comparable<RbPath> {
this.segments = ImmutableList.copyOf(segments);
this.hashCode = Objects.hash(this.segments);
for (String segment : this.segments) {
checkArgument(!segment.isEmpty(),
"empty path segments not permitted: %s", this.segments);
checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments);
// Either the label is quoted (e.g. "foo") or it is bar (e.g. foo) but it can only
// contain double quotes at either end, or not at all. If the string is quoted, only
// validate the content, and not the quotes themselves.
String toValidate;
switch (segment.charAt(0)) {
case '<':
// Allow anything in hidden labels, since they will be removed later and never
@ -157,30 +151,6 @@ public final class RbPath implements Comparable<RbPath> {
return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
}
/**
* Returns whether the first element of this path is prefix by the given "base name".
*
* <p>Resource bundle paths relating to semantically similar data are typically grouped by the
* same first path element. This is not as simple as just comparing the first element, as in
* {@code path.startsWith(prefix)} however, since path elements can have suffixes, such as
* {@code "Foo:alias"} or {@code "Foo%subtype"}.
*
* @param baseName the base name to test for.
* @return true is the "base name" of the first path element is the given prefix.
*/
public boolean hasPrefix(String baseName) {
checkArgument(!baseName.isEmpty() && SEGMENT_SEPARATORS.matchesNoneOf(baseName));
if (length() == 0) {
return false;
}
String firstElement = getSegment(0);
// Slightly subtle (but safe) access to the separator character, since:
// (!a.equals(b) && a.startsWith(b)) ==> a.length() > b.length().
return firstElement.equals(baseName)
|| (firstElement.startsWith(baseName)
&& SEGMENT_SEPARATORS.matches(firstElement.charAt(baseName.length())));
}
public boolean startsWith(RbPath prefix) {
return prefix.length() <= length() && matchesSublist(prefix, 0);
}

View file

@ -6,7 +6,6 @@ import static com.google.common.base.Preconditions.checkArgument;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.Function;
import com.google.common.collect.ImmutableList;
@ -34,9 +33,14 @@ public final class RbValue {
checkArgument(!this.elements.isEmpty(), "Resource bundle values cannot be empty");
}
/** Returns the (non zero) number of elements in this value. */
public int size() {
return elements.size();
/** Returns the non-empty list of value elements. */
public ImmutableList<String> getElements() {
return elements;
}
/** Returns whether this is a single element value. */
public boolean isSingleton() {
return elements.size() == 1;
}
/** Returns the Nth element of this value. */

View file

@ -20,20 +20,22 @@ import java.util.Set;
import javax.annotation.Nullable;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Ascii;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
/**
* A mapper to collect BCP-47 data from {@link CldrDataType#BCP47 BCP47} data under paths
@ -80,8 +82,13 @@ public final class Bcp47Mapper {
* @return A list of IcuData instances containing BCP-47 data to be written to files.
*/
public static ImmutableList<IcuData> process(CldrDataSupplier src) {
return process(src.getDataForType(BCP47));
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static ImmutableList<IcuData> process(CldrData cldrData) {
Bcp47Visitor visitor = new Bcp47Visitor();
src.getDataForType(BCP47).accept(ARBITRARY, visitor);
cldrData.accept(ARBITRARY, visitor);
visitor.addKeyMapValues();
return ImmutableList.of(visitor.keyTypeData.icuData, visitor.tzData.icuData);
}
@ -168,13 +175,15 @@ public final class Bcp47Mapper {
// replacement and these will be processed below (in particular we need to emit
// the fact that they are deprecated).
// According to the old mapper code, it's an error not to have an alias, but
// it's emitted via debug logging and not actually enforced.
// TODO: Consider making this an error if possible.
// Not all key elements have an alias. E.g. in calendar.xml:
// <key name="fw" description="First day of week" since="28">
// But we still add it as a alias to itself (which is later turned into a path with
// an empty value).
String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName));
keyMap.put(keyName, keyAlias);
RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
if (typeAliases.isEmpty()) {
// Generate type map entry using empty value (an empty value indicates same

View file

@ -15,12 +15,13 @@ import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.escape.UnicodeEscaper;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.escape.UnicodeEscaper;
/**
* A mapper to collect break-iterator data from {@link CldrDataType#LDML LDML} data under
* paths matching:
@ -31,9 +32,11 @@ import org.unicode.icu.tool.cldrtoicu.RbPath;
*/
// TODO: This class can almost certainly be replace with a small RegexTransformer config.
public final class BreakIteratorMapper {
// The "type" attribute is not required here, so cannot appear in the matcher.
private static final PathMatcher SUPPRESSION =
PathMatcher.of("ldml/segmentations/segmentation/suppressions/suppression");
// The "type" attribute in /suppressions/ is not required so cannot be in the matcher. And
// its default (and only) value is "standard".
// TODO: Understand and document why this is the case.
private static final PathMatcher SUPPRESSION = PathMatcher.of(
"ldml/segmentations/segmentation[@type=*]/suppressions/suppression");
private static final AttributeKey SEGMENTATION_TYPE = keyOf("segmentation", "type");
// Note: This could be done with an intermediate matcher for
@ -58,9 +61,15 @@ public final class BreakIteratorMapper {
public static IcuData process(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
CldrData cldrData = src.getDataForLocale(localeId, UNRESOLVED);
return process(localeId, cldrData, icuSpecialData);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
BreakIteratorMapper mapper = new BreakIteratorMapper(localeId);
icuSpecialData.ifPresent(s -> s.accept(ARBITRARY, mapper::addSpecials));
src.getDataForLocale(localeId, UNRESOLVED).accept(DTD, mapper::addSuppression);
cldrData.accept(DTD, mapper::addSuppression);
return mapper.icuData;
}

View file

@ -16,14 +16,14 @@ import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
/**
* A mapper to collect collation data from {@link CldrDataType#LDML LDML} data via the paths:
* <pre>{@code

View file

@ -15,11 +15,12 @@ import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.annotations.VisibleForTesting;
/**
* A mapper to collect day-period data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL}
* data via the paths:
@ -47,8 +48,12 @@ public final class DayPeriodsMapper {
* @return the IcuData instance to be written to a file.
*/
public static IcuData process(CldrDataSupplier src) {
return process(src.getDataForType(SUPPLEMENTAL));
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(CldrData data) {
RuleSetVisitor mapper = new RuleSetVisitor();
CldrData data = src.getDataForType(SUPPLEMENTAL);
data.accept(ARBITRARY, mapper);
return mapper.icuData;
}

View file

@ -6,7 +6,6 @@ import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Ordering.natural;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
@ -16,18 +15,12 @@ import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.SetMultimap;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
@ -35,6 +28,10 @@ import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.SetMultimap;
/**
* Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a
* {@link PathValueTransformer}.
@ -43,11 +40,6 @@ import org.unicode.icu.tool.cldrtoicu.SupplementalData;
* {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation.
*/
public final class LocaleMapper {
// Match territory paths so we can skip processing deprecated territories.
private static final PathMatcher TERRITORY = PathMatcher.of(
"ldml/localeDisplayNames/territories/territory[@type=*]");
private static final AttributeKey TERRITORY_TYPE = keyOf("territory", "type");
// The default calendar (only set is different from inherited parent value).
private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default");

View file

@ -14,12 +14,13 @@ import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.annotations.VisibleForTesting;
/**
* A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
* the paths:
@ -28,6 +29,10 @@ import org.unicode.icu.tool.cldrtoicu.RbValue;
* }</pre>
*/
public final class PluralRangesMapper {
// Note that this mapper only matches when there's no "type" specified on the "plurals" element.
// This is a bit weird, since the PluralsMapper expects a type (e.g. cardinal or ordinal) to
// be present. Really this just illustrates that the plural ranges just should not be under the
// same parent element as plurals.
private static final PathMatcher RANGES =
PathMatcher.of("supplementalData/plurals/pluralRanges[@locales=*]");
private static final AttributeKey RANGES_LOCALES = keyOf("pluralRanges", "locales");
@ -47,8 +52,13 @@ public final class PluralRangesMapper {
* @return the IcuData instance to be written to a file.
*/
public static IcuData process(CldrDataSupplier src) {
PluralRangesVisitor visitor = new PluralRangesVisitor();
CldrData data = src.getDataForType(SUPPLEMENTAL);
return process(data);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(CldrData data) {
PluralRangesVisitor visitor = new PluralRangesVisitor();
data.accept(ARBITRARY, visitor);
return visitor.icuData;
}
@ -61,7 +71,6 @@ public final class PluralRangesMapper {
@Override
public void visitPrefixStart(CldrPath prefix, Context ctx) {
// Captured type is either "cardinal" or "ordinal" (and will cause exception otherwise).
if (RANGES.matches(prefix)) {
ruleLabel = String.format("set%02d", setIndex++);
RANGES_LOCALES.listOfValuesFrom(prefix)

View file

@ -19,13 +19,14 @@ import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
/**
* A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
* the paths:
@ -53,8 +54,13 @@ public final class PluralsMapper {
* @return the IcuData instance to be written to a file.
*/
public static IcuData process(CldrDataSupplier src) {
PluralsVisitor visitor = new PluralsVisitor();
CldrData data = src.getDataForType(SUPPLEMENTAL);
return process(data);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(CldrData data) {
PluralsVisitor visitor = new PluralsVisitor();
// Note: We explicitly reset the type to mimic the order of the existing code, since this
// affects the set indices we generate during processing. Ideally this would all be immune
// to ordering (or just enforce DTD ordering) but right now it's very dependent on

View file

@ -15,12 +15,13 @@ import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import com.google.common.escape.UnicodeEscaper;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.escape.UnicodeEscaper;
/**
* A mapper to collect plural data from {@link CldrDataType#LDML LDML} data via the paths:
* <pre>{@code
@ -39,7 +40,6 @@ public final class RbnfMapper {
private static final AttributeKey RBNF_RADIX = keyOf("rbnfrule", "radix");
private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
private static final RbPath RB_PARENT = RbPath.of("%%Parent");
// This is the ICU path prefix, below which everything generated by this visitor will go.
private static final RbPath RB_ROOT = RbPath.of("RBNFRules");
@ -54,12 +54,17 @@ public final class RbnfMapper {
public static IcuData process(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
// Using DTD order is essential here because the RBNF paths contain ordered elements,
// so we must ensure that they appear in sorted order (otherwise we'd have to do more
// work at this end to re-sort the results).
RulesetVisitor visitor = new RulesetVisitor(localeId);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
src.getDataForLocale(localeId, UNRESOLVED).accept(DTD, visitor);
cldrData.accept(DTD, visitor);
return visitor.icuData;
}

View file

@ -12,17 +12,16 @@ import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.SetMultimap;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.SetMultimap;
/**
* Generate supplemental {@link IcuData} by transforming {@link CldrDataType#SUPPLEMENTAL
* SUPPLEMENTAL} data using a {@link PathValueTransformer}.

View file

@ -23,11 +23,11 @@ import org.unicode.cldr.api.CldrData.ValueVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.ibm.icu.text.Transliterator;
/**
@ -99,6 +99,7 @@ public final class TransformsMapper {
// I have _no_ idea what any of this is about, I'm just trying to mimic the original
// (complex and undocumented) code in "ConvertTransforms.java".
// TODO: Understand and document each of the cases below.
icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}");
// Note that this quoting of path segments is almost certainly unnecessary. It matches
// the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so

View file

@ -23,12 +23,12 @@ import java.util.regex.Pattern;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableSetMultimap;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.RbPath;
/**
* Path/value transformer configured by {@code ldml2icu_xxx.txt} mapping and configuration files.

View file

@ -26,14 +26,14 @@ import java.util.stream.Stream;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
/**
* A specification for building a result from the arguments in a matched xpath. Results always
@ -74,7 +74,7 @@ final class ResultSpec {
private static final Splitter VALUE_SPLITTER = Splitter.on(whitespace()).omitEmptyStrings();
// Matcher for "&foo_bar(a,b,c)" which captures function name and complete argument list.
private static final Pattern FUNCTION = Pattern.compile("\\&(\\w++)\\(([^\\)]++)\\)");
private static final Pattern FUNCTION = Pattern.compile("&(\\w++)\\(([^)]++)\\)");
// Resource bundle path specification with placeholders (e.g. "/foo/$1/bar") exactly as it
// appears in the configuration file.
@ -168,7 +168,7 @@ final class ResultSpec {
CldrValue value, List<String> args, DynamicVars varLookupFn) {
return new MatchedResult(
getRbPath(args),
getValues(value.getValue(), args, varLookupFn),
getValues(value.getValue(), args),
getResultPath(value.getPath(), args, varLookupFn));
}
@ -197,8 +197,7 @@ final class ResultSpec {
// Create an array of output values according to the CLDR value (if present) and the
// "values" instruction in the result specification (if present). Any functions present in
// the "values" instruction are invoked here.
private ImmutableList<String> getValues(
String value, List<String> args, DynamicVars varLookupFn) {
private ImmutableList<String> getValues(String value, List<String> args) {
VarString valuesSpec = instructions.get(Instruction.VALUES);
if (valuesSpec == null) {
// No "values" instruction, so just use the _unsplit_ CLDR value. To split a CLDR

View file

@ -15,12 +15,12 @@ import java.util.stream.Stream;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableList;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.collect.ImmutableList;
/*
* Each rule corresponds to a single target xpath specification in the configuration file
* (lines starting //) but may have more than one result specification. For example: