mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-20693 Removing unused code, fixing IDE warnings and preparing for unit tests
See #773
This commit is contained in:
parent
f46b567ba3
commit
e984474ee5
19 changed files with 132 additions and 107 deletions
|
@ -4,6 +4,8 @@ package org.unicode.icu.tool.cldrtoicu;
|
|||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
@ -22,7 +24,7 @@ import com.google.common.collect.ListMultimap;
|
|||
*/
|
||||
public final class IcuData {
|
||||
private static final RbPath RB_VERSION = RbPath.of("Version");
|
||||
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)\\])?$");
|
||||
private static final Pattern ARRAY_INDEX = Pattern.compile("(/[^\\[]++)(?:\\[(\\d++)])?$");
|
||||
|
||||
private final String name;
|
||||
private final boolean hasFallback;
|
||||
|
@ -162,4 +164,18 @@ public final class IcuData {
|
|||
public boolean isEmpty() {
|
||||
return paths.isEmpty();
|
||||
}
|
||||
|
||||
@Override public String toString() {
|
||||
StringWriter out = new StringWriter();
|
||||
PrintWriter w = new PrintWriter(out);
|
||||
w.format("IcuData{ name=%s, fallback=%s\n", name, hasFallback);
|
||||
commentLines.forEach(c -> w.format(" # %s\n", c));
|
||||
paths.forEach(p -> {
|
||||
w.format(" %s:\n", p);
|
||||
rbPathToValues.get(p).forEach(v -> w.format(" %s\n", v));
|
||||
});
|
||||
w.format("}\n");
|
||||
w.flush();
|
||||
return out.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
package org.unicode.icu.tool.cldrtoicu;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static java.lang.Integer.parseInt;
|
||||
|
||||
import java.time.LocalDate;
|
||||
|
@ -13,10 +12,11 @@ import java.util.function.Function;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction;
|
||||
|
||||
import com.google.common.base.Ascii;
|
||||
import com.google.common.base.CharMatcher;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction;
|
||||
|
||||
/**
|
||||
* The named functions used by the {@code RegexTransformer} for {@code ldml2icu_supplemental.txt}.
|
||||
|
@ -46,7 +46,7 @@ final class IcuFunctions {
|
|||
return hiBits + " " + loBits;
|
||||
});
|
||||
|
||||
// TODO(dbeaumont): Improve this documentation (e.g. why is this being done, give examples?).
|
||||
// TODO: Improve this documentation (e.g. why is this being done, give examples?).
|
||||
/**
|
||||
* Inserts '%' into numberingSystems descriptions.
|
||||
*
|
||||
|
|
|
@ -27,7 +27,7 @@ final class IcuTextWriter {
|
|||
// List of characters to escape in UnicodeSets
|
||||
// ('\' followed by any of '\', '[', ']', '{', '}', '-', '&', ':', '^', '=').
|
||||
private static final Pattern UNICODESET_ESCAPE =
|
||||
Pattern.compile("\\\\[\\\\\\[\\]\\{\\}\\-&:^=]");
|
||||
Pattern.compile("\\\\[\\\\\\[\\]{}\\-&:^=]");
|
||||
// Only escape \ and " from other strings.
|
||||
private static final Pattern STRING_ESCAPE = Pattern.compile("(?!')\\\\\\\\(?!')");
|
||||
private static final Pattern QUOTE_ESCAPE = Pattern.compile("\\\\?\"");
|
||||
|
@ -54,7 +54,7 @@ final class IcuTextWriter {
|
|||
}
|
||||
|
||||
// TODO: Write a UTF-8 header (see https://unicode-org.atlassian.net/browse/ICU-10197).
|
||||
private void writeTo(PrintWriter out, List<String> header) throws IOException {
|
||||
private void writeTo(PrintWriter out, List<String> header) {
|
||||
out.write('\uFEFF');
|
||||
writeHeaderAndComments(out, header, icuData.getFileComment());
|
||||
|
||||
|
@ -83,20 +83,20 @@ final class IcuTextWriter {
|
|||
// account for the implicit root segment.
|
||||
int commonDepth = RbPath.getCommonPrefixLength(lastPath, path) + 1;
|
||||
// Before closing, the "cursor" is at the end of the last value written.
|
||||
closeLastPath(lastPath, commonDepth, out);
|
||||
closeLastPath(commonDepth, out);
|
||||
// After opening the value will be ready for the next value to be written.
|
||||
openNextPath(path, out);
|
||||
valueWasInline = appendValues(icuData.getName(), path, icuData.get(path), out);
|
||||
lastPath = path;
|
||||
}
|
||||
closeLastPath(lastPath, 0, out);
|
||||
closeLastPath(0, out);
|
||||
out.println();
|
||||
out.close();
|
||||
}
|
||||
|
||||
// Before: Cursor is at the end of the previous line.
|
||||
// After: Cursor is positioned immediately after the last closed '}'
|
||||
private void closeLastPath(RbPath lastPath, int minDepth, PrintWriter out) {
|
||||
private void closeLastPath(int minDepth, PrintWriter out) {
|
||||
if (valueWasInline) {
|
||||
depth--;
|
||||
out.print('}');
|
||||
|
@ -164,7 +164,7 @@ final class IcuTextWriter {
|
|||
boolean isSequence = rbPath.endsWith(RB_SEQUENCE);
|
||||
if (values.size() == 1 && !mustBeArray(true, name, rbPath)) {
|
||||
onlyValue = values.get(0);
|
||||
if (onlyValue.size() == 1 && !mustBeArray(false, name, rbPath)) {
|
||||
if (onlyValue.isSingleton() && !mustBeArray(false, name, rbPath)) {
|
||||
// Value has a single element and is not being forced to be an array.
|
||||
String onlyElement = onlyValue.getElement(0);
|
||||
if (quote) {
|
||||
|
@ -195,7 +195,7 @@ final class IcuTextWriter {
|
|||
}
|
||||
} else {
|
||||
for (RbValue value : values) {
|
||||
if (value.size() == 1) {
|
||||
if (value.isSingleton()) {
|
||||
// Single-value array: print normally.
|
||||
printArray(value, quote, isSequence, out);
|
||||
} else {
|
||||
|
@ -238,9 +238,9 @@ final class IcuTextWriter {
|
|||
}
|
||||
|
||||
private void printArray(RbValue rbValue, boolean quote, boolean isSequence, PrintWriter out) {
|
||||
for (int n = 0; n < rbValue.size(); n++) {
|
||||
for (String v : rbValue.getElements()) {
|
||||
newLineAndIndent(out);
|
||||
printValue(out, quoteInside(rbValue.getElement(n)), quote);
|
||||
printValue(out, quoteInside(v), quote);
|
||||
if (!isSequence) {
|
||||
out.print(",");
|
||||
}
|
||||
|
|
|
@ -148,12 +148,13 @@ public abstract class PathMatcher {
|
|||
// Make a new, non-interned, unique instance here which we can test by reference to
|
||||
// determine if the argument is to be captured (needed as ImmutableMap prohibits null).
|
||||
// DO NOT change this code to assign "*" as the value directly, it MUST be a new instance.
|
||||
@SuppressWarnings("StringOperationCanBeSimplified")
|
||||
private static final String WILDCARD = new String("*");
|
||||
|
||||
private static final Pattern ELEMENT_START_REGEX =
|
||||
Pattern.compile("(\\*|[-:\\w]+)(?:/|\\[|$)");
|
||||
private static final Pattern ATTRIBUTE_REGEX =
|
||||
Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")\\]");
|
||||
Pattern.compile("\\[@([-:\\w]+)=(?:\\*|\"([^\"]*)\")]");
|
||||
|
||||
// element := foo, foo[@bar="baz"], foo[@bar=*]
|
||||
// pathspec := element{/element}*
|
||||
|
|
|
@ -54,10 +54,6 @@ public final class RbPath implements Comparable<RbPath> {
|
|||
private static final CharMatcher UNQUOTED_SEGMENT_CHARS =
|
||||
QUOTED_SEGMENT_CHARS.and(whitespace().negate());
|
||||
|
||||
// Characters allowed in path segments which separate the "base name" from any suffix (e.g.
|
||||
// the base name of "Foo:intvector" is "Foo").
|
||||
private static final CharMatcher SEGMENT_SEPARATORS = CharMatcher.anyOf("%:");
|
||||
|
||||
private static final RbPath EMPTY = new RbPath(ImmutableList.of());
|
||||
|
||||
public static RbPath empty() {
|
||||
|
@ -97,12 +93,10 @@ public final class RbPath implements Comparable<RbPath> {
|
|||
this.segments = ImmutableList.copyOf(segments);
|
||||
this.hashCode = Objects.hash(this.segments);
|
||||
for (String segment : this.segments) {
|
||||
checkArgument(!segment.isEmpty(),
|
||||
"empty path segments not permitted: %s", this.segments);
|
||||
checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments);
|
||||
// Either the label is quoted (e.g. "foo") or it is bar (e.g. foo) but it can only
|
||||
// contain double quotes at either end, or not at all. If the string is quoted, only
|
||||
// validate the content, and not the quotes themselves.
|
||||
String toValidate;
|
||||
switch (segment.charAt(0)) {
|
||||
case '<':
|
||||
// Allow anything in hidden labels, since they will be removed later and never
|
||||
|
@ -157,30 +151,6 @@ public final class RbPath implements Comparable<RbPath> {
|
|||
return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the first element of this path is prefix by the given "base name".
|
||||
*
|
||||
* <p>Resource bundle paths relating to semantically similar data are typically grouped by the
|
||||
* same first path element. This is not as simple as just comparing the first element, as in
|
||||
* {@code path.startsWith(prefix)} however, since path elements can have suffixes, such as
|
||||
* {@code "Foo:alias"} or {@code "Foo%subtype"}.
|
||||
*
|
||||
* @param baseName the base name to test for.
|
||||
* @return true is the "base name" of the first path element is the given prefix.
|
||||
*/
|
||||
public boolean hasPrefix(String baseName) {
|
||||
checkArgument(!baseName.isEmpty() && SEGMENT_SEPARATORS.matchesNoneOf(baseName));
|
||||
if (length() == 0) {
|
||||
return false;
|
||||
}
|
||||
String firstElement = getSegment(0);
|
||||
// Slightly subtle (but safe) access to the separator character, since:
|
||||
// (!a.equals(b) && a.startsWith(b)) ==> a.length() > b.length().
|
||||
return firstElement.equals(baseName)
|
||||
|| (firstElement.startsWith(baseName)
|
||||
&& SEGMENT_SEPARATORS.matches(firstElement.charAt(baseName.length())));
|
||||
}
|
||||
|
||||
public boolean startsWith(RbPath prefix) {
|
||||
return prefix.length() <= length() && matchesSublist(prefix, 0);
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ import static com.google.common.base.Preconditions.checkArgument;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
|
@ -34,9 +33,14 @@ public final class RbValue {
|
|||
checkArgument(!this.elements.isEmpty(), "Resource bundle values cannot be empty");
|
||||
}
|
||||
|
||||
/** Returns the (non zero) number of elements in this value. */
|
||||
public int size() {
|
||||
return elements.size();
|
||||
/** Returns the non-empty list of value elements. */
|
||||
public ImmutableList<String> getElements() {
|
||||
return elements;
|
||||
}
|
||||
|
||||
/** Returns whether this is a single element value. */
|
||||
public boolean isSingleton() {
|
||||
return elements.size() == 1;
|
||||
}
|
||||
|
||||
/** Returns the Nth element of this value. */
|
||||
|
|
|
@ -20,20 +20,22 @@ import java.util.Set;
|
|||
import javax.annotation.Nullable;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrData.ValueVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Ascii;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
/**
|
||||
* A mapper to collect BCP-47 data from {@link CldrDataType#BCP47 BCP47} data under paths
|
||||
|
@ -80,8 +82,13 @@ public final class Bcp47Mapper {
|
|||
* @return A list of IcuData instances containing BCP-47 data to be written to files.
|
||||
*/
|
||||
public static ImmutableList<IcuData> process(CldrDataSupplier src) {
|
||||
return process(src.getDataForType(BCP47));
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static ImmutableList<IcuData> process(CldrData cldrData) {
|
||||
Bcp47Visitor visitor = new Bcp47Visitor();
|
||||
src.getDataForType(BCP47).accept(ARBITRARY, visitor);
|
||||
cldrData.accept(ARBITRARY, visitor);
|
||||
visitor.addKeyMapValues();
|
||||
return ImmutableList.of(visitor.keyTypeData.icuData, visitor.tzData.icuData);
|
||||
}
|
||||
|
@ -168,13 +175,15 @@ public final class Bcp47Mapper {
|
|||
// replacement and these will be processed below (in particular we need to emit
|
||||
// the fact that they are deprecated).
|
||||
|
||||
// According to the old mapper code, it's an error not to have an alias, but
|
||||
// it's emitted via debug logging and not actually enforced.
|
||||
// TODO: Consider making this an error if possible.
|
||||
// Not all key elements have an alias. E.g. in calendar.xml:
|
||||
// <key name="fw" description="First day of week" since="28">
|
||||
// But we still add it as a alias to itself (which is later turned into a path with
|
||||
// an empty value).
|
||||
String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName));
|
||||
|
||||
keyMap.put(keyName, keyAlias);
|
||||
RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
|
||||
|
||||
List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
|
||||
if (typeAliases.isEmpty()) {
|
||||
// Generate type map entry using empty value (an empty value indicates same
|
||||
|
|
|
@ -15,12 +15,13 @@ import org.unicode.cldr.api.CldrDataSupplier;
|
|||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import com.google.common.escape.UnicodeEscaper;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.escape.UnicodeEscaper;
|
||||
|
||||
/**
|
||||
* A mapper to collect break-iterator data from {@link CldrDataType#LDML LDML} data under
|
||||
* paths matching:
|
||||
|
@ -31,9 +32,11 @@ import org.unicode.icu.tool.cldrtoicu.RbPath;
|
|||
*/
|
||||
// TODO: This class can almost certainly be replace with a small RegexTransformer config.
|
||||
public final class BreakIteratorMapper {
|
||||
// The "type" attribute is not required here, so cannot appear in the matcher.
|
||||
private static final PathMatcher SUPPRESSION =
|
||||
PathMatcher.of("ldml/segmentations/segmentation/suppressions/suppression");
|
||||
// The "type" attribute in /suppressions/ is not required so cannot be in the matcher. And
|
||||
// its default (and only) value is "standard".
|
||||
// TODO: Understand and document why this is the case.
|
||||
private static final PathMatcher SUPPRESSION = PathMatcher.of(
|
||||
"ldml/segmentations/segmentation[@type=*]/suppressions/suppression");
|
||||
private static final AttributeKey SEGMENTATION_TYPE = keyOf("segmentation", "type");
|
||||
|
||||
// Note: This could be done with an intermediate matcher for
|
||||
|
@ -58,9 +61,15 @@ public final class BreakIteratorMapper {
|
|||
public static IcuData process(
|
||||
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
|
||||
|
||||
CldrData cldrData = src.getDataForLocale(localeId, UNRESOLVED);
|
||||
return process(localeId, cldrData, icuSpecialData);
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
|
||||
BreakIteratorMapper mapper = new BreakIteratorMapper(localeId);
|
||||
icuSpecialData.ifPresent(s -> s.accept(ARBITRARY, mapper::addSpecials));
|
||||
src.getDataForLocale(localeId, UNRESOLVED).accept(DTD, mapper::addSuppression);
|
||||
cldrData.accept(DTD, mapper::addSuppression);
|
||||
return mapper.icuData;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,14 +16,14 @@ import org.unicode.cldr.api.CldrDataSupplier;
|
|||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import com.google.common.base.CharMatcher;
|
||||
import com.google.common.base.Splitter;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
|
||||
import com.google.common.base.CharMatcher;
|
||||
import com.google.common.base.Splitter;
|
||||
|
||||
/**
|
||||
* A mapper to collect collation data from {@link CldrDataType#LDML LDML} data via the paths:
|
||||
* <pre>{@code
|
||||
|
|
|
@ -15,11 +15,12 @@ import org.unicode.cldr.api.CldrDataSupplier;
|
|||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
/**
|
||||
* A mapper to collect day-period data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL}
|
||||
* data via the paths:
|
||||
|
@ -47,8 +48,12 @@ public final class DayPeriodsMapper {
|
|||
* @return the IcuData instance to be written to a file.
|
||||
*/
|
||||
public static IcuData process(CldrDataSupplier src) {
|
||||
return process(src.getDataForType(SUPPLEMENTAL));
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(CldrData data) {
|
||||
RuleSetVisitor mapper = new RuleSetVisitor();
|
||||
CldrData data = src.getDataForType(SUPPLEMENTAL);
|
||||
data.accept(ARBITRARY, mapper);
|
||||
return mapper.icuData;
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ import static com.google.common.base.Preconditions.checkArgument;
|
|||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
import static com.google.common.collect.Ordering.natural;
|
||||
import static org.unicode.cldr.api.AttributeKey.keyOf;
|
||||
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
|
||||
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
|
||||
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
|
||||
|
@ -16,18 +15,12 @@ import java.util.List;
|
|||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.ValueVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import com.google.common.collect.ImmutableListMultimap;
|
||||
import com.google.common.collect.LinkedHashMultimap;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
|
||||
|
@ -35,6 +28,10 @@ import org.unicode.icu.tool.cldrtoicu.RbPath;
|
|||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
|
||||
|
||||
import com.google.common.collect.ImmutableListMultimap;
|
||||
import com.google.common.collect.LinkedHashMultimap;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
|
||||
/**
|
||||
* Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a
|
||||
* {@link PathValueTransformer}.
|
||||
|
@ -43,11 +40,6 @@ import org.unicode.icu.tool.cldrtoicu.SupplementalData;
|
|||
* {@code RegexTransformer}, but could use any {@link PathValueTransformer} implementation.
|
||||
*/
|
||||
public final class LocaleMapper {
|
||||
// Match territory paths so we can skip processing deprecated territories.
|
||||
private static final PathMatcher TERRITORY = PathMatcher.of(
|
||||
"ldml/localeDisplayNames/territories/territory[@type=*]");
|
||||
private static final AttributeKey TERRITORY_TYPE = keyOf("territory", "type");
|
||||
|
||||
// The default calendar (only set is different from inherited parent value).
|
||||
private static final RbPath RB_CALENDAR = RbPath.of("calendar", "default");
|
||||
|
||||
|
|
|
@ -14,12 +14,13 @@ import org.unicode.cldr.api.CldrDataSupplier;
|
|||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
/**
|
||||
* A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
|
||||
* the paths:
|
||||
|
@ -28,6 +29,10 @@ import org.unicode.icu.tool.cldrtoicu.RbValue;
|
|||
* }</pre>
|
||||
*/
|
||||
public final class PluralRangesMapper {
|
||||
// Note that this mapper only matches when there's no "type" specified on the "plurals" element.
|
||||
// This is a bit weird, since the PluralsMapper expects a type (e.g. cardinal or ordinal) to
|
||||
// be present. Really this just illustrates that the plural ranges just should not be under the
|
||||
// same parent element as plurals.
|
||||
private static final PathMatcher RANGES =
|
||||
PathMatcher.of("supplementalData/plurals/pluralRanges[@locales=*]");
|
||||
private static final AttributeKey RANGES_LOCALES = keyOf("pluralRanges", "locales");
|
||||
|
@ -47,8 +52,13 @@ public final class PluralRangesMapper {
|
|||
* @return the IcuData instance to be written to a file.
|
||||
*/
|
||||
public static IcuData process(CldrDataSupplier src) {
|
||||
PluralRangesVisitor visitor = new PluralRangesVisitor();
|
||||
CldrData data = src.getDataForType(SUPPLEMENTAL);
|
||||
return process(data);
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(CldrData data) {
|
||||
PluralRangesVisitor visitor = new PluralRangesVisitor();
|
||||
data.accept(ARBITRARY, visitor);
|
||||
return visitor.icuData;
|
||||
}
|
||||
|
@ -61,7 +71,6 @@ public final class PluralRangesMapper {
|
|||
|
||||
@Override
|
||||
public void visitPrefixStart(CldrPath prefix, Context ctx) {
|
||||
// Captured type is either "cardinal" or "ordinal" (and will cause exception otherwise).
|
||||
if (RANGES.matches(prefix)) {
|
||||
ruleLabel = String.format("set%02d", setIndex++);
|
||||
RANGES_LOCALES.listOfValuesFrom(prefix)
|
||||
|
|
|
@ -19,13 +19,14 @@ import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
|||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Iterables;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Iterables;
|
||||
|
||||
/**
|
||||
* A mapper to collect plural data from {@link CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data via
|
||||
* the paths:
|
||||
|
@ -53,8 +54,13 @@ public final class PluralsMapper {
|
|||
* @return the IcuData instance to be written to a file.
|
||||
*/
|
||||
public static IcuData process(CldrDataSupplier src) {
|
||||
PluralsVisitor visitor = new PluralsVisitor();
|
||||
CldrData data = src.getDataForType(SUPPLEMENTAL);
|
||||
return process(data);
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(CldrData data) {
|
||||
PluralsVisitor visitor = new PluralsVisitor();
|
||||
// Note: We explicitly reset the type to mimic the order of the existing code, since this
|
||||
// affects the set indices we generate during processing. Ideally this would all be immune
|
||||
// to ordering (or just enforce DTD ordering) but right now it's very dependent on
|
||||
|
|
|
@ -15,12 +15,13 @@ import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
|||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
|
||||
import com.google.common.escape.UnicodeEscaper;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.escape.UnicodeEscaper;
|
||||
|
||||
/**
|
||||
* A mapper to collect plural data from {@link CldrDataType#LDML LDML} data via the paths:
|
||||
* <pre>{@code
|
||||
|
@ -39,7 +40,6 @@ public final class RbnfMapper {
|
|||
private static final AttributeKey RBNF_RADIX = keyOf("rbnfrule", "radix");
|
||||
private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
|
||||
|
||||
private static final RbPath RB_PARENT = RbPath.of("%%Parent");
|
||||
// This is the ICU path prefix, below which everything generated by this visitor will go.
|
||||
private static final RbPath RB_ROOT = RbPath.of("RBNFRules");
|
||||
|
||||
|
@ -54,12 +54,17 @@ public final class RbnfMapper {
|
|||
public static IcuData process(
|
||||
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
|
||||
|
||||
return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
|
||||
// Using DTD order is essential here because the RBNF paths contain ordered elements,
|
||||
// so we must ensure that they appear in sorted order (otherwise we'd have to do more
|
||||
// work at this end to re-sort the results).
|
||||
RulesetVisitor visitor = new RulesetVisitor(localeId);
|
||||
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
|
||||
src.getDataForLocale(localeId, UNRESOLVED).accept(DTD, visitor);
|
||||
cldrData.accept(DTD, visitor);
|
||||
return visitor.icuData;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,17 +12,16 @@ import org.unicode.cldr.api.CldrData;
|
|||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import com.google.common.collect.ImmutableListMultimap;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.LinkedHashMultimap;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.collect.ImmutableListMultimap;
|
||||
import com.google.common.collect.LinkedHashMultimap;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
|
||||
/**
|
||||
* Generate supplemental {@link IcuData} by transforming {@link CldrDataType#SUPPLEMENTAL
|
||||
* SUPPLEMENTAL} data using a {@link PathValueTransformer}.
|
||||
|
|
|
@ -23,11 +23,11 @@ import org.unicode.cldr.api.CldrData.ValueVisitor;
|
|||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuData;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
|
||||
/**
|
||||
|
@ -99,6 +99,7 @@ public final class TransformsMapper {
|
|||
|
||||
// I have _no_ idea what any of this is about, I'm just trying to mimic the original
|
||||
// (complex and undocumented) code in "ConvertTransforms.java".
|
||||
// TODO: Understand and document each of the cases below.
|
||||
icuData.add(RbPath.of("TransliteratorNamePattern"), "{0,choice,0#|1#{1}|2#{1}-{2}}");
|
||||
// Note that this quoting of path segments is almost certainly unnecessary. It matches
|
||||
// the old "ConvertTransforms" behaviour, but '%' is used elsewhere without quoting, so
|
||||
|
|
|
@ -23,12 +23,12 @@ import java.util.regex.Pattern;
|
|||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableListMultimap;
|
||||
import com.google.common.collect.ImmutableSetMultimap;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
/**
|
||||
* Path/value transformer configured by {@code ldml2icu_xxx.txt} mapping and configuration files.
|
||||
|
|
|
@ -26,14 +26,14 @@ import java.util.stream.Stream;
|
|||
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
/**
|
||||
* A specification for building a result from the arguments in a matched xpath. Results always
|
||||
|
@ -74,7 +74,7 @@ final class ResultSpec {
|
|||
private static final Splitter VALUE_SPLITTER = Splitter.on(whitespace()).omitEmptyStrings();
|
||||
|
||||
// Matcher for "&foo_bar(a,b,c)" which captures function name and complete argument list.
|
||||
private static final Pattern FUNCTION = Pattern.compile("\\&(\\w++)\\(([^\\)]++)\\)");
|
||||
private static final Pattern FUNCTION = Pattern.compile("&(\\w++)\\(([^)]++)\\)");
|
||||
|
||||
// Resource bundle path specification with placeholders (e.g. "/foo/$1/bar") exactly as it
|
||||
// appears in the configuration file.
|
||||
|
@ -168,7 +168,7 @@ final class ResultSpec {
|
|||
CldrValue value, List<String> args, DynamicVars varLookupFn) {
|
||||
return new MatchedResult(
|
||||
getRbPath(args),
|
||||
getValues(value.getValue(), args, varLookupFn),
|
||||
getValues(value.getValue(), args),
|
||||
getResultPath(value.getPath(), args, varLookupFn));
|
||||
}
|
||||
|
||||
|
@ -197,8 +197,7 @@ final class ResultSpec {
|
|||
// Create an array of output values according to the CLDR value (if present) and the
|
||||
// "values" instruction in the result specification (if present). Any functions present in
|
||||
// the "values" instruction are invoked here.
|
||||
private ImmutableList<String> getValues(
|
||||
String value, List<String> args, DynamicVars varLookupFn) {
|
||||
private ImmutableList<String> getValues(String value, List<String> args) {
|
||||
VarString valuesSpec = instructions.get(Instruction.VALUES);
|
||||
if (valuesSpec == null) {
|
||||
// No "values" instruction, so just use the _unsplit_ CLDR value. To split a CLDR
|
||||
|
|
|
@ -15,12 +15,12 @@ import java.util.stream.Stream;
|
|||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars;
|
||||
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
|
||||
import org.unicode.icu.tool.cldrtoicu.RbPath;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
/*
|
||||
* Each rule corresponds to a single target xpath specification in the configuration file
|
||||
* (lines starting //) but may have more than one result specification. For example:
|
||||
|
|
Loading…
Add table
Reference in a new issue