ICU-20693 Pseudo-locale "alt path" filtering support. (#869)

* ICU-20693 Pseudo-locale "alt path" filtering support.
This commit is contained in:
David Beaumont 2019-10-23 12:34:36 +02:00 committed by GitHub
parent 03937347fb
commit ba7f1b61ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 1334 additions and 343 deletions

View file

@ -39,6 +39,15 @@
CldrDraftStatus for more details. -->
<property name="minDraftStatus" value="contributed"/>
<!-- A regular expression to match the locale IDs to be generated (useful for
debugging specific regions). This is applied after locale ID specifications
have been expanded into full locale IDs, so the value "en" will NOT match
"en_GB" or "en_001" etc. -->
<property name="localeIdFilter" value=""/>
<!-- Whether to synthetically generate "pseudo locale" data ("en_XA" and "ar_XB"). -->
<property name="includePseudoLocales" value="false"/>
<!-- Whether to emit a debug report containing some possibly useful information after
the conversion has finished. -->
<!-- TODO: Currently this isn't hugely useful, so find out what people want. -->
@ -78,7 +87,9 @@
</classpath>
</taskdef>
<convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}"
outputTypes="${outputTypes}" minimalDraftStatus="${minDraftStatus}" emitReport="${emitReport}">
outputTypes="${outputTypes}" minimalDraftStatus="${minDraftStatus}"
localeIdFilter="${localeIdFilter}" includePseudoLocales="${includePseudoLocales}"
emitReport="${emitReport}">
<!-- The primary set of locale IDs to be generated by default. The IDs in this list are
automatically expanded to include default scripts and all available regions. The
@ -283,6 +294,19 @@
data than "yue", so this alias is not just rewriting the base language. -->
<!-- TODO: Find out and document this properly. -->
<forcedAlias dir="rbnf" source="zh_Hant_HK" target="yue"/>
<!-- The following elements configure alternate values for some special case paths.
The target path will only be replaced if both it, and the source path, exist in
the CLDR data (new paths will not be added if only the source path exists).
Since the paths must represent the same semantic type of data, they must be in the
same "namespace" (same element names) and must not contain value attributes. Thus
they can only differ by distinguishing attributes (either added or modified).
This feature is typically used to select alternate translations (e.g. short forms)
for certain paths. -->
<!-- <altPath target="//path/to/value[@attr='foo']" source="//path/to/value[@attr='bar']"/> -->
</convert>
</target>
</project>

View file

@ -0,0 +1,126 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.CldrDataType.LDML;
import java.util.Map;
import java.util.Set;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableMap;
/**
* A factory for data suppliers which can filter CLDR values by substituting values from one path
* to another. The replaced value must retain the original "target" path but will have the value
* and value attributes of the "source". A value will only be replaced if both the source and
* target paths have associated values. The replacement retains its original position in the value
* ordering.
*
* <p>This class DOES NOT transform supplemental or BCP-47 data, because the use of "alt" values
* is completely different for that data (it would require merging specific attributes together).
*
* <p>Note that this is not a general purpose transformation of CLDR data, since it is generally
* not possible to "move" values between arbitrary paths. Target and source paths must be in the
* same "namespace" (i.e. share the same element names) but attributes can differ.
*
* <p>Note also that the mapping is not recursive, so mapping {@code A -> B} and {@code B -> C}
* will NOT cause {@code A} to be mapped to {@code C}.
*
* <p>Typically this class is expected to be used for selecting alternate values of locale data
* based on the {@code "alt"} path attribute (e.g. selecting the short form of a region name).
*/
public final class AlternateLocaleData {
/**
* Returns a wrapped data supplier which will transform any {@link CldrValue}s according to the
* supplied {@link CldrPath} mapping. Keys in the path map are the "target" paths of values to
* be modified, and the values in the map are the "source" paths from which the replacement
* values are obtained. For each map entry, the target and source paths must be in the same
* namespace (i.e. have the same path element names).
*/
public static CldrDataSupplier transform(CldrDataSupplier src, Map<CldrPath, CldrPath> altPaths) {
return new CldrDataFilter(src, altPaths);
}
private static final class CldrDataFilter extends CldrDataSupplier {
private final CldrDataSupplier src;
// Mapping from target (destination) to source path. This is necessary since two targets
// could come from the same source).
private final ImmutableMap<CldrPath, CldrPath> altPaths;
CldrDataFilter(
CldrDataSupplier src, Map<CldrPath, CldrPath> altPaths) {
this.src = checkNotNull(src);
this.altPaths = ImmutableMap.copyOf(altPaths);
altPaths.forEach((t, s) -> checkArgument(hasSameNamespace(checkLdml(t), checkLdml(s)),
"alternate paths must have the same namespace: target=%s, source=%s", t, s));
}
@Override
public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
return new CldrDataFilter(src.withDraftStatusAtLeast(draftStatus), altPaths);
}
@Override
public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
return new AltData(src.getDataForLocale(localeId, resolution));
}
@Override
public Set<String> getAvailableLocaleIds() {
return src.getAvailableLocaleIds();
}
@Override
public CldrData getDataForType(CldrDataType type) {
return src.getDataForType(type);
}
private final class AltData extends FilteredData {
AltData(CldrData srcData) {
super(srcData);
}
@Override
protected CldrValue filter(CldrValue value) {
CldrPath altPath = altPaths.get(value.getPath());
if (altPath != null) {
CldrValue altValue = getSourceData().get(altPath);
if (altValue != null) {
return altValue.replacePath(value.getPath());
}
}
return value;
}
}
}
private static boolean hasSameNamespace(CldrPath x, CldrPath y) {
if (x.getLength() != y.getLength()) {
return false;
}
do {
if (!x.getName().equals(y.getName())) {
return false;
}
x = x.getParent();
y = y.getParent();
} while (x != null);
return true;
}
private static CldrPath checkLdml(CldrPath path) {
checkArgument(path.getDataType() == LDML, "only locale data (LDML) is supported: %s", path);
return path;
}
private AlternateLocaleData() {}
}

View file

@ -0,0 +1,67 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import javax.annotation.Nullable;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
/**
* A class which allows data from some underlying {@link CldrData} source to be filtered or
* removed (but not added).
*/
// TODO: Once DTD ordering is the only allowed order, this can be extended to allow adding paths.
abstract class FilteredData implements CldrData {
private final CldrData src;
public FilteredData(CldrData src) {
this.src = checkNotNull(src);
}
/** For sub-classes to access the underlying source data. */
protected CldrData getSourceData() {
return src;
}
/**
* Returns a filtered CLDR value, replacing or removing the original value during visitation.
* The filtered value can only differ in it's base value or value attributes, and must have
* the same {@link CldrPath} associated with it.
*
* @return the filtered to be replaced, or {@code null} to remove the value.
*/
@Nullable
protected abstract CldrValue filter(CldrValue value);
@Override
public void accept(PathOrder order, ValueVisitor visitor) {
src.accept(order, v -> visitFiltered(v, visitor));
}
@Override
public CldrValue get(CldrPath path) {
CldrValue value = src.get(path);
return value != null ? checkFiltered(value) : null;
}
private void visitFiltered(CldrValue value, ValueVisitor visitor) {
CldrValue filteredValue = checkFiltered(value);
if (filteredValue != null) {
visitor.visit(filteredValue);
}
}
@Nullable
private CldrValue checkFiltered(CldrValue value) {
CldrValue filteredValue = filter(value);
checkArgument(filteredValue == null || filteredValue.getPath().equals(value.getPath()),
"filtering is not permitted to modify distinguishing paths: source=%s, filtered=%s",
value, filteredValue);
return filteredValue;
}
}

View file

@ -4,7 +4,9 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.util.stream.Collectors.toList;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import static org.unicode.cldr.api.CldrDataType.BCP47;
import static org.unicode.cldr.api.CldrDataType.LDML;
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
@ -32,8 +34,6 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -56,6 +56,7 @@ import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer;
import com.google.common.base.CharMatcher;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.LinkedListMultimap;
@ -137,69 +138,30 @@ public final class LdmlConverter {
* hide what are essentially implementation specific data splits.
*/
public enum OutputType {
LOCALES(LDML, LdmlConverter::processLocales),
BRKITR(LDML, LdmlConverter::processBrkitr),
COLL(LDML, LdmlConverter::processCollation),
RBNF(LDML, LdmlConverter::processRbnf),
LOCALES(LDML),
BRKITR(LDML),
COLL(LDML),
RBNF(LDML),
DAY_PERIODS(SUPPLEMENTAL),
GENDER_LIST(SUPPLEMENTAL),
LIKELY_SUBTAGS(SUPPLEMENTAL),
SUPPLEMENTAL_DATA(SUPPLEMENTAL),
CURRENCY_DATA(SUPPLEMENTAL),
METADATA(SUPPLEMENTAL),
META_ZONES(SUPPLEMENTAL),
NUMBERING_SYSTEMS(SUPPLEMENTAL),
PLURALS(SUPPLEMENTAL),
PLURAL_RANGES(SUPPLEMENTAL),
WINDOWS_ZONES(SUPPLEMENTAL),
TRANSFORMS(SUPPLEMENTAL),
KEY_TYPE_DATA(BCP47);
DAY_PERIODS(
SUPPLEMENTAL,
LdmlConverter::processDayPeriods),
GENDER_LIST(
SUPPLEMENTAL,
c -> c.processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false)),
LIKELY_SUBTAGS(
SUPPLEMENTAL,
c -> c.processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false)),
SUPPLEMENTAL_DATA(
SUPPLEMENTAL,
c -> c.processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true)),
CURRENCY_DATA(
SUPPLEMENTAL,
c -> c.processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", true)),
METADATA(
SUPPLEMENTAL,
c -> c.processSupplemental("metadata", METADATA_PATHS, "misc", false)),
META_ZONES(
SUPPLEMENTAL,
c -> c.processSupplemental("metaZones", METAZONE_PATHS, "misc", false)),
NUMBERING_SYSTEMS(
SUPPLEMENTAL,
c -> c.processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false)),
PLURALS(
SUPPLEMENTAL,
LdmlConverter::processPlurals),
PLURAL_RANGES(
SUPPLEMENTAL,
LdmlConverter::processPluralRanges),
WINDOWS_ZONES(
SUPPLEMENTAL,
c -> c.processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false)),
TRANSFORMS(
SUPPLEMENTAL,
LdmlConverter::processTransforms),
KEY_TYPE_DATA(
BCP47,
LdmlConverter::processKeyTypeData),
// Batching by type.
DTD_LDML(LDML, c -> c.processAll(LDML)),
DTD_SUPPLEMENTAL(SUPPLEMENTAL, c -> c.processAll(SUPPLEMENTAL)),
DTD_BCP47(BCP47, c -> c.processAll(BCP47));
public static final ImmutableSet<OutputType> ALL =
ImmutableSet.of(DTD_BCP47, DTD_SUPPLEMENTAL, DTD_LDML);
public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values());
private final CldrDataType type;
private final Consumer<LdmlConverter> converterFn;
OutputType(CldrDataType type, Consumer<LdmlConverter> converterFn) {
OutputType(CldrDataType type) {
this.type = checkNotNull(type);
this.converterFn = checkNotNull(converterFn);
}
void convert(LdmlConverter converter) {
converterFn.accept(converter);
}
CldrDataType getCldrType() {
@ -207,6 +169,17 @@ public final class LdmlConverter {
}
}
// Map to convert the rather arbitrarily defined "output types" to the directories into which
// the data is written. This is only for "LDML" types since other mappers don't need to split
// data into multiple directories.
private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR =
ImmutableListMultimap.<OutputType, IcuLocaleDir>builder()
.putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE)
.putAll(OutputType.BRKITR, BRKITR)
.putAll(OutputType.COLL, COLL)
.putAll(OutputType.RBNF, RBNF)
.build();
/** Converts CLDR data according to the given configuration. */
public static void convert(
CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
@ -252,15 +225,8 @@ public final class LdmlConverter {
}
private void convertAll() {
ListMultimap<CldrDataType, OutputType> groupByType = LinkedListMultimap.create();
for (OutputType t : config.getOutputTypes()) {
groupByType.put(t.getCldrType(), t);
}
for (CldrDataType cldrType : groupByType.keySet()) {
for (OutputType t : groupByType.get(cldrType)) {
t.convert(this);
}
}
processLdml();
processSupplemental();
if (config.emitReport()) {
System.out.println("Supplemental Data Transformer=" + supplementalTransformer);
System.out.println("Locale Data Transformer=" + localeTransformer);
@ -275,24 +241,6 @@ public final class LdmlConverter {
}
}
private PathValueTransformer getLocaleTransformer() {
return localeTransformer;
}
private PathValueTransformer getSupplementalTransformer() {
return supplementalTransformer;
}
private void processAll(CldrDataType cldrType) {
List<OutputType> targets = Arrays.stream(OutputType.values())
.filter(t -> t.getCldrType().equals(cldrType))
.filter(t -> !t.name().startsWith("DTD_"))
.collect(toList());
for (OutputType t : targets) {
t.convert(this);
}
}
private Optional<CldrData> loadSpecialsData(String localeId) {
String expected = localeId + ".xml";
try (Stream<Path> files = Files.walk(config.getSpecialsDir())) {
@ -310,31 +258,12 @@ public final class LdmlConverter {
}
}
private void processLocales() {
// TODO: Pre-load specials files to avoid repeatedly re-loading them.
processAndSplitLocaleFiles(
id -> LocaleMapper.process(
id, src, loadSpecialsData(id), getLocaleTransformer(), supplementalData),
CURR, LANG, LOCALES, REGION, UNIT, ZONE);
}
private void processBrkitr() {
processAndSplitLocaleFiles(
id -> BreakIteratorMapper.process(id, src, loadSpecialsData(id)), BRKITR);
}
private void processCollation() {
processAndSplitLocaleFiles(
id -> CollationMapper.process(id, src, loadSpecialsData(id)), COLL);
}
private void processRbnf() {
processAndSplitLocaleFiles(
id -> RbnfMapper.process(id, src, loadSpecialsData(id)), RBNF);
}
private void processAndSplitLocaleFiles(
Function<String, IcuData> icuFn, IcuLocaleDir... splitDirs) {
private void processLdml() {
ImmutableList<IcuLocaleDir> splitDirs =
config.getOutputTypes().stream()
.filter(t -> t.getCldrType() == LDML)
.flatMap(t -> TYPE_TO_DIR.get(t).stream())
.collect(toImmutableList());
SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
Path baseDir = config.getOutputDir();
@ -344,7 +273,20 @@ public final class LdmlConverter {
if (!availableIds.contains(id)) {
continue;
}
IcuData icuData = icuFn.apply(id);
IcuData icuData = new IcuData(id, true);
Optional<CldrData> specials = loadSpecialsData(id);
CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
BreakIteratorMapper.process(icuData, unresolved, specials);
CollationMapper.process(icuData, unresolved, specials);
RbnfMapper.process(icuData, unresolved, specials);
CldrData resolved = src.getDataForLocale(id, RESOLVED);
Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id);
LocaleMapper.process(
icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar);
ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create();
for (RbPath p : icuData.getPaths()) {
@ -399,6 +341,15 @@ public final class LdmlConverter {
}
}
private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
// Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
// annotations (e.g. "languages:intvector"). We strip these when considering the element name.
private static String getBaseSegmentName(String segment) {
int idx = PATH_MODIFIER.indexIn(segment);
return idx == -1 ? segment : segment.substring(0, idx);
}
private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) {
// There are four reasons for treating a locale ID as an alias.
// 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS").
@ -445,34 +396,69 @@ public final class LdmlConverter {
return aliasMap;
}
private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
private void processSupplemental() {
for (OutputType type : config.getOutputTypes()) {
if (type.getCldrType() == LDML) {
continue;
}
switch (type) {
case DAY_PERIODS:
write(DayPeriodsMapper.process(src), "misc");
break;
// Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
// annotations (e.g. "languages:intvector"). We strip these when considering the element name.
private static String getBaseSegmentName(String segment) {
int idx = PATH_MODIFIER.indexIn(segment);
return idx == -1 ? segment : segment.substring(0, idx);
}
case GENDER_LIST:
processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false);
break;
private void processDayPeriods() {
write(DayPeriodsMapper.process(src), "misc");
}
case LIKELY_SUBTAGS:
processSupplemental("likelySubtags", LIKELY_SUBTAGS_PATHS, "misc", false);
break;
private void processPlurals() {
write(PluralsMapper.process(src), "misc");
}
case SUPPLEMENTAL_DATA:
processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true);
break;
private void processPluralRanges() {
write(PluralRangesMapper.process(src), "misc");
}
case CURRENCY_DATA:
processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", true);
break;
private void processKeyTypeData() {
Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
}
case METADATA:
processSupplemental("metadata", METADATA_PATHS, "misc", false);
break;
private void processTransforms() {
Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
write(TransformsMapper.process(src, transformDir, fileHeader), transformDir);
case META_ZONES:
processSupplemental("metaZones", METAZONE_PATHS, "misc", false);
break;
case NUMBERING_SYSTEMS:
processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false);
break;
case PLURALS:
write(PluralsMapper.process(src), "misc");
break;
case PLURAL_RANGES:
write(PluralRangesMapper.process(src), "misc");
break;
case WINDOWS_ZONES:
processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false);
break;
case TRANSFORMS:
Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
write(TransformsMapper.process(src, transformDir, fileHeader), transformDir);
break;
case KEY_TYPE_DATA:
Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
break;
default:
throw new AssertionError("Unsupported supplemental type: " + type);
}
}
}
private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
@ -480,7 +466,7 @@ public final class LdmlConverter {
private void processSupplemental(
String label, PathMatcher paths, String dir, boolean addCldrVersion) {
IcuData icuData =
SupplementalMapper.process(src, getSupplementalTransformer(), label, paths);
SupplementalMapper.process(src, supplementalTransformer, label, paths);
// A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
// supplemental data XML files.
if (addCldrVersion) {

View file

@ -0,0 +1,387 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT;
import static java.util.function.Function.identity;
import static java.util.regex.Pattern.CASE_INSENSITIVE;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import java.util.Arrays;
import java.util.Set;
import java.util.function.Function;
import java.util.function.IntUnaryOperator;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataSupplier.CldrResolution;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
/**
* A factory for wrapping data suppliers to add synthetic locales for debugging. The currently
* supported synthetic locales are:
* <ul>
* <li>{@code en_XA}: A pseudo locale which generates expanded text with many non-Latin accents.
* <li>{@code ar_XB}: A pseudo locale which generates BiDi text for debugging.
* </ul>
*
* <p>Both pseudo locales are based on {@code "en"} data, and generate values which are readable
* by English speaking developers. For example, the CLDR value "Hello World" will be turned into
* something like:
* <ul>
* <li>{@code en_XA}: [Ĥéļļö Ŵöŕļð one two]
* <li>{@code ar_XB}: dlroW elloH
* </ul>
*
* <p>In the case of BiDi pseudo localization, bi-directional markers are also inserted into the
* text so that, if the system using the data is configured correctly, the results will look
* "normal" (i.e. Latin text will appear displayed left-to-right because of the BiDi markers).
*/
// TODO(CLDR-13381): Move this all into the CLDR API once the dust has settled.
public final class PseudoLocales {
private enum PseudoType {
BIDI("ar_XB", PseudoLocales::bidi, "abcdefghijklmnopqrstuvwxyz"),
EXPAND("en_XA", PseudoLocales::expanding,
"a\u00e5b\u0180c\u00e7d\u00f0e\u00e9f\u0192g\u011dh\u0125i\u00eej\u0135k\u0137l\u013cm"
+ "\u0271n\u00f1o\u00f6p\u00feq\u01ebr\u0155s\u0161t\u0163u\u00fbv\u1e7dw\u0175"
+ "x\u1e8by\u00fdz\u017e");
private static final ImmutableMap<String, PseudoType> ID_MAP =
Arrays.stream(values()).collect(toImmutableMap(PseudoType::getLocaleId, identity()));
private static PseudoType fromId(String localeId) {
return checkNotNull(ID_MAP.get(localeId), "unknown pseduo locale: %s", localeId);
}
private static ImmutableSet<String> getLocaleIds() {
return ID_MAP.keySet();
}
private final String localeId;
private final Function<Boolean, PseudoText> textSupplier;
// A string whose code points form the exemplar set for the pseudo locale.
private final String exemplars;
PseudoType(String localeId, Function<Boolean, PseudoText> textSupplier, String exemplars) {
this.localeId = localeId;
this.textSupplier = textSupplier;
this.exemplars = exemplars;
}
String getLocaleId() {
return localeId;
}
PseudoText getText(boolean isPattern) {
return textSupplier.apply(isPattern);
}
String getExemplars() {
return exemplars;
}
}
/**
* Returns a wrapped data supplier which will inject {@link CldrData} for the pseudo locales
* {@code en_XA} and {@code ar_XB}. These locales should behave in all respects like normal
* locales and can be processed accordingly.
*/
public static CldrDataSupplier addPseudoLocalesTo(CldrDataSupplier src) {
return new PseudoSupplier(src);
}
private static final class PseudoSupplier extends CldrDataSupplier {
private final CldrDataSupplier src;
private final Set<String> srcIds;
private final CldrData enData;
PseudoSupplier(CldrDataSupplier src) {
this.src = checkNotNull(src);
this.srcIds = src.getAvailableLocaleIds();
// Use resolved data to ensure we get all the values (e.g. values in "en_001").
this.enData = src.getDataForLocale("en", RESOLVED);
// Just check that we aren't wrapping an already wrapped supplier.
PseudoType.getLocaleIds()
.forEach(id -> checkArgument(!srcIds.contains(id),
"pseudo locale %s already supported by given data supplier", id));
}
@Override public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
return new PseudoSupplier(src.withDraftStatusAtLeast(draftStatus));
}
@Override public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
if (PseudoType.getLocaleIds().contains(localeId)) {
return new PseudoLocaleData(enData, resolution, PseudoType.fromId(localeId));
} else {
return src.getDataForLocale(localeId, resolution);
}
}
@Override public Set<String> getAvailableLocaleIds() {
return Sets.union(src.getAvailableLocaleIds(), PseudoType.getLocaleIds());
}
@Override public CldrData getDataForType(CldrDataType type) {
return src.getDataForType(type);
}
}
private interface PseudoText {
void addFragment(String text, boolean isLocalizable);
}
private static final class PseudoLocaleData extends FilteredData {
private static final PathMatcher AUX_EXEMPLARS =
PathMatcher.of("ldml/characters/exemplarCharacters[@type=\"auxiliary\"]");
private static final PathMatcher NUMBERING_SYSTEM =
PathMatcher.of("ldml/numbers/defaultNumberingSystem");
// These paths were mostly derived from looking at the previous implementation's behaviour
// and can be modified as needed. Notably there are no "units" here (but they were also
// excluded in the original code).
private static final PathMatcher PSEUDO_PATHS = PathMatcher.anyOf(
ldml("localeDisplayNames"),
ldml("delimiters"),
ldml("dates/calendars/calendar"),
ldml("dates/fields"),
ldml("dates/timeZoneNames"),
ldml("listPatterns"),
ldml("posix/messages"),
ldml("characterLabels"),
ldml("typographicNames"));
// Paths which contain non-localizable data. It is important that these paths catch all the
// non-localizable sub-paths of the list above. This list must be accurate.
private static final PathMatcher EXCLUDE_PATHS = PathMatcher.anyOf(
ldml("localeDisplayNames/localeDisplayPattern"),
ldml("dates/timeZoneNames/fallbackFormat"));
// The expectation is that all non-alias paths with values under these roots are "date/time
// pattern like" (such as "E h:mm:ss B") in which care must be taken to not pseudo localize
// the patterns in such as way as to break them. This list must be accurate.
private static final PathMatcher PATTERN_PATHS = PathMatcher.anyOf(
ldml("dates/calendars/calendar/timeFormats"),
ldml("dates/calendars/calendar/dateFormats"),
ldml("dates/calendars/calendar/dateTimeFormats"),
ldml("dates/timeZoneNames/hourFormat"));
private static PathMatcher ldml(String matcherSuffix) {
return PathMatcher.of("ldml/" + matcherSuffix);
}
// Look for any attribute in the path with "narrow" in its value. Since "narrow" values
// have strong expectations of width, we should not expand these (but might alter them
// otherwise).
private static final Predicate<String> IS_NARROW =
Pattern.compile("\\[@[a-z]+=\"[^\"]*narrow[^\"]*\"]", CASE_INSENSITIVE).asPredicate();
private static final Pattern NUMERIC_PLACEHOLDER = Pattern.compile("\\{\\d+\\}");
private static final Pattern QUOTED_TEXT = Pattern.compile("'.*?'");
private final PseudoType type;
private final boolean isResolved;
private PseudoLocaleData(CldrData srcData, CldrResolution resolution, PseudoType type) {
super(srcData);
this.isResolved = checkNotNull(resolution) == RESOLVED;
this.type = checkNotNull(type);
}
@Override
protected CldrValue filter(CldrValue value) {
CldrPath path = value.getPath();
// Special case(s) first...
// We add the exemplar character list according to the pseudo type.
if (AUX_EXEMPLARS.matches(path)) {
return getExemplarValue(path);
}
// Force "latn" for the "ar_XB" pseudo locale (since otherwise it inherits from "ar".
// The path we get here was from "en" so should already be "latn", but we just have
// to return it in order for it to take effect.
if (type == PseudoType.BIDI && NUMBERING_SYSTEM.matches(path)) {
checkArgument(value.getValue().equals("latn"));
return value;
}
CldrValue defaultReturnValue = isResolved ? value : null;
// This makes it look like we have explicit values only for the included paths.
if (!PSEUDO_PATHS.matchesPrefixOf(path) || EXCLUDE_PATHS.matchesPrefixOf(path)) {
return defaultReturnValue;
}
String fullPath = value.getFullPath();
// For now don't do anything with "narrow" data (this matches the previous behaviour).
// We can always add something here later if necessary.
if (IS_NARROW.test(fullPath)) {
return defaultReturnValue;
}
String text = createMessage(value.getValue(), PATTERN_PATHS.matchesPrefixOf(path));
return CldrValue.parseValue(fullPath, text);
}
// It's tempting to think that the existing exemplar list in "en" could be parsed to
// generate list automatically (rather than having a hard coded list in the type) but
// https://unicode.org/reports/tr35/tr35-general.html#ExemplarSyntax
// makes it quite clear that this is infeasible, since there are many equivalent
// representations of the examplar characters that could appear in the value
// (e.g. "[a b ... z]", "[a-z]", "[{a} {b} ... {z}]")
private CldrValue getExemplarValue(CldrPath path) {
StringBuilder exemplarList = new StringBuilder("[");
type.getExemplars().codePoints()
.forEach(cp -> exemplarList.appendCodePoint(cp).append(' '));
exemplarList.setCharAt(exemplarList.length() - 1, ']');
return CldrValue.parseValue(path.toString(), exemplarList.toString());
}
private String createMessage(String text, boolean isPattern) {
// Pattern text is split by the quoted sections (which are localizable) whereas
// non-pattern text is split by placeholder (e.g. {0}) which are not localizable.
// This is why "isPattern" is used to signal "isLocalizable" in addFragment().
Matcher match = (isPattern ? QUOTED_TEXT : NUMERIC_PLACEHOLDER).matcher(text);
// Alternate between unmatched and matched sections in the text, always localizing one
// but not the other (depending the type). Append the trailing section at the end.
PseudoText out = type.getText(isPattern);
int start = 0;
for (; match.find(); start = match.end()) {
out.addFragment(text.substring(start, match.start()), !isPattern);
out.addFragment(match.group(), isPattern);
}
out.addFragment(text.substring(start), !isPattern);
return out.toString();
}
}
// ---- Expanding Pseudo-localizer (e.g. "November" --> "[Ñöṽéɱƀéŕ one two]") ----
// A map from a string of alternating key/value code-points; e.g. '1' -> '①'.
// Note that a subset of this is also used to form the "exemplar" set (see PseudoType).
private static final IntUnaryOperator CONVERT_CODEPOINT = toCodePointFunction(
" \u2003!\u00a1\"\u2033#\u266f$\u20ac%\u2030&\u214b*\u204e+\u207a,\u060c-\u2010.\u00b7"
+ "/\u20440\u24ea1\u24602\u24613\u24624\u24635\u24646\u24657\u24668\u24679\u2468"
+ ":\u2236;\u204f<\u2264=\u2242>\u2265?\u00bf@\u055eA\u00c5B\u0181C\u00c7D\u00d0"
+ "E\u00c9F\u0191G\u011cH\u0124I\u00ceJ\u0134K\u0136L\u013bM\u1e40N\u00d1O\u00d6"
+ "P\u00deQ\u01eaR\u0154S\u0160T\u0162U\u00dbV\u1e7cW\u0174X\u1e8aY\u00ddZ\u017d"
+ "[\u2045\\\u2216]\u2046^\u02c4_\u203f`\u2035a\u00e5b\u0180c\u00e7d\u00f0e\u00e9"
+ "f\u0192g\u011dh\u0125i\u00eej\u0135k\u0137l\u013cm\u0271n\u00f1o\u00f6p\u00fe"
+ "q\u01ebr\u0155s\u0161t\u0163u\u00fbv\u1e7dw\u0175x\u1e8by\u00fdz\u017e|\u00a6"
+ "~\u02de");
// Converts a source/target alternating code-points into a map.
private static IntUnaryOperator toCodePointFunction(String s) {
// Not pretty, but there's no nice way to "pair up" successive stream elements without
// extra library dependencies, so we collect them and then iterate via index.
int[] codePoints = s.codePoints().toArray();
checkArgument((codePoints.length & 1) == 0,
"must have an even number of code points (was %s)", codePoints.length);
ImmutableMap<Integer, Integer> map =
IntStream.range(0, codePoints.length / 2)
.boxed()
.collect(toImmutableMap(n -> codePoints[2 * n], n -> codePoints[(2 * n) + 1]));
return cp -> map.getOrDefault(cp, cp);
}
// A list of words to be added to text when it is expanded. A whole number of words are
// always added (and the fact they are numeric words is irrelevant, could be Lorem Ipsum).
// So far nothing goes above "ten" in en_XA, but this can always be trivially extended.
private static final String PADDING = "one two three four five six seven eight nine ten";
private static PseudoText expanding(boolean isPattern) {
return new PseudoText() {
IntStream.Builder codePoints = IntStream.builder();
@Override
public void addFragment(String text, boolean isLocalizable) {
text.codePoints()
.map(isLocalizable ? CONVERT_CODEPOINT : cp -> cp)
.forEach(codePoints::add);
}
@Override
public String toString() {
int[] cp = codePoints.build().toArray();
// Copy the original code and round up the 50% calculation (it's not important).
int endIndex = CharMatcher.whitespace().indexIn(PADDING, (cp.length + 1) / 2);
String suffix = PADDING.substring(0, Math.min(endIndex, PADDING.length()));
// For pattern strings, any literal text must be quoted (the fragment text
// already was). Note that this is why we don't transform single-quotes.
if (isPattern) {
suffix = "'" + suffix.replace(" ", "' '") + "'";
}
// Final output is something like "November" --> "[Ñöṽéɱƀéŕ one two]"
// Where the additional padding adds at least 50% to the length of the text.
return "[" + new String(cp, 0, cp.length) + " " + suffix + "]";
}
};
}
// ---- Bidi Pseudo-localizer (e.g. "November" --> "rebmevoN" using BiDi tags)----
// Right-to-left override character.
private static final String RLO = "\u202e";
// Arabic letter mark character.
private static final String ALM = "\u061C";
// Pop direction formatting character.
private static final String PDF = "\u202c";
// Prefix to add before each LTR word.
private static final String BIDI_PREFIX = ALM + RLO;
// Postfix to add after each LTR word.
private static final String BIDI_POSTFIX = PDF + ALM;
// Bidi localization doesn't care if the fragment is a pattern or not.
@SuppressWarnings("unused")
private static PseudoText bidi(boolean isPattern) {
return new PseudoText() {
private final StringBuilder out = new StringBuilder();
// This was largely copied from the original CLDRFilePseudolocalizer class and
// while it appears to work fine, I don't know enough to comment it clearly.
// TODO: Find someone who can add a decent comment here!
@Override
public void addFragment(String text, boolean isLocalizable) {
if (isLocalizable) {
boolean wrapping = false;
for (int index = 0; index < text.length();) {
int codePoint = text.codePointAt(index);
index += Character.charCount(codePoint);
byte directionality = Character.getDirectionality(codePoint);
boolean needsWrap = (directionality == DIRECTIONALITY_LEFT_TO_RIGHT);
if (needsWrap != wrapping) {
wrapping = needsWrap;
out.append(wrapping ? BIDI_PREFIX : BIDI_POSTFIX);
}
out.appendCodePoint(codePoint);
}
if (wrapping) {
out.append(BIDI_POSTFIX);
}
} else {
out.append(text);
}
}
@Override
public String toString() {
return out.toString();
}
};
}
private PseudoLocales() {}
}

View file

@ -9,19 +9,27 @@ import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.stream.Collectors.joining;
import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Task;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.cldr.api.CldrPath;
import org.unicode.icu.tool.cldrtoicu.AlternateLocaleData;
import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig;
import org.unicode.icu.tool.cldrtoicu.LdmlConverter;
import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
import org.unicode.icu.tool.cldrtoicu.PseudoLocales;
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.base.Ascii;
@ -31,6 +39,7 @@ import com.google.common.base.Splitter;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.SetMultimap;
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
@ -50,6 +59,11 @@ public final class ConvertIcuDataTask extends Task {
// Per directory overrides (fully specified locale IDs).
private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create();
private final IcuConverterConfig.Builder config = IcuConverterConfig.builder();
// Don't try and resolve actual paths until inside the execute method.
private final Map<String, String> altPathMap = new HashMap<>();
// TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales()
private boolean includePseudoLocales = false;
private Predicate<String> idFilter = id -> true;
@SuppressWarnings("unused")
public void setOutputDir(Path path) {
@ -83,6 +97,16 @@ public final class ConvertIcuDataTask extends Task {
config.setSpecialsDir(path);
}
@SuppressWarnings("unused")
public void setIncludePseudoLocales(boolean includePseudoLocales) {
this.includePseudoLocales = includePseudoLocales;
}
@SuppressWarnings("unused")
public void setLocaleIdFilter(String idFilterRegex) {
this.idFilter = Pattern.compile(idFilterRegex).asPredicate();
}
@SuppressWarnings("unused")
public void setEmitReport(boolean emit) {
config.setEmitReport(emit);
@ -130,7 +154,7 @@ public final class ConvertIcuDataTask extends Task {
@SuppressWarnings("unused")
public void setDir(String directory) {
this.dir = resolveOpt(IcuLocaleDir.class, directory);
this.dir = resolveDir(directory);
}
@SuppressWarnings("unused")
@ -150,6 +174,28 @@ public final class ConvertIcuDataTask extends Task {
}
}
public static final class AltPath extends Task {
private String source = "";
private String target = "";
@SuppressWarnings("unused")
public void setTarget(String target) {
this.target = target.replace('\'', '"');
}
@SuppressWarnings("unused")
public void setSource(String source) {
this.source = source.replace('\'', '"');
}
@Override
public void init() throws BuildException {
checkBuild(!source.isEmpty(), "Source path not be empty");
checkBuild(!target.isEmpty(), "Target path not be empty");
}
}
@SuppressWarnings("unused")
public void addConfiguredLocaleIds(LocaleIds localeIds) {
checkBuild(this.localeIdSpec == null, "Cannot add more that one <localeIds> element");
@ -172,23 +218,48 @@ public final class ConvertIcuDataTask extends Task {
}
}
@SuppressWarnings("unused")
public void addConfiguredAltPath(AltPath altPath) {
// Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs).
// Wait until the "execute()" method since in future we expect to use the configured CLDR
// directory explicitly there.
checkBuild(this.altPathMap.put(altPath.target, altPath.source) == null,
"Duplicate <altPath> elements (same target): %s", altPath.target);
}
@SuppressWarnings("unused")
public void execute() throws BuildException {
CldrDataSupplier src =
CldrDataSupplier.forCldrFilesIn(cldrPath).withDraftStatusAtLeast(minimumDraftStatus);
CldrDataSupplier src = CldrDataSupplier
.forCldrFilesIn(cldrPath)
.withDraftStatusAtLeast(minimumDraftStatus);
// We must do this wrapping of the data supplier _before_ creating the supplemental data
// instance since adding pseudo locales affects the set of available locales.
// TODO: Move some/all of this into the base converter and control it via the config.
if (!altPathMap.isEmpty()) {
Map<CldrPath, CldrPath> pathMap = new HashMap<>();
altPathMap.forEach(
(t, s) -> pathMap.put(parseDistinguishingPath(t), parseDistinguishingPath(s)));
src = AlternateLocaleData.transform(src, pathMap);
}
if (includePseudoLocales) {
src = PseudoLocales.addPseudoLocalesTo(src);
}
SupplementalData supplementalData = SupplementalData.create(src);
ImmutableSet<String> defaultTargetIds =
LocaleIdResolver.expandTargetIds(this.localeIdSpec, supplementalData);
for (IcuLocaleDir dir : IcuLocaleDir.values()) {
config.addLocaleIds(dir, perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds));
Iterable<String> ids = perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds);
config.addLocaleIds(dir, Iterables.filter(ids, idFilter::test));
}
config.setMinimumDraftStatus(minimumDraftStatus);
LdmlConverter.convert(src, supplementalData, config.build());
}
private static void checkBuild(boolean condition, String message) {
private static void checkBuild(boolean condition, String message, Object... args) {
if (!condition) {
throw new BuildException(message);
throw new BuildException(String.format(message, args));
}
}
@ -199,8 +270,8 @@ public final class ConvertIcuDataTask extends Task {
return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
}
private static <T extends Enum<T>> Optional<T> resolveOpt(Class<T> enumClass, String name) {
return !name.isEmpty() ? Optional.of(resolve(enumClass, name)) : Optional.empty();
private static Optional<IcuLocaleDir> resolveDir(String name) {
return !name.isEmpty() ? Optional.of(resolve(IcuLocaleDir.class, name)) : Optional.empty();
}
private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) {

View file

@ -55,12 +55,11 @@ abstract class AbstractPathValueMapper {
}
/**
* Returns a new {@code IcuData} instance produced by post-processing a set of results
* generated by calling sub-class method {@link #addResults()}. This is the only method which
* need be directly invoked by the sub-class implementation (other methods are optionally used
* from within the {@link #addResults()} callback).
* Post-processes results generated by calling the subclass method {@link #addResults()}. This
* is the only method which need be directly invoked by the sub-class implementation (other
* methods are optionally used from within the {@link #addResults()} callback).
*/
final IcuData generateIcuData(String icuName, boolean hasFallback) {
final void addIcuData(IcuData icuData) {
// This subclass mostly exists to control the fact that results need to be added in one go
// to the IcuData because of how referenced paths are handled. If results could be added in
// multiple passes, you could have confusing situations in which values has path references
@ -70,9 +69,8 @@ abstract class AbstractPathValueMapper {
checkState(resultsByRbPath.isEmpty(),
"results must not be added outside the call to addResults(): %s", resultsByRbPath);
addResults();
IcuData icuData = addResultsToIcuData(finalizeResults(), new IcuData(icuName, hasFallback));
addResultsToIcuData(finalizeResults(), icuData);
resultsByRbPath.clear();
return icuData;
}
/**
@ -81,7 +79,7 @@ abstract class AbstractPathValueMapper {
* {@link #transformValue(CldrValue)}) and then, after optional post-processing, add the
* results to this mapper using {@link #addResult(RbPath, Result)}.
*
* <p>This method is called once for each call to {@link #generateIcuData(String, boolean)} and
* <p>This method is called once for each call to {@link #addIcuData(IcuData)} and
* is responsible for adding all necessary results for the returned {@link IcuData}.
*/
abstract void addResults();
@ -145,7 +143,7 @@ abstract class AbstractPathValueMapper {
* having an array index) then the referenced value is assumed to be an existing path whose
* value is then substituted.
*/
private static IcuData addResultsToIcuData(
private static void addResultsToIcuData(
ImmutableListMultimap<RbPath, Result> results, IcuData icuData) {
// Ordering of paths should not matter here (IcuData will re-sort them) and ordering of
@ -178,7 +176,6 @@ abstract class AbstractPathValueMapper {
}
// This works because insertion order is maintained for values of each path.
map.forEach((p, v) -> icuData.add(p, v.resolve(map)));
return icuData;
}
/*

View file

@ -2,15 +2,14 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Optional;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
@ -18,7 +17,6 @@ import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.escape.UnicodeEscaper;
/**
@ -52,21 +50,15 @@ public final class BreakIteratorMapper {
* Processes data from the given supplier to generate break-iterator data for a set of locale
* IDs.
*
* @param localeId the locale ID to generate data for.
* @param src the CLDR data supplier to process.
* @param icuData the ICU data to be filled.
* @param cldrData the unresolved CLDR data to process.
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
* @return IcuData containing break-iterator data for the given locale ID.
*/
public static IcuData process(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {
CldrData cldrData = src.getDataForLocale(localeId, UNRESOLVED);
return process(localeId, cldrData, icuSpecialData);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
BreakIteratorMapper mapper = new BreakIteratorMapper(localeId);
BreakIteratorMapper mapper = new BreakIteratorMapper(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, mapper::addSpecials));
cldrData.accept(DTD, mapper::addSuppression);
return mapper.icuData;
@ -75,8 +67,8 @@ public final class BreakIteratorMapper {
// The per-locale ICU data being collected by this visitor.
private final IcuData icuData;
private BreakIteratorMapper(String localeId) {
this.icuData = new IcuData(localeId, true);
private BreakIteratorMapper(IcuData icuData) {
this.icuData = checkNotNull(icuData);
}
private void addSuppression(CldrValue v) {

View file

@ -3,9 +3,9 @@
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Optional;
@ -21,7 +21,6 @@ import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
@ -61,20 +60,15 @@ public final class CollationMapper {
/**
* Processes data from the given supplier to generate collation data for a set of locale IDs.
*
* @param localeId the locale ID to generate data for.
* @param src the CLDR data supplier to process.
* @param icuData the ICU data to be filled.
* @param cldrData the unresolved CLDR data to process.
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
* @return IcuData containing RBNF data for the given locale ID.
*/
public static IcuData process(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {
return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
CollationVisitor visitor = new CollationVisitor(localeId);
CollationVisitor visitor = new CollationVisitor(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
cldrData.accept(DTD, visitor);
return visitor.icuData;
@ -83,13 +77,13 @@ public final class CollationMapper {
final static class CollationVisitor implements PrefixVisitor {
private final IcuData icuData;
CollationVisitor(String localeId) {
this.icuData = new IcuData(localeId, true);
CollationVisitor(IcuData icuData) {
this.icuData = checkNotNull(icuData);
// Super special hack case because the XML data is a bit broken for the root collation
// data (there's an empty <collation> element that's a non-leaf element and thus not
// visited, but we should add an empty sequence to the output data.
// TODO: Fix CLDR (https://unicode-org.atlassian.net/projects/CLDR/issues/CLDR-13131)
if (localeId.equals("root")) {
if (icuData.getName().equals("root")) {
icuData.replace(RB_STANDARD_SEQUENCE, "");
// TODO: Collation versioning probably needs to be improved.
icuData.replace(RB_STANDARD_VERSION, CldrDataSupplier.getCldrVersionString());

View file

@ -5,8 +5,6 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.HashSet;
import java.util.List;
@ -14,16 +12,12 @@ import java.util.Optional;
import java.util.Set;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
import com.google.common.annotations.VisibleForTesting;
/**
* Generate locale {@link IcuData} by transforming {@link CldrDataType#LDML LDML} data using a
@ -47,50 +41,30 @@ public final class LocaleMapper extends AbstractPathValueMapper {
* {@link org.unicode.cldr.api.CldrDataType#SUPPLEMENTAL SUPPLEMENTAL} data.
* @return IcuData containing locale data for the given locale ID.
*/
public static IcuData process(
String localeId,
CldrDataSupplier src,
Optional<CldrData> icuSpecialData,
PathValueTransformer transformer,
SupplementalData supplementalData) {
return process(
localeId,
src,
icuSpecialData,
transformer,
supplementalData.getDefaultCalendar(localeId));
}
@VisibleForTesting // Avoids needing to pass a complete SupplementalData instance in tests.
public static IcuData process(
String localeId,
CldrDataSupplier src,
public static void process(
IcuData icuData,
CldrData unresolved,
CldrData resolved,
Optional<CldrData> icuSpecialData,
PathValueTransformer transformer,
Optional<String> defaultCalendar) {
IcuData icuData =
new LocaleMapper(localeId, src, icuSpecialData, transformer)
.generateIcuData(localeId, true);
new LocaleMapper(unresolved, resolved, icuSpecialData, transformer).addIcuData(icuData);
doDateTimeHack(icuData);
defaultCalendar.ifPresent(c -> icuData.add(RB_CALENDAR, c));
return icuData;
}
private final String localeId;
private final CldrDataSupplier src;
private final CldrData unresolved;
private final Optional<CldrData> icuSpecialData;
private LocaleMapper(
String localeId,
CldrDataSupplier src,
CldrData unresolved,
CldrData resolved,
Optional<CldrData> icuSpecialData,
PathValueTransformer transformer) {
super(src.getDataForLocale(localeId, RESOLVED), transformer);
this.localeId = localeId;
this.src = checkNotNull(src);
super(resolved, transformer);
this.unresolved = checkNotNull(unresolved);
this.icuSpecialData = checkNotNull(icuSpecialData);
}
@ -102,7 +76,7 @@ public final class LocaleMapper extends AbstractPathValueMapper {
private Set<RbPath> collectPaths() {
Set<RbPath> validRbPaths = new HashSet<>();
src.getDataForLocale(localeId, UNRESOLVED)
unresolved
.accept(DTD, v -> transformValue(v).forEach(r -> collectResultPath(r, validRbPaths)));
return validRbPaths;
}

View file

@ -2,9 +2,9 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
@ -12,14 +12,12 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathMatcher;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.escape.UnicodeEscaper;
/**
@ -46,23 +44,18 @@ public final class RbnfMapper {
/**
* Processes data from the given supplier to generate RBNF data for a set of locale IDs.
*
* @param localeId the locale ID to generate data for.
* @param src the CLDR data supplier to process.
* @param icuData the ICU data to be filled.
* @param cldrData the unresolved CLDR data to process.
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
* @return IcuData containing RBNF data for the given locale ID.
*/
public static IcuData process(
String localeId, CldrDataSupplier src, Optional<CldrData> icuSpecialData) {
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {
return process(localeId, src.getDataForLocale(localeId, UNRESOLVED), icuSpecialData);
}
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
static IcuData process(String localeId, CldrData cldrData, Optional<CldrData> icuSpecialData) {
// Using DTD order is essential here because the RBNF paths contain ordered elements,
// so we must ensure that they appear in sorted order (otherwise we'd have to do more
// work at this end to re-sort the results).
RulesetVisitor visitor = new RulesetVisitor(localeId);
RulesetVisitor visitor = new RulesetVisitor(icuData);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
cldrData.accept(DTD, visitor);
return visitor.icuData;
@ -72,8 +65,8 @@ public final class RbnfMapper {
private final IcuData icuData;
private RulesetVisitor(String localeId) {
this.icuData = new IcuData(localeId, true);
private RulesetVisitor(IcuData icuData) {
this.icuData = checkNotNull(icuData);
}
@Override public void visitPrefixStart(CldrPath prefix, Context context) {

View file

@ -38,7 +38,9 @@ public final class SupplementalMapper extends AbstractPathValueMapper {
public static IcuData process(
CldrDataSupplier src, PathValueTransformer transformer, String icuName, PathMatcher paths) {
return new SupplementalMapper(src, transformer, paths).generateIcuData(icuName, false);
IcuData icuData = new IcuData(icuName, false);
new SupplementalMapper(src, transformer, paths).addIcuData(icuData);
return icuData;
}
private final PathMatcher paths;

View file

@ -0,0 +1,152 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.truth.Truth.assertThat;
import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
import com.google.common.collect.ImmutableMap;
@RunWith(JUnit4.class)
public class AlternateLocaleDataTest {
@Test
public void testLocaleData() {
// Target and source values.
CldrValue target =
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
CldrValue source =
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
// The target path with the source value we expect to be seen in the transformed data.
CldrValue altValue =
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Name");
// Something that's not transformed.
CldrValue other =
ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$");
// Something that should only exist in the resolved data.
CldrValue inherited =
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YYY");
FakeDataSupplier src = new FakeDataSupplier()
.addLocaleData("xx", target, source, other)
.addInheritedData("xx", inherited);
CldrDataSupplier transformed =
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
assertValuesUnordered(unresolved, altValue, source, other);
assertValuesUnordered(resolved, altValue, source, other, inherited);
}
@Test
public void testMissingSource() {
// Target and source values.
CldrValue target =
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
CldrValue source =
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", target);
CldrDataSupplier transformed =
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
// No change because there's nothing to get an alternate value from.
assertValuesUnordered(unresolved, target);
assertValuesUnordered(resolved, target);
}
@Test
public void testMissingTarget() {
// Target and source values.
CldrValue target =
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "Full Display Name");
CldrValue source =
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source);
CldrDataSupplier transformed =
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
// No change because there's nothing to replace.
assertValuesUnordered(unresolved, source);
assertValuesUnordered(resolved, source);
}
@Test
public void testBadPaths() {
// Target and source values.
CldrPath target = CldrPath.parseDistinguishingPath(
"//ldml/numbers/currencies/currency[@type=\"USD\"]/displayName");
CldrPath source = CldrPath.parseDistinguishingPath(
"//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol");
FakeDataSupplier src = new FakeDataSupplier();
IllegalArgumentException e = assertThrows(
IllegalArgumentException.class,
() -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
assertThat(e).hasMessageThat().contains("alternate paths must have the same namespace");
assertThat(e).hasMessageThat().contains(target.toString());
assertThat(e).hasMessageThat().contains(source.toString());
}
@Test
public void testNonLdml() {
// Real supplemental data with "values" in the value attributes:
// target: territories=[AG AR AS AU ...]
// source: territories=[GB]
// where GB is also listed as having "mon" as the first day in it's primary path.
//
// You can see why swapping paths based on 'alt' for supplemental data would be very wrong,
// because it would remove "XX" and "YY" by replacing the value attribute. Supplemental
// and BCP-47 data doesn't have a single value per path, so isn't suitable for swapping.
//
// The right way to do this would be to merge the 'territories' attribute and remove the
// alt territoy from its original list, but that's very complex and depends on the specific
// meaning of each path in question, and will probably never be supported.
CldrPath target = CldrPath.parseDistinguishingPath(
"//supplementalData/weekData/firstDay[@day=\"sun\"]");
CldrPath source = CldrPath.parseDistinguishingPath(
"//supplementalData/weekData/firstDay[@day=\"sun\"][@alt=\"variant\"]");
FakeDataSupplier src = new FakeDataSupplier();
IllegalArgumentException e = assertThrows(
IllegalArgumentException.class,
() -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
assertThat(e).hasMessageThat().contains("only locale data (LDML) is supported");
// At least one of the paths should be in the error message, so look for common substring.
assertThat(e).hasMessageThat().contains("/weekData/firstDay[@day=\"sun\"]");
}
public static void assertValuesUnordered(CldrData data, CldrValue... values) {
Set<CldrValue> captured = new HashSet<>();
data.accept(ARBITRARY, captured::add);
assertThat(captured).containsExactlyElementsIn(values);
}
private static CldrValue ldml(String path, String value) {
return CldrValue.parseValue("//ldml/" + path, value);
}
}

View file

@ -0,0 +1,101 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.truth.Truth.assertThat;
import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nullable;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableList;
@RunWith(JUnit4.class)
public class FilteredDataTest {
@Test
public void testSimple() {
CldrValue keep =
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar");
CldrValue remove =
ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "US$");
CldrValue replace =
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "YYY");
CldrValue replacement =
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZZ");
CldrData src = CldrDataSupplier.forValues(ImmutableList.of(keep, remove, replace));
CldrData filtered = new FilteredData(src) {
@Nullable @Override protected CldrValue filter(CldrValue value) {
if (value.equals(remove)) {
return null;
} else if (value.equals(replace)) {
return replacement;
} else {
return value;
}
}
};
List<CldrValue> filteredValues = new ArrayList<>();
filtered.accept(ARBITRARY, filteredValues::add);
assertThat(filteredValues).containsExactly(keep, replacement).inOrder();
assertThat(filtered.get(remove.getPath())).isNull();
assertThat(filtered.get(keep.getPath())).isEqualTo(keep);
assertThat(filtered.get(replace.getPath())).isEqualTo(replacement);
}
@Test
public void testBadReplacementPath() {
CldrValue replace =
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "VALUE");
CldrValue replacement =
ldml("numbers/currencies/currency[@type=\"USD\"]/symbol", "VALUE");
CldrData src = CldrDataSupplier.forValues(ImmutableList.of(replace));
CldrData filtered = new FilteredData(src) {
@Nullable @Override protected CldrValue filter(CldrValue value) {
return replacement;
}
};
IllegalArgumentException e = assertThrows(
IllegalArgumentException.class, () -> filtered.accept(ARBITRARY, v -> {}));
assertThat(e).hasMessageThat().contains("not permitted to modify distinguishing paths");
assertThat(e).hasMessageThat().contains(replace.toString());
assertThat(e).hasMessageThat().contains(replacement.toString());
}
@Test
public void testBadReplacementAttributes() {
CldrValue replace =
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "XXX");
CldrValue replacement =
ldml("numbers/currencies/currency[@type=\"GBP\"]/displayName", "XXX");
CldrData src = CldrDataSupplier.forValues(ImmutableList.of(replace));
CldrData filtered = new FilteredData(src) {
@Nullable @Override protected CldrValue filter(CldrValue value) {
return replacement;
}
};
IllegalArgumentException e = assertThrows(
IllegalArgumentException.class, () -> filtered.accept(ARBITRARY, v -> {}));
assertThat(e).hasMessageThat().contains("not permitted to modify distinguishing paths");
assertThat(e).hasMessageThat().contains(replace.toString());
assertThat(e).hasMessageThat().contains(replacement.toString());
}
private static CldrValue ldml(String path, String value) {
return CldrValue.parseValue("//ldml/" + path, value);
}
}

View file

@ -107,4 +107,4 @@ public class IcuDataTest {
icuData.replace(fooBar, "another-value");
assertThat(icuData.get(fooBar)).containsExactly(value2);
}
}
}

View file

@ -0,0 +1,141 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.truth.Truth.assertThat;
import static org.unicode.cldr.api.CldrData.PathOrder.ARBITRARY;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
@RunWith(JUnit4.class)
public class PseudoLocalesTest {
@Test
public void testExpansion() {
// Target and source values.
CldrPath included =
ldmlPath("localeDisplayNames/languages/language[@type=\"xx\"]");
CldrPath excluded =
ldmlPath("localeDisplayNames/localeDisplayPattern/localePattern[@alt=\"testing\"]");
CldrPath pattern =
ldmlPath("dates/timeZoneNames/hourFormat[@alt=\"testing\"]");
CldrPath narrow =
ldmlPath("dates/fields/field[@type=\"sun-narrow\"]/relative[@type=\"0\"]");
CldrPath inherited =
ldmlPath("dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard");
FakeDataSupplier src = new FakeDataSupplier()
.addLocaleData("en",
value(included, "{Hello} {0} {World} 100x"),
value(excluded, "Skipped"),
value(pattern, "'plus' HH:mm; 'minus' HH:mm"),
value(narrow, "Skipped"))
.addInheritedData("en",
value(inherited, "UTC"));
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
assertThat(pseudo.getAvailableLocaleIds()).containsAtLeast("en_XA", "ar_XB");
// The pseudo locale should combine both explicit and inherited data from 'en'.
CldrData unresolved = pseudo.getDataForLocale("en_XA", UNRESOLVED);
assertValuesUnordered(unresolved,
// Note how {n} placeholders are not affected, but digits elsewhere are.
value(included, "[{Ĥéļļö}{0}{Ŵöŕļð} ①⓪⓪ẋ one two three]"),
// Note the quoting of any padding added to a pattern string.
value(pattern, "['þļûš' HH:mm; 'ɱîñûš' HH:mm 'one' 'two' 'three' 'four']"),
// Value obtained from the resolved "en" data is here in unresolved data.
value(inherited, "[ÛŢÇ one]"));
}
// This tests behaviour expected by Android (previously patched in earlier ICU versions).
// https://android-review.googlesource.com/c/platform/external/cldr/+/689949
// In particular the use of "ALM" (U+061c) rather than "RLM" (U+200F) as the BiDi marker.
@Test
public void testBidi() {
// Target and source values (same as above but not including the skipped paths).
CldrPath included =
ldmlPath("localeDisplayNames/languages/language[@type=\"xx\"]");
CldrPath pattern =
ldmlPath("dates/timeZoneNames/hourFormat[@alt=\"testing\"]");
CldrPath inherited =
ldmlPath("dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard");
FakeDataSupplier src = new FakeDataSupplier()
.addLocaleData("en",
value(included, "{Hello} {0} {World} 100x"),
value(pattern, "'plus' HH:mm; 'minus' HH:mm"))
.addInheritedData("en",
value(inherited, "UTC"));
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
// The pseudo locale should combine both explicit and inherited data from 'en'.
CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED);
// These are a kind of golden data test because it's super hard to really reason about
// what should be coming out (note how direction markers are added for the 'x' in 100x).
assertValuesUnordered(unresolved,
value(included,
"{\u061C\u202EHello\u202C\u061C} {0}"
+ " {\u061C\u202EWorld\u202C\u061C}"
+ " 100\u061C\u202Ex\u202C\u061C"),
value(pattern,
"'\u061C\u202Eplus\u202C\u061C' HH:mm;"
+ " '\u061C\u202Eminus\u202C\u061C' HH:mm"),
value(inherited, "\u061C\u202EUTC\u202C\u061C"));
}
// This tests behaviour expected by Android (previously patched in earlier ICU versions).
// https://android-review.googlesource.com/c/platform/external/cldr/+/689949
@Test
public void testLatinNumbering() {
CldrValue latn = value(ldmlPath("numbers/defaultNumberingSystem"), "latn");
FakeDataSupplier src = new FakeDataSupplier().addInheritedData("en", latn);
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
CldrData unresolved = pseudo.getDataForLocale("ar_XB", UNRESOLVED);
assertValuesUnordered(unresolved, latn);
}
@Test
public void testExemplars() {
CldrPath exemplarsPath = ldmlPath("characters/exemplarCharacters[@type=\"auxiliary\"]");
FakeDataSupplier src =
new FakeDataSupplier().addLocaleData("en", value(exemplarsPath, "[ignored]"));
CldrDataSupplier pseudo = PseudoLocales.addPseudoLocalesTo(src);
assertValuesUnordered(pseudo.getDataForLocale("ar_XB", UNRESOLVED),
value(exemplarsPath, "[a b c d e f g h i j k l m n o p q r s t u v w x y z]"));
assertValuesUnordered(pseudo.getDataForLocale("en_XA", UNRESOLVED),
value(exemplarsPath,
"[a å b ƀ c ç d ð e é f ƒ g ĝ h ĥ i î j ĵ k ķ l ļ m ɱ"
+ " n ñ o ö p þ q ǫ r ŕ s š t ţ u û v ṽ w ŵ x ẋ y ý z ž]"));
}
public static void assertValuesUnordered(CldrData data, CldrValue... values) {
Set<CldrValue> captured = new HashSet<>();
data.accept(ARBITRARY, captured::add);
assertThat(captured).containsExactlyElementsIn(values);
}
private static CldrPath ldmlPath(String path) {
return CldrPath.parseDistinguishingPath("//ldml/" + path);
}
private static CldrValue value(CldrPath path, String value) {
return CldrValue.parseValue(path.toString(), value);
}
}

View file

@ -17,38 +17,20 @@ import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.RbValue;
import org.unicode.icu.tool.cldrtoicu.testing.FakeResult;
import org.unicode.icu.tool.cldrtoicu.testing.FakeTransformer;
import com.google.common.collect.ImmutableList;
@RunWith(JUnit4.class)
public class AbstractPathValueMapperTest {
@Test
public void testNameAndIcuFallback() {
IcuData foo = new FakeMapper().generateIcuData("foo", false);
IcuData bar = new FakeMapper().generateIcuData("bar", true);
assertThat(foo).getPaths().isEmpty();
assertThat(foo).hasName("foo");
assertThat(foo).hasFallback(false);
assertThat(bar).getPaths().isEmpty();
assertThat(bar).hasName("bar");
assertThat(bar).hasFallback(true);
}
@Test
public void testUngroupedConcatenation() {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/bar", "one", "two");
mapper.addUngroupedResult("foo/baz", "other", "path");
mapper.addUngroupedResult("foo/bar", "three", "four");
IcuData icuData = mapper.generateIcuData("foo", false);
IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("one", "two", "three", "four"));
@ -61,7 +43,7 @@ public class AbstractPathValueMapperTest {
mapper.addGroupedResult("foo/bar", "one", "two");
mapper.addGroupedResult("foo/baz", "other", "path");
mapper.addGroupedResult("foo/bar", "three", "four");
IcuData icuData = mapper.generateIcuData("foo", false);
IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
@ -89,25 +71,25 @@ public class AbstractPathValueMapperTest {
.addResult(explicit1)
.addResult(explicit2)
.addResult(explicit3)
.generateIcuData("foo", false);
.addIcuData("foo");
assertThat(noFallback).hasValuesFor("foo/bar", singletonValues("one", "two", "three"));
// Missing explicit results trigger fallbacks.
IcuData firstFallback = new FakeMapper(transformer)
.addResult(explicit2)
.addResult(explicit3)
.generateIcuData("foo", false);
.addIcuData("foo");
assertThat(firstFallback).hasValuesFor("foo/bar", singletonValues("<ONE>", "two", "three"));
// Fallbacks can appear in any part of the result sequence.
IcuData lastFallbacks = new FakeMapper(transformer)
.addResult(explicit1)
.generateIcuData("foo", false);
.addIcuData("foo");
assertThat(lastFallbacks)
.hasValuesFor("foo/bar", singletonValues("one", "<TWO>", "<THREE>"));
// Without a single result to "seed" the fallback group, nothing is emitted.
IcuData allFallbacks = new FakeMapper(transformer).generateIcuData("foo", false);
IcuData allFallbacks = new FakeMapper(transformer).addIcuData("foo");
assertThat(allFallbacks).getPaths().isEmpty();
}
@ -119,7 +101,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("foo/alias-1", "start", "/alias/target[1]", "end");
mapper.addUngroupedResult("foo/alias-2", "start", "/alias/target[2]", "end");
mapper.addUngroupedResult("alias/target", "first", "second", "third");
IcuData icuData = mapper.generateIcuData("foo", false);
IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(5);
assertThat(icuData)
@ -142,7 +124,7 @@ public class AbstractPathValueMapperTest {
mapper.addGroupedResult("foo/bar", "/alias/target[1]");
mapper.addUngroupedResult("alias/target", "first", "second");
IcuData icuData = mapper.generateIcuData("foo", false);
IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)
.hasValuesFor("foo/bar",
@ -157,7 +139,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("foo/bar:alias", "/alias/target");
mapper.addUngroupedResult("foo/bar", "/alias/target");
mapper.addUngroupedResult("alias/target", "alias-value");
IcuData icuData = mapper.generateIcuData("foo", false);
IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).getPaths().hasSize(3);
assertThat(icuData).hasValuesFor("foo/bar:alias", singletonValues("/alias/target"));
@ -172,7 +154,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("first/alias", "hello");
mapper.addUngroupedResult("foo/bar", "/first/alias", "/last/alias");
mapper.addUngroupedResult("last/alias", "world");
IcuData icuData = mapper.generateIcuData("foo", false);
IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
}
@ -184,7 +166,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("alias/target", "hello");
mapper.addUngroupedResult("foo/bar", "/alias/target[0]", "/alias/target[1]");
mapper.addUngroupedResult("alias/target", "world");
IcuData icuData = mapper.generateIcuData("foo", false);
IcuData icuData = mapper.addIcuData("foo");
assertThat(icuData).hasValuesFor("foo/bar", singletonValues("hello", "world"));
}
@ -195,7 +177,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("alias/target", "value");
mapper.addUngroupedResult("foo/bar", "/no-such-alias/target");
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("no such alias value");
assertThat(e).hasMessageThat().contains("/no-such-alias/target");
}
@ -206,7 +188,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("alias/target", "value");
mapper.addUngroupedResult("foo/bar", "/alias/target[1]");
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("out of bounds");
assertThat(e).hasMessageThat().contains("/alias/target[1]");
}
@ -218,7 +200,7 @@ public class AbstractPathValueMapperTest {
mapper.addUngroupedResult("other/alias", "/other/alias");
mapper.addUngroupedResult("foo/bar", "/alias/target");
IllegalStateException e =
assertThrows(IllegalStateException.class, () -> mapper.generateIcuData("foo", false));
assertThrows(IllegalStateException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("recursive alias resolution is not supported");
}
@ -227,7 +209,7 @@ public class AbstractPathValueMapperTest {
FakeMapper mapper = new FakeMapper();
mapper.addUngroupedResult("foo/bar:alias", "first", "second");
IllegalArgumentException e =
assertThrows(IllegalArgumentException.class, () -> mapper.generateIcuData("foo", false));
assertThrows(IllegalArgumentException.class, () -> mapper.addIcuData("foo"));
assertThat(e).hasMessageThat().contains("explicit aliases must be singleton values");
assertThat(e).hasMessageThat().contains("foo/bar:alias");
}
@ -248,25 +230,6 @@ public class AbstractPathValueMapperTest {
}
};
// We could also just use Mockito for this (it's not yet a project dependency however).
private final PathValueTransformer transformer =
new PathValueTransformer() {
@Override public ImmutableList<Result> transform(CldrValue cldrValue) {
throw new UnsupportedOperationException("should not be called by test");
}
@Override
public ImmutableList<Result> transform(CldrValue cldrValue, DynamicVars varFn) {
throw new UnsupportedOperationException("should not be called by test");
}
@Override
public ImmutableList<Result> getFallbackResultsFor(RbPath key, DynamicVars varFn) {
// TODO: Test fallbacks.
return ImmutableList.of();
}
};
// This preserves insertion order in a well defined way (good for testing alias order).
private final List<Result> fakeResults = new ArrayList<>();
@ -278,6 +241,13 @@ public class AbstractPathValueMapperTest {
super(EXPLODING_DATA, transformer);
}
// Helper method to neaten up the tests a bit.
IcuData addIcuData(String localeId) {
IcuData icuData = new IcuData(localeId, true);
addIcuData(icuData);
return icuData;
}
FakeMapper addUngroupedResult(String path, String... values) {
int index = fakeResults.size() + 1;
return addResult(FakeResult.of(path, index, false, values));

View file

@ -205,12 +205,12 @@ public class Bcp47MapperTest {
// Only the type-map paths/values are split into the timezone data.
assertThat(tzData).getPaths().hasSize(4);
assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped");
assertThat(tzData).hasValuesFor("/typeMap/tz/foo_bar", "unescaped");
// TODO: Check this is correct! Having foo/bar as the value rather than foo:bar seems wrong.
assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar");
assertThat(tzData).hasValuesFor("/typeAlias/tz/hello_world", "foo_bar");
// TODO: Raise bug - having alias target "foo/bar" not match the key "foo:bar" is a bug!
assertThat(tzData).hasValuesFor("/typeMap/tz/\"foo:bar\"", "escaped");
assertThat(tzData).hasValuesFor("/typeAlias/tz/\"hello:world\"", "foo/bar");
}
private static CldrData cldrData(CldrValue... values) {

View file

@ -50,7 +50,8 @@ public class BreakIteratorMapperTest {
suppression(SENTENCE_BREAK, "Alt.", ++idx),
suppression(SENTENCE_BREAK, "Approx.", ++idx));
IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
BreakIteratorMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
@ -72,7 +73,8 @@ public class BreakIteratorMapperTest {
suppression(LINE_BREAK, "Bar", ++idx),
suppression(LINE_BREAK, "Baz", ++idx));
IcuData icuData = BreakIteratorMapper.process("en", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
BreakIteratorMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/exceptions/SentenceBreak:array",
@ -91,7 +93,8 @@ public class BreakIteratorMapperTest {
dictionary("foo", "<foo deps>"),
dictionary("bar", "<bar deps>"));
IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
IcuData icuData = new IcuData("xx", true);
BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/dictionaries/foo:process(dependency)", "<foo deps>");
@ -104,7 +107,8 @@ public class BreakIteratorMapperTest {
boundaries(GRAPHEME, "<grapheme deps>", null),
boundaries(SENTENCE, "<sentence deps>", "altName"));
IcuData icuData = BreakIteratorMapper.process("en", cldrData(), Optional.of(specials));
IcuData icuData = new IcuData("xx", true);
BreakIteratorMapper.process(icuData, cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData)

View file

@ -22,14 +22,17 @@ import com.google.common.base.Joiner;
public class CollationMapperTest {
@Test
public void testEmpty() {
IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.empty());
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData(), Optional.empty());
assertThat(icuData).hasName("xx");
assertThat(icuData).hasFallback(true);
assertThat(icuData).getPaths().isEmpty();
// Root gets a couple of special paths added to it due to the need to work around a CLDR
// data bug.
IcuData rootData = CollationMapper.process("root", cldrData(), Optional.empty());
IcuData rootData = new IcuData("root", true);
CollationMapper.process(rootData, cldrData(), Optional.empty());
assertThat(rootData).hasName("root");
assertThat(rootData).hasFallback(true);
assertThat(rootData).getPaths().hasSize(2);
@ -42,7 +45,8 @@ public class CollationMapperTest {
CldrData cldrData =
cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/collations/default", "any value");
}
@ -61,7 +65,8 @@ public class CollationMapperTest {
collationRule("foo", "alt2", "Second alt rule"),
collationRule("foo", null, "First rule"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
@ -78,7 +83,8 @@ public class CollationMapperTest {
"# And more comments to be stripped",
"And another value"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
"Here is a value",
"And another value");
@ -109,7 +115,8 @@ public class CollationMapperTest {
+ "\uD83D\uDE19",
" <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));
IcuData icuData = CollationMapper.process("xx", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
@ -131,7 +138,8 @@ public class CollationMapperTest {
CldrValue.parseValue("//ldml/special/icu:UCARules[@icu:uca_rules=\"special rule\"]", ""),
CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));
IcuData icuData = CollationMapper.process("xx", cldrData(), Optional.of(specials));
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData(), Optional.of(specials));
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");

View file

@ -4,6 +4,8 @@ package org.unicode.icu.tool.cldrtoicu.mapper;
import static com.google.common.truth.Truth.assertThat;
import static java.util.Optional.empty;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import static org.unicode.cldr.api.CldrValue.parseValue;
import static org.unicode.icu.tool.cldrtoicu.testing.AssertUtils.assertThrows;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
@ -14,8 +16,6 @@ import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result;
@ -38,9 +38,7 @@ public class LocaleMapperTest {
ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "Bar"),
simpleResult("/durationUnits/foo", "Bar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/durationUnits/foo", "Bar");
}
@ -55,9 +53,7 @@ public class LocaleMapperTest {
src.addLocaleData(
"zz", ldml("units/durationUnit[@type=\"foo\"]/durationUnitPattern", "ZZ"));
IcuData icuData =
LocaleMapper.process("yy", src, empty(), transformer, empty());
IcuData icuData = process("yy");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/durationUnits/foo", "YY");
}
@ -73,8 +69,7 @@ public class LocaleMapperTest {
ldml("localeDisplayNames/keys/key[@type=\"sometype\"]", "Value"),
simpleResult("/Keys/sometype", "Value"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
// The 2nd mapping is not used because it does not appear in the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@ -95,8 +90,7 @@ public class LocaleMapperTest {
ldml("numbers/currencies/currency[@type=\"USD\"]/displayName", "US Dollar"),
simpleResult("/Currencies/USD", 2, "US Dollar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@ -121,8 +115,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@ -141,8 +134,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"one\"]", "Bar"),
simpleResult("/calendar/foo/availableFormats/bar/one", "Bar"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
// Now the inherited mapping is used because the path appeared for the unresolved CldrData.
assertThat(icuData).getPaths().hasSize(1);
@ -177,8 +169,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
@ -246,8 +237,7 @@ public class LocaleMapperTest {
+ "/availableFormats/dateFormatItem[@id=\"bar\"][@count=\"many\"]", "Child-2"),
simpleResult("/calendar/foo/availableFormats/bar/<HIDDEN>", 2, "Child-2"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/availableFormats/bar", "Parent");
@ -255,9 +245,7 @@ public class LocaleMapperTest {
@Test
public void testDefaultCalendar() {
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, Optional.of("pastafarian"));
IcuData icuData = process("xx", Optional.of("pastafarian"));
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/default", "pastafarian");
}
@ -302,8 +290,7 @@ public class LocaleMapperTest {
format("dateTime", "short", "twelve"),
simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
IcuData icuData =
LocaleMapper.process("xx", src, empty(), transformer, empty());
IcuData icuData = process("xx");
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/calendar/foo/DateTimePatterns",
@ -351,10 +338,7 @@ public class LocaleMapperTest {
format("dateTime", "short", "twelve"),
simpleResult("/calendar/foo/DateTimePatterns", 12, "twelve"));
IllegalStateException e = assertThrows(
IllegalStateException.class,
() -> LocaleMapper.process("xx", src, empty(), transformer, empty()));
IllegalStateException e = assertThrows(IllegalStateException.class, () -> process("xx"));
assertThat(e).hasMessageThat().contains("unexpected");
assertThat(e).hasMessageThat().contains("/calendar/foo/DateTimePatterns");
}
@ -368,6 +352,24 @@ public class LocaleMapperTest {
type, length, pattern));
}
// ---- Helper methods ----
IcuData process(String localeId) {
return process(localeId, empty());
}
IcuData process(String localeId, Optional<String> defCalendar) {
IcuData icuData = new IcuData(localeId, true);
LocaleMapper.process(
icuData,
src.getDataForLocale(localeId, UNRESOLVED),
src.getDataForLocale(localeId, RESOLVED),
empty(),
transformer,
defCalendar);
return icuData;
}
private void addMapping(String locale, CldrValue value, Result... results) {
src.addLocaleData(locale, value);
transformer.addResults(value, results);
@ -386,16 +388,12 @@ public class LocaleMapperTest {
return FakeResult.of(path, index, false, value);
}
private static CldrData cldrData(CldrValue... values) {
return CldrDataSupplier.forValues(Arrays.asList(values));
}
private static CldrValue ldml(String path) {
return ldml(path, "");
}
private static CldrValue ldml(String path, String value) {
return parseValue("//ldml/" + path, "");
return parseValue("//ldml/" + path, value);
}
private static RbValue[] singletonValues(String... values) {

View file

@ -55,7 +55,8 @@ public class RbnfMapperTest {
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "1", "oh-=%first-set=;", ++idx),
rbnfRule(SPELLOUT_RULES, "2d-year", PRIVATE, "10", "=%first-set=;", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
RbnfMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
// Double-% prefix for "private" access.
@ -82,7 +83,8 @@ public class RbnfMapperTest {
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "NaN", "seven;", ++idx),
rbnfRule(SPELLOUT_RULES, "second-set", PUBLIC, "0", "eight;", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
RbnfMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
// Single-% prefix for "public" access.
@ -115,7 +117,8 @@ public class RbnfMapperTest {
rbnfRule(ORDINAL_RULES, "digits-ordinal", PUBLIC, "0",
"=#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.of(specials));
IcuData icuData = new IcuData("xx", true);
RbnfMapper.process(icuData, cldrData, Optional.of(specials));
assertThat(icuData).hasValuesFor("/RBNFRules/OrdinalRules",
RbValue.of("%digits-ordinal:"),
@ -146,7 +149,8 @@ public class RbnfMapperTest {
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k3", "Ü Umlaut", ++idx),
rbnfRule(SPELLOUT_RULES, "escaping", PUBLIC, "k4", "\uD83D\uDE03 Smiley", ++idx));
IcuData icuData = RbnfMapper.process("en", cldrData, Optional.empty());
IcuData icuData = new IcuData("xx", true);
RbnfMapper.process(icuData, cldrData, Optional.empty());
assertThat(icuData).hasValuesFor("/RBNFRules/SpelloutRules",
RbValue.of("%escaping:"),