ICU-20693 Tidyups and small fixes for ICU conversion code

This commit is contained in:
David Beaumont 2020-01-20 01:13:20 +01:00 committed by David Beaumont
parent 2b3129db44
commit 6a0e9ce0cd
11 changed files with 184 additions and 72 deletions

View file

@ -19,7 +19,11 @@ Important directories
<CLDR_DIR> = The top-level directory for the CLDR production data (typically
the "production" directory in the staging repository).
<OUT_DIR> = The output directory into which ICU data files should be written.
Note that you should not attempt to use data from the CLDR project directory
(were the CLDR API code exists) for conversion into ICU data. The process now
relies on a pre-processing step, and the CLDR data must come from the separate
"staging" repository (i.e. https://github.com/unicode-org/cldr-staging) or be
pre-processed locally into a different directory.
Initial Setup
@ -31,22 +35,45 @@ based system, this should be as simple as:
$ sudo apt-get install maven ant
See also lib/README.txt for instructions on how to install the CLDR JAR files
which contains the CLDR API used by these tools. This step will only need to
be repeated when you update the CLDR project you are using.
You also need to follow the instructions in lib/README.txt to install the CLDR
JAR files, which contain the CLDR API used by these tools. This step will only
need to be repeated if you update the code in the CLDR project you are using.
Generating all ICU data
-----------------------
First edit the Ant build file to
$ export CLDR_DIR="<CLDR_DIR>"
$ ant -f build-icu-data.xml
$ CLDR_DIR=<CLDR_DIR> ant -f build-icu-data.xml
Other Examples (assuming CLDR_DIR is set)
-----------------------------------------
* Outputting a subset of the supplemental data into a specified directory:
$ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DoutputTypes=plurals,dayPeriods
Note: Output types can be listed with mixedCase, lower_underscore or UPPER_UNDERSCORE.
Pass '-DoutputTypes=help' to see the full list.
* Outputting only a subset of locale IDs (and all the supplemental data):
$ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DlocaleIdFilter='(zh|yue).*'
* Overriding the default CLDR version string (which normally matches the CLDR library code):
$ ant -f build-icu-data.xml -DcldrVersion="36.1"
See build-icu-data.xml for documentation of all options and additional customization.
Running unit tests
------------------
$ mvn test -DCLDR_DIR=<CLDR_DIR>
$ mvn test -DCLDR_DIR="$CLDR_DIR"
Importing and running from an IDE

View file

@ -38,6 +38,9 @@
<!-- The directory in which the additional ICU XML data is stored. -->
<property name="specialsDir" value="${basedir}/../../../icu4c/source/data/xml"/>
<!-- An override for the CLDR version string to be used in generated data. -->
<property name="cldrVersion" value=""/>
<!-- The minimum draft status for CLDR data to be used in the conversion. See
CldrDraftStatus for more details. -->
<property name="minDraftStatus" value="contributed"/>
@ -64,11 +67,7 @@
produces locale data for curr/, lang/, main/, region/, unit/, zone/ but NOT
coll/, brkitr/ or rbnf/).
You can also specify by DTD type (e.g. dtdBcp47, dtdSupplemental or dtdLdml)
which is still not quite directly associated with output directories either,
since some supplemental data is also written to the curr/ directory.
See LdmlConverter.OutputType for the full list of valid types. -->
Pass in the value "HELP" (or any invalid value) to see the full list of types. -->
<!-- TODO: Find out what common use cases are and use them. -->
<property name="outputTypes" value=""/>
@ -95,9 +94,9 @@
</classpath>
</taskdef>
<convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}"
outputTypes="${outputTypes}" minimalDraftStatus="${minDraftStatus}"
localeIdFilter="${localeIdFilter}" includePseudoLocales="${includePseudoLocales}"
emitReport="${emitReport}">
outputTypes="${outputTypes}" cldrVersion="${cldrVersion}"
minimalDraftStatus="${minDraftStatus}" localeIdFilter="${localeIdFilter}"
includePseudoLocales="${includePseudoLocales}" emitReport="${emitReport}">
<!-- The primary set of locale IDs to be generated by default. The IDs in this list are
automatically expanded to include default scripts and all available regions. The
@ -224,12 +223,16 @@
<!-- Collation data is large, but also more sharable than other data, which is why there
are a number of aliases and parent remappings for this directory. -->
<directory dir="coll" inheritLanguageSubtag="bs_Cyrl, sr_Latn, zh_Hant">
<!-- This alias is to avoid needing to copy and maintain the same collation data for
"zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs
<!-- These aliases are to avoid needing to copy and maintain the same collation data
for "zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs
"zh_Hans_CN"), and for "yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the
aliases are effectively just rewriting the base language. -->
<forcedAlias source="yue_Hans" target="zh_Hans"/>
<forcedAlias source="yue" target="zh_Hant"/>
<forcedAlias source="yue_Hant" target="zh_Hant"/>
<forcedAlias source="yue_CN" target="zh_Hans"/>
<forcedAlias source="yue_Hans" target="zh_Hans"/>
<forcedAlias source="yue_Hans_CN" target="zh_Hans"/>
<!-- TODO: Find out and document this properly. -->
<forcedAlias source="sr_ME" target="sr_Cyrl_ME"/>
@ -258,8 +261,8 @@
ta, te, th, tk, to, tr,
// U-Z
ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans,
yue, zh_CN, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans
yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
</localeIds>
</directory>

View file

@ -54,8 +54,10 @@ $ mvn install:install-file \
-DlocalRepositoryPath=. \
-Dfile=<CLDR_ROOT>/tools/java/libs/utilities.jar
And if you have updated one of these libraries, run:
And if you have updated one of these libraries then from the main project directory
(i.e. the parent of this directory) run:
$ mvn dependency:purge-local-repository -DsnapshotsOnly=true
If you choose to update the version number, then remember to update the root pom.xml.
If you choose to update the version number of the snapshot, then remember to update the
root pom.xml, but this is unlikely to be necessary.

View file

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.util.stream.Collectors.joining;
import java.io.PrintWriter;
@ -9,17 +10,20 @@ import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.unicode.cldr.api.CldrDataSupplier;
/**
* Stores any explicit locale relationships for a single directory (e.g. "lang" or "coll").
* This class just reflects a concise version of the "%%Parent and %%ALIAS" paths set in files and
* allows them to be written to the dependency graph files in each ICU data directory.
*/
final class DependencyGraph {
private final String cldrVersion;
private final Map<String, String> parentMap = new TreeMap<>();
private final Map<String, String> aliasMap = new TreeMap<>();
public DependencyGraph(String cldrVersion) {
this.cldrVersion = checkNotNull(cldrVersion);
}
void addParent(String localeId, String parentId) {
// Aliases take priority (since they can be forced and will replace empty files). Note
// however that this only happens in a tiny number of places due to the somewhat "hacky"
@ -61,7 +65,7 @@ final class DependencyGraph {
void writeJsonTo(PrintWriter out, List<String> fileHeader) {
fileHeader.forEach(s -> out.println("// " + s));
out.println();
out.format("{\n \"cldrVersion\": \"%s\"", CldrDataSupplier.getCldrVersionString());
out.format("{\n \"cldrVersion\": \"%s\"", cldrVersion);
writeMap(out, "aliases", aliasMap);
writeMap(out, "parents", parentMap);
out.append("\n}\n");

View file

@ -4,6 +4,7 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.util.stream.Collectors.joining;
import java.nio.file.Files;
import java.nio.file.Path;
@ -12,6 +13,7 @@ import java.util.Arrays;
import java.util.Optional;
import java.util.Set;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDraftStatus;
import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
@ -20,6 +22,7 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.common.collect.TreeBasedTable;
import com.google.common.collect.TreeMultimap;
@ -41,6 +44,7 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
private Path specialsDir =
DEFAULT_ICU_DIR.map(d -> d.resolve("icu4c/source/data/xml")).orElse(null);
private ImmutableSet<OutputType> outputTypes = OutputType.ALL;
private Optional<String> cldrVersion = Optional.empty();
private CldrDraftStatus minimumDraftStatus = CldrDraftStatus.CONTRIBUTED;
private boolean emitReport = false;
private final SetMultimap<IcuLocaleDir, String> localeIdsMap = TreeMultimap.create();
@ -77,6 +81,13 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
return this;
}
public Builder setCldrVersion(String version) {
if (!version.isEmpty()) {
this.cldrVersion = Optional.of(version);
}
return this;
}
public void setMinimumDraftStatus(CldrDraftStatus minimumDraftStatus) {
this.minimumDraftStatus = checkNotNull(minimumDraftStatus);
}
@ -110,6 +121,7 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
private final Path outputDir;
private final Path specialsDir;
private final ImmutableSet<OutputType> outputTypes;
private final String cldrVersion;
private final CldrDraftStatus minimumDraftStatus;
private final boolean emitReport;
private final ImmutableSet<String> allLocaleIds;
@ -129,6 +141,8 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
checkArgument(!this.outputTypes.isEmpty(),
"must specify at least one output type to be generated (possible values are: %s)",
Arrays.asList(OutputType.values()));
this.cldrVersion =
builder.cldrVersion.orElse(CldrDataSupplier.getCldrVersionString());
this.minimumDraftStatus = checkNotNull(builder.minimumDraftStatus);
this.emitReport = builder.emitReport;
// getAllLocaleIds() returns the union of all the specified IDs in the map.
@ -157,6 +171,11 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
return specialsDir;
}
@Override
public String getCldrVersion() {
return cldrVersion;
}
@Override
public CldrDraftStatus getMinimumDraftStatus() {
return minimumDraftStatus;

View file

@ -267,9 +267,14 @@ public final class LdmlConverter {
.filter(t -> t.getCldrType() == LDML)
.flatMap(t -> TYPE_TO_DIR.get(t).stream())
.collect(toImmutableList());
if (splitDirs.isEmpty()) {
return;
}
String cldrVersion = config.getCldrVersion();
Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>();
splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph()));
splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion)));
SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
Path baseDir = config.getOutputDir();
@ -286,7 +291,7 @@ public final class LdmlConverter {
CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
BreakIteratorMapper.process(icuData, unresolved, specials);
CollationMapper.process(icuData, unresolved, specials);
CollationMapper.process(icuData, unresolved, specials, cldrVersion);
RbnfMapper.process(icuData, unresolved, specials);
CldrData resolved = src.getDataForLocale(id, RESOLVED);
@ -332,7 +337,7 @@ public final class LdmlConverter {
});
if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
splitData.setVersion(CldrDataSupplier.getCldrVersionString());
splitData.setVersion(cldrVersion);
write(splitData, outDir, false);
writtenLocaleIds.put(dir, id);
}
@ -510,7 +515,8 @@ public final class LdmlConverter {
// A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
// supplemental data XML files.
if (addCldrVersion) {
icuData.add(RB_CLDR_VERSION, CldrDataSupplier.getCldrVersionString());
// Not the same path as used by "setVersion()"
icuData.add(RB_CLDR_VERSION, config.getCldrVersion());
}
write(icuData, dir);
}
@ -532,7 +538,7 @@ public final class LdmlConverter {
} else {
// These empty files only exist because the target of an alias has a parent locale
// which is itself not in the set of written ICU files. An "indirect alias target".
icuData.setVersion(CldrDataSupplier.getCldrVersionString());
icuData.setVersion(config.getCldrVersion());
}
write(icuData, dir, false);
}

View file

@ -76,6 +76,12 @@ public interface LdmlConverterConfig {
*/
Path getOutputDir();
/**
* Returns a CLDR version String (e.g. {@code "36.1"}) according to either the specified option
* or (as a fallback) the version specified by the CLDR library against which this code is run.
*/
String getCldrVersion();
/** Returns the minimal draft status for CLDR data to be converted. */
CldrDraftStatus getMinimumDraftStatus();

View file

@ -7,6 +7,7 @@ import static com.google.common.base.CharMatcher.is;
import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableTable.toImmutableTable;
@ -17,12 +18,11 @@ import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Task;
@ -48,9 +48,8 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Iterables;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.Table;
import com.google.common.collect.Sets;
import com.google.common.collect.Table.Cell;
import com.google.common.collect.Tables;
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
public final class ConvertIcuDataTask extends Task {
@ -86,6 +85,11 @@ public final class ConvertIcuDataTask extends Task {
this.cldrPath = checkNotNull(path);
}
@SuppressWarnings("unused")
public void setCldrVersion(String cldrVersion) {
config.setCldrVersion(cldrVersion);
}
@SuppressWarnings("unused")
public void setMinimalDraftStatus(String status) {
minimumDraftStatus = resolve(CldrDraftStatus.class, status);
@ -228,9 +232,45 @@ public final class ConvertIcuDataTask extends Task {
@SuppressWarnings("unused")
public void addConfiguredDirectory(Directory filter) {
perDirectoryIds.putAll(filter.dir, filter.localeIds.ids);
checkState(!perDirectoryIds.containsKey(filter.dir),
"directory %s specified twice", filter.dir);
ImmutableSet<String> ids = filter.localeIds.ids;
perDirectoryIds.putAll(filter.dir, ids);
// Check that any locale IDs marked to inherit the base language (instead of root) are
// listed in the set of generated locales.
inheritLanguageSubtag.putAll(filter.dir, filter.inheritLanguageSubtag);
if (!ids.containsAll(filter.inheritLanguageSubtag)) {
log(String.format(
"WARNING: Locale IDs listed in 'inheritLanguageSubtag' should also be listed "
+ "in <localeIds> for that directory (%s): %s",
filter.dir, String.join(", ", Sets.difference(filter.inheritLanguageSubtag, ids))));
perDirectoryIds.putAll(filter.dir, filter.inheritLanguageSubtag);
}
// Check that locales specified for forced aliases in this directory are also listed in
// the set of generated locales.
filter.forcedAliases.forEach(a -> config.addForcedAlias(filter.dir, a.source, a.target));
Set<String> sourceIds =
filter.forcedAliases.stream().map(a -> a.source).collect(Collectors.toSet());
if (!ids.containsAll(sourceIds)) {
Set<String> missingIds = Sets.difference(sourceIds, ids);
log(String.format(
"WARNING: Locale IDs listed as sources of a <forcedAlias> should also be listed "
+ "in <localeIds> for that directory (%s): %s",
filter.dir, String.join(", ", missingIds)));
perDirectoryIds.putAll(filter.dir, missingIds);
}
Set<String> targetIds =
filter.forcedAliases.stream().map(a -> a.target).collect(Collectors.toSet());
if (!ids.containsAll(targetIds)) {
Set<String> missingIds = Sets.difference(targetIds, ids);
log(String.format(
"WARNING: Locale IDs listed as targets of a <forcedAlias> should also be listed "
+ "in <localeIds> for that directory (%s): %s",
filter.dir, String.join(", ", missingIds)));
perDirectoryIds.putAll(filter.dir, missingIds);
}
}
// Aliases on the outside are applied to all directories.
@ -322,10 +362,6 @@ public final class ConvertIcuDataTask extends Task {
return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
}
private static Optional<IcuLocaleDir> resolveDir(String name) {
return !name.isEmpty() ? Optional.of(resolve(IcuLocaleDir.class, name)) : Optional.empty();
}
private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) {
checkArgument(!name.isEmpty(), "enumeration name cannot be empty");
checkArgument(VALID_ENUM_CHAR.matchesAllOf(name),

View file

@ -12,7 +12,6 @@ import java.util.Optional;
import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrData.PrefixVisitor;
import org.unicode.cldr.api.CldrDataSupplier;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
@ -62,13 +61,14 @@ public final class CollationMapper {
*
* @param icuData the ICU data to be filled.
* @param cldrData the unresolved CLDR data to process.
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
* @param icuSpecialData additional ICU data (in the "icu:" namespace).
* @param cldrVersion version string to add to ICU data.
* @return IcuData containing RBNF data for the given locale ID.
*/
public static IcuData process(
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData, String cldrVersion) {
CollationVisitor visitor = new CollationVisitor(icuData);
CollationVisitor visitor = new CollationVisitor(icuData, cldrVersion);
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
cldrData.accept(DTD, visitor);
return visitor.icuData;
@ -76,9 +76,11 @@ public final class CollationMapper {
final static class CollationVisitor implements PrefixVisitor {
private final IcuData icuData;
private final String cldrVersion;
CollationVisitor(IcuData icuData) {
CollationVisitor(IcuData icuData, String cldrVersion) {
this.icuData = checkNotNull(icuData);
this.cldrVersion = checkNotNull(cldrVersion);
// Super special hack case because the XML data is a bit broken for the root collation
// data (there's an empty <collation> element that's a non-leaf element and thus not
// visited, but we should add an empty sequence to the output data.
@ -86,7 +88,7 @@ public final class CollationMapper {
if (icuData.getName().equals("root")) {
icuData.replace(RB_STANDARD_SEQUENCE, "");
// TODO: Collation versioning probably needs to be improved.
icuData.replace(RB_STANDARD_VERSION, CldrDataSupplier.getCldrVersionString());
icuData.replace(RB_STANDARD_VERSION, cldrVersion);
}
}
@ -121,9 +123,7 @@ public final class CollationMapper {
.map(CollationMapper::removeComment)
.filter(s -> !s.isEmpty())::iterator);
icuData.replace(rbPath, rules);
icuData.replace(
RbPath.of("collations", type, "Version"),
CldrDataSupplier.getCldrVersionString());
icuData.replace(RbPath.of("collations", type, "Version"), cldrVersion);
}
} else if (DEFAULT_COLLATION.matchesSuffixOf(p)) {
icuData.add(RB_COLLATIONS_DEFAULT, v.getValue());

View file

@ -21,6 +21,7 @@ import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableTable;
@RunWith(JUnit4.class)
public class AlternateLocaleDataTest {
@ -45,14 +46,16 @@ public class AlternateLocaleDataTest {
FakeDataSupplier src = new FakeDataSupplier()
.addLocaleData("xx", target, source, other)
.addInheritedData("xx", inherited);
CldrDataSupplier transformed =
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
CldrDataSupplier transformed = AlternateLocaleData.transform(
src,
ImmutableMap.of(target.getPath(), source.getPath()),
ImmutableTable.of());
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
assertValuesUnordered(unresolved, altValue, source, other);
assertValuesUnordered(resolved, altValue, source, other, inherited);
assertValuesUnordered(unresolved, altValue, other);
assertValuesUnordered(resolved, altValue, other, inherited);
}
@Test
@ -64,8 +67,10 @@ public class AlternateLocaleDataTest {
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", target);
CldrDataSupplier transformed =
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
CldrDataSupplier transformed = AlternateLocaleData.transform(
src,
ImmutableMap.of(target.getPath(), source.getPath()),
ImmutableTable.of());
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
@ -85,14 +90,15 @@ public class AlternateLocaleDataTest {
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source);
CldrDataSupplier transformed =
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()),
ImmutableTable.of());
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
// No change because there's nothing to replace.
assertValuesUnordered(unresolved, source);
assertValuesUnordered(resolved, source);
// We still remove the source even if there was no target.
assertValuesUnordered(unresolved);
assertValuesUnordered(resolved);
}
@Test
@ -106,7 +112,8 @@ public class AlternateLocaleDataTest {
FakeDataSupplier src = new FakeDataSupplier();
IllegalArgumentException e = assertThrows(
IllegalArgumentException.class,
() -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
() -> AlternateLocaleData.transform(
src, ImmutableMap.of(target, source), ImmutableTable.of()));
assertThat(e).hasMessageThat().contains("alternate paths must have the same namespace");
assertThat(e).hasMessageThat().contains(target.toString());
assertThat(e).hasMessageThat().contains(source.toString());
@ -134,7 +141,8 @@ public class AlternateLocaleDataTest {
FakeDataSupplier src = new FakeDataSupplier();
IllegalArgumentException e = assertThrows(
IllegalArgumentException.class,
() -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
() -> AlternateLocaleData.transform(
src, ImmutableMap.of(target, source), ImmutableTable.of()));
assertThat(e).hasMessageThat().contains("only locale data (LDML) is supported");
// At least one of the paths should be in the error message, so look for common substring.
assertThat(e).hasMessageThat().contains("/weekData/firstDay[@day=\"sun\"]");

View file

@ -2,7 +2,6 @@
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;
import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString;
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
import java.util.Arrays;
@ -20,10 +19,12 @@ import com.google.common.base.Joiner;
@RunWith(JUnit4.class)
public class CollationMapperTest {
private static final String CLDR_VERSION = "1.23.4";
@Test
public void testEmpty() {
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData(), Optional.empty());
CollationMapper.process(icuData, cldrData(), Optional.empty(), CLDR_VERSION);
assertThat(icuData).hasName("xx");
assertThat(icuData).hasFallback(true);
@ -32,11 +33,11 @@ public class CollationMapperTest {
// Root gets a couple of special paths added to it due to the need to work around a CLDR
// data bug.
IcuData rootData = new IcuData("root", true);
CollationMapper.process(rootData, cldrData(), Optional.empty());
CollationMapper.process(rootData, cldrData(), Optional.empty(), CLDR_VERSION);
assertThat(rootData).hasName("root");
assertThat(rootData).hasFallback(true);
assertThat(rootData).getPaths().hasSize(2);
assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString());
assertThat(rootData).hasValuesFor("/collations/standard/Version", CLDR_VERSION);
assertThat(rootData).hasEmptyValue("/collations/standard/Sequence");
}
@ -46,7 +47,7 @@ public class CollationMapperTest {
cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
assertThat(icuData).getPaths().hasSize(1);
assertThat(icuData).hasValuesFor("/collations/default", "any value");
}
@ -66,9 +67,9 @@ public class CollationMapperTest {
collationRule("foo", null, "First rule"));
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/foo/Version", CLDR_VERSION);
assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
}
@ -84,7 +85,7 @@ public class CollationMapperTest {
"And another value"));
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
"Here is a value",
"And another value");
@ -116,10 +117,10 @@ public class CollationMapperTest {
" <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData, Optional.empty());
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
assertThat(icuData).hasValuesFor("/collations/emoji/Version", CLDR_VERSION);
assertThat(icuData).hasValuesFor("/collations/emoji/Sequence",
"& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
@ -139,7 +140,7 @@ public class CollationMapperTest {
CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));
IcuData icuData = new IcuData("xx", true);
CollationMapper.process(icuData, cldrData(), Optional.of(specials));
CollationMapper.process(icuData, cldrData(), Optional.of(specials), CLDR_VERSION);
assertThat(icuData).getPaths().hasSize(2);
assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");