mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-20693 Tidyups and small fixes for ICU conversion code
This commit is contained in:
parent
2b3129db44
commit
6a0e9ce0cd
11 changed files with 184 additions and 72 deletions
|
@ -19,7 +19,11 @@ Important directories
|
|||
<CLDR_DIR> = The top-level directory for the CLDR production data (typically
|
||||
the "production" directory in the staging repository).
|
||||
|
||||
<OUT_DIR> = The output directory into which ICU data files should be written.
|
||||
Note that you should not attempt to use data from the CLDR project directory
|
||||
(were the CLDR API code exists) for conversion into ICU data. The process now
|
||||
relies on a pre-processing step, and the CLDR data must come from the separate
|
||||
"staging" repository (i.e. https://github.com/unicode-org/cldr-staging) or be
|
||||
pre-processed locally into a different directory.
|
||||
|
||||
|
||||
Initial Setup
|
||||
|
@ -31,22 +35,45 @@ based system, this should be as simple as:
|
|||
|
||||
$ sudo apt-get install maven ant
|
||||
|
||||
See also lib/README.txt for instructions on how to install the CLDR JAR files
|
||||
which contains the CLDR API used by these tools. This step will only need to
|
||||
be repeated when you update the CLDR project you are using.
|
||||
You also need to follow the instructions in lib/README.txt to install the CLDR
|
||||
JAR files, which contain the CLDR API used by these tools. This step will only
|
||||
need to be repeated if you update the code in the CLDR project you are using.
|
||||
|
||||
Generating all ICU data
|
||||
-----------------------
|
||||
|
||||
First edit the Ant build file to
|
||||
$ export CLDR_DIR="<CLDR_DIR>"
|
||||
$ ant -f build-icu-data.xml
|
||||
|
||||
$ CLDR_DIR=<CLDR_DIR> ant -f build-icu-data.xml
|
||||
|
||||
Other Examples (assuming CLDR_DIR is set)
|
||||
-----------------------------------------
|
||||
|
||||
* Outputting a subset of the supplemental data into a specified directory:
|
||||
|
||||
$ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DoutputTypes=plurals,dayPeriods
|
||||
|
||||
Note: Output types can be listed with mixedCase, lower_underscore or UPPER_UNDERSCORE.
|
||||
Pass '-DoutputTypes=help' to see the full list.
|
||||
|
||||
|
||||
* Outputting only a subset of locale IDs (and all the supplemental data):
|
||||
|
||||
$ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DlocaleIdFilter='(zh|yue).*'
|
||||
|
||||
|
||||
* Overriding the default CLDR version string (which normally matches the CLDR library code):
|
||||
|
||||
$ ant -f build-icu-data.xml -DcldrVersion="36.1"
|
||||
|
||||
|
||||
See build-icu-data.xml for documentation of all options and additional customization.
|
||||
|
||||
|
||||
Running unit tests
|
||||
------------------
|
||||
|
||||
$ mvn test -DCLDR_DIR=<CLDR_DIR>
|
||||
$ mvn test -DCLDR_DIR="$CLDR_DIR"
|
||||
|
||||
|
||||
Importing and running from an IDE
|
||||
|
|
|
@ -38,6 +38,9 @@
|
|||
<!-- The directory in which the additional ICU XML data is stored. -->
|
||||
<property name="specialsDir" value="${basedir}/../../../icu4c/source/data/xml"/>
|
||||
|
||||
<!-- An override for the CLDR version string to be used in generated data. -->
|
||||
<property name="cldrVersion" value=""/>
|
||||
|
||||
<!-- The minimum draft status for CLDR data to be used in the conversion. See
|
||||
CldrDraftStatus for more details. -->
|
||||
<property name="minDraftStatus" value="contributed"/>
|
||||
|
@ -64,11 +67,7 @@
|
|||
produces locale data for curr/, lang/, main/, region/, unit/, zone/ but NOT
|
||||
coll/, brkitr/ or rbnf/).
|
||||
|
||||
You can also specify by DTD type (e.g. dtdBcp47, dtdSupplemental or dtdLdml)
|
||||
which is still not quite directly associated with output directories either,
|
||||
since some supplemental data is also written to the curr/ directory.
|
||||
|
||||
See LdmlConverter.OutputType for the full list of valid types. -->
|
||||
Pass in the value "HELP" (or any invalid value) to see the full list of types. -->
|
||||
<!-- TODO: Find out what common use cases are and use them. -->
|
||||
<property name="outputTypes" value=""/>
|
||||
|
||||
|
@ -95,9 +94,9 @@
|
|||
</classpath>
|
||||
</taskdef>
|
||||
<convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}"
|
||||
outputTypes="${outputTypes}" minimalDraftStatus="${minDraftStatus}"
|
||||
localeIdFilter="${localeIdFilter}" includePseudoLocales="${includePseudoLocales}"
|
||||
emitReport="${emitReport}">
|
||||
outputTypes="${outputTypes}" cldrVersion="${cldrVersion}"
|
||||
minimalDraftStatus="${minDraftStatus}" localeIdFilter="${localeIdFilter}"
|
||||
includePseudoLocales="${includePseudoLocales}" emitReport="${emitReport}">
|
||||
|
||||
<!-- The primary set of locale IDs to be generated by default. The IDs in this list are
|
||||
automatically expanded to include default scripts and all available regions. The
|
||||
|
@ -224,12 +223,16 @@
|
|||
<!-- Collation data is large, but also more sharable than other data, which is why there
|
||||
are a number of aliases and parent remappings for this directory. -->
|
||||
<directory dir="coll" inheritLanguageSubtag="bs_Cyrl, sr_Latn, zh_Hant">
|
||||
<!-- This alias is to avoid needing to copy and maintain the same collation data for
|
||||
"zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs
|
||||
<!-- These aliases are to avoid needing to copy and maintain the same collation data
|
||||
for "zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs
|
||||
"zh_Hans_CN"), and for "yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the
|
||||
aliases are effectively just rewriting the base language. -->
|
||||
<forcedAlias source="yue_Hans" target="zh_Hans"/>
|
||||
<forcedAlias source="yue" target="zh_Hant"/>
|
||||
<forcedAlias source="yue_Hant" target="zh_Hant"/>
|
||||
<forcedAlias source="yue_CN" target="zh_Hans"/>
|
||||
<forcedAlias source="yue_Hans" target="zh_Hans"/>
|
||||
<forcedAlias source="yue_Hans_CN" target="zh_Hans"/>
|
||||
|
||||
<!-- TODO: Find out and document this properly. -->
|
||||
<forcedAlias source="sr_ME" target="sr_Cyrl_ME"/>
|
||||
|
||||
|
@ -258,8 +261,8 @@
|
|||
ta, te, th, tk, to, tr,
|
||||
|
||||
// U-Z
|
||||
ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans,
|
||||
yue, zh_CN, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
|
||||
ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans
|
||||
yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
|
||||
</localeIds>
|
||||
</directory>
|
||||
|
||||
|
|
|
@ -54,8 +54,10 @@ $ mvn install:install-file \
|
|||
-DlocalRepositoryPath=. \
|
||||
-Dfile=<CLDR_ROOT>/tools/java/libs/utilities.jar
|
||||
|
||||
And if you have updated one of these libraries, run:
|
||||
And if you have updated one of these libraries then from the main project directory
|
||||
(i.e. the parent of this directory) run:
|
||||
|
||||
$ mvn dependency:purge-local-repository -DsnapshotsOnly=true
|
||||
|
||||
If you choose to update the version number, then remember to update the root pom.xml.
|
||||
If you choose to update the version number of the snapshot, then remember to update the
|
||||
root pom.xml, but this is unlikely to be necessary.
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static java.util.stream.Collectors.joining;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
|
@ -9,17 +10,20 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
|
||||
/**
|
||||
* Stores any explicit locale relationships for a single directory (e.g. "lang" or "coll").
|
||||
* This class just reflects a concise version of the "%%Parent and %%ALIAS" paths set in files and
|
||||
* allows them to be written to the dependency graph files in each ICU data directory.
|
||||
*/
|
||||
final class DependencyGraph {
|
||||
private final String cldrVersion;
|
||||
private final Map<String, String> parentMap = new TreeMap<>();
|
||||
private final Map<String, String> aliasMap = new TreeMap<>();
|
||||
|
||||
public DependencyGraph(String cldrVersion) {
|
||||
this.cldrVersion = checkNotNull(cldrVersion);
|
||||
}
|
||||
|
||||
void addParent(String localeId, String parentId) {
|
||||
// Aliases take priority (since they can be forced and will replace empty files). Note
|
||||
// however that this only happens in a tiny number of places due to the somewhat "hacky"
|
||||
|
@ -61,7 +65,7 @@ final class DependencyGraph {
|
|||
void writeJsonTo(PrintWriter out, List<String> fileHeader) {
|
||||
fileHeader.forEach(s -> out.println("// " + s));
|
||||
out.println();
|
||||
out.format("{\n \"cldrVersion\": \"%s\"", CldrDataSupplier.getCldrVersionString());
|
||||
out.format("{\n \"cldrVersion\": \"%s\"", cldrVersion);
|
||||
writeMap(out, "aliases", aliasMap);
|
||||
writeMap(out, "parents", parentMap);
|
||||
out.append("\n}\n");
|
||||
|
|
|
@ -4,6 +4,7 @@ package org.unicode.icu.tool.cldrtoicu;
|
|||
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static java.util.stream.Collectors.joining;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
@ -12,6 +13,7 @@ import java.util.Arrays;
|
|||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDraftStatus;
|
||||
import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
|
||||
|
||||
|
@ -20,6 +22,7 @@ import com.google.common.collect.ImmutableSet;
|
|||
import com.google.common.collect.ImmutableSetMultimap;
|
||||
import com.google.common.collect.ImmutableTable;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.collect.Table;
|
||||
import com.google.common.collect.TreeBasedTable;
|
||||
import com.google.common.collect.TreeMultimap;
|
||||
|
@ -41,6 +44,7 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
private Path specialsDir =
|
||||
DEFAULT_ICU_DIR.map(d -> d.resolve("icu4c/source/data/xml")).orElse(null);
|
||||
private ImmutableSet<OutputType> outputTypes = OutputType.ALL;
|
||||
private Optional<String> cldrVersion = Optional.empty();
|
||||
private CldrDraftStatus minimumDraftStatus = CldrDraftStatus.CONTRIBUTED;
|
||||
private boolean emitReport = false;
|
||||
private final SetMultimap<IcuLocaleDir, String> localeIdsMap = TreeMultimap.create();
|
||||
|
@ -77,6 +81,13 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder setCldrVersion(String version) {
|
||||
if (!version.isEmpty()) {
|
||||
this.cldrVersion = Optional.of(version);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public void setMinimumDraftStatus(CldrDraftStatus minimumDraftStatus) {
|
||||
this.minimumDraftStatus = checkNotNull(minimumDraftStatus);
|
||||
}
|
||||
|
@ -110,6 +121,7 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
private final Path outputDir;
|
||||
private final Path specialsDir;
|
||||
private final ImmutableSet<OutputType> outputTypes;
|
||||
private final String cldrVersion;
|
||||
private final CldrDraftStatus minimumDraftStatus;
|
||||
private final boolean emitReport;
|
||||
private final ImmutableSet<String> allLocaleIds;
|
||||
|
@ -129,6 +141,8 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
checkArgument(!this.outputTypes.isEmpty(),
|
||||
"must specify at least one output type to be generated (possible values are: %s)",
|
||||
Arrays.asList(OutputType.values()));
|
||||
this.cldrVersion =
|
||||
builder.cldrVersion.orElse(CldrDataSupplier.getCldrVersionString());
|
||||
this.minimumDraftStatus = checkNotNull(builder.minimumDraftStatus);
|
||||
this.emitReport = builder.emitReport;
|
||||
// getAllLocaleIds() returns the union of all the specified IDs in the map.
|
||||
|
@ -157,6 +171,11 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
return specialsDir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCldrVersion() {
|
||||
return cldrVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CldrDraftStatus getMinimumDraftStatus() {
|
||||
return minimumDraftStatus;
|
||||
|
|
|
@ -267,9 +267,14 @@ public final class LdmlConverter {
|
|||
.filter(t -> t.getCldrType() == LDML)
|
||||
.flatMap(t -> TYPE_TO_DIR.get(t).stream())
|
||||
.collect(toImmutableList());
|
||||
if (splitDirs.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
String cldrVersion = config.getCldrVersion();
|
||||
|
||||
Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>();
|
||||
splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph()));
|
||||
splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion)));
|
||||
|
||||
SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
|
||||
Path baseDir = config.getOutputDir();
|
||||
|
@ -286,7 +291,7 @@ public final class LdmlConverter {
|
|||
CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
|
||||
|
||||
BreakIteratorMapper.process(icuData, unresolved, specials);
|
||||
CollationMapper.process(icuData, unresolved, specials);
|
||||
CollationMapper.process(icuData, unresolved, specials, cldrVersion);
|
||||
RbnfMapper.process(icuData, unresolved, specials);
|
||||
|
||||
CldrData resolved = src.getDataForLocale(id, RESOLVED);
|
||||
|
@ -332,7 +337,7 @@ public final class LdmlConverter {
|
|||
});
|
||||
|
||||
if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
|
||||
splitData.setVersion(CldrDataSupplier.getCldrVersionString());
|
||||
splitData.setVersion(cldrVersion);
|
||||
write(splitData, outDir, false);
|
||||
writtenLocaleIds.put(dir, id);
|
||||
}
|
||||
|
@ -510,7 +515,8 @@ public final class LdmlConverter {
|
|||
// A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
|
||||
// supplemental data XML files.
|
||||
if (addCldrVersion) {
|
||||
icuData.add(RB_CLDR_VERSION, CldrDataSupplier.getCldrVersionString());
|
||||
// Not the same path as used by "setVersion()"
|
||||
icuData.add(RB_CLDR_VERSION, config.getCldrVersion());
|
||||
}
|
||||
write(icuData, dir);
|
||||
}
|
||||
|
@ -532,7 +538,7 @@ public final class LdmlConverter {
|
|||
} else {
|
||||
// These empty files only exist because the target of an alias has a parent locale
|
||||
// which is itself not in the set of written ICU files. An "indirect alias target".
|
||||
icuData.setVersion(CldrDataSupplier.getCldrVersionString());
|
||||
icuData.setVersion(config.getCldrVersion());
|
||||
}
|
||||
write(icuData, dir, false);
|
||||
}
|
||||
|
|
|
@ -76,6 +76,12 @@ public interface LdmlConverterConfig {
|
|||
*/
|
||||
Path getOutputDir();
|
||||
|
||||
/**
|
||||
* Returns a CLDR version String (e.g. {@code "36.1"}) according to either the specified option
|
||||
* or (as a fallback) the version specified by the CLDR library against which this code is run.
|
||||
*/
|
||||
String getCldrVersion();
|
||||
|
||||
/** Returns the minimal draft status for CLDR data to be converted. */
|
||||
CldrDraftStatus getMinimumDraftStatus();
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import static com.google.common.base.CharMatcher.is;
|
|||
import static com.google.common.base.CharMatcher.whitespace;
|
||||
import static com.google.common.base.Preconditions.checkArgument;
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.base.Preconditions.checkState;
|
||||
import static com.google.common.collect.ImmutableList.toImmutableList;
|
||||
import static com.google.common.collect.ImmutableMap.toImmutableMap;
|
||||
import static com.google.common.collect.ImmutableTable.toImmutableTable;
|
||||
|
@ -17,12 +18,11 @@ import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
|
|||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.tools.ant.BuildException;
|
||||
import org.apache.tools.ant.Task;
|
||||
|
@ -48,9 +48,8 @@ import com.google.common.collect.ImmutableSet;
|
|||
import com.google.common.collect.ImmutableTable;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
import com.google.common.collect.Table;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.collect.Table.Cell;
|
||||
import com.google.common.collect.Tables;
|
||||
|
||||
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
|
||||
public final class ConvertIcuDataTask extends Task {
|
||||
|
@ -86,6 +85,11 @@ public final class ConvertIcuDataTask extends Task {
|
|||
this.cldrPath = checkNotNull(path);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setCldrVersion(String cldrVersion) {
|
||||
config.setCldrVersion(cldrVersion);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setMinimalDraftStatus(String status) {
|
||||
minimumDraftStatus = resolve(CldrDraftStatus.class, status);
|
||||
|
@ -228,9 +232,45 @@ public final class ConvertIcuDataTask extends Task {
|
|||
|
||||
@SuppressWarnings("unused")
|
||||
public void addConfiguredDirectory(Directory filter) {
|
||||
perDirectoryIds.putAll(filter.dir, filter.localeIds.ids);
|
||||
checkState(!perDirectoryIds.containsKey(filter.dir),
|
||||
"directory %s specified twice", filter.dir);
|
||||
ImmutableSet<String> ids = filter.localeIds.ids;
|
||||
perDirectoryIds.putAll(filter.dir, ids);
|
||||
|
||||
// Check that any locale IDs marked to inherit the base language (instead of root) are
|
||||
// listed in the set of generated locales.
|
||||
inheritLanguageSubtag.putAll(filter.dir, filter.inheritLanguageSubtag);
|
||||
if (!ids.containsAll(filter.inheritLanguageSubtag)) {
|
||||
log(String.format(
|
||||
"WARNING: Locale IDs listed in 'inheritLanguageSubtag' should also be listed "
|
||||
+ "in <localeIds> for that directory (%s): %s",
|
||||
filter.dir, String.join(", ", Sets.difference(filter.inheritLanguageSubtag, ids))));
|
||||
perDirectoryIds.putAll(filter.dir, filter.inheritLanguageSubtag);
|
||||
}
|
||||
|
||||
// Check that locales specified for forced aliases in this directory are also listed in
|
||||
// the set of generated locales.
|
||||
filter.forcedAliases.forEach(a -> config.addForcedAlias(filter.dir, a.source, a.target));
|
||||
Set<String> sourceIds =
|
||||
filter.forcedAliases.stream().map(a -> a.source).collect(Collectors.toSet());
|
||||
if (!ids.containsAll(sourceIds)) {
|
||||
Set<String> missingIds = Sets.difference(sourceIds, ids);
|
||||
log(String.format(
|
||||
"WARNING: Locale IDs listed as sources of a <forcedAlias> should also be listed "
|
||||
+ "in <localeIds> for that directory (%s): %s",
|
||||
filter.dir, String.join(", ", missingIds)));
|
||||
perDirectoryIds.putAll(filter.dir, missingIds);
|
||||
}
|
||||
Set<String> targetIds =
|
||||
filter.forcedAliases.stream().map(a -> a.target).collect(Collectors.toSet());
|
||||
if (!ids.containsAll(targetIds)) {
|
||||
Set<String> missingIds = Sets.difference(targetIds, ids);
|
||||
log(String.format(
|
||||
"WARNING: Locale IDs listed as targets of a <forcedAlias> should also be listed "
|
||||
+ "in <localeIds> for that directory (%s): %s",
|
||||
filter.dir, String.join(", ", missingIds)));
|
||||
perDirectoryIds.putAll(filter.dir, missingIds);
|
||||
}
|
||||
}
|
||||
|
||||
// Aliases on the outside are applied to all directories.
|
||||
|
@ -322,10 +362,6 @@ public final class ConvertIcuDataTask extends Task {
|
|||
return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
|
||||
}
|
||||
|
||||
private static Optional<IcuLocaleDir> resolveDir(String name) {
|
||||
return !name.isEmpty() ? Optional.of(resolve(IcuLocaleDir.class, name)) : Optional.empty();
|
||||
}
|
||||
|
||||
private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) {
|
||||
checkArgument(!name.isEmpty(), "enumeration name cannot be empty");
|
||||
checkArgument(VALID_ENUM_CHAR.matchesAllOf(name),
|
||||
|
|
|
@ -12,7 +12,6 @@ import java.util.Optional;
|
|||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrData.PrefixVisitor;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
import org.unicode.cldr.api.CldrPath;
|
||||
import org.unicode.cldr.api.CldrValue;
|
||||
|
@ -62,13 +61,14 @@ public final class CollationMapper {
|
|||
*
|
||||
* @param icuData the ICU data to be filled.
|
||||
* @param cldrData the unresolved CLDR data to process.
|
||||
* @param icuSpecialData additional ICU data (in the "icu:" namespace)
|
||||
* @param icuSpecialData additional ICU data (in the "icu:" namespace).
|
||||
* @param cldrVersion version string to add to ICU data.
|
||||
* @return IcuData containing RBNF data for the given locale ID.
|
||||
*/
|
||||
public static IcuData process(
|
||||
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {
|
||||
IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData, String cldrVersion) {
|
||||
|
||||
CollationVisitor visitor = new CollationVisitor(icuData);
|
||||
CollationVisitor visitor = new CollationVisitor(icuData, cldrVersion);
|
||||
icuSpecialData.ifPresent(s -> s.accept(DTD, visitor));
|
||||
cldrData.accept(DTD, visitor);
|
||||
return visitor.icuData;
|
||||
|
@ -76,9 +76,11 @@ public final class CollationMapper {
|
|||
|
||||
final static class CollationVisitor implements PrefixVisitor {
|
||||
private final IcuData icuData;
|
||||
private final String cldrVersion;
|
||||
|
||||
CollationVisitor(IcuData icuData) {
|
||||
CollationVisitor(IcuData icuData, String cldrVersion) {
|
||||
this.icuData = checkNotNull(icuData);
|
||||
this.cldrVersion = checkNotNull(cldrVersion);
|
||||
// Super special hack case because the XML data is a bit broken for the root collation
|
||||
// data (there's an empty <collation> element that's a non-leaf element and thus not
|
||||
// visited, but we should add an empty sequence to the output data.
|
||||
|
@ -86,7 +88,7 @@ public final class CollationMapper {
|
|||
if (icuData.getName().equals("root")) {
|
||||
icuData.replace(RB_STANDARD_SEQUENCE, "");
|
||||
// TODO: Collation versioning probably needs to be improved.
|
||||
icuData.replace(RB_STANDARD_VERSION, CldrDataSupplier.getCldrVersionString());
|
||||
icuData.replace(RB_STANDARD_VERSION, cldrVersion);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,9 +123,7 @@ public final class CollationMapper {
|
|||
.map(CollationMapper::removeComment)
|
||||
.filter(s -> !s.isEmpty())::iterator);
|
||||
icuData.replace(rbPath, rules);
|
||||
icuData.replace(
|
||||
RbPath.of("collations", type, "Version"),
|
||||
CldrDataSupplier.getCldrVersionString());
|
||||
icuData.replace(RbPath.of("collations", type, "Version"), cldrVersion);
|
||||
}
|
||||
} else if (DEFAULT_COLLATION.matchesSuffixOf(p)) {
|
||||
icuData.add(RB_COLLATIONS_DEFAULT, v.getValue());
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.unicode.cldr.api.CldrValue;
|
|||
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableTable;
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
public class AlternateLocaleDataTest {
|
||||
|
@ -45,14 +46,16 @@ public class AlternateLocaleDataTest {
|
|||
FakeDataSupplier src = new FakeDataSupplier()
|
||||
.addLocaleData("xx", target, source, other)
|
||||
.addInheritedData("xx", inherited);
|
||||
CldrDataSupplier transformed =
|
||||
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
|
||||
CldrDataSupplier transformed = AlternateLocaleData.transform(
|
||||
src,
|
||||
ImmutableMap.of(target.getPath(), source.getPath()),
|
||||
ImmutableTable.of());
|
||||
|
||||
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
|
||||
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
|
||||
|
||||
assertValuesUnordered(unresolved, altValue, source, other);
|
||||
assertValuesUnordered(resolved, altValue, source, other, inherited);
|
||||
assertValuesUnordered(unresolved, altValue, other);
|
||||
assertValuesUnordered(resolved, altValue, other, inherited);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -64,8 +67,10 @@ public class AlternateLocaleDataTest {
|
|||
ldml("numbers/currencies/currency[@type=\"USD\"][@alt=\"short\"]/displayName", "Name");
|
||||
|
||||
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", target);
|
||||
CldrDataSupplier transformed =
|
||||
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
|
||||
CldrDataSupplier transformed = AlternateLocaleData.transform(
|
||||
src,
|
||||
ImmutableMap.of(target.getPath(), source.getPath()),
|
||||
ImmutableTable.of());
|
||||
|
||||
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
|
||||
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
|
||||
|
@ -85,14 +90,15 @@ public class AlternateLocaleDataTest {
|
|||
|
||||
FakeDataSupplier src = new FakeDataSupplier().addLocaleData("xx", source);
|
||||
CldrDataSupplier transformed =
|
||||
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()));
|
||||
AlternateLocaleData.transform(src, ImmutableMap.of(target.getPath(), source.getPath()),
|
||||
ImmutableTable.of());
|
||||
|
||||
CldrData unresolved = transformed.getDataForLocale("xx", UNRESOLVED);
|
||||
CldrData resolved = transformed.getDataForLocale("xx", RESOLVED);
|
||||
|
||||
// No change because there's nothing to replace.
|
||||
assertValuesUnordered(unresolved, source);
|
||||
assertValuesUnordered(resolved, source);
|
||||
// We still remove the source even if there was no target.
|
||||
assertValuesUnordered(unresolved);
|
||||
assertValuesUnordered(resolved);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -106,7 +112,8 @@ public class AlternateLocaleDataTest {
|
|||
FakeDataSupplier src = new FakeDataSupplier();
|
||||
IllegalArgumentException e = assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
|
||||
() -> AlternateLocaleData.transform(
|
||||
src, ImmutableMap.of(target, source), ImmutableTable.of()));
|
||||
assertThat(e).hasMessageThat().contains("alternate paths must have the same namespace");
|
||||
assertThat(e).hasMessageThat().contains(target.toString());
|
||||
assertThat(e).hasMessageThat().contains(source.toString());
|
||||
|
@ -134,7 +141,8 @@ public class AlternateLocaleDataTest {
|
|||
FakeDataSupplier src = new FakeDataSupplier();
|
||||
IllegalArgumentException e = assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> AlternateLocaleData.transform(src, ImmutableMap.of(target, source)));
|
||||
() -> AlternateLocaleData.transform(
|
||||
src, ImmutableMap.of(target, source), ImmutableTable.of()));
|
||||
assertThat(e).hasMessageThat().contains("only locale data (LDML) is supported");
|
||||
// At least one of the paths should be in the error message, so look for common substring.
|
||||
assertThat(e).hasMessageThat().contains("/weekData/firstDay[@day=\"sun\"]");
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu.mapper;
|
||||
|
||||
import static org.unicode.cldr.api.CldrDataSupplier.getCldrVersionString;
|
||||
import static org.unicode.icu.tool.cldrtoicu.testing.IcuDataSubjectFactory.assertThat;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
@ -20,10 +19,12 @@ import com.google.common.base.Joiner;
|
|||
|
||||
@RunWith(JUnit4.class)
|
||||
public class CollationMapperTest {
|
||||
private static final String CLDR_VERSION = "1.23.4";
|
||||
|
||||
@Test
|
||||
public void testEmpty() {
|
||||
IcuData icuData = new IcuData("xx", true);
|
||||
CollationMapper.process(icuData, cldrData(), Optional.empty());
|
||||
CollationMapper.process(icuData, cldrData(), Optional.empty(), CLDR_VERSION);
|
||||
|
||||
assertThat(icuData).hasName("xx");
|
||||
assertThat(icuData).hasFallback(true);
|
||||
|
@ -32,11 +33,11 @@ public class CollationMapperTest {
|
|||
// Root gets a couple of special paths added to it due to the need to work around a CLDR
|
||||
// data bug.
|
||||
IcuData rootData = new IcuData("root", true);
|
||||
CollationMapper.process(rootData, cldrData(), Optional.empty());
|
||||
CollationMapper.process(rootData, cldrData(), Optional.empty(), CLDR_VERSION);
|
||||
assertThat(rootData).hasName("root");
|
||||
assertThat(rootData).hasFallback(true);
|
||||
assertThat(rootData).getPaths().hasSize(2);
|
||||
assertThat(rootData).hasValuesFor("/collations/standard/Version", getCldrVersionString());
|
||||
assertThat(rootData).hasValuesFor("/collations/standard/Version", CLDR_VERSION);
|
||||
assertThat(rootData).hasEmptyValue("/collations/standard/Sequence");
|
||||
}
|
||||
|
||||
|
@ -46,7 +47,7 @@ public class CollationMapperTest {
|
|||
cldrData(CldrValue.parseValue("//ldml/collations/defaultCollation", "any value"));
|
||||
|
||||
IcuData icuData = new IcuData("xx", true);
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty());
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
|
||||
assertThat(icuData).getPaths().hasSize(1);
|
||||
assertThat(icuData).hasValuesFor("/collations/default", "any value");
|
||||
}
|
||||
|
@ -66,9 +67,9 @@ public class CollationMapperTest {
|
|||
collationRule("foo", null, "First rule"));
|
||||
|
||||
IcuData icuData = new IcuData("xx", true);
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty());
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
|
||||
assertThat(icuData).getPaths().hasSize(2);
|
||||
assertThat(icuData).hasValuesFor("/collations/foo/Version", getCldrVersionString());
|
||||
assertThat(icuData).hasValuesFor("/collations/foo/Version", CLDR_VERSION);
|
||||
assertThat(icuData).hasValuesFor("/collations/foo/Sequence", "Second alt rule");
|
||||
}
|
||||
|
||||
|
@ -84,7 +85,7 @@ public class CollationMapperTest {
|
|||
"And another value"));
|
||||
|
||||
IcuData icuData = new IcuData("xx", true);
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty());
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
|
||||
assertThat(icuData).hasValuesFor("/collations/foo/Sequence",
|
||||
"Here is a value",
|
||||
"And another value");
|
||||
|
@ -116,10 +117,10 @@ public class CollationMapperTest {
|
|||
" <*\uD83D\uDE0B\uD83D\uDE1B\uD83D\uDE1C\uD83E\uDD2A\uD83D\uDE1D\uD83E\uDD11"));
|
||||
|
||||
IcuData icuData = new IcuData("xx", true);
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty());
|
||||
CollationMapper.process(icuData, cldrData, Optional.empty(), CLDR_VERSION);
|
||||
|
||||
assertThat(icuData).getPaths().hasSize(2);
|
||||
assertThat(icuData).hasValuesFor("/collations/emoji/Version", getCldrVersionString());
|
||||
assertThat(icuData).hasValuesFor("/collations/emoji/Version", CLDR_VERSION);
|
||||
assertThat(icuData).hasValuesFor("/collations/emoji/Sequence",
|
||||
"& [last primary ignorable]<<*\uD83E\uDDB0\uD83E\uDDB1\uD83E\uDDB3\uD83E\uDDB2"
|
||||
+ "\uD83C\uDFFB\uD83C\uDFFC\uD83C\uDFFD\uD83C\uDFFE\uD83C\uDFFF",
|
||||
|
@ -139,7 +140,7 @@ public class CollationMapperTest {
|
|||
CldrValue.parseValue("//ldml/special/icu:depends[@icu:dependency=\"special deps\"]", ""));
|
||||
|
||||
IcuData icuData = new IcuData("xx", true);
|
||||
CollationMapper.process(icuData, cldrData(), Optional.of(specials));
|
||||
CollationMapper.process(icuData, cldrData(), Optional.of(specials), CLDR_VERSION);
|
||||
assertThat(icuData).getPaths().hasSize(2);
|
||||
assertThat(icuData).hasValuesFor("UCARules:process(uca_rules)", "special rule");
|
||||
assertThat(icuData).hasValuesFor("depends:process(dependency)", "special deps");
|
||||
|
|
Loading…
Add table
Reference in a new issue