diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/DependencyGraph.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/DependencyGraph.java new file mode 100644 index 00000000000..8f6f5b3aa1c --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/DependencyGraph.java @@ -0,0 +1,79 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import static java.util.stream.Collectors.joining; + +import java.io.PrintWriter; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.unicode.cldr.api.CldrDataSupplier; + +/** + * Stores any explicit locale relationships for a single directory (e.g. "lang" or "coll"). + * This class just reflects a concise version of the "%%Parent and %%ALIAS" paths set in files and + * allows them to be written to the dependency graph files in each ICU data directory. + */ +final class DependencyGraph { + private final Map parentMap = new TreeMap<>(); + private final Map aliasMap = new TreeMap<>(); + + void addParent(String localeId, String parentId) { + // Aliases take priority (since they can be forced and will replace empty files). Note + // however that this only happens in a tiny number of places due to the somewhat "hacky" + // forced aliases, and in future it's perfectly possibly that there would never be an + // overlap, and this code could just prohibit overlap between alias and parent mappings. + if (!aliasMap.containsKey(localeId)) { + parentMap.put(localeId, parentId); + } + } + + void addAlias(String sourceId, String targetId) { + parentMap.remove(sourceId); + aliasMap.put(sourceId, targetId); + } + + /** + * Outputs a JSON dictionary containing the parent and alias mappings to the given writer. The + * output contains non-JSON line comments and is of the form: + *
{@code
+     * // 
+     * {
+     *     "cldrVersion": ""
+     *     "aliases": {
+     *         "": ""
+     *         ...
+     *     }
+     *     "parents": {
+     *         "": ""
+     *         ...
+     *     }
+     * }
+     * }
+ * where all values (other than the version) are locale IDs. + * + *

Anything reading the produced files must strip the line comments prior to processing the + * JSON data. Line comments only appear as a contiguous block in the header, so comment + * processing can stop at the first non-comment line (i.e. the first bare '{'). + */ + void writeJsonTo(PrintWriter out, List fileHeader) { + fileHeader.forEach(s -> out.println("// " + s)); + out.println(); + out.format("{\n \"cldrVersion\": \"%s\"", CldrDataSupplier.getCldrVersionString()); + writeMap(out, "aliases", aliasMap); + writeMap(out, "parents", parentMap); + out.append("\n}\n"); + out.close(); + } + + private static void writeMap(PrintWriter out, String name, Map map) { + if (!map.isEmpty()) { + out.append( + map.entrySet().stream() + .map(e -> String.format("\n \"%s\": \"%s\"", e.getKey(), e.getValue())) + .collect(joining(",", ",\n \"" + name + "\": {", "\n }"))); + } + } +} diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java index 0e9c3930aa7..d009cec3d71 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java @@ -5,6 +5,7 @@ package org.unicode.icu.tool.cldrtoicu; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; import static org.unicode.cldr.api.CldrDataType.BCP47; @@ -20,13 +21,16 @@ import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RE import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT; import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE; +import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; @@ -265,6 +269,9 @@ public final class LdmlConverter { .flatMap(t -> TYPE_TO_DIR.get(t).stream()) .collect(toImmutableList()); + Map graphMetadata = new HashMap<>(); + splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph())); + SetMultimap writtenLocaleIds = HashMultimap.create(); Path baseDir = config.getOutputDir(); @@ -294,6 +301,7 @@ public final class LdmlConverter { splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p); } + Optional parent = supplementalData.getExplicitParentLocaleOf(id); // We always write base languages (even if empty). boolean isBaseLanguage = !id.contains("_"); // Run through all directories (not just the keySet() of the split path map) since we @@ -308,13 +316,19 @@ public final class LdmlConverter { } continue; } + Path outDir = baseDir.resolve(dir.getOutputDir()); IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback()); - // The split data can still be empty for this directory, but that's expected. + + // The split data can still be empty for this directory, but that's expected (it + // might only be written because it has an explicit parent added below). splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p))); - // Adding a parent locale makes the data non-empty and forces it to be written. - supplementalData.getExplicitParentLocaleOf(splitData.getName()) - .ifPresent(p -> splitData.add(RB_PARENT, p)); + // If we add an explicit parent locale, it forces the data to be written. + parent.ifPresent(p -> { + splitData.add(RB_PARENT, p); + graphMetadata.get(dir).addParent(id, p); + }); + if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) { splitData.setVersion(CldrDataSupplier.getCldrVersionString()); write(splitData, outDir); @@ -326,18 +340,23 @@ public final class LdmlConverter { for (IcuLocaleDir dir : splitDirs) { Path outDir = baseDir.resolve(dir.getOutputDir()); Set targetIds = config.getTargetLocaleIds(dir); + DependencyGraph depGraph = graphMetadata.get(dir); + // TODO: Maybe calculate alias map directly into the dependency graph? Map aliasMap = getAliasMap(targetIds, dir); aliasMap.forEach((s, t) -> { + depGraph.addAlias(s, t); + writeAliasFile(s, t, outDir); // It's only important to record which alias files are written because of forced // aliases, but since it's harmless otherwise, we just do it unconditionally. // Normal alias files don't affect the empty file calculation, but forced ones can. writtenLocaleIds.put(dir, s); - writeAliasFile(s, t, outDir); }); calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values()) .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values())); + + writeDependencyGraph(outDir, depGraph); } } @@ -513,6 +532,16 @@ public final class LdmlConverter { return dir; } + private void writeDependencyGraph(Path dir, DependencyGraph depGraph) { + try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8); + PrintWriter out = new PrintWriter(w)) { + depGraph.writeJsonTo(out, fileHeader); + out.flush(); + } catch (IOException e) { + throw new RuntimeException("cannot write dependency graph file: " + dir, e); + } + } + // The set of IDs to process is: // * any file that was written // * any alias target (not written)