ICU-20693 Supporting dependency graph generation (first draft)

See #909
This commit is contained in:
David Beaumont 2019-11-06 20:47:06 +00:00 committed by David Beaumont
parent 4c74b3456a
commit 6c41b4b24d
2 changed files with 113 additions and 5 deletions

View file

@ -0,0 +1,79 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static java.util.stream.Collectors.joining;
import java.io.PrintWriter;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.unicode.cldr.api.CldrDataSupplier;
/**
* Stores any explicit locale relationships for a single directory (e.g. "lang" or "coll").
* This class just reflects a concise version of the "%%Parent and %%ALIAS" paths set in files and
* allows them to be written to the dependency graph files in each ICU data directory.
*/
final class DependencyGraph {
private final Map<String, String> parentMap = new TreeMap<>();
private final Map<String, String> aliasMap = new TreeMap<>();
void addParent(String localeId, String parentId) {
// Aliases take priority (since they can be forced and will replace empty files). Note
// however that this only happens in a tiny number of places due to the somewhat "hacky"
// forced aliases, and in future it's perfectly possibly that there would never be an
// overlap, and this code could just prohibit overlap between alias and parent mappings.
if (!aliasMap.containsKey(localeId)) {
parentMap.put(localeId, parentId);
}
}
void addAlias(String sourceId, String targetId) {
parentMap.remove(sourceId);
aliasMap.put(sourceId, targetId);
}
/**
* Outputs a JSON dictionary containing the parent and alias mappings to the given writer. The
* output contains non-JSON line comments and is of the form:
* <pre>{@code
* // <copyright message>
* {
* "cldrVersion": "<version>"
* "aliases": {
* "<source>": "<target>"
* ...
* }
* "parents": {
* "<id>": "<parent>"
* ...
* }
* }
* }</pre>
* where all values (other than the version) are locale IDs.
*
* <p>Anything reading the produced files must strip the line comments prior to processing the
* JSON data. Line comments only appear as a contiguous block in the header, so comment
* processing can stop at the first non-comment line (i.e. the first bare '{').
*/
void writeJsonTo(PrintWriter out, List<String> fileHeader) {
fileHeader.forEach(s -> out.println("// " + s));
out.println();
out.format("{\n \"cldrVersion\": \"%s\"", CldrDataSupplier.getCldrVersionString());
writeMap(out, "aliases", aliasMap);
writeMap(out, "parents", parentMap);
out.append("\n}\n");
out.close();
}
private static void writeMap(PrintWriter out, String name, Map<String, String> map) {
if (!map.isEmpty()) {
out.append(
map.entrySet().stream()
.map(e -> String.format("\n \"%s\": \"%s\"", e.getKey(), e.getValue()))
.collect(joining(",", ",\n \"" + name + "\": {", "\n }")));
}
}
}

View file

@ -5,6 +5,7 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
import static org.unicode.cldr.api.CldrDataType.BCP47;
@ -20,13 +21,16 @@ import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RE
import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT;
import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
@ -265,6 +269,9 @@ public final class LdmlConverter {
.flatMap(t -> TYPE_TO_DIR.get(t).stream())
.collect(toImmutableList());
Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>();
splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph()));
SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
Path baseDir = config.getOutputDir();
@ -294,6 +301,7 @@ public final class LdmlConverter {
splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p);
}
Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id);
// We always write base languages (even if empty).
boolean isBaseLanguage = !id.contains("_");
// Run through all directories (not just the keySet() of the split path map) since we
@ -308,13 +316,19 @@ public final class LdmlConverter {
}
continue;
}
Path outDir = baseDir.resolve(dir.getOutputDir());
IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback());
// The split data can still be empty for this directory, but that's expected.
// The split data can still be empty for this directory, but that's expected (it
// might only be written because it has an explicit parent added below).
splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p)));
// Adding a parent locale makes the data non-empty and forces it to be written.
supplementalData.getExplicitParentLocaleOf(splitData.getName())
.ifPresent(p -> splitData.add(RB_PARENT, p));
// If we add an explicit parent locale, it forces the data to be written.
parent.ifPresent(p -> {
splitData.add(RB_PARENT, p);
graphMetadata.get(dir).addParent(id, p);
});
if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
splitData.setVersion(CldrDataSupplier.getCldrVersionString());
write(splitData, outDir);
@ -326,18 +340,23 @@ public final class LdmlConverter {
for (IcuLocaleDir dir : splitDirs) {
Path outDir = baseDir.resolve(dir.getOutputDir());
Set<String> targetIds = config.getTargetLocaleIds(dir);
DependencyGraph depGraph = graphMetadata.get(dir);
// TODO: Maybe calculate alias map directly into the dependency graph?
Map<String, String> aliasMap = getAliasMap(targetIds, dir);
aliasMap.forEach((s, t) -> {
depGraph.addAlias(s, t);
writeAliasFile(s, t, outDir);
// It's only important to record which alias files are written because of forced
// aliases, but since it's harmless otherwise, we just do it unconditionally.
// Normal alias files don't affect the empty file calculation, but forced ones can.
writtenLocaleIds.put(dir, s);
writeAliasFile(s, t, outDir);
});
calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values())
.forEach(id -> writeEmptyFile(id, outDir, aliasMap.values()));
writeDependencyGraph(outDir, depGraph);
}
}
@ -513,6 +532,16 @@ public final class LdmlConverter {
return dir;
}
private void writeDependencyGraph(Path dir, DependencyGraph depGraph) {
try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8);
PrintWriter out = new PrintWriter(w)) {
depGraph.writeJsonTo(out, fileHeader);
out.flush();
} catch (IOException e) {
throw new RuntimeException("cannot write dependency graph file: " + dir, e);
}
}
// The set of IDs to process is:
// * any file that was written
// * any alias target (not written)