ICU-22773 Faster generation (4x) with multithreading

This commit is contained in:
Mihai Nita 2024-12-09 14:09:27 -08:00
parent 2fa8a0908c
commit 515d0a7393
6 changed files with 42 additions and 5 deletions

View file

@ -23,6 +23,7 @@ public class Cldr2Icu {
convert.setLocaleIdFilter(options.localeIdFilter);
convert.setIncludePseudoLocales(options.includePseudoLocales);
convert.setEmitReport(options.emitReport);
convert.setParallel(options.parallel);
convert.init();
convert.execute();

View file

@ -144,6 +144,11 @@ class Cldr2IcuCliOptions {
private static final String XML_CONFIG_DEFAULT = "${icuDir}/tools/cldr/cldr-to-icu/config.xml";
String xmlConfig;
private static final String PARALLEL = "parallel";
private static final String PARALLEL_DESC = "Run the generation in parallel (multithreaded), to make it faster.";
private static final String PARALLEL_DEFAULT = "false";
boolean parallel;
// These must be kept in sync with getOptions().
private static final Options options = new Options()
.addOption(Option.builder()
@ -254,6 +259,10 @@ class Cldr2IcuCliOptions {
.argName("path")
.desc(descWithDefault(XML_CONFIG_DESC, XML_CONFIG_DEFAULT))
.build())
.addOption(Option.builder()
.longOpt(PARALLEL)
.desc(descWithDefault(PARALLEL_DESC, PARALLEL_DEFAULT))
.build())
;
void processArgs(String[] args) {
@ -288,6 +297,7 @@ class Cldr2IcuCliOptions {
emitReport = cli.hasOption(EMIT_REPORT);
forceDelete = cli.hasOption(FORCE_DELETE);
xmlConfig = cli.getOptionValue(XML_CONFIG, expandFolders(XML_CONFIG_DEFAULT));
parallel = cli.hasOption(PARALLEL);
if (cli.hasOption(OUTPUT_TYPES_LIST)) {
OutputType[] outTypesToSort = OutputType.values();

View file

@ -47,6 +47,7 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
private Optional<String> cldrVersion = Optional.empty();
private CldrDraftStatus minimumDraftStatus = CldrDraftStatus.CONTRIBUTED;
private boolean emitReport = false;
private boolean parallel = false;
private final SetMultimap<IcuLocaleDir, String> localeIdsMap = TreeMultimap.create();
private final Table<IcuLocaleDir, String, String> forcedAliases = TreeBasedTable.create();
private final Table<IcuLocaleDir, String, String> forcedParents = TreeBasedTable.create();
@ -111,6 +112,11 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
return this;
}
public Builder setParallel(boolean parallel) {
this.parallel = parallel;
return this;
}
public Builder addLocaleIds(IcuLocaleDir dir, Iterable<String> localeIds) {
localeIdsMap.putAll(dir, localeIds);
return this;
@ -138,6 +144,7 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
private final IcuVersionInfo versionInfo;
private final CldrDraftStatus minimumDraftStatus;
private final boolean emitReport;
private final boolean parallel;
private final ImmutableSet<String> allLocaleIds;
private final ImmutableSetMultimap<IcuLocaleDir, String> localeIdsMap;
private final ImmutableTable<IcuLocaleDir, String, String> forcedAliases;
@ -161,6 +168,7 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
builder.cldrVersion.orElse(CldrDataSupplier.getCldrVersionString()));
this.minimumDraftStatus = checkNotNull(builder.minimumDraftStatus);
this.emitReport = builder.emitReport;
this.parallel = builder.parallel;
// getAllLocaleIds() returns the union of all the specified IDs in the map.
this.allLocaleIds = ImmutableSet.copyOf(builder.localeIdsMap.values());
this.localeIdsMap = ImmutableSetMultimap.copyOf(builder.localeIdsMap);
@ -202,6 +210,11 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
return emitReport;
}
@Override
public boolean parallel() {
return parallel;
}
@Override
public ImmutableMap<String, String> getForcedAliases(IcuLocaleDir dir) {
return forcedAliases.row(dir);

View file

@ -288,22 +288,26 @@ public final class LdmlConverter {
Path baseDir = config.getOutputDir();
System.out.println("processing standard ldml files");
for (String id : config.getAllLocaleIds()) {
Stream<String> localeStream = config.getAllLocaleIds().stream();
if (config.parallel()) {
localeStream = localeStream.parallel();
}
localeStream.forEach(id -> {
// Skip "target" IDs that are aliases (they are handled later).
if (!availableIds.contains(id)) {
continue;
return;
}
// TODO: Remove the following skip when ICU-20997 is fixed
if (id.contains("VALENCIA") || id.contains("TARASK")) {
System.out.println("(skipping " + id + " until ICU-20997 is fixed)");
continue;
return;
}
// Now that former CLDR see locales are in common, there are some language
// variants that are not at a high enough coverage level to pick up.
// TODO need a better way of handling this.
if (id.contains("POLYTON")) {
System.out.println("(skipping " + id + ", insufficient coverage level)");
continue;
return;
}
IcuData icuData = new IcuData(id, true);
@ -365,7 +369,7 @@ public final class LdmlConverter {
writtenLocaleIds.put(dir, id);
}
}
}
});
System.out.println("processing alias ldml files");
for (IcuLocaleDir dir : splitDirs) {

View file

@ -148,4 +148,9 @@ public interface LdmlConverterConfig {
* Whether to emit a summary report for debug purposes after conversion is complete.
*/
boolean emitReport();
/**
* Whether to generate data in parallel (using multithreading).
*/
boolean parallel();
}

View file

@ -136,6 +136,10 @@ public final class ConvertIcuDataTask extends Task {
config.setEmitReport(emit);
}
public void setParallel(boolean parallel) {
config.setParallel(parallel);
}
public static final class LocaleIds extends Task {
private ImmutableSet<String> ids;