ICU-21569 LSTM Part 1- data file and build tool

See #1688
This commit is contained in:
Frank Yung-Fong Tang 2021-04-23 21:28:30 +00:00
parent 4689706386
commit f3f24f1423
15 changed files with 284230 additions and 2 deletions

View file

@ -25,6 +25,7 @@ def generate(config, io, common_vars):
requests += generate_confusables(config, io, common_vars)
requests += generate_conversion_mappings(config, io, common_vars)
requests += generate_brkitr_brk(config, io, common_vars)
requests += generate_brkitr_lstm(config, io, common_vars)
requests += generate_stringprep(config, io, common_vars)
requests += generate_brkitr_dictionaries(config, io, common_vars)
requests += generate_normalization(config, io, common_vars)
@ -180,7 +181,7 @@ def generate_brkitr_brk(config, io, common_vars):
RepeatedExecutionRequest(
name = "brkitr_brk",
category = "brkitr_rules",
dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")],
dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout"), DepTarget("lstm_res")],
input_files = input_files,
output_files = output_files,
tool = IcuTool("genbrk"),
@ -457,6 +458,32 @@ def generate_translit(config, io, common_vars):
]
def generate_brkitr_lstm(config, io, common_vars):
input_files = [InFile(filename) for filename in io.glob("brkitr/lstm/*.txt")]
input_basenames = [v.filename[12:] for v in input_files]
output_files = [
OutFile("brkitr/%s.res" % v[:-4])
for v in input_basenames
]
return [
RepeatedOrSingleExecutionRequest(
name = "lstm_res",
category = "brkitr_lstm",
dep_targets = [],
input_files = input_files,
output_files = output_files,
tool = IcuTool("genrb"),
args = "-s {IN_DIR}/brkitr/lstm -d {OUT_DIR}/brkitr -i {OUT_DIR} "
"-k "
"{INPUT_BASENAME}",
format_with = {
},
repeat_with = {
"INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
}
)
]
def generate_tree(
config,
io,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -22,4 +22,8 @@ root{
Mymr:process(dependency){"burmesedict.dict"}
Thai:process(dependency){"thaidict.dict"}
}
lstm{
Thai:process(dependency){"Thai_graphclust_model4_heavy.res"}
Mymr:process(dependency){"Burmese_graphclust_model5_heavy.res"}
}
}

View file

@ -273,7 +273,10 @@ def _preprocess_file_filters(requests, config, io):
default_filter_json = "exclude" if config.strategy == "additive" else "include"
for category in all_categories:
filter_json = default_filter_json
# Figure out the correct filter to create
# Special default for category "brkitr_lstm" as "exclude" for now.
if "brkitr_lstm" == category:
filter_json = "exclude"
# Figure out the correct filter to create for now.
if "featureFilters" in json_data and category in json_data["featureFilters"]:
filter_json = json_data["featureFilters"][category]
if filter_json == "include" and "localeFilter" in json_data and category.endswith("_tree"):