From a4dd8b370237f20c1402218d5c42d5e4124ba772 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Tue, 4 Dec 2018 19:07:27 -0800 Subject: [PATCH] ICU-10923 Refactoring in preparation for data file filtering. - Adding SpaceSeparatedList abstraction. - Adding ListRequest abstraction. - Adding separate dep_files option to SingleExecutionRequest. - Removing unused variable pkg_exclusions. - Adding IndexTxtRequest abstraction. - Moving curr/supplementalData.txt into its own target. - Changing getters to properties on Config. - Re-indenting and refactoring data/BUILDRULES.py - Adding category to request tuples. Refactoring translit. --- icu4c/source/data/BUILDRULES.py | 844 +++++++++++------- icu4c/source/data/buildtool/__init__.py | 78 +- icu4c/source/data/buildtool/__main__.py | 21 +- .../data/buildtool/renderers/makefile.py | 11 +- icu4c/source/data/buildtool/utils.py | 150 +++- icu4c/source/test/testdata/BUILDRULES.py | 40 +- 6 files changed, 723 insertions(+), 421 deletions(-) diff --git a/icu4c/source/data/BUILDRULES.py b/icu4c/source/data/BUILDRULES.py index a0489414528..6de0b3edd3c 100644 --- a/icu4c/source/data/BUILDRULES.py +++ b/icu4c/source/data/BUILDRULES.py @@ -11,7 +11,6 @@ import sys def generate(config, glob, common_vars): requests = [] - pkg_exclusions = set() if len(glob("misc/*")) == 0: print("Error: Cannot find data directory; please specify --glob_dir", file=sys.stderr) @@ -42,358 +41,523 @@ def generate(config, glob, common_vars): "{TMP_DIR}/brkitr" ] - # UConv Name Aliases - if config.has_feature("cnvalias"): - input_file = InFile("mappings/convrtrs.txt") - output_file = OutFile("cnvalias.icu") - requests += [ - SingleExecutionRequest( - name = "cnvalias", - input_files = [input_file], - output_files = [output_file], - tool = IcuTool("gencnval"), - args = "-s {IN_DIR} -d {OUT_DIR} " - "{INPUT_FILES[0]}", - format_with = {} - ) - ] + requests += generate_cnvalias(config, glob, common_vars) + requests += generate_confusables(config, glob, common_vars) + requests += generate_conversion_mappings(config, glob, common_vars) + requests += generate_brkitr_brk(config, glob, common_vars) + requests += generate_stringprep(config, glob, common_vars) + requests += generate_brkitr_dictionaries(config, glob, common_vars) + requests += generate_normalization(config, glob, common_vars) + requests += generate_coll_ucadata(config, glob, common_vars) + requests += generate_unames(config, glob, common_vars) + requests += generate_misc(config, glob, common_vars) + requests += generate_curr_supplemental(config, glob, common_vars) + requests += generate_translit(config, glob, common_vars) - # CONFUSABLES - if config.has_feature("confusables"): - txt1 = InFile("unidata/confusables.txt") - txt2 = InFile("unidata/confusablesWholeScript.txt") - cfu = OutFile("confusables.cfu") - requests += [ - SingleExecutionRequest( - name = "confusables", - input_files = [txt1, txt2, OutFile("cnvalias.icu")], - output_files = [cfu], - tool = IcuTool("gencfu"), - args = "-d {OUT_DIR} -i {OUT_DIR} " - "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} " - "-o {OUTPUT_FILES[0]}", - format_with = {} - ) - ] - - # UConv Conversion Table Files - if config.has_feature("uconv"): - input_files = [InFile(filename) for filename in glob("mappings/*.ucm")] - output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files] - # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv - requests += [ - RepeatedOrSingleExecutionRequest( - name = "uconv", - dep_files = [], - input_files = input_files, - output_files = output_files, - tool = IcuTool("makeconv"), - args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}", - format_with = {}, - repeat_with = { - "INPUT_FILE_PLACEHOLDER": [file.filename for file in input_files] - }, - flatten_with = { - "INPUT_FILE_PLACEHOLDER": " ".join(file.filename for file in input_files) - } - ) - ] - - # BRK Files + # FIXME: Clean this up (duplicated logic) brkitr_brk_files = [] - if config.has_feature("brkitr"): - input_files = [InFile(filename) for filename in glob("brkitr/rules/*.txt")] - output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] - brkitr_brk_files += output_files - requests += [ - RepeatedExecutionRequest( - name = "brkitr_brk", - dep_files = [OutFile("cnvalias.icu")], - input_files = input_files, - output_files = output_files, - tool = IcuTool("genbrk"), - args = "-d {OUT_DIR} -i {OUT_DIR} " - "-c -r {IN_DIR}/{INPUT_FILE} " - "-o {OUTPUT_FILE}", - format_with = {}, - repeat_with = {} - ) - ] - - # SPP FILES - if config.has_feature("stringprep"): - input_files = [InFile(filename) for filename in glob("sprep/*.txt")] - output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files] - bundle_names = [v.filename[6:-4] for v in input_files] - requests += [ - RepeatedExecutionRequest( - name = "stringprep", - dep_files = [], - input_files = input_files, - output_files = output_files, - tool = IcuTool("gensprep"), - args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} " - "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt", - format_with = {}, - repeat_with = { - "BUNDLE_NAME": bundle_names - } - ) - ] - - # Dict Files + input_files = [InFile(filename) for filename in glob("brkitr/rules/*.txt")] + output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] + brkitr_brk_files += output_files dict_files = [] - if config.has_feature("dictionaries"): - input_files = [InFile(filename) for filename in glob("brkitr/dictionaries/*.txt")] - output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] - dict_files += output_files - extra_options_map = { - "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000", - "brkitr/dictionaries/cjdict.txt": "--uchars", - "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780", - "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80", - "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00" - } - extra_optionses = [extra_options_map[v.filename] for v in input_files] - requests += [ - RepeatedExecutionRequest( - name = "dictionaries", - dep_files = [], - input_files = input_files, - output_files = output_files, - tool = IcuTool("gendict"), - args = "-i {OUT_DIR} " - "-c {EXTRA_OPTIONS} " - "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", - format_with = {}, - repeat_with = { - "EXTRA_OPTIONS": extra_optionses - } - ) - ] + input_files = [InFile(filename) for filename in glob("brkitr/dictionaries/*.txt")] + output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] + dict_files += output_files - # NRM Files - if config.has_feature("normalization"): - input_files = [InFile(filename) for filename in glob("in/*.nrm")] - input_files.remove(InFile("in/nfc.nrm")) # nfc.nrm is pre-compiled into C++ - output_files = [OutFile(v.filename[3:]) for v in input_files] - requests += [ - RepeatedExecutionRequest( - name = "normalization", - dep_files = [], - input_files = input_files, - output_files = output_files, - tool = IcuTool("icupkg"), - args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", - format_with = {}, - repeat_with = {} - ) - ] + # Res Tree Files + # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files) + requests += generate_tree(config, glob, common_vars, + "locales", + None, + "resfiles.mk", + "GENRB_CLDR_VERSION", + "GENRB_SOURCE", + True, + []) - # Collation Dependency File (ucadata.icu) - if config.has_feature("coll"): - input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type()) - output_file = OutFile("coll/ucadata.icu") - requests += [ - SingleExecutionRequest( - name = "coll_ucadata", - input_files = [input_file], - output_files = [output_file], - tool = IcuTool("icupkg"), - args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", - format_with = {} - ) - ] + requests += generate_tree(config, glob, common_vars, + "curr", + "curr", + "resfiles.mk", + "CURR_CLDR_VERSION", + "CURR_SOURCE", + True, + []) - # Unicode Character Names - if config.has_feature("unames"): - input_file = InFile("in/unames.icu") - output_file = OutFile("unames.icu") - requests += [ - SingleExecutionRequest( - name = "unames", - input_files = [input_file], - output_files = [output_file], - tool = IcuTool("icupkg"), - args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", - format_with = {} - ) - ] + requests += generate_tree(config, glob, common_vars, + "lang", + "lang", + "resfiles.mk", + "LANG_CLDR_VERSION", + "LANG_SOURCE", + True, + []) - # Misc Data Res Files - if config.has_feature("misc"): - # TODO: Treat each misc file separately - input_files = [InFile(filename) for filename in glob("misc/*.txt")] - input_basenames = [v.filename[5:] for v in input_files] - output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames] - requests += [ - RepeatedExecutionRequest( - name = "misc", - dep_files = [], - input_files = input_files, - output_files = output_files, - tool = IcuTool("genrb"), - args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} " - "-k -q " - "{INPUT_BASENAME}", - format_with = {}, - repeat_with = { - "INPUT_BASENAME": input_basenames - } - ) - ] + requests += generate_tree(config, glob, common_vars, + "region", + "region", + "resfiles.mk", + "REGION_CLDR_VERSION", + "REGION_SOURCE", + True, + []) - # Specialized Locale Data Res Files - specialized_sub_dirs = [ - # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files) - ("locales", None, "resfiles.mk", "GENRB_CLDR_VERSION", "GENRB_SOURCE", True, - []), - ("curr", "curr", "resfiles.mk", "CURR_CLDR_VERSION", "CURR_SOURCE", True, - []), - ("lang", "lang", "resfiles.mk", "LANG_CLDR_VERSION", "LANG_SOURCE", True, - []), - ("region", "region", "resfiles.mk", "REGION_CLDR_VERSION", "REGION_SOURCE", True, - []), - ("zone", "zone", "resfiles.mk", "ZONE_CLDR_VERSION", "ZONE_SOURCE", True, - []), - ("unit", "unit", "resfiles.mk", "UNIT_CLDR_VERSION", "UNIT_SOURCE", True, - []), - # TODO: We should not need timezoneTypes.res to build collation resource bundles. - # TODO: Maybe keyTypeData.res should be baked into the common library. - ("coll", "coll", "colfiles.mk", "COLLATION_CLDR_VERSION", "COLLATION_SOURCE", False, - [OutFile("coll/ucadata.icu"), OutFile("timezoneTypes.res"), OutFile("keyTypeData.res")]), - ("brkitr", "brkitr", "brkfiles.mk", "BRK_RES_CLDR_VERSION", "BRK_RES_SOURCE", False, - brkitr_brk_files + dict_files), - ("rbnf", "rbnf", "rbnffiles.mk", "RBNF_CLDR_VERSION", "RBNF_SOURCE", False, - []), - ("translit", "translit", "trnsfiles.mk", None, "TRANSLIT_SOURCE", False, - []) - ] + requests += generate_tree(config, glob, common_vars, + "zone", + "zone", + "resfiles.mk", + "ZONE_CLDR_VERSION", + "ZONE_SOURCE", + True, + []) - for sub_dir, out_sub_dir, resfile_name, version_var, source_var, use_pool_bundle, dep_files in specialized_sub_dirs: - out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" - if config.has_feature(sub_dir): - # TODO: Clean this up for translit - if sub_dir == "translit": - input_files = [ - InFile("translit/root.txt"), - InFile("translit/en.txt"), - InFile("translit/el.txt") - ] - else: - input_files = [InFile(filename) for filename in glob("%s/*.txt" % sub_dir)] - input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] - output_files = [ - OutFile("%s%s.res" % (out_prefix, v[:-4])) - for v in input_basenames - ] - if use_pool_bundle: - input_pool_files = [OutFile("%spool.res" % out_prefix)] - use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format( - OUT_PREFIX = out_prefix, - **common_vars - ) - requests += [ - SingleExecutionRequest( - name = "%s_pool_write" % sub_dir, - input_files = dep_files + input_files, - output_files = input_pool_files, - tool = IcuTool("genrb"), - args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " - "--writePoolBundle -k " - "{INPUT_BASENAMES_SPACED}", - format_with = { - "IN_SUB_DIR": sub_dir, - "OUT_PREFIX": out_prefix, - "INPUT_BASENAMES_SPACED": " ".join(input_basenames) - } - ), - ] - else: - input_pool_files = [] - use_pool_bundle_option = "" - requests += [ - RepeatedOrSingleExecutionRequest( - name = "%s_res" % sub_dir, - dep_files = dep_files + input_pool_files, - input_files = input_files, - output_files = output_files, - tool = IcuTool("genrb"), - args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " - "{EXTRA_OPTION} -k " - "{INPUT_BASENAME}", - format_with = { - "IN_SUB_DIR": sub_dir, - "OUT_PREFIX": out_prefix, - "EXTRA_OPTION": use_pool_bundle_option - }, - repeat_with = { - "INPUT_BASENAME": input_basenames, - }, - flatten_with = { - "INPUT_BASENAME": " ".join(input_basenames) - } - ) - ] - # Generate index txt file - if sub_dir != "translit": - # TODO: Change .mk files to .py files so they can be loaded directly. - # Alternatively, figure out a way to require reading this file altogether. - # Right now, it is required for the index list file. - # Reading these files as .py will be required for Bazel. - mk_values = parse_makefile("{GLOB_DIR}/{IN_SUB_DIR}/{RESFILE_NAME}".format( - IN_SUB_DIR = sub_dir, - RESFILE_NAME = resfile_name, - **common_vars - )) - cldr_version = mk_values[version_var] if version_var and sub_dir == "locales" else None - locales = [v[:-4] for v in mk_values[source_var].split()] - pkg_exclusions |= set(output_files) - set(OutFile("%s%s.res" % (out_prefix, locale)) for locale in locales) - index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format( - IN_SUB_DIR = sub_dir, - **common_vars - )) - requests += [ - PrintFileRequest( - name = "%s_index_txt" % sub_dir, - output_file = index_file_txt, - content = utils.generate_index_file(locales, cldr_version, common_vars) - ) - ] - # Generate index res file - index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format( - OUT_PREFIX = out_prefix, - **common_vars - )) - requests += [ - SingleExecutionRequest( - name = "%s_index_res" % sub_dir, - input_files = [index_file_txt], - output_files = [index_res_file], - tool = IcuTool("genrb"), - args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " - "-k " - "{INDEX_NAME}.txt", - format_with = { - "IN_SUB_DIR": sub_dir, - "OUT_PREFIX": out_prefix - } - ) - ] + requests += generate_tree(config, glob, common_vars, + "unit", + "unit", + "resfiles.mk", + "UNIT_CLDR_VERSION", + "UNIT_SOURCE", + True, + []) + + # TODO: We should not need timezoneTypes.res to build collation resource bundles. + # TODO: Maybe keyTypeData.res should be baked into the common library. + requests += generate_tree(config, glob, common_vars, + "coll", + "coll", + "colfiles.mk", + "COLLATION_CLDR_VERSION", + "COLLATION_SOURCE", + False, + [OutFile("coll/ucadata.icu"), OutFile("timezoneTypes.res"), OutFile("keyTypeData.res")]) + + requests += generate_tree(config, glob, common_vars, + "brkitr", + "brkitr", + "brkfiles.mk", + "BRK_RES_CLDR_VERSION", + "BRK_RES_SOURCE", + False, + brkitr_brk_files + dict_files) + + requests += generate_tree(config, glob, common_vars, + "rbnf", + "rbnf", + "rbnffiles.mk", + "RBNF_CLDR_VERSION", + "RBNF_SOURCE", + False, + []) - # Finally, make the package. - all_output_files = list(sorted(utils.get_all_output_files(requests))) - icudata_list_file = TmpFile("icudata.lst") requests += [ - PrintFileRequest( + ListRequest( name = "icudata_list", - output_file = icudata_list_file, - content = "\n".join(file.filename for file in all_output_files) - ), - VariableRequest( - name = "icudata_all_output_files", - input_files = all_output_files + [icudata_list_file] + variable_name = "icudata_all_output_files", + output_file = TmpFile("icudata.lst"), + include_tmp = False ) ] return (build_dirs, requests) + + +def generate_cnvalias(config, glob, common_vars): + # UConv Name Aliases + input_file = InFile("mappings/convrtrs.txt") + output_file = OutFile("cnvalias.icu") + return [ + SingleExecutionRequest( + name = "cnvalias", + category = "cnvalias", + dep_files = [], + input_files = [input_file], + output_files = [output_file], + tool = IcuTool("gencnval"), + args = "-s {IN_DIR} -d {OUT_DIR} " + "{INPUT_FILES[0]}", + format_with = {} + ) + ] + + +def generate_confusables(config, glob, common_vars): + # CONFUSABLES + txt1 = InFile("unidata/confusables.txt") + txt2 = InFile("unidata/confusablesWholeScript.txt") + cfu = OutFile("confusables.cfu") + return [ + SingleExecutionRequest( + name = "confusables", + category = "confusables", + dep_files = [OutFile("cnvalias.icu")], + input_files = [txt1, txt2], + output_files = [cfu], + tool = IcuTool("gencfu"), + args = "-d {OUT_DIR} -i {OUT_DIR} " + "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} " + "-o {OUTPUT_FILES[0]}", + format_with = {} + ) + ] + + +def generate_conversion_mappings(config, glob, common_vars): + # UConv Conversion Table Files + input_files = [InFile(filename) for filename in glob("mappings/*.ucm")] + output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files] + # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv + return [ + RepeatedOrSingleExecutionRequest( + name = "conversion_mappings", + category = "conversion_mappings", + dep_files = [], + input_files = input_files, + output_files = output_files, + tool = IcuTool("makeconv"), + args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}", + format_with = {}, + repeat_with = { + "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files) + } + ) + ] + + +def generate_brkitr_brk(config, glob, common_vars): + # BRK Files + input_files = [InFile(filename) for filename in glob("brkitr/rules/*.txt")] + output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] + return [ + RepeatedExecutionRequest( + name = "brkitr_brk", + category = "brkitr_rules", + dep_files = [OutFile("cnvalias.icu")], + input_files = input_files, + output_files = output_files, + tool = IcuTool("genbrk"), + args = "-d {OUT_DIR} -i {OUT_DIR} " + "-c -r {IN_DIR}/{INPUT_FILE} " + "-o {OUTPUT_FILE}", + format_with = {}, + repeat_with = {} + ) + ] + + +def generate_stringprep(config, glob, common_vars): + # SPP FILES + input_files = [InFile(filename) for filename in glob("sprep/*.txt")] + output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files] + bundle_names = [v.filename[6:-4] for v in input_files] + return [ + RepeatedExecutionRequest( + name = "stringprep", + category = "stringprep", + dep_files = [], + input_files = input_files, + output_files = output_files, + tool = IcuTool("gensprep"), + args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} " + "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt", + format_with = {}, + repeat_with = { + "BUNDLE_NAME": bundle_names + } + ) + ] + + +def generate_brkitr_dictionaries(config, glob, common_vars): + # Dict Files + input_files = [InFile(filename) for filename in glob("brkitr/dictionaries/*.txt")] + output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] + extra_options_map = { + "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000", + "brkitr/dictionaries/cjdict.txt": "--uchars", + "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780", + "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80", + "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00" + } + extra_optionses = [extra_options_map[v.filename] for v in input_files] + return [ + RepeatedExecutionRequest( + name = "dictionaries", + category = "brkitr_dictionaries", + dep_files = [], + input_files = input_files, + output_files = output_files, + tool = IcuTool("gendict"), + args = "-i {OUT_DIR} " + "-c {EXTRA_OPTIONS} " + "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", + format_with = {}, + repeat_with = { + "EXTRA_OPTIONS": extra_optionses + } + ) + ] + + +def generate_normalization(config, glob, common_vars): + # NRM Files + input_files = [InFile(filename) for filename in glob("in/*.nrm")] + input_files.remove(InFile("in/nfc.nrm")) # nfc.nrm is pre-compiled into C++ + output_files = [OutFile(v.filename[3:]) for v in input_files] + return [ + RepeatedExecutionRequest( + name = "normalization", + category = "normalization", + dep_files = [], + input_files = input_files, + output_files = output_files, + tool = IcuTool("icupkg"), + args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", + format_with = {}, + repeat_with = {} + ) + ] + + +def generate_coll_ucadata(config, glob, common_vars): + # Collation Dependency File (ucadata.icu) + input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type) + output_file = OutFile("coll/ucadata.icu") + return [ + SingleExecutionRequest( + name = "coll_ucadata", + category = "coll_ucadata", + dep_files = [], + input_files = [input_file], + output_files = [output_file], + tool = IcuTool("icupkg"), + args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", + format_with = {} + ) + ] + + +def generate_unames(config, glob, common_vars): + # Unicode Character Names + input_file = InFile("in/unames.icu") + output_file = OutFile("unames.icu") + return [ + SingleExecutionRequest( + name = "unames", + category = "unames", + dep_files = [], + input_files = [input_file], + output_files = [output_file], + tool = IcuTool("icupkg"), + args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", + format_with = {} + ) + ] + + +def generate_misc(config, glob, common_vars): + # Misc Data Res Files + input_files = [InFile(filename) for filename in glob("misc/*.txt")] + input_basenames = [v.filename[5:] for v in input_files] + output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames] + return [ + RepeatedExecutionRequest( + name = "misc_res", + category = "misc", + dep_files = [], + input_files = input_files, + output_files = output_files, + tool = IcuTool("genrb"), + args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} " + "-k -q " + "{INPUT_BASENAME}", + format_with = {}, + repeat_with = { + "INPUT_BASENAME": input_basenames + } + ) + ] + + +def generate_curr_supplemental(config, glob, common_vars): + # Currency Supplemental Res File + input_file = InFile("curr/supplementalData.txt") + input_basename = "supplementalData.txt" + output_file = OutFile("curr/supplementalData.res") + return [ + SingleExecutionRequest( + name = "curr_supplemental_res", + category = "curr_supplemental", + dep_files = [], + input_files = [input_file], + output_files = [output_file], + tool = IcuTool("genrb"), + args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} " + "-k " + "{INPUT_BASENAME}", + format_with = { + "INPUT_BASENAME": input_basename + } + ) + ] + + +def generate_translit(config, glob, common_vars): + input_files = [ + InFile("translit/root.txt"), + InFile("translit/en.txt"), + InFile("translit/el.txt") + ] + input_basenames = [v.filename[9:] for v in input_files] + output_files = [ + OutFile("translit/%s.res" % v[:-4]) + for v in input_basenames + ] + return [ + RepeatedOrSingleExecutionRequest( + name = "translit_res", + category = "translit", + dep_files = [], + input_files = input_files, + output_files = output_files, + tool = IcuTool("genrb"), + args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} " + "-k " + "{INPUT_BASENAME}", + format_with = { + }, + repeat_with = { + "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) + } + ) + ] + + +def generate_tree( + config, + glob, + common_vars, + sub_dir, + out_sub_dir, + resfile_name, + version_var, + source_var, + use_pool_bundle, + dep_files): + requests = [] + category = "%s_tree" % sub_dir + out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" + # TODO: Clean this up for curr + input_files = [InFile(filename) for filename in glob("%s/*.txt" % sub_dir)] + if sub_dir == "curr": + input_files.remove(InFile("curr/supplementalData.txt")) + input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] + output_files = [ + OutFile("%s%s.res" % (out_prefix, v[:-4])) + for v in input_basenames + ] + + # Generate Pool Bundle + if use_pool_bundle: + input_pool_files = [OutFile("%spool.res" % out_prefix)] + use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format( + OUT_PREFIX = out_prefix, + **common_vars + ) + requests += [ + SingleExecutionRequest( + name = "%s_pool_write" % sub_dir, + category = category, + dep_files = dep_files, + input_files = input_files, + output_files = input_pool_files, + tool = IcuTool("genrb"), + args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " + "--writePoolBundle -k " + "{INPUT_BASENAMES_SPACED}", + format_with = { + "IN_SUB_DIR": sub_dir, + "OUT_PREFIX": out_prefix, + "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames) + } + ), + ] + else: + input_pool_files = [] + use_pool_bundle_option = "" + + # Generate Res File Tree + requests += [ + RepeatedOrSingleExecutionRequest( + name = "%s_res" % sub_dir, + category = category, + dep_files = dep_files + input_pool_files, + input_files = input_files, + output_files = output_files, + tool = IcuTool("genrb"), + args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " + "{EXTRA_OPTION} -k " + "{INPUT_BASENAME}", + format_with = { + "IN_SUB_DIR": sub_dir, + "OUT_PREFIX": out_prefix, + "EXTRA_OPTION": use_pool_bundle_option + }, + repeat_with = { + "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) + } + ) + ] + + # Generate index txt file + # TODO: Change .mk files to .py files so they can be loaded directly. + # Alternatively, figure out a way to not require reading this file altogether. + # Right now, it is required for the index list file. + # Reading these files as .py will be required for Bazel. + mk_values = parse_makefile("{GLOB_DIR}/{IN_SUB_DIR}/{RESFILE_NAME}".format( + IN_SUB_DIR = sub_dir, + RESFILE_NAME = resfile_name, + **common_vars + )) + cldr_version = mk_values[version_var] if version_var and sub_dir == "locales" else None + index_input_files = [ + InFile("%s/%s" % (sub_dir, basename)) + for basename in mk_values[source_var].split() + ] + index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format( + IN_SUB_DIR = sub_dir, + **common_vars + )) + requests += [ + IndexTxtRequest( + name = "%s_index_txt" % sub_dir, + category = category, + input_files = index_input_files, + output_file = index_file_txt, + cldr_version = cldr_version + ) + ] + + # Generate index res file + index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format( + OUT_PREFIX = out_prefix, + **common_vars + )) + requests += [ + SingleExecutionRequest( + name = "%s_index_res" % sub_dir, + category = category, + dep_files = [], + input_files = [index_file_txt], + output_files = [index_res_file], + tool = IcuTool("genrb"), + args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " + "-k " + "{INDEX_NAME}.txt", + format_with = { + "IN_SUB_DIR": sub_dir, + "OUT_PREFIX": out_prefix + } + ) + ] + + return requests diff --git a/icu4c/source/data/buildtool/__init__.py b/icu4c/source/data/buildtool/__init__.py index 60855c07a08..cc95cb89712 100644 --- a/icu4c/source/data/buildtool/__init__.py +++ b/icu4c/source/data/buildtool/__init__.py @@ -33,16 +33,67 @@ IcuTool = namedtuple("IcuTool", ["name"]) SystemTool = namedtuple("SystemTool", ["name"]) SingleExecutionRequest = namedtuple("SingleExecutionRequest", [ + # Used for identification purposes "name", + + # The filter category that applies to this request + "category", + + # Dependency files; usually generated + "dep_files", + + # Primary input files "input_files", + + # Output files "output_files", + + # What tool to use "tool", + + # Argument string to pass to the tool with optional placeholders "args", + + # Placeholders to substitute into the argument string; if any of these + # have a list type, the list must be equal in length to input_files "format_with" ]) RepeatedExecutionRequest = namedtuple("RepeatedExecutionRequest", [ + # Used for identification purposes "name", + + # The filter category that applies to this request + "category", + + # Dependency files; usually generated + "dep_files", + + # Primary input files + "input_files", + + # Output files + "output_files", + + # What tool to use + "tool", + + # Argument string to pass to the tool with optional placeholders + "args", + + # Placeholders to substitute into the argument string for all executions; + # if any of these have a list type, the list must be equal in length to + # input_files + "format_with", + + # Placeholders to substitute into the argument string unique to each + # iteration; all values must be lists equal in length to input_files + "repeat_with" +]) + +RepeatedOrSingleExecutionRequest = namedtuple("RepeatedOrSingleExecutionRequest", [ + "name", + "category", "dep_files", "input_files", "output_files", @@ -52,18 +103,6 @@ RepeatedExecutionRequest = namedtuple("RepeatedExecutionRequest", [ "repeat_with" ]) -RepeatedOrSingleExecutionRequest = namedtuple("RepeatedOrSingleExecutionRequest", [ - "name", - "dep_files", - "input_files", - "output_files", - "tool", - "args", - "format_with", - "repeat_with", - "flatten_with" -]) - PrintFileRequest = namedtuple("PrintFileRequest", [ "name", "output_file", @@ -80,3 +119,18 @@ VariableRequest = namedtuple("VariableRequest", [ "name", "input_files" ]) + +ListRequest = namedtuple("ListRequest", [ + "name", + "variable_name", + "output_file", + "include_tmp" +]) + +IndexTxtRequest = namedtuple("IndexTxtRequest", [ + "name", + "category", + "input_files", + "output_file", + "cldr_version" +]) diff --git a/icu4c/source/data/buildtool/__main__.py b/icu4c/source/data/buildtool/__main__.py index 95b7a0b3331..54fc351cb55 100644 --- a/icu4c/source/data/buildtool/__main__.py +++ b/icu4c/source/data/buildtool/__main__.py @@ -95,20 +95,14 @@ class Config(object): self._feature_set = set(AVAILABLE_FEATURES) - set(args.blacklist) else: self._feature_set = set(AVAILABLE_FEATURES) - self._max_parallel = (args.seqmode == "parallel") - self._coll_han_type = args.collation_ucadata + self.max_parallel = (args.seqmode == "parallel") + # Either "unihan" or "implicithan" + self.coll_han_type = args.collation_ucadata def has_feature(self, feature_name): assert feature_name in AVAILABLE_FEATURES return feature_name in self._feature_set - def max_parallel(self): - return self._max_parallel - - def coll_han_type(self): - # Either "unihan" or "implicithan" - return self._coll_han_type - def main(): args = flag_parser.parse_args() @@ -144,13 +138,8 @@ def main(): # For the purposes of buildtool, force Unix-style directory separators. return [v.replace("\\", "/")[len(args.glob_dir)+1:] for v in sorted(result_paths)] - build_dirs, raw_requests = BUILDRULES.generate(config, glob, common) - requests = [] - for req in raw_requests: - if isinstance(req, RepeatedOrSingleExecutionRequest): - requests.append(utils.flatten(req, config.max_parallel())) - else: - requests.append(req) + build_dirs, requests = BUILDRULES.generate(config, glob, common) + requests = utils.flatten_requests(requests, config, common) if args.format == "gnumake": print(makefile.get_gnumake_rules( diff --git a/icu4c/source/data/buildtool/renderers/makefile.py b/icu4c/source/data/buildtool/renderers/makefile.py index 6d028b58a2b..cf0559ed6c4 100644 --- a/icu4c/source/data/buildtool/renderers/makefile.py +++ b/icu4c/source/data/buildtool/renderers/makefile.py @@ -10,7 +10,7 @@ def get_gnumake_rules(build_dirs, requests, makefile_vars, **kwargs): # Common Variables common_vars = kwargs["common_vars"] - for key, value in makefile_vars.items(): + for key, value in sorted(makefile_vars.items()): makefile_string += "{KEY} = {VALUE}\n".format( KEY = key, VALUE = value @@ -137,6 +137,7 @@ def get_gnumake_rules_helper(request, common_vars, **kwargs): if isinstance(request, SingleExecutionRequest): cmd = utils.format_single_request_command(request, cmd_template, common_vars) + dep_files = utils.get_input_files(request) if len(request.output_files) > 1: # Special case for multiple output files: Makefile rules should have only one @@ -152,7 +153,7 @@ def get_gnumake_rules_helper(request, common_vars, **kwargs): MakeRule( name = "%s_all" % request.name, dep_literals = [], - dep_files = request.input_files, + dep_files = dep_files, output_file = timestamp_file, cmds = [ cmd, @@ -174,13 +175,13 @@ def get_gnumake_rules_helper(request, common_vars, **kwargs): ] return rules - elif len(request.input_files) > 5: + elif len(dep_files) > 5: # For nicer printing, for long input lists, use a helper variable. dep_var_name = "%s_DEPS" % request.name.upper() return [ MakeFilesVar( name = dep_var_name, - files = request.input_files + files = dep_files ), MakeRule( name = request.name, @@ -196,7 +197,7 @@ def get_gnumake_rules_helper(request, common_vars, **kwargs): MakeRule( name = request.name, dep_literals = [], - dep_files = request.input_files, + dep_files = dep_files, output_file = request.output_files[0], cmds = [cmd] ) diff --git a/icu4c/source/data/buildtool/utils.py b/icu4c/source/data/buildtool/utils.py index 0ca3b811604..e01a9c60aa0 100644 --- a/icu4c/source/data/buildtool/utils.py +++ b/icu4c/source/data/buildtool/utils.py @@ -57,34 +57,73 @@ def format_repeated_request_command(request, cmd_template, loop_vars, common_var ) -def flatten(request, max_parallel): - """Flatten a RepeatedOrSingleExecutionRequest +def flatten_requests(raw_requests, config, common_vars): + """Post-processes "meta" requests into normal requests. - Becomes either a SingleExecutionRequest or a RepeatedExecutionRequest. + Affected classes: + - RepeatedOrSingleExecutionRequest becomes either + RepeatedExecutionRequest or SingleExecutionRequest + - ListRequest becomes PrintFileRequest and VariableRequest + - IndexTxtRequest becomes PrintFileRequest """ - if max_parallel: - return RepeatedExecutionRequest( - name = request.name, - dep_files = request.dep_files, - input_files = request.input_files, - output_files = request.output_files, - tool = request.tool, - args = request.args, - format_with = request.format_with, - repeat_with = request.repeat_with - ) - else: - return SingleExecutionRequest( - name = request.name, - input_files = request.dep_files + request.input_files, - output_files = request.output_files, - tool = request.tool, - args = request.args, - format_with = concat_dicts(request.format_with, request.flatten_with) - ) + flattened_requests = [] + for request in raw_requests: + if isinstance(request, RepeatedOrSingleExecutionRequest): + if config.max_parallel: + flattened_requests.append(RepeatedExecutionRequest( + name = request.name, + category = request.category, + dep_files = request.dep_files, + input_files = request.input_files, + output_files = request.output_files, + tool = request.tool, + args = request.args, + format_with = request.format_with, + repeat_with = request.repeat_with + )) + else: + flattened_requests.append(SingleExecutionRequest( + name = request.name, + category = request.category, + input_files = request.dep_files + request.input_files, + output_files = request.output_files, + tool = request.tool, + args = request.args, + format_with = concat_dicts(request.format_with, request.repeat_with) + )) + elif isinstance(request, ListRequest): + list_files = list(sorted(get_all_output_files(raw_requests))) + if request.include_tmp: + variable_files = list(sorted(get_all_output_files(raw_requests, include_tmp=True))) + else: + # Always include the list file itself + variable_files = list_files + [request.output_file] + flattened_requests += [ + PrintFileRequest( + name = request.name, + output_file = request.output_file, + content = "\n".join(file.filename for file in list_files) + ), + VariableRequest( + name = request.variable_name, + input_files = variable_files + ) + ] + elif isinstance(request, IndexTxtRequest): + flattened_requests += [ + PrintFileRequest( + name = request.name, + output_file = request.output_file, + content = generate_index_file(request.input_files, request.cldr_version, common_vars) + ) + ] + else: + flattened_requests.append(request) + return flattened_requests -def generate_index_file(locales, cldr_version, common_vars): +def generate_index_file(input_files, cldr_version, common_vars): + locales = [f.filename[f.filename.rfind("/")+1:-4] for f in input_files] formatted_version = " CLDRVersion { \"%s\" }\n" % cldr_version if cldr_version else "" formatted_locales = "\n".join([" %s {\"\"}" % v for v in locales]) # TODO: CLDRVersion is required only in the base file @@ -101,23 +140,52 @@ def generate_index_file(locales, cldr_version, common_vars): ) +def get_input_files(request): + if isinstance(request, SingleExecutionRequest): + return request.dep_files + request.input_files + elif isinstance(request, RepeatedExecutionRequest): + return request.dep_files + request.input_files + elif isinstance(request, RepeatedOrSingleExecutionRequest): + return request.dep_files + request.input_files + elif isinstance(request, PrintFileRequest): + return [] + elif isinstance(request, CopyRequest): + return [request.input_file] + elif isinstance(request, VariableRequest): + return [] + elif isinstance(request, ListRequest): + return [] + elif isinstance(request, IndexTxtRequest): + return request.input_files + else: + assert False + + +def get_output_files(request): + if isinstance(request, SingleExecutionRequest): + return request.output_files + elif isinstance(request, RepeatedExecutionRequest): + return request.output_files + elif isinstance(request, RepeatedOrSingleExecutionRequest): + return request.output_files + elif isinstance(request, PrintFileRequest): + return [request.output_file] + elif isinstance(request, CopyRequest): + return [request.output_file] + elif isinstance(request, VariableRequest): + return [] + elif isinstance(request, ListRequest): + return [request.output_file] + elif isinstance(request, IndexTxtRequest): + return [request.output_file] + else: + assert False + + def get_all_output_files(requests, include_tmp=False): files = [] for request in requests: - if isinstance(request, SingleExecutionRequest): - files += request.output_files - elif isinstance(request, RepeatedExecutionRequest): - files += request.output_files - elif isinstance(request, RepeatedOrSingleExecutionRequest): - files += request.output_files - elif isinstance(request, PrintFileRequest): - files += [request.output_file] - elif isinstance(request, CopyRequest): - files += [request.output_file] - elif isinstance(request, VariableRequest): - pass - else: - assert False + files += get_output_files(request) # Filter out all files but those in OUT_DIR if necessary. # It is also easy to filter for uniqueness; do it right now and return. @@ -128,3 +196,9 @@ def get_all_output_files(requests, include_tmp=False): # Filter for unique values. NOTE: Cannot use set() because we need to accept same filename as # OutFile and TmpFile as different, and by default they evaluate as equal. return [f for _, f in set((type(f), f) for f in files)] + + +class SpaceSeparatedList(list): + """A list that joins itself with spaces when converted to a string.""" + def __str__(self): + return " ".join(self) diff --git a/icu4c/source/test/testdata/BUILDRULES.py b/icu4c/source/test/testdata/BUILDRULES.py index 9e12ac1ec40..2d31dda703c 100644 --- a/icu4c/source/test/testdata/BUILDRULES.py +++ b/icu4c/source/test/testdata/BUILDRULES.py @@ -16,18 +16,12 @@ def generate(config, glob, common_vars): requests += generate_other(config, glob, common_vars) requests += generate_copy(config, glob, common_vars) - all_output_files = list(sorted(utils.get_all_output_files(requests))) - all_output_files_with_tmp = list(sorted(utils.get_all_output_files(requests, include_tmp=True))) - testdata_list_file = TmpFile("testdata.lst") requests += [ - PrintFileRequest( + ListRequest( name = "testdata_list", - output_file = testdata_list_file, - content = "\n".join(file.filename for file in all_output_files) - ), - VariableRequest( - name = "testdata_all_output_files", - input_files = all_output_files_with_tmp + [testdata_list_file] + variable_name = "testdata_all_output_files", + output_file = TmpFile("testdata.lst"), + include_tmp = True ) ] @@ -64,6 +58,7 @@ def generate_rb(config, glob, common_vars): # TODO: Use option -k? RepeatedExecutionRequest( name = "testrb", + category = "tests", dep_files = [], input_files = [InFile("%s.txt" % bn) for bn in basenames], output_files = [OutFile("%s.res" % bn) for bn in basenames], @@ -75,6 +70,8 @@ def generate_rb(config, glob, common_vars): # Other standalone res files SingleExecutionRequest( name = "encoded", + category = "tests", + dep_files = [], input_files = [InFile("encoded.utf16be")], output_files = [OutFile("encoded.res")], tool = IcuTool("genrb"), @@ -83,6 +80,8 @@ def generate_rb(config, glob, common_vars): ), SingleExecutionRequest( name = "idna_rules", + category = "tests", + dep_files = [], input_files = [InFile("idna_rules.txt")], output_files = [OutFile("idna_rules.res")], tool = IcuTool("genrb"), @@ -91,6 +90,8 @@ def generate_rb(config, glob, common_vars): ), SingleExecutionRequest( name = "zoneinfo64", + category = "tests", + dep_files = [], input_files = [InFile("zoneinfo64.txt")], output_files = [TmpFile("zoneinfo64.res")], tool = IcuTool("genrb"), @@ -104,6 +105,8 @@ def generate_sprep(config, glob, common_vars): return [ SingleExecutionRequest( name = "nfscsi", + category = "tests", + dep_files = [], input_files = [InFile("nfs4_cs_prep_ci.txt")], output_files = [OutFile("nfscsi.spp")], tool = IcuTool("gensprep"), @@ -112,6 +115,8 @@ def generate_sprep(config, glob, common_vars): ), SingleExecutionRequest( name = "nfscss", + category = "tests", + dep_files = [], input_files = [InFile("nfs4_cs_prep_cs.txt")], output_files = [OutFile("nfscss.spp")], tool = IcuTool("gensprep"), @@ -120,6 +125,8 @@ def generate_sprep(config, glob, common_vars): ), SingleExecutionRequest( name = "nfscis", + category = "tests", + dep_files = [], input_files = [InFile("nfs4_cis_prep.txt")], output_files = [OutFile("nfscis.spp")], tool = IcuTool("gensprep"), @@ -128,6 +135,8 @@ def generate_sprep(config, glob, common_vars): ), SingleExecutionRequest( name = "nfsmxs", + category = "tests", + dep_files = [], input_files = [InFile("nfs4_mixed_prep_s.txt")], output_files = [OutFile("nfsmxs.spp")], tool = IcuTool("gensprep"), @@ -136,6 +145,8 @@ def generate_sprep(config, glob, common_vars): ), SingleExecutionRequest( name = "nfsmxp", + category = "tests", + dep_files = [], input_files = [InFile("nfs4_mixed_prep_p.txt")], output_files = [OutFile("nfsmxp.spp")], tool = IcuTool("gensprep"), @@ -159,6 +170,7 @@ def generate_conv(config, glob, common_vars): return [ RepeatedExecutionRequest( name = "test_conv", + category = "tests", dep_files = [], input_files = [InFile("%s.ucm" % bn) for bn in basenames], output_files = [OutFile("%s.cnv" % bn) for bn in basenames], @@ -194,6 +206,8 @@ def generate_other(config, glob, common_vars): return [ SingleExecutionRequest( name = "testnorm", + category = "tests", + dep_files = [], input_files = [InFile("testnorm.txt")], output_files = [OutFile("testnorm.nrm")], tool = IcuTool("gennorm2"), @@ -202,6 +216,8 @@ def generate_other(config, glob, common_vars): ), SingleExecutionRequest( name = "test_icu", + category = "tests", + dep_files = [], input_files = [], output_files = [OutFile("test.icu")], tool = IcuTool("gentest"), @@ -210,6 +226,8 @@ def generate_other(config, glob, common_vars): ), SingleExecutionRequest( name = "testtable32_txt", + category = "tests", + dep_files = [], input_files = [], output_files = [TmpFile("testtable32.txt")], tool = IcuTool("gentest"), @@ -218,6 +236,8 @@ def generate_other(config, glob, common_vars): ), SingleExecutionRequest( name = "testtable32_res", + category = "tests", + dep_files = [], input_files = [TmpFile("testtable32.txt")], output_files = [OutFile("testtable32.res")], tool = IcuTool("genrb"),