From 017c8b762ec148ad6c500296c829e31bef36aaf4 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Fri, 22 Nov 2019 19:09:45 -0800 Subject: [PATCH] ICU-20890 Change locale_dependencies.py into LOCALE_DEPS.json files - Refactors Python to make I/O operations more abstract - Adds stable sample data for Python test --- icu4c/source/data/BUILDRULES.py | 115 ++++---- icu4c/source/data/brkitr/LOCALE_DEPS.json | 6 + icu4c/source/data/coll/LOCALE_DEPS.json | 32 +++ .../curr/LOCALE_DEPS.json} | 269 ++++++++---------- icu4c/source/data/lang/LOCALE_DEPS.json | 193 +++++++++++++ icu4c/source/data/locales/LOCALE_DEPS.json | 193 +++++++++++++ icu4c/source/data/rbnf/LOCALE_DEPS.json | 32 +++ icu4c/source/data/region/LOCALE_DEPS.json | 193 +++++++++++++ icu4c/source/data/unit/LOCALE_DEPS.json | 193 +++++++++++++ icu4c/source/data/zone/LOCALE_DEPS.json | 193 +++++++++++++ .../python/icutools/databuilder/__main__.py | 41 ++- .../python/icutools/databuilder/filtration.py | 101 ++++--- .../databuilder/test/filtration_test.py | 96 ++++++- .../test/sample_data/brkitr/LOCALE_DEPS.json | 10 + .../test/sample_data/locales/LOCALE_DEPS.json | 197 +++++++++++++ .../test/sample_data/rbnf/LOCALE_DEPS.json | 36 +++ .../python/icutools/databuilder/utils.py | 13 + icu4c/source/test/testdata/BUILDRULES.py | 22 +- 18 files changed, 1640 insertions(+), 295 deletions(-) create mode 100644 icu4c/source/data/brkitr/LOCALE_DEPS.json create mode 100644 icu4c/source/data/coll/LOCALE_DEPS.json rename icu4c/source/{python/icutools/databuilder/locale_dependencies.py => data/curr/LOCALE_DEPS.json} (83%) create mode 100644 icu4c/source/data/lang/LOCALE_DEPS.json create mode 100644 icu4c/source/data/locales/LOCALE_DEPS.json create mode 100644 icu4c/source/data/rbnf/LOCALE_DEPS.json create mode 100644 icu4c/source/data/region/LOCALE_DEPS.json create mode 100644 icu4c/source/data/unit/LOCALE_DEPS.json create mode 100644 icu4c/source/data/zone/LOCALE_DEPS.json create mode 100644 icu4c/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json create mode 100644 icu4c/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json create mode 100644 icu4c/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json diff --git a/icu4c/source/data/BUILDRULES.py b/icu4c/source/data/BUILDRULES.py index 08091ee8092..2338afd1f71 100644 --- a/icu4c/source/data/BUILDRULES.py +++ b/icu4c/source/data/BUILDRULES.py @@ -6,7 +6,6 @@ from __future__ import print_function from icutools.databuilder import * -from icutools.databuilder import locale_dependencies from icutools.databuilder import utils from icutools.databuilder.request_types import * @@ -14,76 +13,69 @@ import os import sys -def generate(config, glob, common_vars): +def generate(config, io, common_vars): requests = [] - if len(glob("misc/*")) == 0: + if len(io.glob("misc/*")) == 0: print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr) exit(1) - requests += generate_cnvalias(config, glob, common_vars) - requests += generate_ulayout(config, glob, common_vars) - requests += generate_confusables(config, glob, common_vars) - requests += generate_conversion_mappings(config, glob, common_vars) - requests += generate_brkitr_brk(config, glob, common_vars) - requests += generate_stringprep(config, glob, common_vars) - requests += generate_brkitr_dictionaries(config, glob, common_vars) - requests += generate_normalization(config, glob, common_vars) - requests += generate_coll_ucadata(config, glob, common_vars) - requests += generate_full_unicore_data(config, glob, common_vars) - requests += generate_unames(config, glob, common_vars) - requests += generate_misc(config, glob, common_vars) - requests += generate_curr_supplemental(config, glob, common_vars) - requests += generate_translit(config, glob, common_vars) + requests += generate_cnvalias(config, io, common_vars) + requests += generate_ulayout(config, io, common_vars) + requests += generate_confusables(config, io, common_vars) + requests += generate_conversion_mappings(config, io, common_vars) + requests += generate_brkitr_brk(config, io, common_vars) + requests += generate_stringprep(config, io, common_vars) + requests += generate_brkitr_dictionaries(config, io, common_vars) + requests += generate_normalization(config, io, common_vars) + requests += generate_coll_ucadata(config, io, common_vars) + requests += generate_full_unicore_data(config, io, common_vars) + requests += generate_unames(config, io, common_vars) + requests += generate_misc(config, io, common_vars) + requests += generate_curr_supplemental(config, io, common_vars) + requests += generate_translit(config, io, common_vars) # Res Tree Files # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "locales", None, - "icu-locale-deprecates.xml", config.use_pool_bundle, []) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "curr", "curr", - "icu-locale-deprecates.xml", config.use_pool_bundle, []) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "lang", "lang", - "icu-locale-deprecates.xml", config.use_pool_bundle, []) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "region", "region", - "icu-locale-deprecates.xml", config.use_pool_bundle, []) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "zone", "zone", - "icu-locale-deprecates.xml", config.use_pool_bundle, []) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "unit", "unit", - "icu-locale-deprecates.xml", config.use_pool_bundle, []) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "coll", "coll", - "icu-coll-deprecates.xml", # Never use pool bundle for coll, brkitr, or rbnf False, # Depends on timezoneTypes.res and keyTypeData.res. @@ -91,18 +83,16 @@ def generate(config, glob, common_vars): # TODO: Bake keyTypeData.res into the common library? [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")]) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "brkitr", "brkitr", - "icu-locale-deprecates.xml", # Never use pool bundle for coll, brkitr, or rbnf False, [DepTarget("brkitr_brk"), DepTarget("dictionaries")]) - requests += generate_tree(config, glob, common_vars, + requests += generate_tree(config, io, common_vars, "rbnf", "rbnf", - "icu-rbnf-deprecates.xml", # Never use pool bundle for coll, brkitr, or rbnf False, []) @@ -119,7 +109,7 @@ def generate(config, glob, common_vars): return requests -def generate_cnvalias(config, glob, common_vars): +def generate_cnvalias(config, io, common_vars): # UConv Name Aliases input_file = InFile("mappings/convrtrs.txt") output_file = OutFile("cnvalias.icu") @@ -138,7 +128,7 @@ def generate_cnvalias(config, glob, common_vars): ] -def generate_confusables(config, glob, common_vars): +def generate_confusables(config, io, common_vars): # CONFUSABLES txt1 = InFile("unidata/confusables.txt") txt2 = InFile("unidata/confusablesWholeScript.txt") @@ -159,9 +149,9 @@ def generate_confusables(config, glob, common_vars): ] -def generate_conversion_mappings(config, glob, common_vars): +def generate_conversion_mappings(config, io, common_vars): # UConv Conversion Table Files - input_files = [InFile(filename) for filename in glob("mappings/*.ucm")] + input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")] output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files] # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv return [ @@ -181,9 +171,9 @@ def generate_conversion_mappings(config, glob, common_vars): ] -def generate_brkitr_brk(config, glob, common_vars): +def generate_brkitr_brk(config, io, common_vars): # BRK Files - input_files = [InFile(filename) for filename in glob("brkitr/rules/*.txt")] + input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")] output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] return [ RepeatedExecutionRequest( @@ -202,9 +192,9 @@ def generate_brkitr_brk(config, glob, common_vars): ] -def generate_stringprep(config, glob, common_vars): +def generate_stringprep(config, io, common_vars): # SPP FILES - input_files = [InFile(filename) for filename in glob("sprep/*.txt")] + input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")] output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files] bundle_names = [v.filename[6:-4] for v in input_files] return [ @@ -225,9 +215,9 @@ def generate_stringprep(config, glob, common_vars): ] -def generate_brkitr_dictionaries(config, glob, common_vars): +def generate_brkitr_dictionaries(config, io, common_vars): # Dict Files - input_files = [InFile(filename) for filename in glob("brkitr/dictionaries/*.txt")] + input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")] output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] extra_options_map = { "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000", @@ -256,9 +246,9 @@ def generate_brkitr_dictionaries(config, glob, common_vars): ] -def generate_normalization(config, glob, common_vars): +def generate_normalization(config, io, common_vars): # NRM Files - input_files = [InFile(filename) for filename in glob("in/*.nrm")] + input_files = [InFile(filename) for filename in io.glob("in/*.nrm")] # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data input_files.remove(InFile("in/nfc.nrm")) output_files = [OutFile(v.filename[3:]) for v in input_files] @@ -277,7 +267,7 @@ def generate_normalization(config, glob, common_vars): ] -def generate_coll_ucadata(config, glob, common_vars): +def generate_coll_ucadata(config, io, common_vars): # Collation Dependency File (ucadata.icu) input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type) output_file = OutFile("coll/ucadata.icu") @@ -295,7 +285,7 @@ def generate_coll_ucadata(config, glob, common_vars): ] -def generate_full_unicore_data(config, glob, common_vars): +def generate_full_unicore_data(config, io, common_vars): # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu) # are hardcoded in the common DLL and therefore not included in the data package any more. # They are not built by default but need to be built for ICU4J data, @@ -325,7 +315,7 @@ def generate_full_unicore_data(config, glob, common_vars): ] -def generate_unames(config, glob, common_vars): +def generate_unames(config, io, common_vars): # Unicode Character Names input_file = InFile("in/unames.icu") output_file = OutFile("unames.icu") @@ -343,7 +333,7 @@ def generate_unames(config, glob, common_vars): ] -def generate_ulayout(config, glob, common_vars): +def generate_ulayout(config, io, common_vars): # Unicode text layout properties basename = "ulayout" input_file = InFile("in/%s.icu" % basename) @@ -362,9 +352,9 @@ def generate_ulayout(config, glob, common_vars): ] -def generate_misc(config, glob, common_vars): +def generate_misc(config, io, common_vars): # Misc Data Res Files - input_files = [InFile(filename) for filename in glob("misc/*.txt")] + input_files = [InFile(filename) for filename in io.glob("misc/*.txt")] input_basenames = [v.filename[5:] for v in input_files] output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames] return [ @@ -386,7 +376,7 @@ def generate_misc(config, glob, common_vars): ] -def generate_curr_supplemental(config, glob, common_vars): +def generate_curr_supplemental(config, io, common_vars): # Currency Supplemental Res File input_file = InFile("curr/supplementalData.txt") input_basename = "supplementalData.txt" @@ -409,13 +399,13 @@ def generate_curr_supplemental(config, glob, common_vars): ] -def generate_translit(config, glob, common_vars): +def generate_translit(config, io, common_vars): input_files = [ InFile("translit/root.txt"), InFile("translit/en.txt"), InFile("translit/el.txt") ] - dep_files = set(InFile(filename) for filename in glob("translit/*.txt")) + dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt")) dep_files -= set(input_files) dep_files = list(sorted(dep_files)) input_basenames = [v.filename[9:] for v in input_files] @@ -445,18 +435,17 @@ def generate_translit(config, glob, common_vars): def generate_tree( config, - glob, + io, common_vars, sub_dir, out_sub_dir, - xml_filename, use_pool_bundle, dep_targets): requests = [] category = "%s_tree" % sub_dir out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" # TODO: Clean this up for curr - input_files = [InFile(filename) for filename in glob("%s/*.txt" % sub_dir)] + input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)] if sub_dir == "curr": input_files.remove(InFile("curr/supplementalData.txt")) input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] @@ -532,7 +521,11 @@ def generate_tree( "root", ]) # Put alias locales in a separate structure; see ICU-20627 - alias_locales = set(locale_dependencies.data["aliases"].keys()) + dependency_data = io.read_locale_deps(sub_dir) + if "aliases" in dependency_data: + alias_locales = set(dependency_data["aliases"].keys()) + else: + alias_locales = set() alias_files = [] installed_files = [] for f in input_files: @@ -541,7 +534,7 @@ def generate_tree( continue destination = alias_files if file_stem in alias_locales else installed_files destination.append(f) - cldr_version = locale_dependencies.data["cldrVersion"] if sub_dir == "locales" else None + cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format( IN_SUB_DIR = sub_dir, **common_vars diff --git a/icu4c/source/data/brkitr/LOCALE_DEPS.json b/icu4c/source/data/brkitr/LOCALE_DEPS.json new file mode 100644 index 00000000000..da0a80f7b14 --- /dev/null +++ b/icu4c/source/data/brkitr/LOCALE_DEPS.json @@ -0,0 +1,6 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1" +} diff --git a/icu4c/source/data/coll/LOCALE_DEPS.json b/icu4c/source/data/coll/LOCALE_DEPS.json new file mode 100644 index 00000000000..5a56e84ad4e --- /dev/null +++ b/icu4c/source/data/coll/LOCALE_DEPS.json @@ -0,0 +1,32 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "pa_IN": "pa_Guru_IN", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "sr_BA": "sr_Cyrl_BA", + "sr_ME": "sr_Cyrl_ME", + "sr_RS": "sr_Cyrl_RS", + "yue": "zh_Hant", + "yue_CN": "yue_Hans_CN", + "yue_Hans": "zh_Hans", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + } +} diff --git a/icu4c/source/python/icutools/databuilder/locale_dependencies.py b/icu4c/source/data/curr/LOCALE_DEPS.json similarity index 83% rename from icu4c/source/python/icutools/databuilder/locale_dependencies.py rename to icu4c/source/data/curr/LOCALE_DEPS.json index f9c28470390..808163607cf 100644 --- a/icu4c/source/python/icutools/databuilder/locale_dependencies.py +++ b/icu4c/source/data/curr/LOCALE_DEPS.json @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- -# © 2019 and later: Unicode, Inc. and others. -# License & terms of use: http://www.unicode.org/copyright.html#License +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License -data = { +{ "cldrVersion": "36.1", "aliases": { "ars": "ar_SA", @@ -19,9 +18,9 @@ data = { "iw": "he", "iw_IL": "he_IL", "mo": "ro", + "no": "nb", "no_NO": "nb_NO", "no_NO_NY": "nn_NO", - "no": "nb", "pa_IN": "pa_Guru_IN", "pa_PK": "pa_Arab_PK", "sh": "sr_Latn", @@ -31,14 +30,14 @@ data = { "shi_MA": "shi_Tfng_MA", "sr_BA": "sr_Cyrl_BA", "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", "sr_ME": "sr_Latn_ME", "sr_RS": "sr_Cyrl_RS", "sr_XK": "sr_Cyrl_XK", "sr_YU": "sr_Cyrl_RS", - "sr_Cyrl_YU": "sr_Cyrl_RS", - "sr_Cyrl_CS": "sr_Cyrl_RS", - "sr_Latn_YU": "sr_Latn_RS", - "sr_Latn_CS": "sr_Latn_RS", "tl": "fil", "tl_PH": "fil_PH", "uz_AF": "uz_Arab_AF", @@ -53,172 +52,142 @@ data = { "zh_TW": "zh_Hant_TW" }, "parents": { - "ff_Adlm": "root", - "en_CM": "en_001", - "so_Arab": "root", - "en_KY": "en_001", - "en_TC": "en_001", - "yue_Hans": "root", - "en_CX": "en_001", - "es_EC": "es_419", - "es_US": "es_419", - "byn_Latn": "root", - "en_CY": "en_001", - "en_LC": "en_001", - "en_TK": "en_001", - "tg_Arab": "root", - "es_UY": "es_419", - "ky_Latn": "root", - "en_TO": "en_001", - "en_TT": "en_001", - "en_DE": "en_150", - "es_MX": "es_419", - "en_TV": "en_001", - "en_DG": "en_001", - "pt_ST": "pt_PT", - "en_DM": "en_001", - "en_LR": "en_001", - "en_TZ": "en_001", - "en_LS": "en_001", - "en_DK": "en_150", - "blt_Latn": "root", - "es_VE": "es_419", - "es_NI": "es_419", - "sd_Khoj": "root", - "pt_AO": "pt_PT", - "en_UG": "en_001", - "yo_Arab": "root", - "dje_Arab": "root", - "en_MG": "en_001", - "en_MO": "en_001", - "en_MU": "en_001", - "en_MS": "en_001", - "en_MT": "en_001", - "shi_Latn": "root", - "es_BR": "es_419", - "en_AU": "en_001", - "en_ZM": "en_001", - "en_AT": "en_150", - "es_BZ": "es_419", - "uz_Arab": "root", "az_Cyrl": "root", - "es_SV": "es_419", - "en_ZW": "en_001", - "en_JE": "en_001", - "en_BB": "en_001", - "sd_Deva": "root", - "pa_Arab": "root", - "en_RW": "en_001", - "es_CO": "es_419", - "en_JM": "en_001", - "en_BE": "en_150", - "dyo_Arab": "root", - "es_CL": "es_419", - "en_BM": "en_001", - "en_SC": "en_001", - "es_CR": "es_419", + "bs_Cyrl": "root", "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", "en_BS": "en_001", - "en_SD": "en_001", - "pt_GQ": "pt_PT", - "en_SB": "en_001", - "es_CU": "es_419", - "en_SG": "en_001", - "uz_Cyrl": "root", "en_BW": "en_001", - "en_SH": "en_001", - "en_SE": "en_150", - "pt_GW": "pt_PT", - "ky_Arab": "root", "en_BZ": "en_001", - "en_SL": "en_001", - "en_SI": "en_150", - "ff_Arab": "root", - "en_KE": "en_001", - "bm_Nkoo": "root", - "en_CC": "en_001", - "en_SS": "en_001", - "iu_Latn": "root", "en_CA": "en_001", - "en_KI": "en_001", - "es_DO": "es_419", - "en_SX": "en_001", + "en_CC": "en_001", "en_CH": "en_150", - "en_KN": "en_001", "en_CK": "en_001", - "ml_Arab": "root", - "en_SZ": "en_001", - "pt_FR": "pt_PT", - "ug_Cyrl": "root", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", "en_GY": "en_001", - "en_PH": "en_001", - "en_PG": "en_001", - "en_PK": "en_001", - "cu_Glag": "root", - "en_PN": "en_001", - "kk_Arab": "root", "en_HK": "en_001", - "zh_Hant": "root", - "en_PW": "en_001", - "es_AR": "es_419", - "pt_MZ": "pt_PT", - "sd_Sind": "root", - "en_Shaw": "root", "en_IE": "en_001", - "ms_Arab": "root", + "en_IL": "en_001", "en_IM": "en_001", "en_IN": "en_001", - "es_BO": "es_419", - "en_IL": "en_001", - "en_AI": "en_001", - "az_Arab": "root", - "en_AG": "en_001", "en_IO": "en_001", - "en_ZA": "en_001", - "en_MY": "en_001", - "en_ER": "en_001", - "en_VC": "en_001", - "mn_Mong": "root", - "vai_Latn": "root", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", "en_MW": "en_001", - "pt_LU": "pt_PT", - "bs_Cyrl": "root", - "en_VG": "en_001", + "en_MY": "en_001", "en_NA": "en_001", "en_NF": "en_001", "en_NG": "en_001", - "ha_Arab": "root", "en_NL": "en_150", - "zh_Hant_MO": "zh_Hant_HK", - "en_VU": "en_001", - "en_FJ": "en_001", "en_NR": "en_001", - "en_FK": "en_001", - "es_GT": "es_419", - "en_FI": "en_150", - "ku_Arab": "root", - "pt_MO": "pt_PT", - "en_FM": "en_001", "en_NU": "en_001", "en_NZ": "en_001", - "pt_CH": "pt_PT", - "en_Dsrt": "root", - "es_PE": "es_419", - "es_PA": "es_419", - "pt_CV": "pt_PT", - "wo_Arab": "root", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", "en_WS": "en_001", - "en_GD": "en_001", - "en_GB": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", "es_HN": "es_419", - "pt_TL": "pt_PT", - "en_GG": "en_001", - "en_GH": "en_001", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", "es_PR": "es_419", - "sw_Arab": "root", - "en_GI": "en_001", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", "sr_Latn": "root", - "en_GM": "en_001", - "es_PY": "es_419" + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" } } diff --git a/icu4c/source/data/lang/LOCALE_DEPS.json b/icu4c/source/data/lang/LOCALE_DEPS.json new file mode 100644 index 00000000000..808163607cf --- /dev/null +++ b/icu4c/source/data/lang/LOCALE_DEPS.json @@ -0,0 +1,193 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/icu4c/source/data/locales/LOCALE_DEPS.json b/icu4c/source/data/locales/LOCALE_DEPS.json new file mode 100644 index 00000000000..808163607cf --- /dev/null +++ b/icu4c/source/data/locales/LOCALE_DEPS.json @@ -0,0 +1,193 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/icu4c/source/data/rbnf/LOCALE_DEPS.json b/icu4c/source/data/rbnf/LOCALE_DEPS.json new file mode 100644 index 00000000000..e1d005ee12b --- /dev/null +++ b/icu4c/source/data/rbnf/LOCALE_DEPS.json @@ -0,0 +1,32 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "in": "id", + "iw": "he", + "no": "nb", + "sh": "sr_Latn", + "zh_HK": "zh_Hant_HK", + "zh_Hant_HK": "yue", + "zh_MO": "zh_Hant_MO", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "en_IN": "en_001", + "es_DO": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PR": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "sr_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root" + } +} diff --git a/icu4c/source/data/region/LOCALE_DEPS.json b/icu4c/source/data/region/LOCALE_DEPS.json new file mode 100644 index 00000000000..808163607cf --- /dev/null +++ b/icu4c/source/data/region/LOCALE_DEPS.json @@ -0,0 +1,193 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/icu4c/source/data/unit/LOCALE_DEPS.json b/icu4c/source/data/unit/LOCALE_DEPS.json new file mode 100644 index 00000000000..808163607cf --- /dev/null +++ b/icu4c/source/data/unit/LOCALE_DEPS.json @@ -0,0 +1,193 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/icu4c/source/data/zone/LOCALE_DEPS.json b/icu4c/source/data/zone/LOCALE_DEPS.json new file mode 100644 index 00000000000..808163607cf --- /dev/null +++ b/icu4c/source/data/zone/LOCALE_DEPS.json @@ -0,0 +1,193 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/icu4c/source/python/icutools/databuilder/__main__.py b/icu4c/source/python/icutools/databuilder/__main__.py index 5d843d9c081..a6a387d9ad2 100644 --- a/icu4c/source/python/icutools/databuilder/__main__.py +++ b/icu4c/source/python/icutools/databuilder/__main__.py @@ -7,6 +7,7 @@ from __future__ import print_function import argparse import glob as pyglob +import io as pyio import json import os import sys @@ -237,6 +238,29 @@ def add_copy_input_requests(requests, config, common_vars): return result +class IO(object): + """I/O operations required when computing the build actions""" + + def __init__(self, src_dir): + self.src_dir = src_dir + + def glob(self, pattern): + absolute_paths = pyglob.glob(os.path.join(self.src_dir, pattern)) + # Strip off the absolute path suffix so we are left with a relative path. + relative_paths = [v[len(self.src_dir)+1:] for v in sorted(absolute_paths)] + # For the purposes of icutools.databuilder, force Unix-style directory separators. + # Within the Python code, including BUILDRULES.py and user-provided config files, + # directory separators are normalized to '/', including on Windows platforms. + return [v.replace("\\", "/") for v in relative_paths] + + def read_locale_deps(self, tree): + return self._read_json("%s/LOCALE_DEPS.json" % tree) + + def _read_json(self, filename): + with pyio.open(os.path.join(self.src_dir, filename), "r", encoding="utf-8-sig") as f: + return json.load(CommentStripper(f)) + + def main(argv): args = flag_parser.parse_args(argv) config = Config(args) @@ -252,15 +276,11 @@ def main(argv): key: "$(%s)" % key for key in list(makefile_vars.keys()) + makefile_env } - common["GLOB_DIR"] = args.src_dir common["FILTERS_DIR"] = config.filter_dir common["CWD_DIR"] = os.getcwd() else: makefile_vars = None common = { - # GLOB_DIR is used now, whereas IN_DIR is used during execution phase. - # There is no useful distinction in unix-exec or windows-exec mode. - "GLOB_DIR": args.src_dir, "SRC_DIR": args.src_dir, "IN_DIR": args.src_dir, "OUT_DIR": args.out_dir, @@ -272,14 +292,6 @@ def main(argv): "ICUDATA_CHAR": "l" } - def glob(pattern): - result_paths = pyglob.glob("{GLOB_DIR}/{PATTERN}".format( - GLOB_DIR = args.src_dir, - PATTERN = pattern - )) - # For the purposes of icutools.databuilder, force Unix-style directory separators. - return [v.replace("\\", "/")[len(args.src_dir)+1:] for v in sorted(result_paths)] - # Automatically load BUILDRULES from the src_dir sys.path.append(args.src_dir) try: @@ -288,7 +300,8 @@ def main(argv): print("Cannot find BUILDRULES! Did you set your --src_dir?", file=sys.stderr) sys.exit(1) - requests = BUILDRULES.generate(config, glob, common) + io = IO(args.src_dir) + requests = BUILDRULES.generate(config, io, common) if "fileReplacements" in config.filters_json_data: tmp_in_dir = "{TMP_DIR}/in".format(**common) @@ -298,7 +311,7 @@ def main(argv): common["IN_DIR"] = tmp_in_dir requests = add_copy_input_requests(requests, config, common) - requests = filtration.apply_filters(requests, config) + requests = filtration.apply_filters(requests, config, io) requests = utils.flatten_requests(requests, config, common) build_dirs = utils.compute_directories(requests) diff --git a/icu4c/source/python/icutools/databuilder/filtration.py b/icu4c/source/python/icutools/databuilder/filtration.py index 0eff5b9c57c..acdba0ee5b2 100644 --- a/icu4c/source/python/icutools/databuilder/filtration.py +++ b/icu4c/source/python/icutools/databuilder/filtration.py @@ -12,7 +12,6 @@ import sys from . import * from . import utils -from .locale_dependencies import data as DEPENDENCY_DATA from .request_types import * @@ -21,7 +20,8 @@ from .request_types import * # TODO(ICU-20301): Make this inherit from abc.ABC. class Filter(object): @staticmethod - def create_from_json(json_data): + def create_from_json(json_data, io): + assert io != None if "filterType" in json_data: filter_type = json_data["filterType"] else: @@ -36,9 +36,9 @@ class Filter(object): elif filter_type == "exclude": return ExclusionFilter() elif filter_type == "union": - return UnionFilter(json_data) + return UnionFilter(json_data, io) elif filter_type == "locale": - return LocaleFilter(json_data) + return LocaleFilter(json_data, io) else: print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr) return None @@ -50,12 +50,19 @@ class Filter(object): assert self.match(file) return [request] - @classmethod - def _file_to_file_stem(cls, file): + @staticmethod + def _file_to_file_stem(file): start = file.filename.rfind("/") limit = file.filename.rfind(".") return file.filename[start+1:limit] + @staticmethod + def _file_to_subdir(file): + limit = file.filename.rfind("/") + if limit == -1: + return None + return file.filename[:limit] + @abstractmethod def match(self, file): pass @@ -133,11 +140,11 @@ class RegexFilter(WhitelistBlacklistFilter): class UnionFilter(Filter): - def __init__(self, json_data): + def __init__(self, json_data, io): # Collect the sub-filters. self.sub_filters = [] for filter_json in json_data["unionOf"]: - self.sub_filters.append(Filter.create_from_json(filter_json)) + self.sub_filters.append(Filter.create_from_json(filter_json, io)) def match(self, file): """Match iff any of the sub-filters match.""" @@ -151,36 +158,31 @@ LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$") LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$") class LocaleFilter(Filter): - def __init__(self, json_data): - self.locales_requested = set() - self.locales_required = set() + def __init__(self, json_data, io): + self.locales_requested = list(json_data["whitelist"]) self.include_children = json_data.get("includeChildren", True) self.include_scripts = json_data.get("includeScripts", False) - # Compute the requested and required locales. - for locale in json_data["whitelist"]: - self._add_locale_and_parents(locale) - - def _add_locale_and_parents(self, locale): - # Store the locale as *requested* - self.locales_requested.add(locale) - # Store the locale and its dependencies as *required* - while locale is not None: - self.locales_required.add(locale) - locale = self._get_parent_locale(locale) + # Load the dependency graph from disk + self.dependency_data_by_tree = { + tree: io.read_locale_deps(tree) + for tree in utils.ALL_TREES + } def match(self, file): + tree = self._file_to_subdir(file) + assert tree is not None locale = self._file_to_file_stem(file) # A locale is *required* if it is *requested* or an ancestor of a # *requested* locale. - if locale in self.locales_required: + if locale in self._locales_required(tree): return True # Resolve include_scripts and include_children. - return self._match_recursive(locale) + return self._match_recursive(locale, tree) - def _match_recursive(self, locale): + def _match_recursive(self, locale, tree): # Base case: return True if we reached a *requested* locale, # or False if we ascend out of the locale tree. if locale is None: @@ -192,42 +194,51 @@ class LocaleFilter(Filter): # This causes sr_Latn to check sr instead of going directly to root. if self.include_scripts: match = LANGUAGE_SCRIPT_REGEX.match(locale) - if match and self._match_recursive(match.group(1)): + if match and self._match_recursive(match.group(1), tree): return True # Check if we are a descendant of a *requested* locale. if self.include_children: - parent = self._get_parent_locale(locale) - if self._match_recursive(parent): + parent = self._get_parent_locale(locale, tree) + if self._match_recursive(parent, tree): return True # No matches. return False - @classmethod - def _get_parent_locale(cls, locale): - if locale in DEPENDENCY_DATA["parents"]: - return DEPENDENCY_DATA["parents"][locale] - if locale in DEPENDENCY_DATA["aliases"]: - return DEPENDENCY_DATA["aliases"][locale] + def _get_parent_locale(self, locale, tree): + """Gets the parent locale in the given tree, according to dependency data.""" + dependency_data = self.dependency_data_by_tree[tree] + if "parents" in dependency_data and locale in dependency_data["parents"]: + return dependency_data["parents"][locale] + if "aliases" in dependency_data and locale in dependency_data["aliases"]: + return dependency_data["aliases"][locale] if LANGUAGE_ONLY_REGEX.match(locale): return "root" i = locale.rfind("_") if i < 0: + assert locale == "root" return None return locale[:i] + def _locales_required(self, tree): + """Returns a generator of all required locales in the given tree.""" + for locale in self.locales_requested: + while locale is not None: + yield locale + locale = self._get_parent_locale(locale, tree) -def apply_filters(requests, config): + +def apply_filters(requests, config, io): """Runs the filters and returns a new list of requests.""" - requests = _apply_file_filters(requests, config) - requests = _apply_resource_filters(requests, config) + requests = _apply_file_filters(requests, config, io) + requests = _apply_resource_filters(requests, config, io) return requests -def _apply_file_filters(old_requests, config): +def _apply_file_filters(old_requests, config, io): """Filters out entire files.""" - filters = _preprocess_file_filters(old_requests, config) + filters = _preprocess_file_filters(old_requests, config, io) new_requests = [] for request in old_requests: category = request.category @@ -238,7 +249,7 @@ def _apply_file_filters(old_requests, config): return new_requests -def _preprocess_file_filters(requests, config): +def _preprocess_file_filters(requests, config, io): all_categories = set( request.category for request in requests @@ -261,7 +272,7 @@ def _preprocess_file_filters(requests, config): elif filter_json == "include": pass # no-op else: - filters[category] = Filter.create_from_json(filter_json) + filters[category] = Filter.create_from_json(filter_json, io) if "featureFilters" in json_data: for category in json_data["featureFilters"]: if category not in all_categories: @@ -363,14 +374,14 @@ class ResourceFilterInfo(object): i += 1 return new_requests - @classmethod - def _generate_resource_filter_txt(cls, rules): + @staticmethod + def _generate_resource_filter_txt(rules): result = "# Caution: This file is automatically generated\n\n" result += "\n".join(rules) return result -def _apply_resource_filters(all_requests, config): +def _apply_resource_filters(all_requests, config, io): """Creates filters for looking within resource bundle files.""" json_data = config.filters_json_data if "resourceFilters" not in json_data: @@ -379,7 +390,7 @@ def _apply_resource_filters(all_requests, config): collected = {} for entry in json_data["resourceFilters"]: if "files" in entry: - file_filter = Filter.create_from_json(entry["files"]) + file_filter = Filter.create_from_json(entry["files"], io) else: file_filter = InclusionFilter() for category in entry["categories"]: diff --git a/icu4c/source/python/icutools/databuilder/test/filtration_test.py b/icu4c/source/python/icutools/databuilder/test/filtration_test.py index 5687006c42d..416223bd7e3 100644 --- a/icu4c/source/python/icutools/databuilder/test/filtration_test.py +++ b/icu4c/source/python/icutools/databuilder/test/filtration_test.py @@ -1,13 +1,19 @@ # Copyright (C) 2018 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html +import io as pyio +import json +import os import unittest from .. import InFile +from ..comment_stripper import CommentStripper from ..filtration import Filter EXAMPLE_FILE_STEMS = [ "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", "af_ZA", "af", "ar", @@ -33,6 +39,7 @@ EXAMPLE_FILE_STEMS = [ "sr_Cyrl", "sr_Latn_BA", "sr_Latn_CS", + "sr_Latn_ME_VARIANT", "sr_Latn_ME", "sr_Latn", "sr_ME", @@ -43,6 +50,7 @@ EXAMPLE_FILE_STEMS = [ "vai_Vaii_LR", "vai_Vaii", "vai", + "yue", "zh_CN", "zh_Hans_CN", "zh_Hans_HK", @@ -60,12 +68,29 @@ EXAMPLE_FILE_STEMS = [ "zh" ] + +class TestIO(object): + def __init__(self): + pass + + def read_locale_deps(self, tree): + if tree not in ("brkitr", "locales", "rbnf"): + return None + with pyio.open(os.path.join( + os.path.dirname(__file__), + "sample_data", + tree, + "LOCALE_DEPS.json" + ), "r", encoding="utf-8-sig") as f: + return json.load(CommentStripper(f)) + + class FiltrationTest(unittest.TestCase): def test_exclude(self): self._check_filter(Filter.create_from_json({ "filterType": "exclude" - }), [ + }, TestIO()), [ ]) def test_default_whitelist(self): @@ -74,7 +99,7 @@ class FiltrationTest(unittest.TestCase): "ars", "zh_Hans" ] - }), [ + }, TestIO()), [ "ars", "zh_Hans" ]) @@ -88,7 +113,7 @@ class FiltrationTest(unittest.TestCase): "ars", "zh_Hans" ] - }), expected_matches) + }, TestIO()), expected_matches) def test_language_whitelist(self): self._check_filter(Filter.create_from_json({ @@ -97,9 +122,11 @@ class FiltrationTest(unittest.TestCase): "af", "bs" ] - }), [ + }, TestIO()), [ "root", "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", "af_ZA", "af", "bs_BA", @@ -113,6 +140,8 @@ class FiltrationTest(unittest.TestCase): def test_language_blacklist(self): expected_matches = set(EXAMPLE_FILE_STEMS) expected_matches.remove("af_NA") + expected_matches.remove("af_VARIANT") + expected_matches.remove("af_ZA_VARIANT") expected_matches.remove("af_ZA") expected_matches.remove("af") self._check_filter(Filter.create_from_json({ @@ -120,7 +149,7 @@ class FiltrationTest(unittest.TestCase): "blacklist": [ "af" ] - }), expected_matches) + }, TestIO()), expected_matches) def test_regex_whitelist(self): self._check_filter(Filter.create_from_json({ @@ -129,7 +158,7 @@ class FiltrationTest(unittest.TestCase): r"^ar.*$", r"^zh$" ] - }), [ + }, TestIO()), [ "ar", "ar_SA", "ars", @@ -148,7 +177,7 @@ class FiltrationTest(unittest.TestCase): r"^ar.*$", r"^zh$" ] - }), expected_matches) + }, TestIO()), expected_matches) def test_locale_basic(self): self._check_filter(Filter.create_from_json({ @@ -164,7 +193,7 @@ class FiltrationTest(unittest.TestCase): "vai_Latn", # Language with non-default script "zh_Hans" # Language with default script ] - }), [ + }, TestIO()), [ "root", # bs: should include the full dependency tree of bs_BA "bs_BA", @@ -210,7 +239,7 @@ class FiltrationTest(unittest.TestCase): "vai_Latn", "zh_Hans" ] - }), [ + }, TestIO()), [ "root", "bs_BA", "bs_Latn_BA", @@ -238,7 +267,7 @@ class FiltrationTest(unittest.TestCase): "vai_Latn", "zh_Hans" ] - }), [ + }, TestIO()), [ "root", # bs: includeScripts only works for language-only (without region) "bs_BA", @@ -259,6 +288,7 @@ class FiltrationTest(unittest.TestCase): "sr_Cyrl", "sr_Latn_BA", "sr_Latn_CS", + "sr_Latn_ME_VARIANT", "sr_Latn_ME", "sr_Latn", "sr_ME", @@ -290,7 +320,7 @@ class FiltrationTest(unittest.TestCase): "vai_Latn", "zh_Hans" ] - }), [ + }, TestIO()), [ "root", # bs: includeScripts only works for language-only (without region) "bs_BA", @@ -331,7 +361,7 @@ class FiltrationTest(unittest.TestCase): ] } ] - }), [ + }, TestIO()), [ "ars", "zh_Hans", "bs_BA", @@ -343,9 +373,47 @@ class FiltrationTest(unittest.TestCase): "zh" ]) - def _check_filter(self, filter, expected_matches): + def test_hk_deps_normal(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_Hant", + "zh_Hant_HK", + "zh_HK", + ]) + + def test_hk_deps_rbnf(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "yue", + "zh_Hant_HK", + "zh_HK", + ], "rbnf") + + def test_no_alias_parent_structure(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_HK", + "zh", + ], "brkitr") + + def _check_filter(self, filter, expected_matches, tree="locales"): for file_stem in EXAMPLE_FILE_STEMS: - is_match = filter.match(InFile("locales/%s.txt" % file_stem)) + is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem))) expected_match = file_stem in expected_matches self.assertEqual(is_match, expected_match, file_stem) diff --git a/icu4c/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json b/icu4c/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json new file mode 100644 index 00000000000..89329e87eea --- /dev/null +++ b/icu4c/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json @@ -0,0 +1,10 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1" +} diff --git a/icu4c/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json b/icu4c/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json new file mode 100644 index 00000000000..fd28a741ef6 --- /dev/null +++ b/icu4c/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json @@ -0,0 +1,197 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/icu4c/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json b/icu4c/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json new file mode 100644 index 00000000000..f079619a368 --- /dev/null +++ b/icu4c/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json @@ -0,0 +1,36 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "in": "id", + "iw": "he", + "no": "nb", + "sh": "sr_Latn", + "zh_HK": "zh_Hant_HK", + "zh_Hant_HK": "yue", + "zh_MO": "zh_Hant_MO", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "en_IN": "en_001", + "es_DO": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PR": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "sr_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root" + } +} diff --git a/icu4c/source/python/icutools/databuilder/utils.py b/icu4c/source/python/icutools/databuilder/utils.py index c5965906c6b..3d53d18faeb 100644 --- a/icu4c/source/python/icutools/databuilder/utils.py +++ b/icu4c/source/python/icutools/databuilder/utils.py @@ -52,6 +52,19 @@ def get_local_dirname(dirname): exit(1) +ALL_TREES = [ + "locales", + "curr", + "lang", + "region", + "zone", + "unit", + "coll", + "brkitr", + "rbnf", +] + + def concat_dicts(*dicts): # There is not a super great way to do this in Python: new_dict = {} diff --git a/icu4c/source/test/testdata/BUILDRULES.py b/icu4c/source/test/testdata/BUILDRULES.py index 2a5eed8755c..6e09938f9e6 100644 --- a/icu4c/source/test/testdata/BUILDRULES.py +++ b/icu4c/source/test/testdata/BUILDRULES.py @@ -5,13 +5,13 @@ from icutools.databuilder import * from icutools.databuilder.request_types import * -def generate(config, glob, common_vars): +def generate(config, io, common_vars): requests = [] - requests += generate_rb(config, glob, common_vars) - requests += generate_sprep(config, glob, common_vars) - requests += generate_conv(config, glob, common_vars) - requests += generate_other(config, glob, common_vars) - requests += generate_copy(config, glob, common_vars) + requests += generate_rb(config, io, common_vars) + requests += generate_sprep(config, io, common_vars) + requests += generate_conv(config, io, common_vars) + requests += generate_other(config, io, common_vars) + requests += generate_copy(config, io, common_vars) requests += [ ListRequest( @@ -25,7 +25,7 @@ def generate(config, glob, common_vars): return requests -def generate_rb(config, glob, common_vars): +def generate_rb(config, io, common_vars): basenames = [ "calendar", "casing", @@ -95,7 +95,7 @@ def generate_rb(config, glob, common_vars): ] -def generate_sprep(config, glob, common_vars): +def generate_sprep(config, io, common_vars): return [ SingleExecutionRequest( name = "nfscsi", @@ -145,7 +145,7 @@ def generate_sprep(config, glob, common_vars): ] -def generate_conv(config, glob, common_vars): +def generate_conv(config, io, common_vars): basenames = [ "test1", "test1bmp", @@ -170,7 +170,7 @@ def generate_conv(config, glob, common_vars): ] -def generate_copy(config, glob, common_vars): +def generate_copy(config, io, common_vars): return [ CopyRequest( name = "nam_typ", @@ -190,7 +190,7 @@ def generate_copy(config, glob, common_vars): ] -def generate_other(config, glob, common_vars): +def generate_other(config, io, common_vars): return [ SingleExecutionRequest( name = "testnorm",