mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-20298 Adding improved locale filtering to buildtool.
- Integrates changes from cldrbug 11802 to ICU. - Adds test suite for buildtool. - Adds new filter type "union".
This commit is contained in:
parent
1f85e94068
commit
ccba38d382
17 changed files with 761 additions and 21 deletions
1
icu4c/source/configure
vendored
1
icu4c/source/configure
vendored
|
@ -4244,6 +4244,7 @@ fi
|
|||
done
|
||||
|
||||
|
||||
|
||||
# Check for the platform make
|
||||
for ac_prog in gmake gnumake
|
||||
do
|
||||
|
|
|
@ -197,6 +197,7 @@ fi
|
|||
|
||||
# TODO(ICU-20301): Remove fallback to Python 2.
|
||||
AC_CHECK_PROGS(PYTHON, python3 "py -3" python "py")
|
||||
AC_SUBST(PYTHON)
|
||||
|
||||
# Check for the platform make
|
||||
AC_PATH_PROGS(U_MAKE, gmake gnumake, make)
|
||||
|
|
|
@ -135,7 +135,7 @@ install: all-local install-local
|
|||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist:
|
||||
check: all
|
||||
check: all check-local
|
||||
|
||||
check-exhaustive: check
|
||||
|
||||
|
@ -163,6 +163,7 @@ cleanpackage:
|
|||
$(RMV) $(LIBDIR)/*$(LIB_ICUDATA_NAME)*.$(SO)* $(LIBDIR)/$(LIB_STATIC_ICUDATA_NAME).$(A)
|
||||
|
||||
check-local:
|
||||
@PYTHON@ -m buildtool.test
|
||||
|
||||
# Find out if we have a source archive.
|
||||
# If we have that, then use that instead of building everything from scratch.
|
||||
|
|
|
@ -109,6 +109,7 @@
|
|||
<arg name="--supplementaldir" value="${env.CLDR_DIR}/common/supplemental" />
|
||||
<arg name="--type" value="locales"/>
|
||||
<arg name="--makefile" value="resfiles.mk"/>
|
||||
<arg name="--depgraphfile" value="../buildtool/locale_dependencies.py"/>
|
||||
</args>
|
||||
<remapper>
|
||||
<remap sourcePath="/Keys" targetDir="lang" />
|
||||
|
@ -373,6 +374,9 @@
|
|||
<fileset id="resfiles" dir="${env.ICU4C_DIR}/source/data/locales">
|
||||
<include name="resfiles.mk" />
|
||||
</fileset>
|
||||
<fileset id="dependencies_py" dir="${env.ICU4C_DIR}/source/data/buildtool">
|
||||
<include name="locale_dependencies.py" />
|
||||
</fileset>
|
||||
<fileset id="locales_split" dir="${env.ICU4C_DIR}/source/data">
|
||||
<include name="curr/*.txt" />
|
||||
<include name="curr/resfiles.mk" />
|
||||
|
|
|
@ -159,6 +159,7 @@ class Config(object):
|
|||
),
|
||||
file=sys.stderr)
|
||||
except ImportError:
|
||||
print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr)
|
||||
pass
|
||||
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ import sys
|
|||
|
||||
from . import *
|
||||
from . import utils
|
||||
from .locale_dependencies import data as DEPENDENCY_DATA
|
||||
from .request_types import *
|
||||
|
||||
|
||||
|
@ -34,6 +35,10 @@ class Filter(object):
|
|||
return RegexFilter(json_data)
|
||||
elif filter_type == "exclude":
|
||||
return ExclusionFilter()
|
||||
elif filter_type == "union":
|
||||
return UnionFilter(json_data)
|
||||
elif filter_type == "locale":
|
||||
return LocaleFilter(json_data)
|
||||
else:
|
||||
print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr)
|
||||
return None
|
||||
|
@ -45,6 +50,12 @@ class Filter(object):
|
|||
assert self.match(file)
|
||||
return [request]
|
||||
|
||||
@classmethod
|
||||
def _file_to_file_stem(cls, file):
|
||||
start = file.filename.rfind("/")
|
||||
limit = file.filename.rfind(".")
|
||||
return file.filename[start+1:limit]
|
||||
|
||||
@abstractmethod
|
||||
def match(self, file):
|
||||
pass
|
||||
|
@ -65,7 +76,8 @@ class WhitelistBlacklistFilter(Filter):
|
|||
if "whitelist" in json_data:
|
||||
self.is_whitelist = True
|
||||
self.whitelist = json_data["whitelist"]
|
||||
elif "blacklist" in json_data:
|
||||
else:
|
||||
assert "blacklist" in json_data, "Need either whitelist or blacklist: %s" % str(json_data)
|
||||
self.is_whitelist = False
|
||||
self.blacklist = json_data["blacklist"]
|
||||
|
||||
|
@ -73,12 +85,6 @@ class WhitelistBlacklistFilter(Filter):
|
|||
file_stem = self._file_to_file_stem(file)
|
||||
return self._should_include(file_stem)
|
||||
|
||||
@classmethod
|
||||
def _file_to_file_stem(cls, file):
|
||||
start = file.filename.rfind("/")
|
||||
limit = file.filename.rfind(".")
|
||||
return file.filename[start+1:limit]
|
||||
|
||||
@abstractmethod
|
||||
def _should_include(self, file_stem):
|
||||
pass
|
||||
|
@ -126,6 +132,92 @@ class RegexFilter(WhitelistBlacklistFilter):
|
|||
return True
|
||||
|
||||
|
||||
class UnionFilter(Filter):
|
||||
def __init__(self, json_data):
|
||||
# Collect the sub-filters.
|
||||
self.sub_filters = []
|
||||
for filter_json in json_data["unionOf"]:
|
||||
self.sub_filters.append(Filter.create_from_json(filter_json))
|
||||
|
||||
def match(self, file):
|
||||
"""Match iff any of the sub-filters match."""
|
||||
for filter in self.sub_filters:
|
||||
if filter.match(file):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$")
|
||||
LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$")
|
||||
|
||||
class LocaleFilter(Filter):
|
||||
def __init__(self, json_data):
|
||||
self.locales_requested = set()
|
||||
self.locales_required = set()
|
||||
self.include_children = json_data.get("includeChildren", True)
|
||||
self.include_scripts = json_data.get("includeScripts", False)
|
||||
|
||||
# Compute the requested and required locales.
|
||||
for locale in json_data["whitelist"]:
|
||||
self._add_locale_and_parents(locale)
|
||||
|
||||
def _add_locale_and_parents(self, locale):
|
||||
# Store the locale as *requested*
|
||||
self.locales_requested.add(locale)
|
||||
# Store the locale and its dependencies as *required*
|
||||
while locale is not None:
|
||||
self.locales_required.add(locale)
|
||||
locale = self._get_parent_locale(locale)
|
||||
|
||||
def match(self, file):
|
||||
locale = self._file_to_file_stem(file)
|
||||
|
||||
# A locale is *required* if it is *requested* or an ancestor of a
|
||||
# *requested* locale.
|
||||
if locale in self.locales_required:
|
||||
return True
|
||||
|
||||
# Resolve include_scripts and include_children.
|
||||
return self._match_recursive(locale)
|
||||
|
||||
def _match_recursive(self, locale):
|
||||
# Base case: return True if we reached a *requested* locale,
|
||||
# or False if we ascend out of the locale tree.
|
||||
if locale is None:
|
||||
return False
|
||||
if locale in self.locales_requested:
|
||||
return True
|
||||
|
||||
# Check for alternative scripts.
|
||||
# This causes sr_Latn to check sr instead of going directly to root.
|
||||
if self.include_scripts:
|
||||
match = LANGUAGE_SCRIPT_REGEX.match(locale)
|
||||
if match and self._match_recursive(match.group(1)):
|
||||
return True
|
||||
|
||||
# Check if we are a descendant of a *requested* locale.
|
||||
if self.include_children:
|
||||
parent = self._get_parent_locale(locale)
|
||||
if self._match_recursive(parent):
|
||||
return True
|
||||
|
||||
# No matches.
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _get_parent_locale(cls, locale):
|
||||
if locale in DEPENDENCY_DATA["parents"]:
|
||||
return DEPENDENCY_DATA["parents"][locale]
|
||||
if locale in DEPENDENCY_DATA["aliases"]:
|
||||
return DEPENDENCY_DATA["aliases"][locale]
|
||||
if LANGUAGE_ONLY_REGEX.match(locale):
|
||||
return "root"
|
||||
i = locale.rfind("_")
|
||||
if i < 0:
|
||||
return None
|
||||
return locale[:i]
|
||||
|
||||
|
||||
def apply_filters(requests, config):
|
||||
"""Runs the filters and returns a new list of requests."""
|
||||
requests = _apply_file_filters(requests, config)
|
||||
|
|
|
@ -42,7 +42,9 @@
|
|||
"oneOf": [
|
||||
{
|
||||
"properties": {
|
||||
"filterType": { "$ref": "#/definitions/filterType" },
|
||||
"filterType": {
|
||||
"$ref": "#/definitions/blacklistWhitelistFilterTypes"
|
||||
},
|
||||
"whitelist": { "$ref": "#/definitions/stringList" }
|
||||
},
|
||||
"required": ["whitelist"],
|
||||
|
@ -50,7 +52,9 @@
|
|||
},
|
||||
{
|
||||
"properties": {
|
||||
"filterType": { "$ref": "#/definitions/filterType" },
|
||||
"filterType": {
|
||||
"$ref": "#/definitions/blacklistWhitelistFilterTypes"
|
||||
},
|
||||
"blacklist": { "$ref": "#/definitions/stringList" }
|
||||
},
|
||||
"required": ["blacklist"],
|
||||
|
@ -58,19 +62,52 @@
|
|||
},
|
||||
{
|
||||
"properties": {
|
||||
"filterType": { "$ref": "#/definitions/filterType" }
|
||||
"filterType": {
|
||||
"type": "string",
|
||||
"enum": ["exclude"]
|
||||
}
|
||||
},
|
||||
"required": ["filterType"],
|
||||
"additionalProperties": false
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"filterType": {
|
||||
"type": "string",
|
||||
"enum": ["locale"]
|
||||
},
|
||||
"includeChildren": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"includeScripts": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"whitelist": { "$ref": "#/definitions/stringList" }
|
||||
},
|
||||
"required": ["filterType", "whitelist"],
|
||||
"additionalProperties": false
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"filterType": {
|
||||
"type": "string",
|
||||
"enum": ["union"]
|
||||
},
|
||||
"unionOf": {
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/definitions/filter" }
|
||||
}
|
||||
},
|
||||
"required": ["filterType", "unionOf"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"filterType": {
|
||||
"blacklistWhitelistFilterTypes": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"file-stem",
|
||||
"language",
|
||||
"regex",
|
||||
"exclude"
|
||||
"regex"
|
||||
]
|
||||
},
|
||||
"stringList": {
|
||||
|
|
198
icu4c/source/data/buildtool/locale_dependencies.py
Normal file
198
icu4c/source/data/buildtool/locale_dependencies.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# © 2019 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
||||
data = {
|
||||
"aliases": {
|
||||
"ars": "ar_SA",
|
||||
"az_AZ": "az_Latn_AZ",
|
||||
"bs_BA": "bs_Latn_BA",
|
||||
"en_NH": "en_VU",
|
||||
"en_RH": "en_ZW",
|
||||
"in": "id",
|
||||
"in_ID": "id_ID",
|
||||
"iw": "he",
|
||||
"iw_IL": "he_IL",
|
||||
"mo": "ro_MD",
|
||||
"no_NO": "nb_NO",
|
||||
"no_NO_NY": "nn_NO",
|
||||
"no": "nb",
|
||||
"pa_IN": "pa_Guru_IN",
|
||||
"pa_PK": "pa_Arab_PK",
|
||||
"sh": "sr_Latn",
|
||||
"sh_BA": "sr_Latn_BA",
|
||||
"sh_CS": "sr_Latn_RS",
|
||||
"sh_YU": "sr_Latn_RS",
|
||||
"shi_MA": "shi_Tfng_MA",
|
||||
"sr_BA": "sr_Cyrl_BA",
|
||||
"sr_CS": "sr_Cyrl_RS",
|
||||
"sr_ME": "sr_Latn_ME",
|
||||
"sr_RS": "sr_Cyrl_RS",
|
||||
"sr_XK": "sr_Cyrl_XK",
|
||||
"sr_YU": "sr_Cyrl_RS",
|
||||
"sr_Cyrl_YU": "sr_Cyrl_RS",
|
||||
"sr_Cyrl_CS": "sr_Cyrl_RS",
|
||||
"sr_Latn_YU": "sr_Latn_RS",
|
||||
"sr_Latn_CS": "sr_Latn_RS",
|
||||
"tl": "fil",
|
||||
"tl_PH": "fil_PH",
|
||||
"uz_AF": "uz_Arab_AF",
|
||||
"uz_UZ": "uz_Latn_UZ",
|
||||
"vai_LR": "vai_Vaii_LR",
|
||||
"yue_CN": "yue_Hans_CN",
|
||||
"yue_HK": "yue_Hant_HK",
|
||||
"zh_CN": "zh_Hans_CN",
|
||||
"zh_HK": "zh_Hant_HK",
|
||||
"zh_MO": "zh_Hant_MO",
|
||||
"zh_SG": "zh_Hans_SG",
|
||||
"zh_TW": "zh_Hant_TW"
|
||||
},
|
||||
"parents": {
|
||||
"ff_Adlm": "root",
|
||||
"en_CM": "en_001",
|
||||
"en_KY": "en_001",
|
||||
"en_TC": "en_001",
|
||||
"yue_Hans": "root",
|
||||
"en_CX": "en_001",
|
||||
"es_EC": "es_419",
|
||||
"es_US": "es_419",
|
||||
"en_CY": "en_001",
|
||||
"en_LC": "en_001",
|
||||
"en_TK": "en_001",
|
||||
"es_UY": "es_419",
|
||||
"en_TO": "en_001",
|
||||
"en_TT": "en_001",
|
||||
"en_DE": "en_150",
|
||||
"es_MX": "es_419",
|
||||
"en_TV": "en_001",
|
||||
"en_DG": "en_001",
|
||||
"pt_ST": "pt_PT",
|
||||
"en_DM": "en_001",
|
||||
"en_LR": "en_001",
|
||||
"en_TZ": "en_001",
|
||||
"en_LS": "en_001",
|
||||
"en_DK": "en_150",
|
||||
"es_VE": "es_419",
|
||||
"es_NI": "es_419",
|
||||
"pt_AO": "pt_PT",
|
||||
"en_UG": "en_001",
|
||||
"en_MG": "en_001",
|
||||
"en_MO": "en_001",
|
||||
"en_MU": "en_001",
|
||||
"en_MS": "en_001",
|
||||
"en_MT": "en_001",
|
||||
"shi_Latn": "root",
|
||||
"es_BR": "es_419",
|
||||
"en_AU": "en_001",
|
||||
"en_ZM": "en_001",
|
||||
"en_AT": "en_150",
|
||||
"es_BZ": "es_419",
|
||||
"uz_Arab": "root",
|
||||
"az_Cyrl": "root",
|
||||
"es_SV": "es_419",
|
||||
"en_ZW": "en_001",
|
||||
"en_JE": "en_001",
|
||||
"en_BB": "en_001",
|
||||
"pa_Arab": "root",
|
||||
"en_RW": "en_001",
|
||||
"es_CO": "es_419",
|
||||
"en_JM": "en_001",
|
||||
"en_BE": "en_150",
|
||||
"es_CL": "es_419",
|
||||
"en_BM": "en_001",
|
||||
"en_SC": "en_001",
|
||||
"es_CR": "es_419",
|
||||
"en_150": "en_001",
|
||||
"en_BS": "en_001",
|
||||
"en_SD": "en_001",
|
||||
"pt_GQ": "pt_PT",
|
||||
"en_SB": "en_001",
|
||||
"es_CU": "es_419",
|
||||
"en_SG": "en_001",
|
||||
"uz_Cyrl": "root",
|
||||
"en_BW": "en_001",
|
||||
"en_SH": "en_001",
|
||||
"en_SE": "en_150",
|
||||
"pt_GW": "pt_PT",
|
||||
"en_BZ": "en_001",
|
||||
"en_SL": "en_001",
|
||||
"en_SI": "en_150",
|
||||
"en_KE": "en_001",
|
||||
"bm_Nkoo": "root",
|
||||
"en_CC": "en_001",
|
||||
"en_SS": "en_001",
|
||||
"iu_Latn": "root",
|
||||
"en_CA": "en_001",
|
||||
"en_KI": "en_001",
|
||||
"es_DO": "es_419",
|
||||
"en_SX": "en_001",
|
||||
"en_CH": "en_150",
|
||||
"en_KN": "en_001",
|
||||
"en_CK": "en_001",
|
||||
"en_SZ": "en_001",
|
||||
"en_GY": "en_001",
|
||||
"en_PH": "en_001",
|
||||
"en_PG": "en_001",
|
||||
"en_PK": "en_001",
|
||||
"en_PN": "en_001",
|
||||
"en_HK": "en_001",
|
||||
"zh_Hant": "root",
|
||||
"en_PW": "en_001",
|
||||
"es_AR": "es_419",
|
||||
"pt_MZ": "pt_PT",
|
||||
"en_Shaw": "root",
|
||||
"en_IE": "en_001",
|
||||
"ms_Arab": "root",
|
||||
"en_IM": "en_001",
|
||||
"en_IN": "en_001",
|
||||
"es_BO": "es_419",
|
||||
"en_IL": "en_001",
|
||||
"en_AI": "en_001",
|
||||
"az_Arab": "root",
|
||||
"en_AG": "en_001",
|
||||
"en_IO": "en_001",
|
||||
"en_ZA": "en_001",
|
||||
"en_MY": "en_001",
|
||||
"en_ER": "en_001",
|
||||
"en_VC": "en_001",
|
||||
"mn_Mong": "root",
|
||||
"vai_Latn": "root",
|
||||
"en_MW": "en_001",
|
||||
"pt_LU": "pt_PT",
|
||||
"bs_Cyrl": "root",
|
||||
"en_VG": "en_001",
|
||||
"en_NA": "en_001",
|
||||
"en_NF": "en_001",
|
||||
"en_NG": "en_001",
|
||||
"ha_Arab": "root",
|
||||
"en_NL": "en_150",
|
||||
"zh_Hant_MO": "zh_Hant_HK",
|
||||
"en_VU": "en_001",
|
||||
"en_FJ": "en_001",
|
||||
"en_NR": "en_001",
|
||||
"en_FK": "en_001",
|
||||
"es_GT": "es_419",
|
||||
"en_FI": "en_150",
|
||||
"pt_MO": "pt_PT",
|
||||
"en_FM": "en_001",
|
||||
"en_NU": "en_001",
|
||||
"en_NZ": "en_001",
|
||||
"pt_CH": "pt_PT",
|
||||
"en_Dsrt": "root",
|
||||
"es_PE": "es_419",
|
||||
"es_PA": "es_419",
|
||||
"pt_CV": "pt_PT",
|
||||
"en_WS": "en_001",
|
||||
"en_GD": "en_001",
|
||||
"en_GB": "en_001",
|
||||
"es_HN": "es_419",
|
||||
"pt_TL": "pt_PT",
|
||||
"en_GG": "en_001",
|
||||
"en_GH": "en_001",
|
||||
"es_PR": "es_419",
|
||||
"en_GI": "en_001",
|
||||
"sr_Latn": "root",
|
||||
"en_GM": "en_001",
|
||||
"es_PY": "es_419"
|
||||
}
|
||||
}
|
2
icu4c/source/data/buildtool/test/__init__.py
Normal file
2
icu4c/source/data/buildtool/test/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
# Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
14
icu4c/source/data/buildtool/test/__main__.py
Normal file
14
icu4c/source/data/buildtool/test/__main__.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
# Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
import unittest
|
||||
|
||||
from . import filtration_test
|
||||
|
||||
def load_tests(loader, tests, pattern):
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(filtration_test.suite)
|
||||
return suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
353
icu4c/source/data/buildtool/test/filtration_test.py
Normal file
353
icu4c/source/data/buildtool/test/filtration_test.py
Normal file
|
@ -0,0 +1,353 @@
|
|||
# Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
import unittest
|
||||
|
||||
from .. import InFile
|
||||
from ..filtration import Filter
|
||||
|
||||
EXAMPLE_FILE_STEMS = [
|
||||
"af_NA",
|
||||
"af_ZA",
|
||||
"af",
|
||||
"ar",
|
||||
"ar_SA",
|
||||
"ars",
|
||||
"bs_BA",
|
||||
"bs_Cyrl_BA",
|
||||
"bs_Cyrl",
|
||||
"bs_Latn_BA",
|
||||
"bs_Latn",
|
||||
"bs",
|
||||
"en_001",
|
||||
"en_150",
|
||||
"en_DE",
|
||||
"en_GB",
|
||||
"en_US",
|
||||
"root",
|
||||
"sr_BA",
|
||||
"sr_CS",
|
||||
"sr_Cyrl_BA",
|
||||
"sr_Cyrl_CS",
|
||||
"sr_Cyrl_ME",
|
||||
"sr_Cyrl",
|
||||
"sr_Latn_BA",
|
||||
"sr_Latn_CS",
|
||||
"sr_Latn_ME",
|
||||
"sr_Latn",
|
||||
"sr_ME",
|
||||
"sr",
|
||||
"vai_Latn_LR",
|
||||
"vai_Latn",
|
||||
"vai_LR",
|
||||
"vai_Vaii_LR",
|
||||
"vai_Vaii",
|
||||
"vai",
|
||||
"zh_CN",
|
||||
"zh_Hans_CN",
|
||||
"zh_Hans_HK",
|
||||
"zh_Hans_MO",
|
||||
"zh_Hans_SG",
|
||||
"zh_Hans",
|
||||
"zh_Hant_HK",
|
||||
"zh_Hant_MO",
|
||||
"zh_Hant_TW",
|
||||
"zh_Hant",
|
||||
"zh_HK",
|
||||
"zh_MO",
|
||||
"zh_SG",
|
||||
"zh_TW",
|
||||
"zh"
|
||||
]
|
||||
|
||||
class FiltrationTest(unittest.TestCase):
|
||||
|
||||
def test_exclude(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "exclude"
|
||||
}), [
|
||||
])
|
||||
|
||||
def test_default_whitelist(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"whitelist": [
|
||||
"ars",
|
||||
"zh_Hans"
|
||||
]
|
||||
}), [
|
||||
"ars",
|
||||
"zh_Hans"
|
||||
])
|
||||
|
||||
def test_default_blacklist(self):
|
||||
expected_matches = set(EXAMPLE_FILE_STEMS)
|
||||
expected_matches.remove("ars")
|
||||
expected_matches.remove("zh_Hans")
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"blacklist": [
|
||||
"ars",
|
||||
"zh_Hans"
|
||||
]
|
||||
}), expected_matches)
|
||||
|
||||
def test_language_whitelist(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "language",
|
||||
"whitelist": [
|
||||
"af",
|
||||
"bs"
|
||||
]
|
||||
}), [
|
||||
"root",
|
||||
"af_NA",
|
||||
"af_ZA",
|
||||
"af",
|
||||
"bs_BA",
|
||||
"bs_Cyrl_BA",
|
||||
"bs_Cyrl",
|
||||
"bs_Latn_BA",
|
||||
"bs_Latn",
|
||||
"bs"
|
||||
])
|
||||
|
||||
def test_language_blacklist(self):
|
||||
expected_matches = set(EXAMPLE_FILE_STEMS)
|
||||
expected_matches.remove("af_NA")
|
||||
expected_matches.remove("af_ZA")
|
||||
expected_matches.remove("af")
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "language",
|
||||
"blacklist": [
|
||||
"af"
|
||||
]
|
||||
}), expected_matches)
|
||||
|
||||
def test_regex_whitelist(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "regex",
|
||||
"whitelist": [
|
||||
r"^ar.*$",
|
||||
r"^zh$"
|
||||
]
|
||||
}), [
|
||||
"ar",
|
||||
"ar_SA",
|
||||
"ars",
|
||||
"zh"
|
||||
])
|
||||
|
||||
def test_regex_blacklist(self):
|
||||
expected_matches = set(EXAMPLE_FILE_STEMS)
|
||||
expected_matches.remove("ar")
|
||||
expected_matches.remove("ar_SA")
|
||||
expected_matches.remove("ars")
|
||||
expected_matches.remove("zh")
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "regex",
|
||||
"blacklist": [
|
||||
r"^ar.*$",
|
||||
r"^zh$"
|
||||
]
|
||||
}), expected_matches)
|
||||
|
||||
def test_locale_basic(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "locale",
|
||||
"whitelist": [
|
||||
# Default scripts:
|
||||
# sr => Cyrl
|
||||
# vai => Vaii
|
||||
# zh => Hans
|
||||
"bs_BA", # is an alias to bs_Latn_BA
|
||||
"en_DE",
|
||||
"sr", # Language with no script
|
||||
"vai_Latn", # Language with non-default script
|
||||
"zh_Hans" # Language with default script
|
||||
]
|
||||
}), [
|
||||
"root",
|
||||
# bs: should include the full dependency tree of bs_BA
|
||||
"bs_BA",
|
||||
"bs_Latn_BA",
|
||||
"bs_Latn",
|
||||
"bs",
|
||||
# en: should include the full dependency tree of en_DE
|
||||
"en",
|
||||
"en_DE",
|
||||
"en_150",
|
||||
"en_001",
|
||||
# sr: include Cyrl, the default, but not Latn.
|
||||
"sr",
|
||||
"sr_BA",
|
||||
"sr_CS",
|
||||
"sr_Cyrl",
|
||||
"sr_Cyrl_BA",
|
||||
"sr_Cyrl_CS",
|
||||
"sr_Cyrl_ME",
|
||||
# vai: include Latn but NOT Vaii.
|
||||
"vai_Latn",
|
||||
"vai_Latn_LR",
|
||||
# zh: include Hans but NOT Hant.
|
||||
"zh",
|
||||
"zh_CN",
|
||||
"zh_SG",
|
||||
"zh_Hans",
|
||||
"zh_Hans_CN",
|
||||
"zh_Hans_HK",
|
||||
"zh_Hans_MO",
|
||||
"zh_Hans_SG"
|
||||
])
|
||||
|
||||
def test_locale_no_children(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "locale",
|
||||
"includeChildren": False,
|
||||
"whitelist": [
|
||||
# See comments in test_locale_basic.
|
||||
"bs_BA",
|
||||
"en_DE",
|
||||
"sr",
|
||||
"vai_Latn",
|
||||
"zh_Hans"
|
||||
]
|
||||
}), [
|
||||
"root",
|
||||
"bs_BA",
|
||||
"bs_Latn_BA",
|
||||
"bs_Latn",
|
||||
"bs",
|
||||
"en",
|
||||
"en_DE",
|
||||
"en_150",
|
||||
"en_001",
|
||||
"sr",
|
||||
"vai_Latn",
|
||||
"zh",
|
||||
"zh_Hans",
|
||||
])
|
||||
|
||||
def test_locale_include_scripts(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "locale",
|
||||
"includeScripts": True,
|
||||
"whitelist": [
|
||||
# See comments in test_locale_basic.
|
||||
"bs_BA",
|
||||
"en_DE",
|
||||
"sr",
|
||||
"vai_Latn",
|
||||
"zh_Hans"
|
||||
]
|
||||
}), [
|
||||
"root",
|
||||
# bs: includeScripts only works for language-only (without region)
|
||||
"bs_BA",
|
||||
"bs_Latn_BA",
|
||||
"bs_Latn",
|
||||
"bs",
|
||||
# en: should include the full dependency tree of en_DE
|
||||
"en",
|
||||
"en_DE",
|
||||
"en_150",
|
||||
"en_001",
|
||||
# sr: include Latn, since no particular script was requested.
|
||||
"sr_BA",
|
||||
"sr_CS",
|
||||
"sr_Cyrl_BA",
|
||||
"sr_Cyrl_CS",
|
||||
"sr_Cyrl_ME",
|
||||
"sr_Cyrl",
|
||||
"sr_Latn_BA",
|
||||
"sr_Latn_CS",
|
||||
"sr_Latn_ME",
|
||||
"sr_Latn",
|
||||
"sr_ME",
|
||||
"sr",
|
||||
# vai: do NOT include Vaii; the script was explicitly requested.
|
||||
"vai_Latn_LR",
|
||||
"vai_Latn",
|
||||
# zh: do NOT include Hant; the script was explicitly requested.
|
||||
"zh_CN",
|
||||
"zh_SG",
|
||||
"zh_Hans_CN",
|
||||
"zh_Hans_HK",
|
||||
"zh_Hans_MO",
|
||||
"zh_Hans_SG",
|
||||
"zh_Hans",
|
||||
"zh"
|
||||
])
|
||||
|
||||
def test_locale_no_children_include_scripts(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "locale",
|
||||
"includeChildren": False,
|
||||
"includeScripts": True,
|
||||
"whitelist": [
|
||||
# See comments in test_locale_basic.
|
||||
"bs_BA",
|
||||
"en_DE",
|
||||
"sr",
|
||||
"vai_Latn",
|
||||
"zh_Hans"
|
||||
]
|
||||
}), [
|
||||
"root",
|
||||
# bs: includeScripts only works for language-only (without region)
|
||||
"bs_BA",
|
||||
"bs_Latn_BA",
|
||||
"bs_Latn",
|
||||
"bs",
|
||||
# en: should include the full dependency tree of en_DE
|
||||
"en",
|
||||
"en_DE",
|
||||
"en_150",
|
||||
"en_001",
|
||||
# sr: include Cyrl and Latn but no other children
|
||||
"sr",
|
||||
"sr_Cyrl",
|
||||
"sr_Latn",
|
||||
# vai: include only the requested script
|
||||
"vai_Latn",
|
||||
# zh: include only the requested script
|
||||
"zh",
|
||||
"zh_Hans",
|
||||
])
|
||||
|
||||
def test_union(self):
|
||||
self._check_filter(Filter.create_from_json({
|
||||
"filterType": "union",
|
||||
"unionOf": [
|
||||
{
|
||||
"whitelist": [
|
||||
"ars",
|
||||
"zh_Hans"
|
||||
]
|
||||
},
|
||||
{
|
||||
"filterType": "regex",
|
||||
"whitelist": [
|
||||
r"^bs.*$",
|
||||
r"^zh$"
|
||||
]
|
||||
}
|
||||
]
|
||||
}), [
|
||||
"ars",
|
||||
"zh_Hans",
|
||||
"bs_BA",
|
||||
"bs_Cyrl_BA",
|
||||
"bs_Cyrl",
|
||||
"bs_Latn_BA",
|
||||
"bs_Latn",
|
||||
"bs",
|
||||
"zh"
|
||||
])
|
||||
|
||||
def _check_filter(self, filter, expected_matches):
|
||||
for file_stem in EXAMPLE_FILE_STEMS:
|
||||
is_match = filter.match(InFile("locales/%s.txt" % file_stem))
|
||||
expected_match = file_stem in expected_matches
|
||||
self.assertEqual(is_match, expected_match, file_stem)
|
||||
|
||||
# Export the test for the runner
|
||||
suite = unittest.makeSuite(FiltrationTest)
|
|
@ -1,7 +1,13 @@
|
|||
# © 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
CURR_CLDR_VERSION = 34
|
||||
# A list of txt's to build
|
||||
#
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# NOTE (January 2019): Please use ICU's new data filtering to select locale
|
||||
# files. This makefile is no longer used to filter locale files.
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#
|
||||
# Old description: A list of txt's to build
|
||||
# Note:
|
||||
#
|
||||
# If you are thinking of modifying this file, READ THIS.
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
# © 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
LANG_CLDR_VERSION = 34
|
||||
# A list of txt's to build
|
||||
#
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# NOTE (January 2019): Please use ICU's new data filtering to select locale
|
||||
# files. This makefile is no longer used to filter locale files.
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#
|
||||
# Old description: A list of txt's to build
|
||||
# Note:
|
||||
#
|
||||
# If you are thinking of modifying this file, READ THIS.
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
# © 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
GENRB_CLDR_VERSION = 34
|
||||
# A list of txt's to build
|
||||
#
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# NOTE (January 2019): Please use ICU's new data filtering to select locale
|
||||
# files. This makefile is no longer used to filter locale files.
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#
|
||||
# Old description: A list of txt's to build
|
||||
# Note:
|
||||
#
|
||||
# If you are thinking of modifying this file, READ THIS.
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
# © 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
REGION_CLDR_VERSION = 34
|
||||
# A list of txt's to build
|
||||
#
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# NOTE (January 2019): Please use ICU's new data filtering to select locale
|
||||
# files. This makefile is no longer used to filter locale files.
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#
|
||||
# Old description: A list of txt's to build
|
||||
# Note:
|
||||
#
|
||||
# If you are thinking of modifying this file, READ THIS.
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
# © 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
UNIT_CLDR_VERSION = 34
|
||||
# A list of txt's to build
|
||||
#
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# NOTE (January 2019): Please use ICU's new data filtering to select locale
|
||||
# files. This makefile is no longer used to filter locale files.
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#
|
||||
# Old description: A list of txt's to build
|
||||
# Note:
|
||||
#
|
||||
# If you are thinking of modifying this file, READ THIS.
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
# © 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
ZONE_CLDR_VERSION = 34
|
||||
# A list of txt's to build
|
||||
#
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# NOTE (January 2019): Please use ICU's new data filtering to select locale
|
||||
# files. This makefile is no longer used to filter locale files.
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#
|
||||
# Old description: A list of txt's to build
|
||||
# Note:
|
||||
#
|
||||
# If you are thinking of modifying this file, READ THIS.
|
||||
|
|
Loading…
Add table
Reference in a new issue