mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-22285 omit the gb2312 & big5han collation tailorings by default
This commit is contained in:
parent
97510de5d4
commit
2d9fa3fa99
5 changed files with 56 additions and 13 deletions
|
@ -78,6 +78,32 @@ To build ICU4J with custom data, you must first build ICU4C with custom data
|
|||
and then generate the JAR file. For more information on building ICU4J, read the
|
||||
[ICU4J Readme](../icu4j/).
|
||||
|
||||
### Default Configuration
|
||||
|
||||
By default (without a configuration file and without option flags),
|
||||
the ICU data file includes all of the data in the ICU source tree.
|
||||
|
||||
Since ICU 73 (2023q2), there is an exception:
|
||||
By default, the "big5han" and "gb2312han" collation tailorings are omitted.
|
||||
These mimic the order of their respective charsets, are relatively large, and rarely used.
|
||||
(See [ICU-22285](https://unicode-org.atlassian.net/browse/ICU-22285).)
|
||||
|
||||
The default configuration is equivalent to a filter file like this:
|
||||
|
||||
{
|
||||
"resourceFilters": [
|
||||
{
|
||||
"categories": [
|
||||
"coll_tree"
|
||||
],
|
||||
"rules": [
|
||||
"-/collations/big5han",
|
||||
"-/collations/gb2312han"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### Locale Slicing
|
||||
|
||||
The simplest way to slice ICU data is by locale. The ICU Data Build Tool
|
||||
|
|
|
@ -159,6 +159,23 @@ class Config(object):
|
|||
if "usePoolBundle" in self.filters_json_data:
|
||||
self.use_pool_bundle = self.filters_json_data["usePoolBundle"]
|
||||
|
||||
# By default, exclude collation data that mimics the order of some large legacy charsets.
|
||||
# We do this in "subtractive" strategy by inserting a resourceFilter.
|
||||
# Later rules from an explicit filter file may override this default behavior.
|
||||
# (In "additive" strategy this is unnecessary.)
|
||||
if self.strategy == "subtractive":
|
||||
filters = self.filters_json_data.setdefault("resourceFilters", [])
|
||||
omit_charset_collations = {
|
||||
"categories": [
|
||||
"coll_tree"
|
||||
],
|
||||
"rules": [
|
||||
"-/collations/big5han",
|
||||
"-/collations/gb2312han"
|
||||
]
|
||||
}
|
||||
filters.insert(0, omit_charset_collations)
|
||||
|
||||
def _parse_filter_file(self, f):
|
||||
# Use the Hjson parser if it is available; otherwise, use vanilla JSON.
|
||||
try:
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d73b2718b26be3283b897d2780e6c36b64efba9c0d20a04397ade3fa354d21a1
|
||||
size 14443319
|
||||
oid sha256:11da259e78948bcd4daf70a381eed008ac3d9a02aaefeeb904ea2b4b89f29dc1
|
||||
size 14329200
|
||||
|
|
|
@ -49,19 +49,19 @@ public final class ICUResourceBundleCollationTest extends TestFmwk {
|
|||
"f", "zh_MO", "zh@collation=stroke", /* alias of zh_Hant_MO */
|
||||
"t", "zh_Hant_MO", "zh@collation=stroke",
|
||||
"f", "zh_TW_STROKE", "zh@collation=stroke",
|
||||
"f", "zh_TW_STROKE@collation=big5han", "zh@collation=big5han",
|
||||
"f", "zh_TW_STROKE@collation=zhuyin", "zh@collation=zhuyin",
|
||||
"f", "sv_CN@calendar=japanese", "sv",
|
||||
"t", "sv@calendar=japanese", "sv",
|
||||
"f", "zh_TW@collation=big5han", "zh@collation=big5han", /* alias of zh_Hant_TW */
|
||||
"t", "zh_Hant_TW@collation=big5han", "zh@collation=big5han",
|
||||
"f", "zh_TW@collation=gb2312han", "zh@collation=gb2312han", /* alias of zh_Hant_TW */
|
||||
"t", "zh_Hant_TW@collation=gb2312han", "zh@collation=gb2312han",
|
||||
"f", "zh_CN@collation=big5han", "zh@collation=big5han", /* alias of zh_Hans_CN */
|
||||
"t", "zh_Hans_CN@collation=big5han", "zh@collation=big5han",
|
||||
"f", "zh_CN@collation=gb2312han", "zh@collation=gb2312han", /* alias of zh_Hans_CN */
|
||||
"t", "zh_Hans_CN@collation=gb2312han", "zh@collation=gb2312han",
|
||||
"t", "zh@collation=big5han", "zh@collation=big5han",
|
||||
"t", "zh@collation=gb2312han", "zh@collation=gb2312han",
|
||||
"f", "zh_TW@collation=zhuyin", "zh@collation=zhuyin", /* alias of zh_Hant_TW */
|
||||
"t", "zh_Hant_TW@collation=zhuyin", "zh@collation=zhuyin",
|
||||
"f", "zh_TW@collation=unihan", "zh@collation=unihan", /* alias of zh_Hant_TW */
|
||||
"t", "zh_Hant_TW@collation=unihan", "zh@collation=unihan",
|
||||
"f", "zh_CN@collation=zhuyin", "zh@collation=zhuyin", /* alias of zh_Hans_CN */
|
||||
"t", "zh_Hans_CN@collation=zhuyin", "zh@collation=zhuyin",
|
||||
"f", "zh_CN@collation=unihan", "zh@collation=unihan", /* alias of zh_Hans_CN */
|
||||
"t", "zh_Hans_CN@collation=unihan", "zh@collation=unihan",
|
||||
"t", "zh@collation=zhuyin", "zh@collation=zhuyin",
|
||||
"t", "zh@collation=unihan", "zh@collation=unihan",
|
||||
"t", "hi@collation=standard", "hi",
|
||||
"f", "hi_AU@collation=standard;currency=CHF;calendar=buddhist", "hi",
|
||||
"f", "sv_SE@collation=pinyin", "sv", /* bug 4582 tests */
|
||||
|
|
Binary file not shown.
Loading…
Add table
Reference in a new issue