mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-04 21:15:35 +00:00
295 lines
13 KiB
XML
295 lines
13 KiB
XML
<!-- © 2019 and later: Unicode, Inc. and others.
|
|
License & terms of use: http://www.unicode.org/copyright.html -->
|
|
|
|
<config>
|
|
<convert>
|
|
|
|
<!-- The primary set of locale IDs to be generated by default. The IDs in this list are
|
|
automatically expanded to include default scripts and all available regions. The
|
|
rules are:
|
|
|
|
1) Base languages are expanded to include default scripts (e.g. "en" -> "en_Latn").
|
|
2) All region and variant subtags are added for any base language or language+script
|
|
(e.g. "en" -> "en_GB" or "shi_Latn" -> "shi_Latn_MA").
|
|
|
|
If a non-default script is desired it should be listed explicitly (e.g. "sr_Latn").
|
|
|
|
Locale IDs with deprecated subtags (which become aliases) must still be listed in
|
|
full (e.g. "en_RH" or "sr_Latn_YU").
|
|
-->
|
|
<localeIds>
|
|
// A
|
|
af, agq, ak, am, ar, ars, as, asa, ast, az, az_AZ, az_Cyrl
|
|
|
|
// B
|
|
bas, be, bem, bez, bg, bgc, bho, blo, bm, bn, bo, br, brx, bs, bs_BA, bs_Cyrl
|
|
|
|
// C
|
|
ca, ccp, ce, ceb, cgg, chr, ckb, cs, csw, cv, cy
|
|
|
|
// D
|
|
da, dav, de, dje, doi, dsb, dua, dyo, dz
|
|
|
|
// E
|
|
ebu, ee, el, en, en_NH, en_RH, eo, es, et, eu, ewo
|
|
|
|
// F
|
|
fa, ff, ff_Adlm, ff_CM, ff_GN, ff_MR, ff_SN, fi, fil, fo, fr, fur, fy
|
|
|
|
// G
|
|
ga, gaa, gd, gl, gsw, gu, guz, gv
|
|
|
|
// H
|
|
ha, haw, he, hi, hi_Latn, hr, hsb, hu, hy
|
|
|
|
// I
|
|
ia, id, ie, ig, ii, in, in_ID, is, it, iw, iw_IL
|
|
|
|
// J
|
|
ja, jgo, jmc, jv
|
|
|
|
// K
|
|
ka, kab, kam, kde, kea, kgp, khq, ki, kk, kkj, kl, kln, km, kn, ko, kok, kok_Latn, ks
|
|
ks_Deva, ks_IN, ksb, ksf, ksh, ku, kw, kxv, kxv_Deva, kxv_IN, kxv_Orya, kxv_Telu, ky
|
|
|
|
// L
|
|
lag, lb, lg, lij, lkt, lmo, ln, lo, lrc, lt, lu, luo, luy, lv
|
|
|
|
// M
|
|
mai, mas, mer, mfe, mg, mgh, mgo, mi, mk, ml, mn, mni, mni_IN, mo, mr, ms
|
|
mt, mua, my, mzn
|
|
|
|
// N
|
|
naq, nb, nd, nds, ne, nl, nmg, nn, nnh, no, no_NO, no_NO_NY, nqo, nso, nus, nyn
|
|
|
|
// O
|
|
oc, om, or, os
|
|
|
|
// P
|
|
pa, pa_Arab, pa_IN, pa_PK, pcm, pl, prg, ps, pt
|
|
|
|
// Q
|
|
qu
|
|
|
|
// R
|
|
raj, rm, rn, ro, rof, ru, rw, rwk
|
|
|
|
// S
|
|
sa, sah, saq, sat, sat_IN, sbp, sc, sd, sd_Deva, sd_IN, sd_PK, se, seh, ses, sg, sh, sh_BA, sh_CS, sh_YU
|
|
shi, shi_Latn, shi_MA, si, sk, sl, smn, sn, so, sq, sr, sr_BA, sr_CS, sr_Cyrl_CS, sr_Cyrl_YU, sr_Latn
|
|
sr_Latn_CS, sr_Latn_YU, sr_ME, sr_RS, sr_XK, sr_YU, st, su, su_ID, sv, sw, syr, szl
|
|
|
|
// T
|
|
ta, te, teo, tg, th, ti, tk, tl, tl_PH, tn, to, tok, tr, tt, twq, tzm
|
|
|
|
// U
|
|
ug, uk, ur, uz, uz_AF, uz_Arab, uz_Cyrl, uz_UZ
|
|
|
|
// V
|
|
vai, vai_LR, vai_Latn, vec, vi, vmw, vun
|
|
|
|
// W
|
|
wae, wo
|
|
|
|
// X
|
|
xh, xnr, xog
|
|
|
|
// Y
|
|
yav, yi, yo, yrl, yue, yue_CN, yue_HK, yue_Hans
|
|
|
|
// Z
|
|
za, zgh, zh, zh_CN, zh_HK, zh_Hant, zh_MO, zh_SG, zh_TW, zu
|
|
</localeIds>
|
|
|
|
<!-- The following elements configure directories in which a subset of the available
|
|
locales IDs should be generated. Unlike the main <localeId> element, these
|
|
filters must specify all locale IDs in full (but since they mostly select base
|
|
languages, this isn't a big deal).
|
|
|
|
As well as allowing some data directories to have a subset of available data (via
|
|
the <localeIds> element) there are also mechanisms for controlling aliasing and
|
|
the locale parent relation which allows the sharing of some ICU data in cases
|
|
where it would otherwise need to be copied. The two mechanisms are:
|
|
|
|
1: inheritLanguageSubtag: Used to rewrite the parent of a locale ID from "root" to
|
|
its language subtag (e.g. "zh_Hant" has a natural parent of "root", but to allow
|
|
some base language data to be shared it can be made to have a parent of "zh").
|
|
|
|
2: forcedAlias: Used to add aliases for specific directories in order to affect the
|
|
ICU behaviour in special cases.
|
|
|
|
Between them these mechanisms are known as "tailorings" of the affected locales. -->
|
|
<!-- TODO: Explain why these special cases are needed/different. -->
|
|
|
|
<!-- Collation data is large, but also more sharable than other data, which is why there
|
|
are a number of aliases and parent remappings for this directory. -->
|
|
<directory dir="coll" inheritLanguageSubtag="bs_Cyrl, sr_Latn, zh_Hant">
|
|
<!-- These aliases are to avoid needing to copy and maintain the same collation data
|
|
for "zh" and "yue". The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs
|
|
"zh_Hans_CN"), and for "yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the
|
|
aliases are effectively just rewriting the base language. -->
|
|
<forcedAlias source="yue" target="zh_Hant"/>
|
|
<forcedAlias source="yue_Hant" target="zh_Hant"/>
|
|
<forcedAlias source="yue_CN" target="zh_Hans"/>
|
|
<forcedAlias source="yue_Hans" target="zh_Hans"/>
|
|
<forcedAlias source="yue_Hans_CN" target="zh_Hans"/>
|
|
<!-- TODO: Find out and document this properly. -->
|
|
<forcedAlias source="sr_ME" target="sr_Cyrl_ME"/>
|
|
|
|
<localeIds>
|
|
root,
|
|
|
|
// A-B
|
|
af, am, ars, ar, as, az, be, bg, bn, bo, br, bs_Cyrl, bs,
|
|
|
|
// C-F
|
|
ca, ceb, chr, cs, cy, da, de_AT, de, dsb, dz, ee, el, en,
|
|
en_US_POSIX, en_US, eo, es, et, fa_AF, fa, ff_Adlm, ff, fil, fi, fo, fr_CA, fr, fy,
|
|
|
|
// G-J
|
|
ga, gl, gu, ha, haw, he, hi, hr, hsb, hu, hy,
|
|
id_ID, id, ig, in, in_ID, is, it, iw_IL, iw, ja,
|
|
|
|
// K-P
|
|
ka, kk, kl, km, kn, kok, ko, ku, ky, lb, lij, lkt, ln, lo, lt, lv,
|
|
mk, ml, mn, mo, mr, ms, mt, my, nb, nb_NO, ne, nl, nn, no, no_NO, nso,
|
|
om, or, pa_IN, pa, pa_Guru, pl, ps, pt,
|
|
|
|
// R-T
|
|
ro, ru, sa, se, sh_BA, sh_CS, sh, sh_YU, si, sk, sl, smn, sq,
|
|
sr_BA, sr_Cyrl_ME, sr_Latn, sr_ME, sr_RS, sr, st, sv, sw,
|
|
ta, te, th, tk, tn, to, tr,
|
|
|
|
// U-Z
|
|
ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans
|
|
yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
|
|
</localeIds>
|
|
</directory>
|
|
|
|
<directory dir="rbnf">
|
|
<!-- It is not at all clear why this is being done. It's certainly not exactly the
|
|
same as above, since (a) the alias is reversed (b) "zh_Hant" does exist, with
|
|
different data than "yue", so this alias is not just rewriting the base
|
|
language. -->
|
|
<!-- TODO: Find out and document this properly. -->
|
|
<forcedAlias source="zh_Hant_HK" target="yue"/>
|
|
|
|
<localeIds>
|
|
root,
|
|
|
|
// A-E
|
|
af, ak, am, ars, ar, az, be, bg, bs, ca, ccp, chr, cs, cy,
|
|
da, de_CH, de, ee, el, en_001, en_IN, en, eo, es_419, es_DO,
|
|
es_GT, es_HN, es_MX, es_NI, es_PA, es_PR, es_SV, es, es_US, et,
|
|
|
|
// F-P
|
|
fa_AF, fa, ff, fil, fi, fo, fr_BE, fr_CH, fr, ga, he, hi, hr,
|
|
hu, hy, id, in, is, it, iw, ja, ka, kk, kl, km, ko, ky, lb,
|
|
lo, lrc, lt, lv, mk, ms, mt, my, nb, ne, nl, nn, no, pl, pt_PT, pt,
|
|
|
|
// Q-Z
|
|
qu, ro, ru, se, sh, sk, sl, sq, sr_Latn, sr, su, sv, sw, ta, th, tr,
|
|
uk, vec, vi, yue_Hans, yue, zh_Hant_HK, zh_Hant, zh_HK, zh_MO, zh_TW, zh
|
|
</localeIds>
|
|
</directory>
|
|
|
|
<directory dir="brkitr" inheritLanguageSubtag="zh_Hant">
|
|
<localeIds>
|
|
root,
|
|
de, el, en, en_US_POSIX, en_US, es, fi, fr, it, ja, ko, pt, ru, sv, zh_Hant, zh
|
|
</localeIds>
|
|
</directory>
|
|
|
|
<!-- GLOBAL ALIASES -->
|
|
|
|
<!-- Some spoken languages (e.g. "ars") inherit all their data from a written language
|
|
(e.g. "ar_SA"). However CLDR doesn't currently support a way to represent that
|
|
relationship. Unlike deprecated languages for which an alias can be inferred from
|
|
the "languageAlias" CLDR data, there's no way in CLDR to represent the fact that
|
|
we want "ars" (a non-deprecated language) to inherit the data of "ar_SA".
|
|
|
|
This alias is the first example of potentially many cases where ICU needs to
|
|
generate an alias in order to affect "sideways inheritance" for spoken languages,
|
|
and at some stage it should probably be supported properly in the CLDR data. -->
|
|
<forcedAlias source="ars" target="ar_SA"/>
|
|
|
|
<!-- A legacy global alias (note that "no_NO_NY" is not even structurally valid). -->
|
|
<forcedAlias source="no_NO_NY" target="nn_NO"/>
|
|
|
|
<!-- This one is a bit silly, it is just to generate a stub for no_NO, which is
|
|
not in CLDR. If we do not do this, then including it in localeIds will generate
|
|
empty no_Latn and no_Latn_NO and then no_NO aliasing to no_Latn_NO. -->
|
|
<forcedAlias source="no_NO" target="no"/>
|
|
|
|
<!-- ALTERNATE VALUES -->
|
|
|
|
<!-- The following elements configure alternate values for some special case paths.
|
|
The target path will only be replaced if both it, and the source path, exist in
|
|
the CLDR data (paths will not be modified if only the source path exists).
|
|
|
|
Since the paths must represent the same semantic type of data, they must be in the
|
|
same "namespace" (same element names) and must not contain value attributes. Thus
|
|
they can only differ by distinguishing attributes (either added or modified).
|
|
|
|
This feature is typically used to select alternate translations (e.g. short forms)
|
|
for certain paths. -->
|
|
<!-- <altPath target="//path/to/value[@attr='foo']"
|
|
source="//path/to/value[@attr='bar']"
|
|
locales="xx,yy_ZZ"/> -->
|
|
</convert>
|
|
|
|
<!-- If a directory is listed here, then every file in it is assumed to be automatically
|
|
generated by the conversion tool, unless it is explicitly listed in a <retain> element.
|
|
The tool then checks every file to determine if it has the expected header present,
|
|
indiciating that it was automatically generated, before deleting it.
|
|
|
|
If unexpected files are found, the "clean" task will fail without deleting anything
|
|
(unless'forceDelete' is set to override this). Note that even if 'forceDelete' is set,
|
|
the files listed explicitly below will never be deleted by this process.
|
|
|
|
This two-step approach minimizes the risk that the conversion process will ever
|
|
accidentally delete a manually maintained file.
|
|
-->
|
|
<outputDirectories root="${outDir}" forceDelete="${forceDelete}">
|
|
<dir name="brkitr">
|
|
<retain path="adaboost"/>
|
|
<retain path="dictionaries"/>
|
|
<retain path="lstm"/>
|
|
<retain path="rules"/>
|
|
</dir>
|
|
<dir name="coll">
|
|
<!-- Legacy files whose file names aren't supported for automatic generation.
|
|
Simple to maintain manually and unlikely to ever change again. -->
|
|
<retain path="de__PHONEBOOK.txt"/>
|
|
<retain path="de_.txt"/>
|
|
<retain path="es__TRADITIONAL.txt"/>
|
|
<retain path="es_.txt"/>
|
|
</dir>
|
|
<dir name="curr"/>
|
|
<dir name="lang"/>
|
|
<dir name="locales"/>
|
|
<dir name="misc">
|
|
<!-- Machine generated files produced by different tools.
|
|
Possibly worth moving into the new LDML conversion tool one day. -->
|
|
<retain path="currencyNumericCodes.txt"/>
|
|
<retain path="zoneinfo64.txt"/>
|
|
<!-- Project file (not ICU data), unlikely to ever be auto-generated. -->
|
|
<retain path="icudata.rc"/>
|
|
<!-- Small high-level metadata file, stable and easy to maintain manually. -->
|
|
<retain path="icustd.txt"/>
|
|
</dir>
|
|
<dir name="rbnf"/>
|
|
<dir name="region"/>
|
|
<dir name="translit">
|
|
<!-- Small, easy to maintain, special case top-level files. -->
|
|
<retain path="en.txt"/>
|
|
<retain path="el.txt"/>
|
|
</dir>
|
|
<dir name="unit"/>
|
|
<dir name="zone">
|
|
<!-- Manually edited to support TZ database name compatibility. -->
|
|
<retain path="tzdbNames.txt"/>
|
|
</dir>
|
|
</outputDirectories>
|
|
</config>
|
|
|