mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-20693 Refactoring for inferred IDs.
This commit is contained in:
parent
142c90afcc
commit
7078e19070
12 changed files with 449 additions and 299 deletions
|
@ -78,184 +78,117 @@
|
|||
</classpath>
|
||||
</taskdef>
|
||||
<convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}"
|
||||
minimalDraftStatus="${minDraftStatus}" emitReport="${emitReport}">
|
||||
outputTypes="${outputTypes}" minimalDraftStatus="${minDraftStatus}" emitReport="${emitReport}">
|
||||
|
||||
<!-- It is not at all clear why this is being done (we expect "sr_Latn_ME" normally).
|
||||
TODO: Find out and document this properly. -->
|
||||
<forcedAlias dir="coll" source="sr_ME" target="sr_Cyrl_ME"/>
|
||||
<!-- The primary set of locale IDs to be generated by default. The IDs in this list are
|
||||
automatically expanded to include default scripts and all available regions. The
|
||||
rules are:
|
||||
|
||||
<!-- This appears to be a hack to avoid needing to copy and maintain the same "zh"
|
||||
data for "yue". The files for "yue" in this directory should be empty otherwise.
|
||||
The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs "zh_Hans_CN"), and for
|
||||
"yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the aliases are effectively just
|
||||
rewriting the base language. -->
|
||||
<forcedAlias dir="coll" source="yue_Hans" target="zh_Hans"/>
|
||||
<forcedAlias dir="coll" source="yue" target="zh_Hant"/>
|
||||
1) Base languages are expanded to include default scripts (e.g. "en" -> "en_Latn").
|
||||
2) All region and variant subtags are added for any base language or language+script
|
||||
(e.g. "en" -> "en_GB" or "shi_Latn" -> "shi_Latn_MA").
|
||||
|
||||
<!-- It is not at all clear why this is being done. It's certainly not exactly the same
|
||||
as above, since (a) the alias is reversed (b) "zh_Hant" does exist, with different
|
||||
data than "yue", so this alias is not just rewriting the base language.
|
||||
TODO: Find out and document this properly. -->
|
||||
<forcedAlias dir="rbnf" source="zh_Hant_HK" target="yue"/>
|
||||
|
||||
<!-- The primary set of locale IDs to be generated. Other, directory specific, sets exist
|
||||
and do not have to be subsets of this. Some of these ID are aliases, so XML files
|
||||
may not exist for all of them. -->
|
||||
<!-- TODO: Add locale ID inference to reduce this list considerably. -->
|
||||
<localeIds dirs="curr,lang,locales,region,unit,zone">
|
||||
root,
|
||||
If a non-default script is desired it should be listed explicitly (e.g. "sr_Latn").
|
||||
|
||||
Locale IDs with deprecated subtags (which become aliases) must still be listed in
|
||||
full (e.g. "en_RH" or "sr_Latn_YU").
|
||||
-->
|
||||
<localeIds>
|
||||
// A
|
||||
af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001,
|
||||
ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ,
|
||||
ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS,
|
||||
ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, ars,
|
||||
as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_AZ, az_Cyrl, az_Cyrl_AZ,
|
||||
az_Latn, az_Latn_AZ,
|
||||
af, agq, agq_CM, ak, am, ar, ars, as, asa, asa_TZ, ast, ast_ES, az, az_AZ, az_Cyrl
|
||||
|
||||
// B
|
||||
bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm,
|
||||
bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN,
|
||||
bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, bs_BA,
|
||||
bas, bas_CM, be, bem, bem_ZM, bez, bez_TZ, bg, bm, bn, bo, br, brx, brx_IN, bs, bs_BA
|
||||
bs_Cyrl
|
||||
|
||||
// C
|
||||
ca, ca_AD, ca_ES, ca_FR, ca_IT, ccp, ccp_BD, ccp_IN, ce, ce_RU,
|
||||
ceb, ceb_PH, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs,
|
||||
cs_CZ, cy, cy_GB,
|
||||
ca, ccp, ccp_BD, ccp_IN, ce, ceb, ceb_PH, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cy
|
||||
|
||||
// D
|
||||
da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE,
|
||||
de_IT, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo,
|
||||
dyo_SN, dz, dz_BT,
|
||||
da, dav, dav_KE, de, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz
|
||||
|
||||
// E
|
||||
ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001,
|
||||
en_150, en_AE, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE,
|
||||
en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK,
|
||||
en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI,
|
||||
en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM,
|
||||
en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE,
|
||||
en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG,
|
||||
en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA,
|
||||
en_NF, en_NG, en_NH, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH,
|
||||
en_PK, en_PN, en_PR, en_PW, en_RH, en_RW, en_SB, en_SC, en_SD,
|
||||
en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC,
|
||||
en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX,
|
||||
en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo,
|
||||
eo_001, es, es_419, es_AR, es_BO, es_BR, es_BZ, es_CL, es_CO,
|
||||
es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN,
|
||||
es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV,
|
||||
es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM,
|
||||
ebu, ebu_KE, ee, el, en, en_NH, en_RH, eo, es, et, eu, ewo, ewo_CM
|
||||
|
||||
// F
|
||||
fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_Latn, ff_Latn_BF, ff_Latn_CM,
|
||||
ff_Latn_GH, ff_Latn_GM, ff_Latn_GN, ff_Latn_GW, ff_Latn_LR, ff_Latn_MR,
|
||||
ff_Latn_NE, ff_Latn_NG, ff_Latn_SL, ff_Latn_SN, ff_MR, ff_SN, fi,
|
||||
fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI,
|
||||
fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM,
|
||||
fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT,
|
||||
fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR,
|
||||
fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN,
|
||||
fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT,
|
||||
fy, fy_NL,
|
||||
fa, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fil, fil_PH, fo, fr, fur, fur_IT, fy
|
||||
|
||||
// G
|
||||
ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI,
|
||||
gu, gu_IN, guz, guz_KE, gv, gv_IM,
|
||||
ga, gd, gl, gsw, gsw_CH, gsw_FR, gsw_LI, gu, guz, guz_KE, gv
|
||||
|
||||
// H
|
||||
ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN,
|
||||
hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM,
|
||||
ha, haw, haw_US, he, hi, hr, hsb, hsb_DE, hu, hy
|
||||
|
||||
// I
|
||||
ia, ia_001, id, id_ID, ig, ig_NG, ii, ii_CN, in, in_ID, is,
|
||||
is_IS, it, it_CH, it_IT, it_SM, it_VA, iw, iw_IL,
|
||||
ia, id, ig, ii, in, in_ID, is, it, iw, iw_IL
|
||||
|
||||
// J
|
||||
ja, ja_JP, ja_JP_TRADITIONAL, jgo, jgo_CM, jmc, jmc_TZ, jv, jv_ID,
|
||||
ja, jgo, jgo_CM, jmc, jmc_TZ, jv
|
||||
|
||||
// K
|
||||
ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV,
|
||||
khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln,
|
||||
kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN,
|
||||
ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, ku, ku_TR,
|
||||
kw, kw_GB, ky, ky_KG,
|
||||
ka, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, kk, kkj, kkj_CM, kl
|
||||
kln, kln_KE, km, kn, ko, kok, kok_IN, ks, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, ku, kw
|
||||
ky
|
||||
|
||||
// L
|
||||
lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO,
|
||||
ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT,
|
||||
lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV,
|
||||
lag, lag_TZ, lb, lg, lkt, lkt_US, ln, lo, lrc, lrc_IQ, lrc_IR, lt, lu, luo, luo_KE, luy
|
||||
luy_KE, lv
|
||||
|
||||
// M
|
||||
mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh,
|
||||
mgh_MZ, mgo, mgo_CM, mi, mi_NZ, mk, mk_MK, ml, ml_IN, mn,
|
||||
mn_MN, mo, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua,
|
||||
mua_CM, my, my_MM, mzn, mzn_IR,
|
||||
mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mgh, mgh_MZ, mgo, mgo_CM, mi, mk, ml, mn
|
||||
mo, mr, ms, mt, mua, mua_CM, my, mzn, mzn_IR
|
||||
|
||||
// N
|
||||
naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, nds, nds_DE, nds_NL,
|
||||
ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR,
|
||||
nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, no, no_NO, no_NO_NY,
|
||||
nus, nus_SS, nyn, nyn_UG,
|
||||
naq, naq_NA, nb, nd, nds, nds_DE, nds_NL, ne, nl, nmg, nmg_CM, nn, nnh, nnh_CM, no, no_NO
|
||||
no_NO_NY, nus, nus_SS, nyn, nyn_UG
|
||||
|
||||
// O
|
||||
om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU,
|
||||
om, or, os
|
||||
|
||||
// P
|
||||
pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pa_IN, pa_PK, pl,
|
||||
pl_PL, ps, ps_AF, ps_PK, pt, pt_AO, pt_BR, pt_CH, pt_CV, pt_GQ,
|
||||
pt_GW, pt_LU, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL,
|
||||
pa, pa_Arab, pa_IN, pa_PK, pl, ps, pt
|
||||
|
||||
// Q
|
||||
qu, qu_BO, qu_EC, qu_PE,
|
||||
qu
|
||||
|
||||
// R
|
||||
rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, ru,
|
||||
ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ,
|
||||
rm, rn, ro, rof, rof_TZ, ru, rw, rwk, rwk_TZ
|
||||
|
||||
// S
|
||||
sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, sd, sd_PK, se, se_FI,
|
||||
se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, sh, sh_BA,
|
||||
sh_CS, sh_YU, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA,
|
||||
shi_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW,
|
||||
so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr,
|
||||
sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_CS, sr_Cyrl_XK,
|
||||
sr_Cyrl_YU, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_CS,
|
||||
sr_Latn_XK, sr_Latn_YU, sr_BA, sr_ME, sr_RS, sr_CS, sr_XK, sr_YU,
|
||||
sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG,
|
||||
sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, sd, se, seh, seh_MZ, ses, ses_ML, sg, sh, sh_BA, sh_CS
|
||||
sh_YU, shi, shi_Latn, shi_Latn_MA, shi_MA, shi_Tfng, shi_Tfng_MA, si, sk, sl, smn, smn_FI, sn, so, sq, sr
|
||||
sr_BA, sr_CS, sr_Cyrl_CS, sr_Cyrl_YU, sr_Latn, sr_Latn_CS, sr_Latn_YU, sr_ME, sr_RS, sr_XK, sr_YU, sv, sw
|
||||
|
||||
// T
|
||||
ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG,
|
||||
tg, tg_TJ, th, th_TH, th_TH_TRADITIONAL, ti, ti_ER, ti_ET, tk,
|
||||
tk_TM, tl, tl_PH, to, to_TO, tr, tr_CY, tr_TR, tt, tt_RU,
|
||||
twq, twq_NE, tzm, tzm_MA,
|
||||
ta, te, teo, teo_KE, teo_UG, tg, th, ti, tk, tl, tl_PH, to, tr, tt, twq, twq_NE
|
||||
tzm, tzm_MA
|
||||
|
||||
// U
|
||||
ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_AF, uz_Arab,
|
||||
uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, uz_UZ,
|
||||
ug, uk, ur, uz, uz_AF, uz_Arab, uz_Cyrl, uz_UZ
|
||||
|
||||
// V
|
||||
vai, vai_Latn, vai_Latn_LR, vai_LR, vai_Vaii, vai_Vaii_LR, vi,
|
||||
vi_VN, vun, vun_TZ,
|
||||
vai, vai_LR, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vun, vun_TZ
|
||||
|
||||
// W
|
||||
wae, wae_CH, wo, wo_SN,
|
||||
wae, wae_CH, wo
|
||||
|
||||
// X
|
||||
xh, xh_ZA, xog, xog_UG,
|
||||
xh, xog, xog_UG
|
||||
|
||||
// Y
|
||||
yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, yue, yue_CN, yue_HK,
|
||||
yue_Hans, yue_Hans_CN, yue_Hant, yue_Hant_HK,
|
||||
yav, yav_CM, yi, yo, yue, yue_CN, yue_HK, yue_Hans, yue_Hans_CN, yue_Hant, yue_Hant_HK
|
||||
|
||||
// Z
|
||||
zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO,
|
||||
zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zh_CN,
|
||||
zh_HK, zh_MO, zh_SG, zh_TW, zu, zu_ZA
|
||||
zgh, zgh_MA, zh, zh_CN, zh_HK, zh_Hant, zh_MO, zh_SG, zh_TW, zu
|
||||
</localeIds>
|
||||
|
||||
<!-- The following elements configure directories in which a subset of the available
|
||||
locales IDs should be generated. Unlike the main <localeId> element, these
|
||||
filters must specify all locale IDs in full (but since they mostly select base
|
||||
languages, this isn't a big deal). -->
|
||||
<!-- TODO: Explain why these special cases are needed/different. -->
|
||||
<localeIds dirs="coll">
|
||||
|
||||
<directoryFilter dir="coll">
|
||||
root,
|
||||
|
||||
// A-B
|
||||
|
@ -282,9 +215,9 @@
|
|||
// U-Z
|
||||
ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans,
|
||||
yue, zh_CN, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu
|
||||
</localeIds>
|
||||
</directoryFilter>
|
||||
|
||||
<localeIds dirs="rbnf">
|
||||
<directoryFilter dir="rbnf">
|
||||
root,
|
||||
|
||||
// A-E
|
||||
|
@ -300,12 +233,56 @@
|
|||
// Q-Z
|
||||
qu, ro, ru, se, sh, sk, sl, sq, sr_Latn, sr, sv, sw, ta, th, tr,
|
||||
uk, vi, yue_Hans, yue, zh_Hant_HK, zh_Hant, zh_HK, zh_MO, zh_TW, zh
|
||||
</localeIds>
|
||||
</directoryFilter>
|
||||
|
||||
<localeIds dirs="brkitr">
|
||||
<directoryFilter dir="brkitr">
|
||||
root,
|
||||
de, el, en, en_US_POSIX, en_US, es, fr, it, ja, pt, ru, zh_Hant, zh
|
||||
</localeIds>
|
||||
</directoryFilter>
|
||||
|
||||
<!-- The following elements configure some very special case locale alias behaviour,
|
||||
mainly to support situations where the natural alias relationship is not wanted
|
||||
for a particular type of data. -->
|
||||
|
||||
<!-- GLOBAL ALIASES -->
|
||||
|
||||
<!-- Some spoken languages (e.g. "ars") inherit all their data from a written language
|
||||
(e.g. "ar_SA"). However CLDR doesn't currently support a way to represent that
|
||||
relationship. Unlike deprecated languages for which an alias can be inferred from
|
||||
the "languageAlias" element, there's no way in CLDR to represent the fact that we
|
||||
want "ars" (a non-deprecated language) to inherit the data of "ar_SA".
|
||||
|
||||
This alias is the first example of potentially many cases where ICU needs to
|
||||
generate an alias in order to affect "sideways inheritence" for spoken languages,
|
||||
and at some stage it should be supported properly in the CLDR data. -->
|
||||
<forcedAlias source="ars" target="ar_SA"/>
|
||||
|
||||
<!-- A legacy global alias (note that "no_NO_NY" is not even structurally valid). -->
|
||||
<forcedAlias source="no_NO_NY" target="nn_NO"/>
|
||||
|
||||
<!-- PER-DIRECTORY ALIASES (these are really special cases) -->
|
||||
|
||||
<!-- It is not at all clear why this is being done (we expect "sr_Latn_ME" normally). -->
|
||||
<!-- TODO: Find out and document this properly. -->
|
||||
<forcedAlias dir="coll" source="sr_ME" target="sr_Cyrl_ME"/>
|
||||
|
||||
<!-- This alias is to avoid needing to copy and maintain the same "zh" data for "yue".
|
||||
The maximized versions of "yue_Hans" is "yue_Hans_CN" (vs "zh_Hans_CN"), and for
|
||||
"yue" it's "yue_Hant_HK" (vs "zh_Hant_HK"), so the aliases are effectively just
|
||||
rewriting the base language.
|
||||
|
||||
This is similar to the case for "ars"/"ar_SA" but it is not done globally, since
|
||||
CLDR data does exist for "yue" and "yue_Hans" which is NOT the same as "zh_Hant"
|
||||
and "zh_Hans"/"zh". This mapping is a bit more of a "hack" for the purposes of
|
||||
reducing data duplication in ICU. -->
|
||||
<forcedAlias dir="coll" source="yue_Hans" target="zh_Hans"/>
|
||||
<forcedAlias dir="coll" source="yue" target="zh_Hant"/>
|
||||
|
||||
<!-- It is not at all clear why this is being done. It's certainly not exactly the same
|
||||
as above, since (a) the alias is reversed (b) "zh_Hant" does exist, with different
|
||||
data than "yue", so this alias is not just rewriting the base language. -->
|
||||
<!-- TODO: Find out and document this properly. -->
|
||||
<forcedAlias dir="rbnf" source="zh_Hant_HK" target="yue"/>
|
||||
</convert>
|
||||
</target>
|
||||
</project>
|
||||
</project>
|
||||
|
|
|
@ -9,13 +9,13 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
import org.unicode.cldr.api.CldrDraftStatus;
|
||||
import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.ImmutableSetMultimap;
|
||||
import com.google.common.collect.ImmutableTable;
|
||||
|
@ -29,11 +29,6 @@ import com.google.common.collect.TreeMultimap;
|
|||
* that was configured by text files such as "icu-locale-deprecates.xml" and "icu-config.
|
||||
*/
|
||||
public final class IcuConverterConfig implements LdmlConverterConfig {
|
||||
|
||||
private static final Optional<Path> DEFAULT_CLDR_DIR =
|
||||
Optional.ofNullable(System.getProperty("CLDR_DIR", null))
|
||||
.map(d -> Paths.get(d).toAbsolutePath());
|
||||
|
||||
private static final Optional<Path> DEFAULT_ICU_DIR =
|
||||
Optional.ofNullable(System.getProperty("ICU_DIR", null))
|
||||
.map(d -> Paths.get(d).toAbsolutePath());
|
||||
|
@ -41,26 +36,16 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
/** The builder with which to specify configuration for the {@link LdmlConverter}. */
|
||||
@SuppressWarnings("UnusedReturnValue")
|
||||
public static final class Builder {
|
||||
private Path cldrDir = DEFAULT_CLDR_DIR.orElse(null);
|
||||
private Path outputDir =
|
||||
DEFAULT_ICU_DIR.map(d -> d.resolve("icu4c/source/data")).orElse(null);
|
||||
private Path specialsDir =
|
||||
DEFAULT_ICU_DIR.map(d -> d.resolve("icu4c/source/data/xml")).orElse(null);
|
||||
private ImmutableSet<OutputType> outputTypes = OutputType.ALL;
|
||||
private CldrDraftStatus minimalDraftStatus = CldrDraftStatus.CONTRIBUTED;
|
||||
private CldrDraftStatus minimumDraftStatus = CldrDraftStatus.CONTRIBUTED;
|
||||
private boolean emitReport = false;
|
||||
private final SetMultimap<IcuLocaleDir, String> localeIdsMap = TreeMultimap.create();
|
||||
private final Table<IcuLocaleDir, String, String> forcedAliases = TreeBasedTable.create();
|
||||
|
||||
/**
|
||||
* Sets the CLDR base directory from which to load all CLDR data. This is optional if the
|
||||
* {@code CLDR_DIR} environment variable is set, which will be used instead.
|
||||
*/
|
||||
public Builder setCldrDir(Path cldrDir) {
|
||||
this.cldrDir = checkNotNull(cldrDir.toAbsolutePath());
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the output directory in which the ICU data directories and files will go. This is
|
||||
* optional if the {@code ICU_DIR} system property is set, which will be used to generate
|
||||
|
@ -91,14 +76,8 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the minimum draft status for CLDR data to be converted (paths below this status are
|
||||
* ignored during conversion). This is optional and defaults to {@link
|
||||
* CldrDraftStatus#CONTRIBUTED}.
|
||||
*/
|
||||
public Builder setMinimalDraftStatus(CldrDraftStatus minimalDraftStatus) {
|
||||
this.minimalDraftStatus = checkNotNull(minimalDraftStatus);
|
||||
return this;
|
||||
public void setMinimumDraftStatus(CldrDraftStatus minimumDraftStatus) {
|
||||
this.minimumDraftStatus = checkNotNull(minimumDraftStatus);
|
||||
}
|
||||
|
||||
public Builder setEmitReport(boolean emitReport) {
|
||||
|
@ -122,26 +101,16 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
}
|
||||
}
|
||||
|
||||
private final Path cldrDir;
|
||||
private final Path outputDir;
|
||||
private final Path specialsDir;
|
||||
private final ImmutableSet<OutputType> outputTypes;
|
||||
private final CldrDraftStatus minimalDraftStatus;
|
||||
private final CldrDraftStatus minimumDraftStatus;
|
||||
private final boolean emitReport;
|
||||
private final ImmutableSet<String> allLocaleIds;
|
||||
private final ImmutableSetMultimap<IcuLocaleDir, String> localeIdsMap;
|
||||
private final ImmutableTable<IcuLocaleDir, String, String> forcedAliases;
|
||||
|
||||
private IcuConverterConfig(Builder builder) {
|
||||
this.cldrDir = checkNotNull(builder.cldrDir,
|
||||
"must set a CLDR directory, or the CLDR_DIR system property");
|
||||
if (DEFAULT_CLDR_DIR.isPresent() && !this.cldrDir.equals(DEFAULT_CLDR_DIR.get())) {
|
||||
System.err.format(
|
||||
"Warning: Specified CLDR base directory does not appear to match the"
|
||||
+ " directory inferred by the 'CLDR_DIR' system property.\n"
|
||||
+ "Specified: %s\n"
|
||||
+ "Inferred: %s\n",
|
||||
this.cldrDir, DEFAULT_CLDR_DIR.get());
|
||||
}
|
||||
this.outputDir = checkNotNull(builder.outputDir);
|
||||
checkArgument(!Files.isRegularFile(outputDir),
|
||||
"specified output directory if not a directory: %s", outputDir);
|
||||
|
@ -153,8 +122,10 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
checkArgument(!this.outputTypes.isEmpty(),
|
||||
"must specify at least one output type to be generated (possible values are: %s)",
|
||||
Arrays.asList(OutputType.values()));
|
||||
this.minimalDraftStatus = builder.minimalDraftStatus;
|
||||
this.minimumDraftStatus = checkNotNull(builder.minimumDraftStatus);
|
||||
this.emitReport = builder.emitReport;
|
||||
// getAllLocaleIds() returns the union of all the specified IDs in the map.
|
||||
this.allLocaleIds = ImmutableSet.copyOf(builder.localeIdsMap.values());
|
||||
this.localeIdsMap = ImmutableSetMultimap.copyOf(builder.localeIdsMap);
|
||||
this.forcedAliases = ImmutableTable.copyOf(builder.forcedAliases);
|
||||
}
|
||||
|
@ -163,11 +134,6 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
return new Builder();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getCldrDirectory() {
|
||||
return cldrDir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getOutputDir() {
|
||||
return outputDir;
|
||||
|
@ -179,13 +145,13 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
}
|
||||
|
||||
@Override
|
||||
public CldrDraftStatus getMinimumDraftStatus() {
|
||||
return minimalDraftStatus;
|
||||
public Path getSpecialsDir() {
|
||||
return specialsDir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getSpecialsDir() {
|
||||
return specialsDir;
|
||||
public CldrDraftStatus getMinimumDraftStatus() {
|
||||
return minimumDraftStatus;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -194,10 +160,14 @@ public final class IcuConverterConfig implements LdmlConverterConfig {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getForcedAliases(IcuLocaleDir dir) {
|
||||
public ImmutableMap<String, String> getForcedAliases(IcuLocaleDir dir) {
|
||||
return forcedAliases.row(dir);
|
||||
}
|
||||
|
||||
@Override public ImmutableSet<String> getAllLocaleIds() {
|
||||
return allLocaleIds;
|
||||
}
|
||||
|
||||
@Override public ImmutableSet<String> getTargetLocaleIds(IcuLocaleDir dir) {
|
||||
return localeIdsMap.get(dir);
|
||||
}
|
||||
|
|
|
@ -148,7 +148,8 @@ final class IcuTextWriter {
|
|||
// TODO: Sort this out so there isn't a messy mix of comment styles in the data files.
|
||||
private static void writeHeaderAndComments(
|
||||
PrintWriter out, List<String> header, List<String> comments) {
|
||||
header.forEach(out::println);
|
||||
|
||||
header.forEach(s -> out.println("// " + s));
|
||||
if (!comments.isEmpty()) {
|
||||
// TODO: Don't use /* */ block quotes, just use inline // quotes.
|
||||
out.println(
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.util.Arrays;
|
|||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
@ -61,6 +60,7 @@ import com.google.common.collect.ImmutableMap;
|
|||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.LinkedListMultimap;
|
||||
import com.google.common.collect.ListMultimap;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.io.CharStreams;
|
||||
|
@ -112,21 +112,6 @@ public final class LdmlConverter {
|
|||
private static final PathMatcher WINDOWS_ZONES_PATHS =
|
||||
supplementalMatcher("windowsZones");
|
||||
|
||||
// Special IDs which are not supported via CLDR, but for which synthetic data is injected.
|
||||
// The "TRADITIONAL" variants are here because their calendar differs from the non-variant
|
||||
// locale. However CLDR cannot represent this currently because calendar defaults are in
|
||||
// supplemental data (rather than locale data) and are keyed only on territory.
|
||||
private static final ImmutableSet<String> PHANTOM_LOCALE_IDS =
|
||||
ImmutableSet.of("ja_JP_TRADITIONAL", "th_TH_TRADITIONAL");
|
||||
|
||||
// Special alias mapping which exists in ICU even though "no_NO_NY" is simply not a
|
||||
// structurally valid locale ID. This is injected manually when creating the alias map.
|
||||
// This does mean that nobody can ever parse the _keys_ of the alias map, but so far there
|
||||
// has been no need for that.
|
||||
// TODO: Get "ars" into CLDR and remove this hack.
|
||||
private static final Map<String, String> PHANTOM_ALIASES =
|
||||
ImmutableMap.of("ars", "ar_SA", "no_NO_NY", "nn_NO");
|
||||
|
||||
private static PathMatcher supplementalMatcher(String... spec) {
|
||||
checkArgument(spec.length > 0, "must supply at least one matcher spec");
|
||||
if (spec.length == 1) {
|
||||
|
@ -223,42 +208,35 @@ public final class LdmlConverter {
|
|||
}
|
||||
|
||||
/** Converts CLDR data according to the given configuration. */
|
||||
public static void convert(LdmlConverterConfig config) {
|
||||
CldrDataSupplier src = CldrDataSupplier
|
||||
.forCldrFilesIn(config.getCldrDirectory())
|
||||
.withDraftStatusAtLeast(config.getMinimumDraftStatus());
|
||||
new LdmlConverter(config, src).convertAll(config);
|
||||
public static void convert(
|
||||
CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
|
||||
new LdmlConverter(src, supplementalData, config).convertAll();
|
||||
}
|
||||
|
||||
// The configuration controlling conversion behaviour.
|
||||
private final LdmlConverterConfig config;
|
||||
// The supplier for all data to be converted.
|
||||
private final CldrDataSupplier src;
|
||||
// The set of available locale IDs.
|
||||
// TODO: Make available IDs include specials files (or fail if specials are not available).
|
||||
private final ImmutableSet<String> availableIds;
|
||||
// Supplemental data available to mappers if needed.
|
||||
private final SupplementalData supplementalData;
|
||||
// The configuration controlling conversion behaviour.
|
||||
private final LdmlConverterConfig config;
|
||||
// The set of expanded target locale IDs.
|
||||
// TODO: Make available IDs include specials files (or fail if specials are not available).
|
||||
private final ImmutableSet<String> availableIds;
|
||||
// Transformer for locale data.
|
||||
private final PathValueTransformer localeTransformer;
|
||||
// Transformer for supplemental data.
|
||||
private final PathValueTransformer supplementalTransformer;
|
||||
// Header string to go into every ICU data file.
|
||||
private final ImmutableList<String> icuFileHeader;
|
||||
// Header string to go into every ICU data and transliteration rule file (comment prefixes
|
||||
// are not present and must be added by the code writing the file).
|
||||
private final ImmutableList<String> fileHeader;
|
||||
|
||||
private LdmlConverter(LdmlConverterConfig config, CldrDataSupplier src) {
|
||||
this.config = checkNotNull(config);
|
||||
private LdmlConverter(
|
||||
CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
|
||||
this.src = checkNotNull(src);
|
||||
this.supplementalData = SupplementalData.create(src.getDataForType(SUPPLEMENTAL));
|
||||
// Sort the set of available locale IDs but add "root" at the front. This is the
|
||||
// set of non-alias locale IDs to be processed.
|
||||
Set<String> localeIds = new LinkedHashSet<>();
|
||||
localeIds.add("root");
|
||||
localeIds.addAll(
|
||||
Sets.intersection(src.getAvailableLocaleIds(), config.getTargetLocaleIds(LOCALES)));
|
||||
localeIds.addAll(PHANTOM_LOCALE_IDS);
|
||||
this.availableIds = ImmutableSet.copyOf(localeIds);
|
||||
|
||||
this.supplementalData = checkNotNull(supplementalData);
|
||||
this.config = checkNotNull(config);
|
||||
this.availableIds = ImmutableSet.copyOf(
|
||||
Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds()));
|
||||
// Load the remaining path value transformers.
|
||||
this.supplementalTransformer =
|
||||
RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"),
|
||||
|
@ -270,10 +248,10 @@ public final class LdmlConverter {
|
|||
this.localeTransformer =
|
||||
RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"),
|
||||
IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN);
|
||||
this.icuFileHeader = ImmutableList.copyOf(readLinesFromResource("/ldml2icu_header.txt"));
|
||||
this.fileHeader = ImmutableList.copyOf(readLinesFromResource("/ldml2icu_header.txt"));
|
||||
}
|
||||
|
||||
private void convertAll(LdmlConverterConfig config) {
|
||||
private void convertAll() {
|
||||
ListMultimap<CldrDataType, OutputType> groupByType = LinkedListMultimap.create();
|
||||
for (OutputType t : config.getOutputTypes()) {
|
||||
groupByType.put(t.getCldrType(), t);
|
||||
|
@ -361,7 +339,7 @@ public final class LdmlConverter {
|
|||
SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
|
||||
Path baseDir = config.getOutputDir();
|
||||
|
||||
for (String id : config.getTargetLocaleIds(LOCALES)) {
|
||||
for (String id : config.getAllLocaleIds()) {
|
||||
// Skip "target" IDs that are aliases (they are handled later).
|
||||
if (!availableIds.contains(id)) {
|
||||
continue;
|
||||
|
@ -429,13 +407,17 @@ public final class LdmlConverter {
|
|||
// and must be manually mapped (e.g. legacy locale IDs which don't even parse).
|
||||
// 4: It is a "super special" forced alias, which might replace existing aliases in
|
||||
// some output directories.
|
||||
|
||||
// Even forced aliases only apply if they are in the set of locale IDs for the directory.
|
||||
Map<String, String> forcedAliases =
|
||||
Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains);
|
||||
|
||||
Map<String, String> aliasMap = new LinkedHashMap<>();
|
||||
for (String id : localeIds) {
|
||||
if (PHANTOM_ALIASES.keySet().contains(id)) {
|
||||
checkArgument(!availableIds.contains(id),
|
||||
"phantom aliases should never be otherwise supported: %s\n"
|
||||
+ "(maybe the phantom alias can now be removed?)", id);
|
||||
aliasMap.put(id, PHANTOM_ALIASES.get(id));
|
||||
if (forcedAliases.keySet().contains(id)) {
|
||||
// Forced aliases will be added later and don't need to be processed here. This
|
||||
// is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY")
|
||||
// since that cannot be processed by the code below.
|
||||
continue;
|
||||
}
|
||||
String canonicalId = supplementalData.replaceDeprecatedTags(id);
|
||||
|
@ -459,7 +441,7 @@ public final class LdmlConverter {
|
|||
// Important that we overwrite entries which might already exist here, since we might have
|
||||
// already calculated a "natural" alias for something that we want to force (and we should
|
||||
// replace the existing target, since that affects how we determine empty files later).
|
||||
aliasMap.putAll(config.getForcedAliases(dir));
|
||||
aliasMap.putAll(forcedAliases);
|
||||
return aliasMap;
|
||||
}
|
||||
|
||||
|
@ -490,7 +472,7 @@ public final class LdmlConverter {
|
|||
|
||||
private void processTransforms() {
|
||||
Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
|
||||
write(TransformsMapper.process(src, transformDir), transformDir);
|
||||
write(TransformsMapper.process(src, transformDir, fileHeader), transformDir);
|
||||
}
|
||||
|
||||
private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
|
||||
|
@ -533,7 +515,7 @@ public final class LdmlConverter {
|
|||
|
||||
private void write(IcuData icuData, Path dir) {
|
||||
createDirectory(dir);
|
||||
IcuTextWriter.writeToFile(icuData, dir, icuFileHeader);
|
||||
IcuTextWriter.writeToFile(icuData, dir, fileHeader);
|
||||
}
|
||||
|
||||
private Path createDirectory(Path dir) {
|
||||
|
|
|
@ -63,9 +63,6 @@ public interface LdmlConverterConfig {
|
|||
*/
|
||||
Set<OutputType> getOutputTypes();
|
||||
|
||||
/** Returns the root directory in which the CLDR release is located. */
|
||||
Path getCldrDirectory();
|
||||
|
||||
/**
|
||||
* Returns an additional "specials" directory containing additional ICU specific XML
|
||||
* files depending on the given output type. This is where the converter finds any XML
|
||||
|
@ -83,12 +80,19 @@ public interface LdmlConverterConfig {
|
|||
CldrDraftStatus getMinimumDraftStatus();
|
||||
|
||||
/**
|
||||
* Returns the set of locale IDs to be processed for the given directory.
|
||||
* Returns the complete set of locale IDs which should be considered for processing for this
|
||||
* configuration.
|
||||
*
|
||||
* <p>This set can contain IDs which have noICU data associated with them if they are
|
||||
* suitable aliases (e.g. they are deprecated versions of locale IDs for which data does
|
||||
* <p>Note that this set can contain IDs which have no CLDR data associated with them if they
|
||||
* are suitable aliases (e.g. they are deprecated versions of locale IDs for which data does
|
||||
* exist).
|
||||
*/
|
||||
Set<String> getAllLocaleIds();
|
||||
|
||||
/**
|
||||
* Returns the set of locale IDs to be processed for the given directory. This set must always
|
||||
* be a subset of {@link #getAllLocaleIds()}.
|
||||
*/
|
||||
Set<String> getTargetLocaleIds(IcuLocaleDir dir);
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,20 +17,24 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.unicode.cldr.api.AttributeKey;
|
||||
import org.unicode.cldr.api.CldrData;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDataType;
|
||||
|
||||
import com.google.common.base.Ascii;
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.HashBasedTable;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.ImmutableTable;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.collect.Table;
|
||||
|
||||
/**
|
||||
|
@ -43,6 +47,13 @@ import com.google.common.collect.Table;
|
|||
*/
|
||||
// TODO: This should be moved into the API and leverage some of the existing utility functions.
|
||||
public final class SupplementalData {
|
||||
// Special IDs which are not supported via CLDR, but for which synthetic data is injected.
|
||||
// The "TRADITIONAL" variants are here because their calendar differs from the non-variant
|
||||
// locale. However CLDR cannot represent this currently because calendar defaults are in
|
||||
// supplemental data (rather than locale data) and are keyed only on territory.
|
||||
private static final ImmutableSet<String> PHANTOM_LOCALE_IDS =
|
||||
ImmutableSet.of("ja_JP_TRADITIONAL", "th_TH_TRADITIONAL");
|
||||
|
||||
private static final Pattern SCRIPT_SUBTAG = Pattern.compile("[A-Z][a-z]{3}");
|
||||
|
||||
private static final PathMatcher ALIAS =
|
||||
|
@ -88,18 +99,18 @@ public final class SupplementalData {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a supplemental data API instance from the given CLDR data.
|
||||
* Creates a supplemental data API instance from the given CLDR data supplier.
|
||||
*
|
||||
* @param supplementalData the raw CLDR supplemental data instance.
|
||||
* @param src the CLDR data supplier.
|
||||
* @return the supplemental data API.
|
||||
*/
|
||||
public static SupplementalData create(CldrData supplementalData) {
|
||||
public static SupplementalData create(CldrDataSupplier src) {
|
||||
Table<Alias, String, String> aliasTable = HashBasedTable.create();
|
||||
Map<String, String> parentLocaleMap = new HashMap<>();
|
||||
Map<String, String> defaultCalendarMap = new HashMap<>();
|
||||
Map<String, String> likelySubtagMap = new HashMap<>();
|
||||
|
||||
supplementalData.accept(
|
||||
src.getDataForType(CldrDataType.SUPPLEMENTAL).accept(
|
||||
ARBITRARY,
|
||||
v -> {
|
||||
if (ALIAS.matches(v.getPath())) {
|
||||
|
@ -122,17 +133,9 @@ public final class SupplementalData {
|
|||
}
|
||||
});
|
||||
|
||||
// WARNING: The original mapper code determines the full set of deprecated territories and
|
||||
// then removes the following hard-coded list without any explanation as to why. While this
|
||||
// is presumably to "undeprecate" them for the purposes of the locale processing, there's
|
||||
// no explanation of where this list comes from, and thus no way to maintain it.
|
||||
//
|
||||
// asList("062", "172", "200", "830", "AN", "CS", "QU")
|
||||
// .forEach(t -> aliasTable.remove(Alias.TERRITORY, t));
|
||||
// TODO: Understand and document what on Earth this is all about or delete this comment.
|
||||
|
||||
Set<String> availableIds = Sets.union(src.getAvailableLocaleIds(), PHANTOM_LOCALE_IDS);
|
||||
return new SupplementalData(
|
||||
aliasTable, parentLocaleMap, defaultCalendarMap, likelySubtagMap);
|
||||
availableIds, aliasTable, parentLocaleMap, defaultCalendarMap, likelySubtagMap);
|
||||
}
|
||||
|
||||
// A simple-as-possible, mutable, locale ID data "struct" to handle the IDs used during ICU
|
||||
|
@ -256,22 +259,30 @@ public final class SupplementalData {
|
|||
}
|
||||
}
|
||||
|
||||
private final ImmutableSet<String> availableIds;
|
||||
private final ImmutableTable<Alias, String, String> aliasTable;
|
||||
private final ImmutableMap<String, String> parentLocaleMap;
|
||||
private final ImmutableMap<String, String> defaultCalendarMap;
|
||||
private final ImmutableMap<String, String> likelySubtagMap;
|
||||
|
||||
private SupplementalData(
|
||||
Set<String> availableIds,
|
||||
Table<Alias, String, String> aliasTable,
|
||||
Map<String, String> parentLocaleMap,
|
||||
Map<String, String> defaultCalendarMap,
|
||||
Map<String, String> likelySubtagMap) {
|
||||
|
||||
this.availableIds = ImmutableSet.copyOf(availableIds);
|
||||
this.aliasTable = ImmutableTable.copyOf(aliasTable);
|
||||
this.parentLocaleMap = ImmutableMap.copyOf(parentLocaleMap);
|
||||
this.defaultCalendarMap = ImmutableMap.copyOf(defaultCalendarMap);
|
||||
this.likelySubtagMap = ImmutableMap.copyOf(likelySubtagMap);
|
||||
}
|
||||
|
||||
public ImmutableSet<String> getAvailableLocaleIds() {
|
||||
return availableIds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the "maximized" form of a given locale ID, by adding likely subtags where possible.
|
||||
*/
|
||||
|
|
|
@ -12,19 +12,26 @@ import static java.util.stream.Collectors.joining;
|
|||
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.tools.ant.BuildException;
|
||||
import org.apache.tools.ant.Task;
|
||||
import org.unicode.cldr.api.CldrDataSupplier;
|
||||
import org.unicode.cldr.api.CldrDraftStatus;
|
||||
import org.unicode.icu.tool.cldrtoicu.IcuConverterConfig;
|
||||
import org.unicode.icu.tool.cldrtoicu.LdmlConverter;
|
||||
import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType;
|
||||
import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
|
||||
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
|
||||
|
||||
import com.google.common.base.Ascii;
|
||||
import com.google.common.base.CaseFormat;
|
||||
import com.google.common.base.CharMatcher;
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.SetMultimap;
|
||||
|
||||
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
|
||||
public final class ConvertIcuDataTask extends Task {
|
||||
|
@ -36,6 +43,12 @@ public final class ConvertIcuDataTask extends Task {
|
|||
private static final CharMatcher LOWER_UNDERSCORE = inRange('a', 'z').or(DIGIT_OR_UNDERSCORE);
|
||||
private static final CharMatcher VALID_ENUM_CHAR = LOWER_UNDERSCORE.or(UPPER_UNDERSCORE);
|
||||
|
||||
private Path cldrPath;
|
||||
private CldrDraftStatus minimumDraftStatus;
|
||||
// Set of default locale ID specifiers (wildcard IDs which are expanded).
|
||||
private ImmutableSet<String> localeIdSpec;
|
||||
// Per directory overrides (fully specified locale IDs).
|
||||
private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create();
|
||||
private final IcuConverterConfig.Builder config = IcuConverterConfig.builder();
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
|
@ -45,21 +58,24 @@ public final class ConvertIcuDataTask extends Task {
|
|||
|
||||
@SuppressWarnings("unused")
|
||||
public void setCldrDir(Path path) {
|
||||
config.setCldrDir(path);
|
||||
this.cldrPath = checkNotNull(path);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setMinimalDraftStatus(String status) {
|
||||
config.setMinimalDraftStatus(resolve(CldrDraftStatus.class, status));
|
||||
minimumDraftStatus = resolve(CldrDraftStatus.class, status);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setOutputTypes(String types) {
|
||||
config.setOutputTypes(
|
||||
ImmutableList<OutputType> typeList =
|
||||
LIST_SPLITTER
|
||||
.splitToList(types).stream()
|
||||
.map(s -> resolve(LdmlConverter.OutputType.class, s))
|
||||
.collect(toImmutableList()));
|
||||
.map(s -> resolve(OutputType.class, s))
|
||||
.collect(toImmutableList());
|
||||
if (!typeList.isEmpty()) {
|
||||
config.setOutputTypes(typeList);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
|
@ -73,59 +89,118 @@ public final class ConvertIcuDataTask extends Task {
|
|||
}
|
||||
|
||||
public static final class LocaleIds extends Task {
|
||||
private ImmutableList<IcuLocaleDir> dirs = ImmutableList.of();
|
||||
private ImmutableList<String> ids = ImmutableList.of();
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setDirs(String directories) {
|
||||
this.dirs = LIST_SPLITTER.splitToList(directories).stream()
|
||||
.map(s -> resolve(IcuLocaleDir.class, s))
|
||||
.collect(toImmutableList());
|
||||
}
|
||||
private ImmutableSet<String> ids;
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void addText(String localeIds) {
|
||||
// Need to filter out '//' style end-of-line comments first (replace with \n to avoid
|
||||
// inadvertantly joining two elements.
|
||||
localeIds = localeIds.replaceAll("//[^\n]*\n", "\n");
|
||||
this.ids = ImmutableList.copyOf(LIST_SPLITTER.splitToList(localeIds));
|
||||
this.ids = parseLocaleIds(localeIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() throws BuildException {
|
||||
checkBuild(!ids.isEmpty(), "Locale IDs must be specified");
|
||||
}
|
||||
}
|
||||
|
||||
public static final class ForcedAlias extends Task {
|
||||
public static final class DirectoryFilter extends Task {
|
||||
private IcuLocaleDir dir;
|
||||
private String source;
|
||||
private String target;
|
||||
private ImmutableSet<String> ids;
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setDir(String directory) {
|
||||
this.dir = resolve(IcuLocaleDir.class, directory);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void addText(String localeIds) {
|
||||
this.ids = parseLocaleIds(localeIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() throws BuildException {
|
||||
checkBuild(dir != null, "Directory must be specified");
|
||||
checkBuild(!ids.isEmpty(), "Locale IDs must be specified");
|
||||
}
|
||||
}
|
||||
|
||||
public static final class ForcedAlias extends Task {
|
||||
private Optional<IcuLocaleDir> dir = Optional.empty();
|
||||
private String source = "";
|
||||
private String target = "";
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setDir(String directory) {
|
||||
this.dir = resolveOpt(IcuLocaleDir.class, directory);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setSource(String source) {
|
||||
this.source = checkNotNull(source);
|
||||
this.source = whitespace().trimFrom(source);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void setTarget(String target) {
|
||||
this.target = checkNotNull(target);
|
||||
this.target = whitespace().trimFrom(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() throws BuildException {
|
||||
checkBuild(!source.isEmpty(), "Alias source must not be empty");
|
||||
checkBuild(!target.isEmpty(), "Alias target must not be empty");
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void addConfiguredLocaleIds(LocaleIds localeIds) {
|
||||
localeIds.dirs.forEach(d -> config.addLocaleIds(d, localeIds.ids));
|
||||
checkBuild(this.localeIdSpec == null, "Cannot add more that one <localeIds> element");
|
||||
this.localeIdSpec = localeIds.ids;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void addConfiguredDirectoryFilter(DirectoryFilter filter) {
|
||||
perDirectoryIds.putAll(filter.dir, filter.ids);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void addConfiguredForcedAlias(ForcedAlias alias) {
|
||||
config.addForcedAlias(alias.dir, alias.source, alias.target);
|
||||
if (alias.dir.isPresent()) {
|
||||
config.addForcedAlias(alias.dir.get(), alias.source, alias.target);
|
||||
} else {
|
||||
for (IcuLocaleDir dir : IcuLocaleDir.values()) {
|
||||
config.addForcedAlias(dir, alias.source, alias.target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public void execute() throws BuildException {
|
||||
LdmlConverter.convert(config.build());
|
||||
CldrDataSupplier src =
|
||||
CldrDataSupplier.forCldrFilesIn(cldrPath).withDraftStatusAtLeast(minimumDraftStatus);
|
||||
SupplementalData supplementalData = SupplementalData.create(src);
|
||||
ImmutableSet<String> defaultTargetIds =
|
||||
LocaleIdResolver.expandTargetIds(this.localeIdSpec, supplementalData);
|
||||
for (IcuLocaleDir dir : IcuLocaleDir.values()) {
|
||||
config.addLocaleIds(dir, perDirectoryIds.asMap().getOrDefault(dir, defaultTargetIds));
|
||||
}
|
||||
config.setMinimumDraftStatus(minimumDraftStatus);
|
||||
LdmlConverter.convert(src, supplementalData, config.build());
|
||||
}
|
||||
|
||||
private static void checkBuild(boolean condition, String message) {
|
||||
if (!condition) {
|
||||
throw new BuildException(message);
|
||||
}
|
||||
}
|
||||
|
||||
private static ImmutableSet<String> parseLocaleIds(String localeIds) {
|
||||
// Need to filter out '//' style end-of-line comments first (replace with \n to avoid
|
||||
// inadvertantly joining two elements.
|
||||
localeIds = localeIds.replaceAll("//[^\n]*\n", "\n");
|
||||
return ImmutableSet.copyOf(LIST_SPLITTER.splitToList(localeIds));
|
||||
}
|
||||
|
||||
private static <T extends Enum<T>> Optional<T> resolveOpt(Class<T> enumClass, String name) {
|
||||
return !name.isEmpty() ? Optional.of(resolve(enumClass, name)) : Optional.empty();
|
||||
}
|
||||
|
||||
private static <T extends Enum<T>> T resolve(Class<T> enumClass, String name) {
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package org.unicode.icu.tool.cldrtoicu.ant;
|
||||
|
||||
import static com.google.common.base.Preconditions.checkNotNull;
|
||||
import static com.google.common.collect.ImmutableSet.toImmutableSet;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.unicode.icu.tool.cldrtoicu.SupplementalData;
|
||||
|
||||
import com.google.common.base.Ascii;
|
||||
import com.google.common.collect.ImmutableListMultimap;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Multimaps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
/** Helper class to reslove ID configuration. */
|
||||
final class LocaleIdResolver {
|
||||
/** Returns the expanded set of target locale IDs based on the given ID specifications. */
|
||||
public static ImmutableSet<String> expandTargetIds(
|
||||
Set<String> idSpecs, SupplementalData supplementalData) {
|
||||
return new LocaleIdResolver(supplementalData).resolve(idSpecs);
|
||||
}
|
||||
|
||||
private final SupplementalData supplementalData;
|
||||
|
||||
private LocaleIdResolver(SupplementalData supplementalData) {
|
||||
this.supplementalData = checkNotNull(supplementalData);
|
||||
}
|
||||
|
||||
// ---- Code below here is to expand the incoming set of locale IDs ----
|
||||
|
||||
private static final Pattern WILDCARD_LOCALE = Pattern.compile("[a-z]{2}(?:_[A-Z][a-z]{3})?");
|
||||
|
||||
private ImmutableSet<String> resolve(Set<String> idSpecs) {
|
||||
ImmutableSet<String> allAvailableIds = supplementalData.getAvailableLocaleIds();
|
||||
// Get the minimized wildcard set, converting things like "en_Latn" --> "en".
|
||||
ImmutableSet<String> wildcardIds = idSpecs.stream()
|
||||
.filter(supplementalData.getAvailableLocaleIds()::contains)
|
||||
.filter(id -> WILDCARD_LOCALE.matcher(id).matches())
|
||||
.map(this::removeDefaultScript)
|
||||
.collect(toImmutableSet());
|
||||
|
||||
// Get the set of IDs which are implied by the wildcard IDs.
|
||||
Set<String> targetIds = new TreeSet<>();
|
||||
allAvailableIds.forEach(id -> addWildcardMatches(id, wildcardIds::contains, targetIds));
|
||||
|
||||
// Get the IDs which don't need to be in the config (because they are implied).
|
||||
Set<String> redundant = Sets.intersection(idSpecs, targetIds);
|
||||
if (!redundant.isEmpty()) {
|
||||
System.err.println("Configuration lists redundant locale IDs");
|
||||
System.err.println("The following IDs should be removed from the configuration:");
|
||||
Iterables.partition(redundant, 16)
|
||||
.forEach(ids -> System.err.println(String.join(", ", ids)));
|
||||
|
||||
// Note that the minimal configuration includes aliases.
|
||||
Set<String> minimalConfigIds = new TreeSet<>(Sets.difference(idSpecs, targetIds));
|
||||
minimalConfigIds.remove("root");
|
||||
ImmutableListMultimap<Character, String> idsByFirstChar =
|
||||
Multimaps.index(minimalConfigIds, s -> s.charAt(0));
|
||||
|
||||
System.err.println("Canonical ID list is:");
|
||||
for (char c: idsByFirstChar.keySet()) {
|
||||
System.err.println(" // " + Ascii.toUpperCase(c));
|
||||
Iterables.partition(idsByFirstChar.get(c), 16)
|
||||
.forEach(ids -> System.err.println(" " + String.join(", ", ids)));
|
||||
System.err.println();
|
||||
}
|
||||
System.err.flush();
|
||||
throw new IllegalStateException("Non-canonical configuration");
|
||||
}
|
||||
|
||||
// We return the set of IDs made up of:
|
||||
// 1: The original IDs specified by the configuration (and any parent IDs).
|
||||
// 2: IDs expanded from wildcard IDs (e.g. "en_Latn_GB" & "en_Latn" from "en").
|
||||
// (this is what's already in targetIds).
|
||||
// 3: The "root" ID.
|
||||
idSpecs.forEach(id -> addRecursively(id, targetIds));
|
||||
return ImmutableSet.<String>builder().add("root").addAll(targetIds).build();
|
||||
}
|
||||
|
||||
// E.g. "xx_Fooo" --> "xx" --> "xx_Baar_YY" ==> "xx_Fooo"
|
||||
// E.g. "xx_Fooo" --> "xx" --> "xx_Fooo_YY" ==> "xx"
|
||||
private String removeDefaultScript(String id) {
|
||||
if (id.contains("_")) {
|
||||
String lang = id.substring(0, 2);
|
||||
String maxId = supplementalData.maximize(lang)
|
||||
.orElseThrow(
|
||||
() -> new IllegalStateException("cannot maximize language subtag: " + lang));
|
||||
if (maxId.startsWith(id)) {
|
||||
return lang;
|
||||
}
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
private void addRecursively(String id, Set<String> dst) {
|
||||
while (!id.equals("root") && dst.add(id)) {
|
||||
id = supplementalData.getParent(id);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean addWildcardMatches(
|
||||
String id, Predicate<String> isWildcard, Set<String> dst) {
|
||||
if (id.equals("root")) {
|
||||
return false;
|
||||
}
|
||||
String parentId = supplementalData.getParent(id);
|
||||
if (isWildcard.test(parentId) || addWildcardMatches(parentId, isWildcard, dst)) {
|
||||
dst.add(id);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -15,6 +15,7 @@ import java.io.PrintWriter;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
|
||||
|
@ -30,6 +31,7 @@ import org.unicode.icu.tool.cldrtoicu.RbPath;
|
|||
import org.unicode.icu.tool.cldrtoicu.RbValue;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
|
||||
/**
|
||||
|
@ -78,7 +80,9 @@ public final class TransformsMapper {
|
|||
* @param ruleFileOutputDir the directory into which transliteration rule files will be written.
|
||||
* @return the IcuData instance to be written to a file.
|
||||
*/
|
||||
public static IcuData process(CldrDataSupplier src, Path ruleFileOutputDir) {
|
||||
public static IcuData process(
|
||||
CldrDataSupplier src, Path ruleFileOutputDir, List<String> header) {
|
||||
|
||||
Function<Path, PrintWriter> fileWriterFn = p -> {
|
||||
Path file = ruleFileOutputDir.resolve(p);
|
||||
try {
|
||||
|
@ -88,12 +92,14 @@ public final class TransformsMapper {
|
|||
}
|
||||
};
|
||||
CldrData cldrData = src.getDataForType(SUPPLEMENTAL);
|
||||
return process(cldrData, fileWriterFn);
|
||||
return process(cldrData, fileWriterFn, header);
|
||||
}
|
||||
|
||||
@VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
|
||||
static IcuData process(CldrData cldrData, Function<Path, PrintWriter> fileWriterFn) {
|
||||
RuleVisitor visitor = new RuleVisitor(fileWriterFn);
|
||||
static IcuData process(
|
||||
CldrData cldrData, Function<Path, PrintWriter> fileWriterFn, List<String> header) {
|
||||
|
||||
RuleVisitor visitor = new RuleVisitor(fileWriterFn, header);
|
||||
cldrData.accept(DTD, visitor);
|
||||
addSpecialCaseValues(visitor.icuData);
|
||||
return visitor.icuData;
|
||||
|
@ -102,9 +108,11 @@ public final class TransformsMapper {
|
|||
private static class RuleVisitor implements ValueVisitor {
|
||||
private final IcuData icuData = new IcuData("root", false);
|
||||
private final Function<Path, PrintWriter> outFn;
|
||||
private final ImmutableList<String> header;
|
||||
|
||||
RuleVisitor(Function<Path, PrintWriter> outFn) {
|
||||
RuleVisitor(Function<Path, PrintWriter> outFn, List<String> header) {
|
||||
this.outFn = checkNotNull(outFn);
|
||||
this.header = ImmutableList.copyOf(header);
|
||||
icuData.setFileComment("File: root.txt");
|
||||
}
|
||||
|
||||
|
@ -124,8 +132,8 @@ public final class TransformsMapper {
|
|||
|
||||
private void writeDataFile(String filename, CldrValue value) {
|
||||
try (PrintWriter out = outFn.apply(Paths.get(filename))) {
|
||||
out.println("\uFEFF# © 2016 and later: Unicode, Inc. and others.");
|
||||
out.println("# License & terms of use: http://www.unicode.org/copyright.html#License");
|
||||
out.print("\uFEFF");
|
||||
header.forEach(s -> out.println("# " + s));
|
||||
out.println("#");
|
||||
out.println("# File: " + filename);
|
||||
out.println("# Generated from CLDR");
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
© 2016 and later: Unicode, Inc. and others.
|
||||
License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
|
|
|
@ -5,8 +5,6 @@ package org.unicode.icu.tool.cldrtoicu;
|
|||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static com.google.common.truth.Truth.assertWithMessage;
|
||||
import static com.google.common.truth.Truth8.assertThat;
|
||||
import static java.util.Arrays.asList;
|
||||
import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
|
||||
import static org.unicode.cldr.api.CldrValue.parseValue;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
@ -25,6 +23,7 @@ import org.unicode.cldr.tool.LikelySubtags;
|
|||
import org.unicode.cldr.util.LanguageTagCanonicalizer;
|
||||
import org.unicode.cldr.util.LocaleIDParser;
|
||||
import org.unicode.cldr.util.SupplementalDataInfo;
|
||||
import org.unicode.icu.tool.cldrtoicu.testing.FakeDataSupplier;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
|
@ -41,8 +40,7 @@ public class SupplementalDataTest {
|
|||
@BeforeClass
|
||||
public static void loadRegressionData() {
|
||||
Path cldrRoot = Paths.get(System.getProperty("CLDR_DIR"));
|
||||
regressionData = SupplementalData
|
||||
.create(CldrDataSupplier.forCldrFilesIn(cldrRoot).getDataForType(SUPPLEMENTAL));
|
||||
regressionData = SupplementalData.create(CldrDataSupplier.forCldrFilesIn(cldrRoot));
|
||||
SupplementalDataInfo sdi =
|
||||
SupplementalDataInfo.getInstance(cldrRoot.resolve("common/supplemental").toString());
|
||||
likelySubtags = new LikelySubtags(sdi);
|
||||
|
@ -348,6 +346,6 @@ public class SupplementalDataTest {
|
|||
}
|
||||
|
||||
private static SupplementalData fakeSupplementalData(CldrValue... values) {
|
||||
return SupplementalData.create(CldrDataSupplier.forValues(asList(values)));
|
||||
return SupplementalData.create(new FakeDataSupplier().addSupplementalData(values));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.Arrays;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
@ -38,10 +37,14 @@ import com.google.common.collect.ImmutableList;
|
|||
@RunWith(JUnit4.class)
|
||||
public class TransformsMapperTest {
|
||||
|
||||
private static final ImmutableList<String> FILE_HEADER = ImmutableList.of(
|
||||
"\uFEFF# © 2016 and later: Unicode, Inc. and others.",
|
||||
"# License & terms of use: http://www.unicode.org/copyright.html#License",
|
||||
"#");
|
||||
private static final ImmutableList<String> HEADER_LINES = ImmutableList.of(
|
||||
"First header line",
|
||||
"Second header line");
|
||||
|
||||
private static final String FILE_HEADER =
|
||||
"\uFEFF# First header line\n"
|
||||
+ "# Second header line\n"
|
||||
+ "#\n";
|
||||
|
||||
private static final int DEFAULT_PATH_COUNT = 7;
|
||||
|
||||
|
@ -64,7 +67,7 @@ public class TransformsMapperTest {
|
|||
@Test
|
||||
public void testDefaultContent() {
|
||||
Map<String, String> fileMap = new TreeMap<>();
|
||||
IcuData icuData = TransformsMapper.process(cldrData(), wrap(fileMap));
|
||||
IcuData icuData = TransformsMapper.process(cldrData(), wrap(fileMap), HEADER_LINES);
|
||||
|
||||
assertThat(fileMap).isEmpty();
|
||||
|
||||
|
@ -88,7 +91,7 @@ public class TransformsMapperTest {
|
|||
cldrData(oneWay("foo", "bar", FORWARD, null, INTERNAL, "first second third", ++idx));
|
||||
|
||||
Map<String, String> fileMap = new TreeMap<>();
|
||||
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
|
||||
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap), HEADER_LINES);
|
||||
|
||||
assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5);
|
||||
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/first/alias", "foo-bar");
|
||||
|
@ -118,7 +121,7 @@ public class TransformsMapperTest {
|
|||
cldrData(oneWay("foo", "bar", BACKWARD, "variant", EXTERNAL, "one two three", ++idx));
|
||||
|
||||
Map<String, String> fileMap = new TreeMap<>();
|
||||
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
|
||||
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap), HEADER_LINES);
|
||||
|
||||
assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 5);
|
||||
assertThat(icuData).hasValuesFor("RuleBasedTransliteratorIDs/one/alias", "bar-foo/variant");
|
||||
|
@ -149,7 +152,7 @@ public class TransformsMapperTest {
|
|||
both("foo", "bar", null, INTERNAL, "forward-alias", "backward-alias", ++idx));
|
||||
|
||||
Map<String, String> fileMap = new TreeMap<>();
|
||||
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap));
|
||||
IcuData icuData = TransformsMapper.process(cldrData, wrap(fileMap), HEADER_LINES);
|
||||
|
||||
// 3 for each direction.
|
||||
assertThat(icuData).getPaths().hasSize(DEFAULT_PATH_COUNT + 6);
|
||||
|
@ -188,9 +191,7 @@ public class TransformsMapperTest {
|
|||
private String headerPlusLines(String... lines) {
|
||||
// For now the files always contain a blank line at the end (to match legacy behaviour) but
|
||||
// this can, and probably should be changed.
|
||||
return Stream
|
||||
.concat(FILE_HEADER.stream(), Arrays.stream(lines))
|
||||
.collect(joining("\n", "", "\n\n"));
|
||||
return Arrays.stream(lines).collect(joining("\n", FILE_HEADER, "\n\n"));
|
||||
}
|
||||
|
||||
private static CldrData cldrData(CldrValue... values) {
|
||||
|
|
Loading…
Add table
Reference in a new issue