ICU-22922 Integrate CLDR 46.1 beta1 to ICU main, part 2: source data/test generated or copied from CLDR

This commit is contained in:
Peter Edberg 2024-12-08 22:46:09 -08:00 committed by Peter Edberg
parent c3929d1595
commit e2581fd1ac
48 changed files with 2511 additions and 2450 deletions

File diff suppressed because it is too large Load diff

View file

@ -25,5 +25,5 @@ root{
lstm{
Thai{"Thai_graphclust_model4_heavy.res"}
Mymr{"Burmese_graphclust_model5_heavy.res"}
}
}
}

View file

@ -42,7 +42,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<!ATTLIST version number CDATA #REQUIRED >
<!--@MATCH:regex/\$Revision.*\$-->
<!--@METADATA-->
<!ATTLIST version cldrVersion CDATA #FIXED "46" >
<!ATTLIST version cldrVersion CDATA #FIXED "46.1" >
<!--@MATCH:any-->
<!--@VALUE-->
<!ATTLIST version draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >

View file

@ -283,8 +283,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GGGGyMEEEEd",
"GGGGyMd",
"GGGGyMMMEEEEd",
"GGGGyMMMd",
"GyMMdd",
"GyMMdd",
}
@ -761,8 +761,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GGGGGyMMdd",
"GGGGGyMd",
}
@ -1002,8 +1002,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"yMEEEEd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMdd",
"yMMdd",
}
@ -1326,8 +1326,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMdd",
"GyMMdd",
}
@ -1398,8 +1398,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMdd",
"GyMMdd",
}
@ -1466,10 +1466,10 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
{
"GyMd",
"GyMMMd",
"y=jpanyear",
}
"GGGGGyMd",
@ -2005,8 +2005,8 @@ ja{
"Hmmssz",
"Hmmss",
"Hmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMdd",
"GyMMdd",
}

View file

@ -856,8 +856,8 @@ ko{
"ahmsz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMd",
"GyMd",
}
@ -1085,8 +1085,8 @@ ko{
"ahmsz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMMMMEEEEd",
"yMMMMd",
"yMd",
"yyMd",
}

View file

@ -24,8 +24,8 @@ ko_CN{
"HHmmssz",
"HHmmss",
"HHmm",
"yMEEEEd",
"yMd",
"yMMMMEEEEd",
"yMMMMd",
"yMd",
"yyMd",
}

View file

@ -770,9 +770,9 @@ yue{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -981,9 +981,9 @@ yue{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
availableFormats{

View file

@ -206,9 +206,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -766,9 +766,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMMd",
"GyMMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -972,9 +972,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMdMM",
"yMMMd",
"yMd",
}
appendItems{
@ -1287,9 +1287,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
eras{
@ -1364,9 +1364,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -1446,9 +1446,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMMdd",
}
availableFormats{
@ -1756,9 +1756,9 @@ yue_Hans{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -2,4 +2,33 @@
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
yue_Hant_CN{
calendar{
gregorian{
DateTimePatterns{
"HH:mm:ss [zzzz]",
"HH:mm:ss [z]",
"HH:mm:ss",
"HH:mm",
"y年M月d日 EEEE",
"y年M月d日",
"y年M月d日",
"y/M/d",
"{1} {0}",
"{1} {0}",
"{1} {0}",
"{1} {0}",
"{1} {0}",
}
DateTimeSkeletons{
"HHmmsszzzz",
"HHmmssz",
"HHmmss",
"HHmm",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
}
}
}

View file

@ -677,8 +677,8 @@ zh{
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMd",
"GyMd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
eras{
@ -744,8 +744,8 @@ zh{
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMd",
"GyMd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
eras{
@ -821,9 +821,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -1022,9 +1022,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
appendItems{
@ -1417,9 +1417,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMMEEEEd",
"GyMMd",
"GyMMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -1542,9 +1542,9 @@ zh{
"HHmmssz",
"HHmmss",
"HHmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMMdd",
}
availableFormats{

View file

@ -24,9 +24,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{
@ -107,9 +107,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GGGGGyyMd",
}
availableFormats{
@ -173,9 +173,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yyMd",
}
availableFormats{
@ -240,9 +240,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
}
@ -267,9 +267,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{
@ -298,9 +298,9 @@ zh_Hans_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -77,9 +77,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GGGGGyyMd",
}
availableFormats{
@ -149,9 +149,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yyMd",
}
availableFormats{
@ -224,9 +224,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
}
@ -251,9 +251,9 @@ zh_Hans_MO{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -24,9 +24,9 @@ zh_Hans_MY{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
}

View file

@ -24,9 +24,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
intervalFormats{
@ -102,9 +102,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GGGGGyyMMdd",
}
availableFormats{
@ -171,9 +171,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yyMMdd",
}
availableFormats{
@ -244,9 +244,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
}
@ -271,9 +271,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{
@ -302,9 +302,9 @@ zh_Hans_SG{
"ahmmssz",
"ahmmss",
"ahmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyyMd",
}
availableFormats{

View file

@ -807,9 +807,9 @@ zh_Hant{
"Bhmmssz",
"Bhmmss",
"Bhmm",
"GyMEEEEd",
"GyMd",
"GyMd",
"GyMMMEEEEd",
"GyMMMd",
"GyMMMd",
"GyMd",
}
availableFormats{
@ -967,9 +967,9 @@ zh_Hant{
"Bhmmssz",
"Bhmmss",
"Bhmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
availableFormats{

View file

@ -315,9 +315,9 @@ zh_Hant_HK{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
availableFormats{

View file

@ -24,9 +24,9 @@ zh_Hant_MY{
"ahmmssz",
"ahmmss",
"ahmm",
"yMEEEEd",
"yMd",
"yMd",
"yMMMEEEEd",
"yMMMd",
"yMMMd",
"yMd",
}
}

File diff suppressed because it is too large Load diff

View file

@ -13324,6 +13324,11 @@ supplementalData:table(nofallback){
scripts{"Latn"}
}
}
ecy{
secondary{
scripts{"Cprt"}
}
}
ee{
primary{
scripts{"Latn"}
@ -13983,6 +13988,11 @@ supplementalData:table(nofallback){
scripts{"Latn"}
}
}
gmy{
secondary{
scripts{"Linb"}
}
}
gn{
primary{
scripts{"Latn"}
@ -14030,11 +14040,7 @@ supplementalData:table(nofallback){
}
grc{
secondary{
scripts{
"Cprt",
"Grek",
"Linb",
}
scripts{"Grek"}
}
}
grt{
@ -14572,6 +14578,15 @@ supplementalData:table(nofallback){
scripts{"Latn"}
}
}
kaw{
secondary{
scripts{
"Bali",
"Java",
"Kawi",
}
}
}
kbd{
primary{
scripts{"Cyrl"}
@ -28522,6 +28537,9 @@ supplementalData:table(nofallback){
ar{
populationShareF:int{46980000}
}
ecy{
populationShareF:int{44100000}
}
el{
officialStatus{"official"}
populationShareF:int{49950000}
@ -29490,6 +29508,12 @@ supplementalData:table(nofallback){
fr{
populationShareF:int{48900000}
}
gmy{
populationShareF:int{0}
}
grc{
populationShareF:int{0}
}
mk{
populationShareF:int{48160000}
}
@ -29721,6 +29745,9 @@ supplementalData:table(nofallback){
literacyShareF:int{49100000}
populationShareF:int{48180000}
}
ban_Bali{
populationShareF:int{0}
}
bbc{
populationShareF:int{47920000}
}
@ -29749,6 +29776,9 @@ supplementalData:table(nofallback){
literacyShareF:int{49100000}
populationShareF:int{49340000}
}
kaw{
populationShareF:int{0}
}
kge{
populationShareF:int{47320000}
}

View file

@ -18,7 +18,7 @@
# Does *not* do assimilation of "al", nor hyphenation.
# While it could be done, we need to determine whether a prefix "al" could
# occur other than as the definite article (since no space is used).
:: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
:: [[:Arabic:][:Block=Arabic:][‎ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
:: NFKD (NFC);
$disambig = \u0331 ;
$disambig2 = \u0330 ;

View file

@ -6,7 +6,7 @@
# Generated from CLDR
#
::[[:script=bengali:][।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3-৺ৎ]];
::[[:Script=Bengali:][।-॥\u0981-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ\u09BC-\u09C4ে-ৈো-\u09CDৗড়-ঢ়য়-\u09E3-৺ৎ]];
::NFD;
::Bengali-InterIndic;
::InterIndic-Latin;

View file

@ -10,7 +10,7 @@
# Should add variants for Russian-English, Russian-German
# Those can use this as a base, and then remap cases
# like a $hat to ya or ja.
# :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
# :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:]] ;
### WARNING, \u0308 must be added to the generated filters, in both directions ###
# MINIMAL FILTER
# Cyrillic-Latin
@ -275,13 +275,13 @@ $ignore = [[:Mark:]''] * ;
| K ← Q ;
| u ← w ;
| U ← W ;
| KS ← X } $ignore [:UppercaseLetter:] ;
| KS ← [:UppercaseLetter:] $ignore { X ;
| KS ← X } $ignore [:Uppercase_Letter:] ;
| KS ← [:Uppercase_Letter:] $ignore { X ;
| Ks ← X ;
| ks ← x ;
:: NFC (NFD) ;
# note: a global filter is more efficient, but MUST include all source chars!!
# :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:] ‧]);
# :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:] ‧]);
# MINIMAL FILTER: Latin-Cyrillic
:: ( [ḫḪhH‧ˌ\u0308A-Za-zÀ-ÏÑ-ÖÙ-Ýà-ïñ-öù-ýÿ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƏƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳəʹ-ʺ\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344ʹ΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЀЃЌ-ЎЙйѐѓќ-ўӁ-ӂӐ-ӑӖ-ӗḀ-ẙẛẠ-ỹἂ-ἅἊ-Ἅἒ-ἕἚ-Ἕἢ-ἥἪ-Ἥἲ-ἵἺ-Ἵὂ-ὅὊ-Ὅὒ-ὕὛὝὢ-ὥὪ-Ὥὰ-ώᾂ-ᾅᾊ-ᾍᾒ-ᾕᾚ-ᾝᾢ-ᾥᾪ-ᾭᾰᾲᾴᾸᾺ-ΆῂῄῈ-Ή῍-῎ῐῒ-ΐῘῚ-Ί῝-῞ῠῢ-ΰῨῪ-Ύ῭-΅ῲῴῸ-ΏK-Å] ) ;

View file

@ -7,7 +7,7 @@
#
# Rules are predicated on running NFD first, and NFC afterwards
# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:Nonspacing_Mark:]] ;
# MINIMAL FILTER GENERATED FOR: Greek-Latin
:: [΄´;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
:: NFD (NFC) ;
@ -20,9 +20,9 @@
# ὨΣ ὩΣ ὪΣ ὫΣ
# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]];
$glower = [[:greek:] & [:Ll:]];
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$lower = [[:Latin:][:Greek:] & [:Ll:]];
$glower = [[:Greek:] & [:Ll:]];
$upper = [[:Latin:][:Greek:] & [:Lu:]] ;
$accent = [:M:] ;
# NOTE: restrict to just the Greek & Latin accents that we care about
# TODO: broaden out once interation is fixed
@ -228,8 +228,8 @@ $ignore = [[:Mark:]''] * ;
| B ← W } $vowel ;
| U ← V ;
| U ← W ;
$rough } $ignore [:UppercaseLetter:] → H ;
$ignore [:UppercaseLetter:] { $rough → H ;
$rough } $ignore [:Uppercase_Letter:] → H ;
$ignore [:Uppercase_Letter:] { $rough → H ;
$rough ← H ;
$rough ↔ h ;
# Completeness for Greek
@ -251,8 +251,8 @@ $rough ↔ h ;
← [Ππ] { \' } [Ss] ;
← [Νν] { \' } $egammaLike ;
::NFC (NFD) ;
# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
# ([\u0000-\u007F [:Latin:] [:Greek:] [:Nonspacing_Mark:]]) ;
# ([\u0000-\u007F · [:Latin:] [:Nonspacing_Mark:]]) ;
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;

View file

@ -14,8 +14,8 @@
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
::NFD (NFC) ;
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]] ;
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$lower = [[:Latin:][:Greek:] & [:Ll:]] ;
$upper = [[:Latin:][:Greek:] & [:Lu:]] ;
$accent = [[:Mn:][:Me:]] ;
$macron = \u0304 ;
$ddot = \u0308 ;

View file

@ -15,7 +15,7 @@
# Do this before ::Han-Spacedhan() to catch Han after space in original text,
# and to apply before all other rules.
$startOfHanMarker = \uFDD1;
[:^script=Han:] { ([:script=Han:]) → $startOfHanMarker $1;
[:^Script=Han:] { ([:Script=Han:]) → $startOfHanMarker $1;
# Need Spacedhan so the name transliterations get spaced properly
::Han-Spacedhan();
# Convert special name readings that depend on next character

View file

@ -8,9 +8,9 @@
# Only intended for internal use
# Make sure Han are normalized, including characters that contain them.
# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:Ideographic:]-[:sc=Han:]
# Where XXX is the resolved [:Ideographic:][:sc=Han:]. It needs updating with each Unicode release!
:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:Ideographic:][:sc=Han:]] nfkc;
:: fullwidth-halfwidth;
。 → '.';
。→ '.';
@ -31,7 +31,7 @@
々→ '⓶';
〜→ '~';
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
$initialPunct = [:Ps:][:Pi:];
$initialPunct = [[:Ps:][:Pi:]];
# add space between any Han or terminal punctuation and letters, and
# between letters and Han or initial punct
[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;

View file

@ -7,7 +7,7 @@
#
# note: a global filter is more efficient, but MUST include all source chars
:: [[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]];
:: [[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]];
:: NFKC (NFC);
# Hiragana-Katakana
# This is largely a one-to-one mapping, but it has a
@ -181,6 +181,6 @@ $xo = [
お ← $xo {ー};
:: NFC (NFKC) ;
# note: a global filter is more efficient, but MUST include all source chars!!
:: ([[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]]);
:: ([[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]]);
# eof

View file

@ -31,6 +31,6 @@ $digit = [1-5];
$1 &NumericPinyin-Pinyin($3) $2 ← ([aAeE]) ($vowel* $consonant*) ($digit);
$1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
$1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit);
&NumericPinyin-Pinyin($1) ← [:letter:] {($digit)};
&NumericPinyin-Pinyin($1) ← [:Letter:] {($digit)};
::NFC (NFD);

View file

@ -6,9 +6,9 @@
# Generated from CLDR
#
::[[:script=Latin:][:M:]-];
::[[:Script=Latin:][:M:]-];
::NFD;
::Lower;
::Latin-ConjoiningJamo;
::[[:script=Latin:][:M:]] NFC;
::[[:Script=Latin:][:M:]] NFC;

View file

@ -7,7 +7,7 @@
#
# note: a global filter is more efficient, but MUST include all source chars
#:: [\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ;
#:: [\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:Nonspacing_Mark:]] ;
# MINIMAL FILTER GENERATED FOR: Latin-Katakana
### WARNING -- must add width filter, both here and below!!! ###
:: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」\u3099-\u309Aァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0304Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
@ -378,11 +378,11 @@ x → | ks ;
# Final cleanup
'~' → ; # delete stray tildes between letters
[:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
# [ʾ[:Nonspacing Mark:]-[\u3099-゜]] → ; # delete any non-spacing marks that we didn't use
# [ʾ[:Nonspacing_Mark:]-[\u3099-゜]] → ; # delete any non-spacing marks that we didn't use
:: NFC (NFD) ;
:: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
# note: a global filter is more efficient, but MUST include all source chars!!
#:: ([\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]);
#:: ([\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:Nonspacing_Mark:]]);
# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
:: ( [[\ -~¢-£¥-¦¬\u0304₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ\u3099-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
# eof

View file

@ -26,8 +26,8 @@
#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
#\uE000 → o\u0323 ;
# ← o\u0323 ;
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
$notAbove = [^\p{ccc=0}\p{ccc=Above}] ;
$notBelow = [^\p{ccc=0}\p{ccc=Below}] ;
# Consonants
# Warning: the 'h's need to be handled carefully!
# What we really want to say is the following, but we can't

View file

@ -6,7 +6,7 @@
# Generated from CLDR
#
::[[:thai:] ก-\u0E3Aเ-๛];
::[[:Thai:] ก-\u0E3Aเ-๛];
::NFD;
::Thai-ThaiSemi;
::Any-BreakInternal;

View file

@ -9,7 +9,7 @@
# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
# The rules that convert space into semicolon are in another file;
# since they have to come BEFORE the break iterator
$thai = [[:thai:] ก-\u0E3Aเ-๛] ;
$thai = [[:Thai:] ก-\u0E3Aเ-๛] ;
# First convert the semicolon back
' ' ← $thai { '; ' } $thai;
# Remove any other spaces between thai letters

View file

@ -8,6 +8,6 @@
# The rules that convert space into semicolon are in this file;
# since they have to come BEFORE the break iterator.
$thai = [[:thai:] ก-\u0E3Aเ-๛] ;
$thai = [[:Thai:] ก-\u0E3Aเ-๛] ;
$thai { ' ' } $thai → '; ' ;

View file

@ -20,7 +20,7 @@
#
# MINIMAL FILTER: Ethiopic-Morse Code
#
:: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"$=\-[:Ethiopic:]] ;
:: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"\$=\-[:Ethiopic:]] ;
([:Lo:])([:Zs:]+)([:Lo:]) → | $1$2$3 ; # ⁄⁂⁄ is assumed to be a sufficiently weird enough sequence that won't naturally appear in any normal content
#
########################################################################

View file

@ -22,7 +22,7 @@
#
# MINIMAL FILTER: Arabic-Latin
#
:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩ٱ]] ;
:: [[:Arabic:][:Block=Arabic:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩ٱ]] ;
:: NFKD (NFC) ;
#
#

View file

@ -8,9 +8,9 @@
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# Make any string of letters after a cased letter be lower, with rules for i
[:cased:] [:case-ignorable:]* { İ → i;
[:cased:] [:case-ignorable:]* { I → ı;
[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
[:Cased:] [:Case_Ignorable:]* { İ → i;
[:Cased:] [:Case_Ignorable:]* { I → ı;
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
# Otherwise all lowercase go to upper (titlecase stay as is)
i→İ ;
([:Lowercase:]) → &Any-Upper($1) ;

View file

@ -18,5 +18,5 @@ $UE = [Ü {U \u0308}];
$AE → AE;
$OE → OE;
$UE → UE;
::Any-ASCII;
::Latin-ASCII;

View file

@ -18,7 +18,7 @@
# In our rules, we also convert Arabic punctuation characters to Latin.
# These appears to be used in Maldivian text, for example in the Universal
# Declaration of Human Rights.
::[[:block=thaana:][،؛؟٪٫٬]\uFDF2] ;
::[[:Block=Thaana:][،؛؟٪٫٬]\uFDF2] ;
::NFD;
$wordBoundary = [^[:L:][:M:][:N:]] ;
$vowel = [\u07A6-\u07AF] ;

View file

@ -12,8 +12,8 @@
# and C is not followed by a sequence consisting of zero or more case-ignorable characters and then a cased letter.
# 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
# With translit rules, easiest is to handle the negative condition first, mapping in that case to the regular sigma.
Σ } [:case-ignorable:]* [:cased:] → σ;
[:cased:] [:case-ignorable:]* { Σ → ς;
Σ } [:Case_Ignorable:]* [:Cased:] → σ;
[:Cased:] [:Case_Ignorable:]* { Σ → ς;
::Any-Lower;
::NFC();

View file

@ -10,9 +10,9 @@
# Remove \0301 following Greek, with possible intervening 0308 marks.
# [[:Greek:] & [:Ll:]] [\u0308]? { \u0301 → ;
# Make any string of letters after a cased letter be lower, with rules for sigma
[:cased:] [:case-ignorable:]* { Σ } [:case-ignorable:]* [:cased:] → σ;
[:cased:] [:case-ignorable:]* { Σ → ς;
[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
[:Cased:] [:Case_Ignorable:]* { Σ } [:Case_Ignorable:]* [:Cased:] → σ;
[:Cased:] [:Case_Ignorable:]* { Σ → ς;
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
# Otherwise all lowercase go to upper (titlecase stay as is)
([:Lowercase:]) → &Any-Title($1) ;
::NFC();

View file

@ -19,7 +19,7 @@
#
# MINIMAL FILTER: Persian-Latin
#
:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویي\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩پچژگی]] ;
:: [[:Arabic:][:Block=Arabic:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویي\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩پچژگی]] ;
:: NFKD (NFC) ;
#
#

View file

@ -259,6 +259,6 @@ y → | i;
z → ዝ;
#
#
[:nonspacing mark:] → ;
[:Nonspacing_Mark:] → ;
::NFC(NFD);

View file

@ -261,6 +261,6 @@ z → ツ;
\- → ;
#
#
[:nonspacing mark:] → ;
[:Nonspacing_Mark:] → ;
::NFC(NFD);

View file

@ -8,13 +8,13 @@
# Make any string of letters after a cased letter be lower
::NFD();
[:cased:] [:case-ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
[:cased:] [:case-ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
[:cased:] [:case-ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
[:cased:] [:case-ignorable:]* {I \u0300 → i \u0307 \u0300;
[:cased:] [:case-ignorable:]* {I \u0301 → i \u0307 \u0301;
[:cased:] [:case-ignorable:]* {I \u0303 → i \u0307 \u0303;
[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
[:Cased:] [:Case_Ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
[:Cased:] [:Case_Ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
[:Cased:] [:Case_Ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
[:Cased:] [:Case_Ignorable:]* {I \u0300 → i \u0307 \u0300;
[:Cased:] [:Case_Ignorable:]* {I \u0301 → i \u0307 \u0301;
[:Cased:] [:Case_Ignorable:]* {I \u0303 → i \u0307 \u0303;
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
# Otherwise all lowercase go to upper (titlecase stay as is)
[:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
([:Lowercase:]) → &Any-Upper($1) ;

View file

@ -8,9 +8,9 @@
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# Make any string of letters after a cased letter be lower, with rules for i
[:cased:] [:case-ignorable:]* { İ → i;
[:cased:] [:case-ignorable:]* { I → ı;
[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
[:Cased:] [:Case_Ignorable:]* { İ → i;
[:Cased:] [:Case_Ignorable:]* { I → ı;
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
# Otherwise all lowercase go to upper (titlecase stay as is)
i→İ ;
([:Lowercase:]) → &Any-Upper($1) ;

View file

@ -318,7 +318,7 @@ class LocaleFallbackData {
t.put("gon", "Deva");
t.put("got", "Goth");
t.put("gra", "Deva");
t.put("grc", "Cprt");
t.put("grc", "Grek");
t.put("grt", "Beng");
t.put("gru", "Ethi");
t.put("gu", "Gujr");
@ -409,7 +409,7 @@ class LocaleFallbackData {
t.put("ka", "Geor");
t.put("kaa", "Cyrl");
t.put("kap", "Cyrl");
t.put("kaw", "Kawi");
t.put("kaw", "Bali");
t.put("kbd", "Cyrl");
t.put("kbg", "Tibt");
t.put("kbu", "Arab");

View file

@ -151,6 +151,7 @@ mass ; ton ; kilogram ; 907.18474 * x ; 907184.7
mass ; tonne ; kilogram ; 1,000 * x ; 1000000.0
mass ; earth-mass ; kilogram ; 5,972,200,000,000,000,000,000,000 * x ; 5.9722E27
mass ; solar-mass ; kilogram ; 1,988,470,000,000,000,000,000,000,000,000 * x ; 1.98847E33
night-duration ; night ; night ; 1 * x ; 1,000.00
portion ; permillion ; portion ; 0.000001 * x ; 0.001
portion ; permyriad ; portion ; 0.0001 * x ; 0.1
portion ; permille ; portion ; 0.001 * x ; 1.0
@ -187,6 +188,7 @@ speed ; kilometer-per-hour ; meter-per-second ; 2.5/9 * x ; 277.7778
speed ; mile-per-hour ; meter-per-second ; 0.44704 * x ; 447.04
speed ; knot ; meter-per-second ; 4.63/9 * x ; 514.4444
speed ; meter-per-second ; meter-per-second ; 1 * x ; 1,000.00
speed ; light-speed ; meter-per-second ; 299,792,458 * x ; 2.997925E11
substance-amount ; item ; item ; 1 * x ; 1,000.00
substance-amount ; mole ; item ; 602,214,076,000,000,000,000,000 * x ; 6.022141E26
temperature ; rankine ; kelvin ; 5/9 * x ; 555.5556