From 72099ee64cef53b326dc9941f567f4d57733f788 Mon Sep 17 00:00:00 2001 From: DraganBesevic Date: Tue, 3 Oct 2023 17:51:05 -0700 Subject: [PATCH] ICU-22325 CLDR 44 beta2 integration to ICU part two, source files generated or copied from CLDR --- icu4c/source/data/locales/ar_001.txt | 4 - icu4c/source/data/locales/fil.txt | 18 +- icu4c/source/data/translit/Cyrl_Latn.txt | 19 +- icu4c/source/data/zone/en.txt | 21 -- icu4c/source/data/zone/root.txt | 14 +- .../cldr/localeIdentifiers/_readme.txt | 11 + .../cldr/localeIdentifiers/likelySubtags.txt | 20 +- .../localeCanonicalization.txt | 1 - .../localeIdentifiers/localeDisplayName.txt | 335 ++++++++++++++++++ .../data/cldr/localeIdentifiers/_readme.txt | 11 + .../cldr/localeIdentifiers/likelySubtags.txt | 20 +- .../localeCanonicalization.txt | 1 - .../localeIdentifiers/localeDisplayName.txt | 335 ++++++++++++++++++ 13 files changed, 741 insertions(+), 69 deletions(-) create mode 100644 icu4c/source/test/testdata/cldr/localeIdentifiers/_readme.txt create mode 100644 icu4c/source/test/testdata/cldr/localeIdentifiers/localeDisplayName.txt create mode 100644 icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/_readme.txt create mode 100644 icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeDisplayName.txt diff --git a/icu4c/source/data/locales/ar_001.txt b/icu4c/source/data/locales/ar_001.txt index 4d59c44619d..5a1bf2a4c33 100644 --- a/icu4c/source/data/locales/ar_001.txt +++ b/icu4c/source/data/locales/ar_001.txt @@ -2,8 +2,4 @@ // License & terms of use: http://www.unicode.org/copyright.html // Generated using tools/cldr/cldr-to-icu/build-icu-data.xml ar_001{ - NumberElements{ - default{"arab"} - default_latn{"latn"} - } } diff --git a/icu4c/source/data/locales/fil.txt b/icu4c/source/data/locales/fil.txt index b9283742747..5e958854c6c 100644 --- a/icu4c/source/data/locales/fil.txt +++ b/icu4c/source/data/locales/fil.txt @@ -505,7 +505,7 @@ fil{ midnight{"hatinggabi"} morning1{"nang umaga"} morning2{"madaling-araw"} - night1{"gabi"} + night1{"ng gabi"} noon{"tanghaling-tapat"} } narrow{ @@ -514,15 +514,6 @@ fil{ midnight{"hatinggabi"} morning1{"umaga"} morning2{"madaling-araw"} - night1{"gabi"} - noon{"tanghaling-tapat"} - } - wide{ - afternoon1{"tanghali"} - evening1{"ng gabi"} - midnight{"hatinggabi"} - morning1{"nang umaga"} - morning2{"madaling-araw"} night1{"ng gabi"} noon{"tanghaling-tapat"} } @@ -535,6 +526,13 @@ fil{ morning2{"madaling-araw"} night1{"gabi"} } + wide{ + afternoon1{"hapon"} + evening1{"gabi"} + morning1{"umaga"} + morning2{"madaling-araw"} + night1{"gabi"} + } } } eras{ diff --git a/icu4c/source/data/translit/Cyrl_Latn.txt b/icu4c/source/data/translit/Cyrl_Latn.txt index 7d4ea45ec1c..420191f7ef2 100644 --- a/icu4c/source/data/translit/Cyrl_Latn.txt +++ b/icu4c/source/data/translit/Cyrl_Latn.txt @@ -13,6 +13,7 @@ # :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ; ### WARNING, \u0308 must be added to the generated filters, in both directions ### # MINIMAL FILTER +# Cyrillic-Latin :: [ҺһңҢҰұҮүөӨҚқ\u0308Ă-ăĔ-ĕĞ-ğĬ-ĭŎ-ŏŬ-ŭ\u0306Ѐ-џҐ-ҕҘ-ҙӁ-ӂӐ-ӟӢ-ӧӬ-ӵӸ-ӹḜ-ḝẮ-ặᾰᾸῐῘῠῨ] ; :: NFD (NFC) ; $modprime = ʹ; @@ -69,13 +70,10 @@ $beforeLower = $ignoreForCase * $lower ; Ћ ↔ C $acute ; # CYRILLIC CAPITAL LETTER TSHE џ ↔ d $hat ; # CYRILLIC SMALL LETTER DZHE Џ ↔ D $hat ; # CYRILLIC CAPITAL LETTER DZHE -х ↔ kh ; # CYRILLIC SMALL LETTER HA -Х } $beforeLower ↔ Kh ; -Х ↔ KH; # CYRILLIC CAPITAL LETTER HA -# Insert separator between K and characters that result in h -# And delete going the other way -[Kk] { } [Һһ] → ‧ ; -← ‧ ; +# https://www.eki.ee/wgrs/v2_2/rom2_az.htm +# but modified to not collide with Cyrillic HA +һ ↔ h $breveBelow ; # CYRILLIC SMALL LETTER SHHA +Һ ↔ H $breveBelow; # CYRILLIC CAPITAL LETTER SHHA # Normal order а ↔ a ; # CYRILLIC SMALL LETTER A А ↔ A ; # CYRILLIC CAPITAL LETTER A @@ -170,9 +168,8 @@ $beforeLower = $ignoreForCase * $lower ; # Ѹ ↔ XXX ; # CYRILLIC CAPITAL LETTER UK ф ↔ f ; # CYRILLIC SMALL LETTER EF Ф ↔ F ; # CYRILLIC CAPITAL LETTER EF -#https://www.eki.ee/wgrs/v2_2/rom2_az.htm -һ ↔ h ; # CYRILLIC SMALL LETTER SHHA -Һ ↔ H ; # CYRILLIC CAPITAL LETTER SHHA +х ↔ h ; # CYRILLIC SMALL LETTER HA +Х ↔ H; # CYRILLIC CAPITAL LETTER HA # ҳ ↔ XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER # Ҳ ↔ XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER # ѡ ↔ XXX ; # CYRILLIC SMALL LETTER OMEGA @@ -286,5 +283,5 @@ $ignore = [[:Mark:]''] * ; # note: a global filter is more efficient, but MUST include all source chars!! # :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:] ‧]); # MINIMAL FILTER: Latin-Cyrillic -:: ( [hH‧ˌ\u0308A-Za-zÀ-ÏÑ-ÖÙ-Ýà-ïñ-öù-ýÿ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƏƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳəʹ-ʺ\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344ʹ΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЀЃЌ-ЎЙйѐѓќ-ўӁ-ӂӐ-ӑӖ-ӗḀ-ẙẛẠ-ỹἂ-ἅἊ-Ἅἒ-ἕἚ-Ἕἢ-ἥἪ-Ἥἲ-ἵἺ-Ἵὂ-ὅὊ-Ὅὒ-ὕὛὝὢ-ὥὪ-Ὥὰ-ώᾂ-ᾅᾊ-ᾍᾒ-ᾕᾚ-ᾝᾢ-ᾥᾪ-ᾭᾰᾲᾴᾸᾺ-ΆῂῄῈ-Ή῍-῎ῐῒ-ΐῘῚ-Ί῝-῞ῠῢ-ΰῨῪ-Ύ῭-΅ῲῴῸ-ΏK-Å] ) ; +:: ( [ḫḪhH‧ˌ\u0308A-Za-zÀ-ÏÑ-ÖÙ-Ýà-ïñ-öù-ýÿ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƏƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳəʹ-ʺ\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344ʹ΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЀЃЌ-ЎЙйѐѓќ-ўӁ-ӂӐ-ӑӖ-ӗḀ-ẙẛẠ-ỹἂ-ἅἊ-Ἅἒ-ἕἚ-Ἕἢ-ἥἪ-Ἥἲ-ἵἺ-Ἵὂ-ὅὊ-Ὅὒ-ὕὛὝὢ-ὥὪ-Ὥὰ-ώᾂ-ᾅᾊ-ᾍᾒ-ᾕᾚ-ᾝᾢ-ᾥᾪ-ᾭᾰᾲᾴᾸᾺ-ΆῂῄῈ-Ή῍-῎ῐῒ-ΐῘῚ-Ί῝-῞ῠῢ-ΰῨῪ-Ύ῭-΅ῲῴῸ-ΏK-Å] ) ; diff --git a/icu4c/source/data/zone/en.txt b/icu4c/source/data/zone/en.txt index d45e2178364..fe3812ae198 100644 --- a/icu4c/source/data/zone/en.txt +++ b/icu4c/source/data/zone/en.txt @@ -3,21 +3,6 @@ // Generated using tools/cldr/cldr-to-icu/build-icu-data.xml en{ zoneStrings{ - "Africa:Sao_Tome"{ - ec{"São Tomé"} - } - "America:Asuncion"{ - ec{"Asunción"} - } - "America:Curacao"{ - ec{"Curaçao"} - } - "America:St_Barthelemy"{ - ec{"St. Barthélemy"} - } - "Antarctica:DumontDUrville"{ - ec{"Dumont d’Urville"} - } "Asia:Qostanay"{ ec{"Kostanay"} } @@ -33,18 +18,12 @@ en{ "Europe:Dublin"{ ld{"Irish Standard Time"} } - "Europe:Kiev"{ - ec{"Kyiv"} - } "Europe:London"{ ld{"British Summer Time"} } "Europe:Uzhgorod"{ ec{"Uzhhorod"} } - "Indian:Reunion"{ - ec{"Réunion"} - } "Pacific:Honolulu"{ sd{"HDT"} sg{"HST"} diff --git a/icu4c/source/data/zone/root.txt b/icu4c/source/data/zone/root.txt index 656042e5aff..e0ee45de9b9 100644 --- a/icu4c/source/data/zone/root.txt +++ b/icu4c/source/data/zone/root.txt @@ -7,6 +7,12 @@ root{ "Africa:Asmera"{ ec{"Asmara"} } + "Africa:Sao_Tome"{ + ec{"São Tomé"} + } + "America:Asuncion"{ + ec{"Asunción"} + } "America:Bahia_Banderas"{ ec{"Bahía de Banderas"} } @@ -19,6 +25,9 @@ root{ "America:Coral_Harbour"{ ec{"Atikokan"} } + "America:Curacao"{ + ec{"Curaçao"} + } "America:Godthab"{ ec{"Nuuk"} } @@ -65,7 +74,7 @@ root{ ec{"Ittoqqortoormiit"} } "America:St_Barthelemy"{ - ec{"St. Barthelemy"} + ec{"St. Barthélemy"} } "America:St_Johns"{ ec{"St. John’s"} @@ -115,6 +124,9 @@ root{ "Europe:Kiev"{ ec{"Kyiv"} } + "Indian:Reunion"{ + ec{"Réunion"} + } "Pacific:Ponape"{ ec{"Pohnpei"} } diff --git a/icu4c/source/test/testdata/cldr/localeIdentifiers/_readme.txt b/icu4c/source/test/testdata/cldr/localeIdentifiers/_readme.txt new file mode 100644 index 00000000000..fbbef7a583a --- /dev/null +++ b/icu4c/source/test/testdata/cldr/localeIdentifiers/_readme.txt @@ -0,0 +1,11 @@ +# Test data for grapheme locale validity, canonicalization, and name generation +# Copyright © 1991-2020 Unicode, Inc. +# For terms of use, see http://www.unicode.org/copyright.html +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) +# The format may vary between folders or files. +# Format: +# Generally has the following structure, but the values depend on the file. +# @= +# ; + diff --git a/icu4c/source/test/testdata/cldr/localeIdentifiers/likelySubtags.txt b/icu4c/source/test/testdata/cldr/localeIdentifiers/likelySubtags.txt index 23bceab6ce6..1f9e3557fff 100644 --- a/icu4c/source/test/testdata/cldr/localeIdentifiers/likelySubtags.txt +++ b/icu4c/source/test/testdata/cldr/localeIdentifiers/likelySubtags.txt @@ -1259,7 +1259,7 @@ und-Latn-001 ; en-Latn-001 ; en-001 ; und-Latn-150 ; en-Latn-150 ; en-150 ; und-Latn-419 ; es-Latn-419 ; es-419 ; und-Latn-AD ; ca-Latn-AD ; ca-AD ; -und-Latn-AE ; ar-Latn-AE ; ; +und-Latn-AE ; en-Latn-AE ; en-AE ; und-Latn-AG ; en-Latn-AG ; en-AG ; und-Latn-AI ; en-Latn-AI ; en-AI ; und-Latn-AL ; sq-Latn-AL ; sq ; @@ -1288,7 +1288,7 @@ und-Latn-BS ; en-Latn-BS ; en-BS ; und-Latn-BW ; en-Latn-BW ; en-BW ; und-Latn-BZ ; en-Latn-BZ ; en-BZ ; und-Latn-CA ; en-Latn-CA ; en-CA ; -und-Latn-CC ; ms-Latn-CC ; ; +und-Latn-CC ; en-Latn-CC ; en-CC ; und-Latn-CD ; sw-Latn-CD ; sw-CD ; und-Latn-CF ; fr-Latn-CF ; fr-CF ; und-Latn-CG ; fr-Latn-CG ; fr-CG ; @@ -1316,7 +1316,7 @@ und-Latn-DZ ; fr-Latn-DZ ; fr-DZ ; und-Latn-EA ; es-Latn-EA ; es-EA ; und-Latn-EC ; es-Latn-EC ; es-EC ; und-Latn-EE ; et-Latn-EE ; et ; -und-Latn-ER ; ti-Latn-ER ; ; +und-Latn-ER ; en-Latn-ER ; en-ER ; und-Latn-ES ; es-Latn-ES ; es ; und-Latn-ET ; en-Latn-ET ; en-ET ; und-Latn-FI ; fi-Latn-FI ; fi ; @@ -1341,7 +1341,7 @@ und-Latn-GT ; es-Latn-GT ; es-GT ; und-Latn-GU ; en-Latn-GU ; en-GU ; und-Latn-GW ; pt-Latn-GW ; pt-GW ; und-Latn-GY ; en-Latn-GY ; en-GY ; -und-Latn-HK ; zh-Latn-HK ; ; +und-Latn-HK ; en-Latn-HK ; en-HK ; und-Latn-HN ; es-Latn-HN ; es-HN ; und-Latn-HR ; hr-Latn-HR ; hr ; und-Latn-HT ; ht-Latn-HT ; ht ; @@ -1349,9 +1349,9 @@ und-Latn-HU ; hu-Latn-HU ; hu ; und-Latn-IC ; es-Latn-IC ; es-IC ; und-Latn-ID ; id-Latn-ID ; id ; und-Latn-IE ; en-Latn-IE ; en-IE ; -und-Latn-IL ; he-Latn-IL ; he-Latn ; +und-Latn-IL ; en-Latn-IL ; en-IL ; und-Latn-IM ; en-Latn-IM ; en-IM ; -und-Latn-IN ; hi-Latn-IN ; hi-Latn ; +und-Latn-IN ; en-Latn-IN ; en-IN ; und-Latn-IO ; en-Latn-IO ; en-IO ; und-Latn-IS ; is-Latn-IS ; is ; und-Latn-IT ; it-Latn-IT ; it ; @@ -1385,7 +1385,7 @@ und-Latn-MR ; fr-Latn-MR ; fr-MR ; und-Latn-MS ; en-Latn-MS ; en-MS ; und-Latn-MT ; mt-Latn-MT ; mt ; und-Latn-MU ; mfe-Latn-MU ; mfe ; -und-Latn-MV ; dv-Latn-MV ; dv-Latn ; +und-Latn-MV ; en-Latn-MV ; en-MV ; und-Latn-MW ; en-Latn-MW ; en-MW ; und-Latn-MX ; es-Latn-MX ; es-MX ; und-Latn-MY ; ms-Latn-MY ; ms ; @@ -1406,7 +1406,7 @@ und-Latn-PE ; es-Latn-PE ; es-PE ; und-Latn-PF ; fr-Latn-PF ; fr-PF ; und-Latn-PG ; tpi-Latn-PG ; tpi ; und-Latn-PH ; fil-Latn-PH ; fil ; -und-Latn-PK ; ur-Latn-PK ; ur-Latn ; +und-Latn-PK ; en-Latn-PK ; en-PK ; und-Latn-PL ; pl-Latn-PL ; pl ; und-Latn-PM ; fr-Latn-PM ; fr-PM ; und-Latn-PN ; en-Latn-PN ; en-PN ; @@ -1420,7 +1420,7 @@ und-Latn-RS ; sr-Latn-RS ; sr-Latn ; und-Latn-RW ; rw-Latn-RW ; rw ; und-Latn-SB ; en-Latn-SB ; en-SB ; und-Latn-SC ; fr-Latn-SC ; fr-SC ; -und-Latn-SD ; ar-Latn-SD ; ; +und-Latn-SD ; en-Latn-SD ; en-SD ; und-Latn-SE ; sv-Latn-SE ; sv ; und-Latn-SG ; en-Latn-SG ; en-SG ; und-Latn-SH ; en-Latn-SH ; en-SH ; @@ -1432,7 +1432,7 @@ und-Latn-SM ; it-Latn-SM ; it-SM ; und-Latn-SN ; fr-Latn-SN ; fr-SN ; und-Latn-SO ; so-Latn-SO ; so ; und-Latn-SR ; nl-Latn-SR ; nl-SR ; -und-Latn-SS ; ar-Latn-SS ; ; +und-Latn-SS ; en-Latn-SS ; en-SS ; und-Latn-ST ; pt-Latn-ST ; pt-ST ; und-Latn-SV ; es-Latn-SV ; es-SV ; und-Latn-SX ; en-Latn-SX ; en-SX ; diff --git a/icu4c/source/test/testdata/cldr/localeIdentifiers/localeCanonicalization.txt b/icu4c/source/test/testdata/cldr/localeIdentifiers/localeCanonicalization.txt index f2f3249fb5d..bce4a8a4ff7 100644 --- a/icu4c/source/test/testdata/cldr/localeIdentifiers/localeCanonicalization.txt +++ b/icu4c/source/test/testdata/cldr/localeIdentifiers/localeCanonicalization.txt @@ -1,4 +1,3 @@ -# File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt # Test data for locale identifier canonicalization # Copyright © 1991-2023 Unicode, Inc. # For terms of use, see http://www.unicode.org/copyright.html diff --git a/icu4c/source/test/testdata/cldr/localeIdentifiers/localeDisplayName.txt b/icu4c/source/test/testdata/cldr/localeIdentifiers/localeDisplayName.txt new file mode 100644 index 00000000000..fd9986c0097 --- /dev/null +++ b/icu4c/source/test/testdata/cldr/localeIdentifiers/localeDisplayName.txt @@ -0,0 +1,335 @@ +# Test data for locale display name generation +# Copyright © 1991-2023 Unicode, Inc. +# For terms of use, see http://www.unicode.org/copyright.html +# SPDX-License-Identifier: Unicode-DFS-2016 +# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) +# Format: +# @locale= +# @compound= +# ; + +@locale=en +@compound=false + + +# Simple cases: Language, script, region, variants + +es; Spanish +es-419; Spanish (Latin America) +es-Cyrl-MX; Spanish (Cyrillic, Mexico) +hi-Latn; Hindi (Latin) + +#Note that the order of the variants is alphabetized before generating names + +en-Latn-GB-scouse-fonipa; English (Latin, United Kingdom, IPA Phonetics, Scouse) + +# Add extensions, and verify their order + +en-u-nu-thai-ca-islamic-civil; English (Hijri Calendar [tabular, civil epoch], Thai Digits) +hi-u-nu-latn-t-en-h0-hybrid; Hindi (Hybrid: English, Western Digits) +en-u-nu-deva-t-de; English (Transform: German, Devanagari Digits) + +# Test ordering of extensions (include well-formed but invalid cases) + +fr-z-zz-zzz-v-vv-vvv-u-uu-uuu-t-ru-Cyrl-s-ss-sss-a-aa-aaa-x-u-x; French (Transform: Russian [Cyrillic], uu: uuu, a: aa-aaa, s: ss-sss, v: vv-vvv, x: u-x, z: zz-zzz) + +# Comprehensive list (mostly comprehensive: currencies, subdivisions, timezones have abbreviated lists) + +en-u-ca-buddhist; English (Buddhist Calendar) +en-u-ca-chinese; English (Chinese Calendar) +en-u-ca-coptic; English (Coptic Calendar) +en-u-ca-dangi; English (Dangi Calendar) +en-u-ca-ethioaa; English (Ethiopic Amete Alem Calendar) +en-u-ca-ethiopic; English (Ethiopic Calendar) +en-u-ca-gregory; English (Gregorian Calendar) +en-u-ca-hebrew; English (Hebrew Calendar) +en-u-ca-indian; English (Indian National Calendar) +en-u-ca-islamic; English (Hijri Calendar) +en-u-ca-islamic-civil; English (Hijri Calendar [tabular, civil epoch]) +en-u-ca-islamic-rgsa; English (Hijri Calendar [Saudi Arabia, sighting]) +en-u-ca-islamic-tbla; English (Hijri Calendar [tabular, astronomical epoch]) +en-u-ca-islamic-umalqura; English (Hijri Calendar [Umm al-Qura]) +en-u-ca-iso8601; English (ISO-8601 Calendar) +en-u-ca-japanese; English (Japanese Calendar) +en-u-ca-persian; English (Persian Calendar) +en-u-ca-roc; English (Minguo Calendar) +en-u-cf-account; English (Accounting Currency Format) +en-u-cf-standard; English (Standard Currency Format) +en-u-co-big5han; English (Traditional Chinese Sort Order - Big5) +en-u-co-compat; English (Previous Sort Order, for compatibility) +en-u-co-dict; English (Dictionary Sort Order) +en-u-co-ducet; English (Default Unicode Sort Order) +en-u-co-emoji; English (Emoji Sort Order) +en-u-co-eor; English (European Ordering Rules) +en-u-co-gb2312; English (Simplified Chinese Sort Order - GB2312) +en-u-co-phonebk; English (Phonebook Sort Order) +en-u-co-phonetic; English (Phonetic Sort Order) +en-u-co-pinyin; English (Pinyin Sort Order) +en-u-co-reformed; English (Reformed Sort Order) +en-u-co-search; English (General-Purpose Search) +en-u-co-searchjl; English (Search By Hangul Initial Consonant) +en-u-co-standard; English (Standard Sort Order) +en-u-co-stroke; English (Stroke Sort Order) +en-u-co-trad; English (Traditional Sort Order) +en-u-co-unihan; English (Radical-Stroke Sort Order) +en-u-co-zhuyin; English (Zhuyin Sort Order) +en-u-cu-eur; English (Currency: €) +en-u-cu-jpy; English (Currency: ¥) +en-u-cu-usd; English (Currency: $) +en-u-cu-chf; English (Currency: CHF) +en-t-d0-accents; English (To Accented Characters From ASCII Sequence) +en-t-d0-ascii; English (To ASCII) +en-t-d0-casefold; English (To Casefolded) +en-t-d0-charname; English (To Unicode Character Names) +en-t-d0-digit; English (To Digit Form Of Accent) +en-t-d0-fcc; English (To Unicode FCC) +en-t-d0-fcd; English (To Unicode FCD) +en-t-d0-fwidth; English (To Fullwidth) +en-t-d0-hex; English (To Hexadecimal Codes) +en-t-d0-hwidth; English (To Halfwidth) +en-t-d0-lower; English (To Lowercase) +en-t-d0-morse; English (To Morse Code) +en-t-d0-nfc; English (To Unicode NFC) +en-t-d0-nfd; English (To Unicode NFD) +en-t-d0-nfkc; English (To Unicode NFKC) +en-t-d0-nfkd; English (To Unicode NFKD) +en-t-d0-npinyin; English (To Pinyin With Numeric Tones) +en-t-d0-null; English (No Change) +en-t-d0-publish; English (To Publishing Characters From ASCII) +en-t-d0-remove; English (To Empty String) +en-t-d0-title; English (To Titlecase) +en-t-d0-upper; English (To Uppercase) +en-t-d0-zawgyi; English (To Zawgyi Myanmar Encoding) +en-u-dx-thai; English (Dictionary Break Exclusions: thai) +en-u-em-default; English (Use Default Presentation For Emoji Characters) +en-u-em-emoji; English (Prefer Emoji Presentation For Emoji Characters) +en-u-em-text; English (Prefer Text Presentation For Emoji Characters) +en-u-fw-fri; English (First Day of Week Is Friday) +en-u-fw-mon; English (First Day of Week Is Monday) +en-u-fw-sat; English (First Day of Week Is Saturday) +en-u-fw-sun; English (First Day of Week Is Sunday) +en-u-fw-thu; English (First Day of Week Is Thursday) +en-u-fw-tue; English (First Day of Week Is Tuesday) +en-u-fw-wed; English (First Day of Week Is Wednesday) +en-t-h0-hybrid; English +en-u-hc-h11; English (12 Hour System [0–11]) +en-u-hc-h12; English (12 Hour System [1–12]) +en-u-hc-h23; English (24 Hour System [0–23]) +en-u-hc-h24; English (24 Hour System [1–24]) +en-t-i0-handwrit; English (Handwriting Input Method) +en-t-i0-pinyin; English (Pinyin Input Method) +en-t-i0-und; English (Unspecified Input Method) +en-t-i0-wubi; English (Wubi Input Method) +en-t-k0-101key; English (101-Key Keyboard) +en-t-k0-102key; English (102-Key Keyboard) +en-t-k0-600dpi; English (600 dpi Keyboard) +en-t-k0-768dpi; English (768 dpi Keyboard) +en-t-k0-android; English (Android Keyboard) +en-t-k0-azerty; English (AZERTY-Based Keyboard) +en-t-k0-chromeos; English (ChromeOS Keyboard) +en-t-k0-colemak; English (Colemak Keyboard) +en-t-k0-dvorak; English (Dvorak Keyboard) +en-t-k0-dvorakl; English (Dvorak Left-Handed Keyboard) +en-t-k0-dvorakr; English (Dvorak Right-Handed Keyboard) +en-t-k0-el220; English (Greek 220 Keyboard) +en-t-k0-el319; English (Greek 319 Keyboard) +en-t-k0-extended; English (Keyboard With Many Extra Characters) +en-t-k0-googlevk; English (Google Virtual Keyboard) +en-t-k0-isiri; English (Persian ISIRI Keyboard) +en-t-k0-legacy; English (Legacy Keyboard) +en-t-k0-lt1205; English (Lithuanian LST 1205 Keyboard) +en-t-k0-lt1582; English (Lithuanian LST 1582 Keyboard) +en-t-k0-nutaaq; English (Inuktitut Nutaaq Keyboard) +en-t-k0-osx; English (macOS Keyboard) +en-t-k0-patta; English (Thai Pattachote Keyboard) +en-t-k0-qwerty; English (QWERTY-Based Keyboard) +en-t-k0-qwertz; English (QWERTZ-Based Keyboard) +en-t-k0-ta99; English (Tamil 99 Keyboard) +en-t-k0-und; English (Unspecified Keyboard) +en-t-k0-var; English (Keyboard Variant) +en-t-k0-viqr; English (Vietnamese VIQR Keyboard) +en-t-k0-windows; English (Windows Keyboard) +en-u-ka-noignore; English (Sort Symbols) +en-u-ka-shifted; English (Sort Ignoring Symbols) +en-u-kb-false; English (Sort Accents Normally) +en-u-kb-true; English (Sort Accents Reversed) +en-u-kc-false; English (Sort Case Insensitive) +en-u-kc-true; English (Sort Case Sensitive) +en-u-kf-false; English (Sort Normal Case Order) +en-u-kf-lower; English (Sort Lowercase First) +en-u-kf-upper; English (Sort Uppercase First) +en-u-kk-false; English (Sort Without Normalization) +en-u-kk-true; English (Sort Unicode Normalized) +en-u-kn-false; English (Sort Digits Individually) +en-u-kn-true; English (Sort Digits Numerically) +en-u-kr-arab; English (Script/Block Reordering: Arabic) +en-u-kr-digit-deva-latn; English (Script/Block Reordering: Digits, Devanagari, Latin) +en-u-kr-currency; English (Currency) +en-u-kr-digit; English (Digits) +en-u-kr-punct; English (Punctuation) +en-u-kr-space; English (Whitespace) +en-u-kr-symbol; English (Symbol) +en-u-ks-identic; English (Sort All) +en-u-ks-level1; English (Sort Base Letters Only) +en-u-ks-level2; English (Sort Accents) +en-u-ks-level3; English (Sort Accents/Case/Width) +en-u-ks-level4; English (Sort Accents/Case/Width/Kana) +en-u-kv-currency; English (Ignore Symbols affects spaces, punctuation, all symbols) +en-u-kv-punct; English (Ignore Symbols affects spaces and punctuation only) +en-u-kv-space; English (Ignore Symbols affects spaces only) +en-u-kv-symbol; English (Ignore Symbols affects spaces, punctuation, non-currency symbols) +en-u-lb-loose; English (Loose Line Break Style) +en-u-lb-normal; English (Normal Line Break Style) +en-u-lb-strict; English (Strict Line Break Style) +en-u-lw-breakall; English (Allow Line Breaks In All Words) +en-u-lw-keepall; English (Prevent Line Breaks In All Words) +en-u-lw-normal; English (Normal Line Breaks For Words) +en-u-lw-phrase; English (Prevent Line Breaks In Phrases) +en-t-m0-aethiopi; English (Encylopedia Aethiopica Transliteration) +en-t-m0-alaloc; English (US ALA-LOC Transliteration) +en-t-m0-betamets; English (Beta Maṣāḥǝft Transliteration) +en-t-m0-bgn; English (US BGN Transliteration) +en-t-m0-buckwalt; English (Buckwalter Arabic Transliteration) +en-t-m0-c11; English (Hex transform using C11 syntax) +en-t-m0-css; English (Hex transform using CSS syntax) +en-t-m0-din; English (German DIN Transliteration) +en-t-m0-es3842; English (Ethiopian Standards Agency ES 3842:2014 Ethiopic-Latin Transliteration) +en-t-m0-ewts; English (Extended Wylie Transliteration Scheme) +en-t-m0-gost; English (CIS GOST Transliteration) +en-t-m0-gurage; English (Gurage Legacy to Modern Transliteration) +en-t-m0-gutgarts; English (Yaros Gutgarts Ethiopic-Cyrillic Transliteration) +en-t-m0-iast; English (International Alphabet of Sanskrit Transliteration) +en-t-m0-iesjes; English (IES/JES Amharic Transliteration) +en-t-m0-iso; English (ISO Transliteration) +en-t-m0-java; English (Hex transform using Java syntax) +en-t-m0-lambdin; English (Thomas Oden Lambdin Ethiopic-Latin Transliteration) +en-t-m0-mcst; English (Korean MCST Transliteration) +en-t-m0-mns; English (Mongolian National Standard Transliteration) +en-t-m0-percent; English (Hex transform using percent syntax) +en-t-m0-perl; English (Hex transform using Perl syntax) +en-t-m0-plain; English (Hex transform with no surrounding syntax) +en-t-m0-prprname; English (Personal name transliteration variant) +en-t-m0-satts; English (Standard Arabic Technical Transliteration) +en-t-m0-sera; English (System for Ethiopic Representation in ASCII) +en-t-m0-tekieali; English (Tekie Alibekit Blin-Latin Transliteration) +en-t-m0-ungegn; English (UN GEGN Transliteration) +en-t-m0-unicode; English (Hex transform using Unicode syntax) +en-t-m0-xaleget; English (Eritrean Ministry of Education Blin-Latin Transliteration) +en-t-m0-xml; English (Hex transform using XML syntax) +en-t-m0-xml10; English (Hex transform using XML decimal syntax) +en-u-ms-metric; English (Metric System) +en-u-ms-uksystem; English (Imperial Measurement System) +en-u-ms-ussystem; English (US Measurement System) +en-u-mu-celsius; English (Celsius) +en-u-mu-fahrenhe; English (Fahrenheit) +en-u-mu-kelvin; English (Kelvin) +en-u-nu-adlm; English (Adlam Digits) +en-u-nu-ahom; English (Ahom Digits) +en-u-nu-arab; English (Arabic-Indic Digits) +en-u-nu-arabext; English (Extended Arabic-Indic Digits) +en-u-nu-armn; English (Armenian Numerals) +en-u-nu-armnlow; English (Armenian Lowercase Numerals) +en-u-nu-bali; English (Balinese Digits) +en-u-nu-beng; English (Bangla Digits) +en-u-nu-bhks; English (Bhaiksuki Digits) +en-u-nu-brah; English (Brahmi Digits) +en-u-nu-cakm; English (Chakma Digits) +en-u-nu-cham; English (Cham Digits) +en-u-nu-cyrl; English (Cyrillic Numerals) +en-u-nu-deva; English (Devanagari Digits) +en-u-nu-diak; English (Dives Akuru Digits) +en-u-nu-ethi; English (Ethiopic Numerals) +en-u-nu-finance; English (Financial Numerals) +en-u-nu-fullwide; English (Full-Width Digits) +en-u-nu-geor; English (Georgian Numerals) +en-u-nu-gong; English (Gunjala Gondi digits) +en-u-nu-gonm; English (Masaram Gondi digits) +en-u-nu-grek; English (Greek Numerals) +en-u-nu-greklow; English (Greek Lowercase Numerals) +en-u-nu-gujr; English (Gujarati Digits) +en-u-nu-guru; English (Gurmukhi Digits) +en-u-nu-hanidays; English (Chinese Calendar Day-of-Month Numerals) +en-u-nu-hanidec; English (Chinese Decimal Numerals) +en-u-nu-hans; English (Simplified Chinese Numerals) +en-u-nu-hansfin; English (Simplified Chinese Financial Numerals) +en-u-nu-hant; English (Traditional Chinese Numerals) +en-u-nu-hantfin; English (Traditional Chinese Financial Numerals) +en-u-nu-hebr; English (Hebrew Numerals) +en-u-nu-hmng; English (Pahawh Hmong Digits) +en-u-nu-hmnp; English (Nyiakeng Puachue Hmong Digits) +en-u-nu-java; English (Javanese Digits) +en-u-nu-jpan; English (Japanese Numerals) +en-u-nu-jpanfin; English (Japanese Financial Numerals) +en-u-nu-jpanyear; English (Japanese Calendar Gannen Year Numerals) +en-u-nu-kali; English (Kayah Li Digits) +en-u-nu-kawi; English (Kawi Digits) +en-u-nu-khmr; English (Khmer Digits) +en-u-nu-knda; English (Kannada Digits) +en-u-nu-lana; English (Tai Tham Hora Digits) +en-u-nu-lanatham; English (Tai Tham Tham Digits) +en-u-nu-laoo; English (Lao Digits) +en-u-nu-latn; English (Western Digits) +en-u-nu-lepc; English (Lepcha Digits) +en-u-nu-limb; English (Limbu Digits) +en-u-nu-mathbold; English (Mathematical Bold Digits) +en-u-nu-mathdbl; English (Mathematical Double-Struck Digits) +en-u-nu-mathmono; English (Mathematical Monospace Digits) +en-u-nu-mathsanb; English (Mathematical Sans-Serif Bold Digits) +en-u-nu-mathsans; English (Mathematical Sans-Serif Digits) +en-u-nu-mlym; English (Malayalam Digits) +en-u-nu-modi; English (Modi Digits) +en-u-nu-mong; English (Mongolian Digits) +en-u-nu-mroo; English (Mro Digits) +en-u-nu-mtei; English (Meetei Mayek Digits) +en-u-nu-mymr; English (Myanmar Digits) +en-u-nu-mymrshan; English (Myanmar Shan Digits) +en-u-nu-mymrtlng; English (Myanmar Tai Laing Digits) +en-u-nu-nagm; English (Nag Mundari Digits) +en-u-nu-native; English (Native Digits) +en-u-nu-newa; English (Newa Digits) +en-u-nu-nkoo; English (N’Ko Digits) +en-u-nu-olck; English (Ol Chiki Digits) +en-u-nu-orya; English (Odia Digits) +en-u-nu-osma; English (Osmanya Digits) +en-u-nu-rohg; English (Hanifi Rohingya digits) +en-u-nu-roman; English (Roman Numerals) +en-u-nu-romanlow; English (Roman Lowercase Numerals) +en-u-nu-saur; English (Saurashtra Digits) +en-u-nu-segment; English (Segmented Digits) +en-u-nu-shrd; English (Sharada Digits) +en-u-nu-sind; English (Khudawadi Digits) +en-u-nu-sinh; English (Sinhala Lith Digits) +en-u-nu-sora; English (Sora Sompeng Digits) +en-u-nu-sund; English (Sundanese Digits) +en-u-nu-takr; English (Takri Digits) +en-u-nu-talu; English (New Tai Lue Digits) +en-u-nu-taml; English (Traditional Tamil Numerals) +en-u-nu-tamldec; English (Tamil Digits) +en-u-nu-telu; English (Telugu Digits) +en-u-nu-thai; English (Thai Digits) +en-u-nu-tibt; English (Tibetan Digits) +en-u-nu-tirh; English (Tirhuta Digits) +en-u-nu-tnsa; English (Tangsa Digits) +en-u-nu-traditio; English (Traditional Numerals) +en-u-nu-vaii; English (Vai Digits) +en-u-nu-wara; English (Warang Citi Digits) +en-u-nu-wcho; English (Wancho Digits) +en-u-rg-gbsct; English (Region For Supplemental Data: Scotland) +en-u-rg-gbeng; English (Region For Supplemental Data: England) +en-t-s0-accents; English (From Accented Characters To ASCII Sequence) +en-t-s0-ascii; English (From ASCII) +en-t-s0-hex; English (From Hexadecimal Codes) +en-t-s0-morse; English (From Morse Code) +en-t-s0-npinyin; English (From Pinyin With Numeric Tones) +en-t-s0-publish; English (From Publishing Punctuation To ASCII) +en-t-s0-zawgyi; English (From Zawgyi Myanmar Encoding) +en-u-sd-gbsct; English (Region Subdivision: Scotland) +en-u-sd-gbwls; English (Region Subdivision: Wales) +en-u-ss-none; English (Sentence Breaks Without Abbreviation Handling) +en-u-ss-standard; English (Suppress Sentence Breaks After Standard Abbreviations) +en-t-t0-und; English (Unspecified Machine Translation) +en-u-tz-uslax; English (Time Zone: Los Angeles Time) +en-u-tz-gblon; English (Time Zone: United Kingdom Time) +en-u-tz-chzrh; English (Time Zone: Switzerland Time) +en-u-va-posix; English (POSIX Compliant Locale) +en-t-x0-foobar2; English (Private-Use Transform: foobar2) diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/_readme.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/_readme.txt new file mode 100644 index 00000000000..fbbef7a583a --- /dev/null +++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/_readme.txt @@ -0,0 +1,11 @@ +# Test data for grapheme locale validity, canonicalization, and name generation +# Copyright © 1991-2020 Unicode, Inc. +# For terms of use, see http://www.unicode.org/copyright.html +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) +# The format may vary between folders or files. +# Format: +# Generally has the following structure, but the values depend on the file. +# @= +# ; + diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/likelySubtags.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/likelySubtags.txt index 23bceab6ce6..1f9e3557fff 100644 --- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/likelySubtags.txt +++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/likelySubtags.txt @@ -1259,7 +1259,7 @@ und-Latn-001 ; en-Latn-001 ; en-001 ; und-Latn-150 ; en-Latn-150 ; en-150 ; und-Latn-419 ; es-Latn-419 ; es-419 ; und-Latn-AD ; ca-Latn-AD ; ca-AD ; -und-Latn-AE ; ar-Latn-AE ; ; +und-Latn-AE ; en-Latn-AE ; en-AE ; und-Latn-AG ; en-Latn-AG ; en-AG ; und-Latn-AI ; en-Latn-AI ; en-AI ; und-Latn-AL ; sq-Latn-AL ; sq ; @@ -1288,7 +1288,7 @@ und-Latn-BS ; en-Latn-BS ; en-BS ; und-Latn-BW ; en-Latn-BW ; en-BW ; und-Latn-BZ ; en-Latn-BZ ; en-BZ ; und-Latn-CA ; en-Latn-CA ; en-CA ; -und-Latn-CC ; ms-Latn-CC ; ; +und-Latn-CC ; en-Latn-CC ; en-CC ; und-Latn-CD ; sw-Latn-CD ; sw-CD ; und-Latn-CF ; fr-Latn-CF ; fr-CF ; und-Latn-CG ; fr-Latn-CG ; fr-CG ; @@ -1316,7 +1316,7 @@ und-Latn-DZ ; fr-Latn-DZ ; fr-DZ ; und-Latn-EA ; es-Latn-EA ; es-EA ; und-Latn-EC ; es-Latn-EC ; es-EC ; und-Latn-EE ; et-Latn-EE ; et ; -und-Latn-ER ; ti-Latn-ER ; ; +und-Latn-ER ; en-Latn-ER ; en-ER ; und-Latn-ES ; es-Latn-ES ; es ; und-Latn-ET ; en-Latn-ET ; en-ET ; und-Latn-FI ; fi-Latn-FI ; fi ; @@ -1341,7 +1341,7 @@ und-Latn-GT ; es-Latn-GT ; es-GT ; und-Latn-GU ; en-Latn-GU ; en-GU ; und-Latn-GW ; pt-Latn-GW ; pt-GW ; und-Latn-GY ; en-Latn-GY ; en-GY ; -und-Latn-HK ; zh-Latn-HK ; ; +und-Latn-HK ; en-Latn-HK ; en-HK ; und-Latn-HN ; es-Latn-HN ; es-HN ; und-Latn-HR ; hr-Latn-HR ; hr ; und-Latn-HT ; ht-Latn-HT ; ht ; @@ -1349,9 +1349,9 @@ und-Latn-HU ; hu-Latn-HU ; hu ; und-Latn-IC ; es-Latn-IC ; es-IC ; und-Latn-ID ; id-Latn-ID ; id ; und-Latn-IE ; en-Latn-IE ; en-IE ; -und-Latn-IL ; he-Latn-IL ; he-Latn ; +und-Latn-IL ; en-Latn-IL ; en-IL ; und-Latn-IM ; en-Latn-IM ; en-IM ; -und-Latn-IN ; hi-Latn-IN ; hi-Latn ; +und-Latn-IN ; en-Latn-IN ; en-IN ; und-Latn-IO ; en-Latn-IO ; en-IO ; und-Latn-IS ; is-Latn-IS ; is ; und-Latn-IT ; it-Latn-IT ; it ; @@ -1385,7 +1385,7 @@ und-Latn-MR ; fr-Latn-MR ; fr-MR ; und-Latn-MS ; en-Latn-MS ; en-MS ; und-Latn-MT ; mt-Latn-MT ; mt ; und-Latn-MU ; mfe-Latn-MU ; mfe ; -und-Latn-MV ; dv-Latn-MV ; dv-Latn ; +und-Latn-MV ; en-Latn-MV ; en-MV ; und-Latn-MW ; en-Latn-MW ; en-MW ; und-Latn-MX ; es-Latn-MX ; es-MX ; und-Latn-MY ; ms-Latn-MY ; ms ; @@ -1406,7 +1406,7 @@ und-Latn-PE ; es-Latn-PE ; es-PE ; und-Latn-PF ; fr-Latn-PF ; fr-PF ; und-Latn-PG ; tpi-Latn-PG ; tpi ; und-Latn-PH ; fil-Latn-PH ; fil ; -und-Latn-PK ; ur-Latn-PK ; ur-Latn ; +und-Latn-PK ; en-Latn-PK ; en-PK ; und-Latn-PL ; pl-Latn-PL ; pl ; und-Latn-PM ; fr-Latn-PM ; fr-PM ; und-Latn-PN ; en-Latn-PN ; en-PN ; @@ -1420,7 +1420,7 @@ und-Latn-RS ; sr-Latn-RS ; sr-Latn ; und-Latn-RW ; rw-Latn-RW ; rw ; und-Latn-SB ; en-Latn-SB ; en-SB ; und-Latn-SC ; fr-Latn-SC ; fr-SC ; -und-Latn-SD ; ar-Latn-SD ; ; +und-Latn-SD ; en-Latn-SD ; en-SD ; und-Latn-SE ; sv-Latn-SE ; sv ; und-Latn-SG ; en-Latn-SG ; en-SG ; und-Latn-SH ; en-Latn-SH ; en-SH ; @@ -1432,7 +1432,7 @@ und-Latn-SM ; it-Latn-SM ; it-SM ; und-Latn-SN ; fr-Latn-SN ; fr-SN ; und-Latn-SO ; so-Latn-SO ; so ; und-Latn-SR ; nl-Latn-SR ; nl-SR ; -und-Latn-SS ; ar-Latn-SS ; ; +und-Latn-SS ; en-Latn-SS ; en-SS ; und-Latn-ST ; pt-Latn-ST ; pt-ST ; und-Latn-SV ; es-Latn-SV ; es-SV ; und-Latn-SX ; en-Latn-SX ; en-SX ; diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeCanonicalization.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeCanonicalization.txt index f2f3249fb5d..bce4a8a4ff7 100644 --- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeCanonicalization.txt +++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeCanonicalization.txt @@ -1,4 +1,3 @@ -# File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt # Test data for locale identifier canonicalization # Copyright © 1991-2023 Unicode, Inc. # For terms of use, see http://www.unicode.org/copyright.html diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeDisplayName.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeDisplayName.txt new file mode 100644 index 00000000000..fd9986c0097 --- /dev/null +++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/cldr/localeIdentifiers/localeDisplayName.txt @@ -0,0 +1,335 @@ +# Test data for locale display name generation +# Copyright © 1991-2023 Unicode, Inc. +# For terms of use, see http://www.unicode.org/copyright.html +# SPDX-License-Identifier: Unicode-DFS-2016 +# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) +# Format: +# @locale= +# @compound= +# ; + +@locale=en +@compound=false + + +# Simple cases: Language, script, region, variants + +es; Spanish +es-419; Spanish (Latin America) +es-Cyrl-MX; Spanish (Cyrillic, Mexico) +hi-Latn; Hindi (Latin) + +#Note that the order of the variants is alphabetized before generating names + +en-Latn-GB-scouse-fonipa; English (Latin, United Kingdom, IPA Phonetics, Scouse) + +# Add extensions, and verify their order + +en-u-nu-thai-ca-islamic-civil; English (Hijri Calendar [tabular, civil epoch], Thai Digits) +hi-u-nu-latn-t-en-h0-hybrid; Hindi (Hybrid: English, Western Digits) +en-u-nu-deva-t-de; English (Transform: German, Devanagari Digits) + +# Test ordering of extensions (include well-formed but invalid cases) + +fr-z-zz-zzz-v-vv-vvv-u-uu-uuu-t-ru-Cyrl-s-ss-sss-a-aa-aaa-x-u-x; French (Transform: Russian [Cyrillic], uu: uuu, a: aa-aaa, s: ss-sss, v: vv-vvv, x: u-x, z: zz-zzz) + +# Comprehensive list (mostly comprehensive: currencies, subdivisions, timezones have abbreviated lists) + +en-u-ca-buddhist; English (Buddhist Calendar) +en-u-ca-chinese; English (Chinese Calendar) +en-u-ca-coptic; English (Coptic Calendar) +en-u-ca-dangi; English (Dangi Calendar) +en-u-ca-ethioaa; English (Ethiopic Amete Alem Calendar) +en-u-ca-ethiopic; English (Ethiopic Calendar) +en-u-ca-gregory; English (Gregorian Calendar) +en-u-ca-hebrew; English (Hebrew Calendar) +en-u-ca-indian; English (Indian National Calendar) +en-u-ca-islamic; English (Hijri Calendar) +en-u-ca-islamic-civil; English (Hijri Calendar [tabular, civil epoch]) +en-u-ca-islamic-rgsa; English (Hijri Calendar [Saudi Arabia, sighting]) +en-u-ca-islamic-tbla; English (Hijri Calendar [tabular, astronomical epoch]) +en-u-ca-islamic-umalqura; English (Hijri Calendar [Umm al-Qura]) +en-u-ca-iso8601; English (ISO-8601 Calendar) +en-u-ca-japanese; English (Japanese Calendar) +en-u-ca-persian; English (Persian Calendar) +en-u-ca-roc; English (Minguo Calendar) +en-u-cf-account; English (Accounting Currency Format) +en-u-cf-standard; English (Standard Currency Format) +en-u-co-big5han; English (Traditional Chinese Sort Order - Big5) +en-u-co-compat; English (Previous Sort Order, for compatibility) +en-u-co-dict; English (Dictionary Sort Order) +en-u-co-ducet; English (Default Unicode Sort Order) +en-u-co-emoji; English (Emoji Sort Order) +en-u-co-eor; English (European Ordering Rules) +en-u-co-gb2312; English (Simplified Chinese Sort Order - GB2312) +en-u-co-phonebk; English (Phonebook Sort Order) +en-u-co-phonetic; English (Phonetic Sort Order) +en-u-co-pinyin; English (Pinyin Sort Order) +en-u-co-reformed; English (Reformed Sort Order) +en-u-co-search; English (General-Purpose Search) +en-u-co-searchjl; English (Search By Hangul Initial Consonant) +en-u-co-standard; English (Standard Sort Order) +en-u-co-stroke; English (Stroke Sort Order) +en-u-co-trad; English (Traditional Sort Order) +en-u-co-unihan; English (Radical-Stroke Sort Order) +en-u-co-zhuyin; English (Zhuyin Sort Order) +en-u-cu-eur; English (Currency: €) +en-u-cu-jpy; English (Currency: ¥) +en-u-cu-usd; English (Currency: $) +en-u-cu-chf; English (Currency: CHF) +en-t-d0-accents; English (To Accented Characters From ASCII Sequence) +en-t-d0-ascii; English (To ASCII) +en-t-d0-casefold; English (To Casefolded) +en-t-d0-charname; English (To Unicode Character Names) +en-t-d0-digit; English (To Digit Form Of Accent) +en-t-d0-fcc; English (To Unicode FCC) +en-t-d0-fcd; English (To Unicode FCD) +en-t-d0-fwidth; English (To Fullwidth) +en-t-d0-hex; English (To Hexadecimal Codes) +en-t-d0-hwidth; English (To Halfwidth) +en-t-d0-lower; English (To Lowercase) +en-t-d0-morse; English (To Morse Code) +en-t-d0-nfc; English (To Unicode NFC) +en-t-d0-nfd; English (To Unicode NFD) +en-t-d0-nfkc; English (To Unicode NFKC) +en-t-d0-nfkd; English (To Unicode NFKD) +en-t-d0-npinyin; English (To Pinyin With Numeric Tones) +en-t-d0-null; English (No Change) +en-t-d0-publish; English (To Publishing Characters From ASCII) +en-t-d0-remove; English (To Empty String) +en-t-d0-title; English (To Titlecase) +en-t-d0-upper; English (To Uppercase) +en-t-d0-zawgyi; English (To Zawgyi Myanmar Encoding) +en-u-dx-thai; English (Dictionary Break Exclusions: thai) +en-u-em-default; English (Use Default Presentation For Emoji Characters) +en-u-em-emoji; English (Prefer Emoji Presentation For Emoji Characters) +en-u-em-text; English (Prefer Text Presentation For Emoji Characters) +en-u-fw-fri; English (First Day of Week Is Friday) +en-u-fw-mon; English (First Day of Week Is Monday) +en-u-fw-sat; English (First Day of Week Is Saturday) +en-u-fw-sun; English (First Day of Week Is Sunday) +en-u-fw-thu; English (First Day of Week Is Thursday) +en-u-fw-tue; English (First Day of Week Is Tuesday) +en-u-fw-wed; English (First Day of Week Is Wednesday) +en-t-h0-hybrid; English +en-u-hc-h11; English (12 Hour System [0–11]) +en-u-hc-h12; English (12 Hour System [1–12]) +en-u-hc-h23; English (24 Hour System [0–23]) +en-u-hc-h24; English (24 Hour System [1–24]) +en-t-i0-handwrit; English (Handwriting Input Method) +en-t-i0-pinyin; English (Pinyin Input Method) +en-t-i0-und; English (Unspecified Input Method) +en-t-i0-wubi; English (Wubi Input Method) +en-t-k0-101key; English (101-Key Keyboard) +en-t-k0-102key; English (102-Key Keyboard) +en-t-k0-600dpi; English (600 dpi Keyboard) +en-t-k0-768dpi; English (768 dpi Keyboard) +en-t-k0-android; English (Android Keyboard) +en-t-k0-azerty; English (AZERTY-Based Keyboard) +en-t-k0-chromeos; English (ChromeOS Keyboard) +en-t-k0-colemak; English (Colemak Keyboard) +en-t-k0-dvorak; English (Dvorak Keyboard) +en-t-k0-dvorakl; English (Dvorak Left-Handed Keyboard) +en-t-k0-dvorakr; English (Dvorak Right-Handed Keyboard) +en-t-k0-el220; English (Greek 220 Keyboard) +en-t-k0-el319; English (Greek 319 Keyboard) +en-t-k0-extended; English (Keyboard With Many Extra Characters) +en-t-k0-googlevk; English (Google Virtual Keyboard) +en-t-k0-isiri; English (Persian ISIRI Keyboard) +en-t-k0-legacy; English (Legacy Keyboard) +en-t-k0-lt1205; English (Lithuanian LST 1205 Keyboard) +en-t-k0-lt1582; English (Lithuanian LST 1582 Keyboard) +en-t-k0-nutaaq; English (Inuktitut Nutaaq Keyboard) +en-t-k0-osx; English (macOS Keyboard) +en-t-k0-patta; English (Thai Pattachote Keyboard) +en-t-k0-qwerty; English (QWERTY-Based Keyboard) +en-t-k0-qwertz; English (QWERTZ-Based Keyboard) +en-t-k0-ta99; English (Tamil 99 Keyboard) +en-t-k0-und; English (Unspecified Keyboard) +en-t-k0-var; English (Keyboard Variant) +en-t-k0-viqr; English (Vietnamese VIQR Keyboard) +en-t-k0-windows; English (Windows Keyboard) +en-u-ka-noignore; English (Sort Symbols) +en-u-ka-shifted; English (Sort Ignoring Symbols) +en-u-kb-false; English (Sort Accents Normally) +en-u-kb-true; English (Sort Accents Reversed) +en-u-kc-false; English (Sort Case Insensitive) +en-u-kc-true; English (Sort Case Sensitive) +en-u-kf-false; English (Sort Normal Case Order) +en-u-kf-lower; English (Sort Lowercase First) +en-u-kf-upper; English (Sort Uppercase First) +en-u-kk-false; English (Sort Without Normalization) +en-u-kk-true; English (Sort Unicode Normalized) +en-u-kn-false; English (Sort Digits Individually) +en-u-kn-true; English (Sort Digits Numerically) +en-u-kr-arab; English (Script/Block Reordering: Arabic) +en-u-kr-digit-deva-latn; English (Script/Block Reordering: Digits, Devanagari, Latin) +en-u-kr-currency; English (Currency) +en-u-kr-digit; English (Digits) +en-u-kr-punct; English (Punctuation) +en-u-kr-space; English (Whitespace) +en-u-kr-symbol; English (Symbol) +en-u-ks-identic; English (Sort All) +en-u-ks-level1; English (Sort Base Letters Only) +en-u-ks-level2; English (Sort Accents) +en-u-ks-level3; English (Sort Accents/Case/Width) +en-u-ks-level4; English (Sort Accents/Case/Width/Kana) +en-u-kv-currency; English (Ignore Symbols affects spaces, punctuation, all symbols) +en-u-kv-punct; English (Ignore Symbols affects spaces and punctuation only) +en-u-kv-space; English (Ignore Symbols affects spaces only) +en-u-kv-symbol; English (Ignore Symbols affects spaces, punctuation, non-currency symbols) +en-u-lb-loose; English (Loose Line Break Style) +en-u-lb-normal; English (Normal Line Break Style) +en-u-lb-strict; English (Strict Line Break Style) +en-u-lw-breakall; English (Allow Line Breaks In All Words) +en-u-lw-keepall; English (Prevent Line Breaks In All Words) +en-u-lw-normal; English (Normal Line Breaks For Words) +en-u-lw-phrase; English (Prevent Line Breaks In Phrases) +en-t-m0-aethiopi; English (Encylopedia Aethiopica Transliteration) +en-t-m0-alaloc; English (US ALA-LOC Transliteration) +en-t-m0-betamets; English (Beta Maṣāḥǝft Transliteration) +en-t-m0-bgn; English (US BGN Transliteration) +en-t-m0-buckwalt; English (Buckwalter Arabic Transliteration) +en-t-m0-c11; English (Hex transform using C11 syntax) +en-t-m0-css; English (Hex transform using CSS syntax) +en-t-m0-din; English (German DIN Transliteration) +en-t-m0-es3842; English (Ethiopian Standards Agency ES 3842:2014 Ethiopic-Latin Transliteration) +en-t-m0-ewts; English (Extended Wylie Transliteration Scheme) +en-t-m0-gost; English (CIS GOST Transliteration) +en-t-m0-gurage; English (Gurage Legacy to Modern Transliteration) +en-t-m0-gutgarts; English (Yaros Gutgarts Ethiopic-Cyrillic Transliteration) +en-t-m0-iast; English (International Alphabet of Sanskrit Transliteration) +en-t-m0-iesjes; English (IES/JES Amharic Transliteration) +en-t-m0-iso; English (ISO Transliteration) +en-t-m0-java; English (Hex transform using Java syntax) +en-t-m0-lambdin; English (Thomas Oden Lambdin Ethiopic-Latin Transliteration) +en-t-m0-mcst; English (Korean MCST Transliteration) +en-t-m0-mns; English (Mongolian National Standard Transliteration) +en-t-m0-percent; English (Hex transform using percent syntax) +en-t-m0-perl; English (Hex transform using Perl syntax) +en-t-m0-plain; English (Hex transform with no surrounding syntax) +en-t-m0-prprname; English (Personal name transliteration variant) +en-t-m0-satts; English (Standard Arabic Technical Transliteration) +en-t-m0-sera; English (System for Ethiopic Representation in ASCII) +en-t-m0-tekieali; English (Tekie Alibekit Blin-Latin Transliteration) +en-t-m0-ungegn; English (UN GEGN Transliteration) +en-t-m0-unicode; English (Hex transform using Unicode syntax) +en-t-m0-xaleget; English (Eritrean Ministry of Education Blin-Latin Transliteration) +en-t-m0-xml; English (Hex transform using XML syntax) +en-t-m0-xml10; English (Hex transform using XML decimal syntax) +en-u-ms-metric; English (Metric System) +en-u-ms-uksystem; English (Imperial Measurement System) +en-u-ms-ussystem; English (US Measurement System) +en-u-mu-celsius; English (Celsius) +en-u-mu-fahrenhe; English (Fahrenheit) +en-u-mu-kelvin; English (Kelvin) +en-u-nu-adlm; English (Adlam Digits) +en-u-nu-ahom; English (Ahom Digits) +en-u-nu-arab; English (Arabic-Indic Digits) +en-u-nu-arabext; English (Extended Arabic-Indic Digits) +en-u-nu-armn; English (Armenian Numerals) +en-u-nu-armnlow; English (Armenian Lowercase Numerals) +en-u-nu-bali; English (Balinese Digits) +en-u-nu-beng; English (Bangla Digits) +en-u-nu-bhks; English (Bhaiksuki Digits) +en-u-nu-brah; English (Brahmi Digits) +en-u-nu-cakm; English (Chakma Digits) +en-u-nu-cham; English (Cham Digits) +en-u-nu-cyrl; English (Cyrillic Numerals) +en-u-nu-deva; English (Devanagari Digits) +en-u-nu-diak; English (Dives Akuru Digits) +en-u-nu-ethi; English (Ethiopic Numerals) +en-u-nu-finance; English (Financial Numerals) +en-u-nu-fullwide; English (Full-Width Digits) +en-u-nu-geor; English (Georgian Numerals) +en-u-nu-gong; English (Gunjala Gondi digits) +en-u-nu-gonm; English (Masaram Gondi digits) +en-u-nu-grek; English (Greek Numerals) +en-u-nu-greklow; English (Greek Lowercase Numerals) +en-u-nu-gujr; English (Gujarati Digits) +en-u-nu-guru; English (Gurmukhi Digits) +en-u-nu-hanidays; English (Chinese Calendar Day-of-Month Numerals) +en-u-nu-hanidec; English (Chinese Decimal Numerals) +en-u-nu-hans; English (Simplified Chinese Numerals) +en-u-nu-hansfin; English (Simplified Chinese Financial Numerals) +en-u-nu-hant; English (Traditional Chinese Numerals) +en-u-nu-hantfin; English (Traditional Chinese Financial Numerals) +en-u-nu-hebr; English (Hebrew Numerals) +en-u-nu-hmng; English (Pahawh Hmong Digits) +en-u-nu-hmnp; English (Nyiakeng Puachue Hmong Digits) +en-u-nu-java; English (Javanese Digits) +en-u-nu-jpan; English (Japanese Numerals) +en-u-nu-jpanfin; English (Japanese Financial Numerals) +en-u-nu-jpanyear; English (Japanese Calendar Gannen Year Numerals) +en-u-nu-kali; English (Kayah Li Digits) +en-u-nu-kawi; English (Kawi Digits) +en-u-nu-khmr; English (Khmer Digits) +en-u-nu-knda; English (Kannada Digits) +en-u-nu-lana; English (Tai Tham Hora Digits) +en-u-nu-lanatham; English (Tai Tham Tham Digits) +en-u-nu-laoo; English (Lao Digits) +en-u-nu-latn; English (Western Digits) +en-u-nu-lepc; English (Lepcha Digits) +en-u-nu-limb; English (Limbu Digits) +en-u-nu-mathbold; English (Mathematical Bold Digits) +en-u-nu-mathdbl; English (Mathematical Double-Struck Digits) +en-u-nu-mathmono; English (Mathematical Monospace Digits) +en-u-nu-mathsanb; English (Mathematical Sans-Serif Bold Digits) +en-u-nu-mathsans; English (Mathematical Sans-Serif Digits) +en-u-nu-mlym; English (Malayalam Digits) +en-u-nu-modi; English (Modi Digits) +en-u-nu-mong; English (Mongolian Digits) +en-u-nu-mroo; English (Mro Digits) +en-u-nu-mtei; English (Meetei Mayek Digits) +en-u-nu-mymr; English (Myanmar Digits) +en-u-nu-mymrshan; English (Myanmar Shan Digits) +en-u-nu-mymrtlng; English (Myanmar Tai Laing Digits) +en-u-nu-nagm; English (Nag Mundari Digits) +en-u-nu-native; English (Native Digits) +en-u-nu-newa; English (Newa Digits) +en-u-nu-nkoo; English (N’Ko Digits) +en-u-nu-olck; English (Ol Chiki Digits) +en-u-nu-orya; English (Odia Digits) +en-u-nu-osma; English (Osmanya Digits) +en-u-nu-rohg; English (Hanifi Rohingya digits) +en-u-nu-roman; English (Roman Numerals) +en-u-nu-romanlow; English (Roman Lowercase Numerals) +en-u-nu-saur; English (Saurashtra Digits) +en-u-nu-segment; English (Segmented Digits) +en-u-nu-shrd; English (Sharada Digits) +en-u-nu-sind; English (Khudawadi Digits) +en-u-nu-sinh; English (Sinhala Lith Digits) +en-u-nu-sora; English (Sora Sompeng Digits) +en-u-nu-sund; English (Sundanese Digits) +en-u-nu-takr; English (Takri Digits) +en-u-nu-talu; English (New Tai Lue Digits) +en-u-nu-taml; English (Traditional Tamil Numerals) +en-u-nu-tamldec; English (Tamil Digits) +en-u-nu-telu; English (Telugu Digits) +en-u-nu-thai; English (Thai Digits) +en-u-nu-tibt; English (Tibetan Digits) +en-u-nu-tirh; English (Tirhuta Digits) +en-u-nu-tnsa; English (Tangsa Digits) +en-u-nu-traditio; English (Traditional Numerals) +en-u-nu-vaii; English (Vai Digits) +en-u-nu-wara; English (Warang Citi Digits) +en-u-nu-wcho; English (Wancho Digits) +en-u-rg-gbsct; English (Region For Supplemental Data: Scotland) +en-u-rg-gbeng; English (Region For Supplemental Data: England) +en-t-s0-accents; English (From Accented Characters To ASCII Sequence) +en-t-s0-ascii; English (From ASCII) +en-t-s0-hex; English (From Hexadecimal Codes) +en-t-s0-morse; English (From Morse Code) +en-t-s0-npinyin; English (From Pinyin With Numeric Tones) +en-t-s0-publish; English (From Publishing Punctuation To ASCII) +en-t-s0-zawgyi; English (From Zawgyi Myanmar Encoding) +en-u-sd-gbsct; English (Region Subdivision: Scotland) +en-u-sd-gbwls; English (Region Subdivision: Wales) +en-u-ss-none; English (Sentence Breaks Without Abbreviation Handling) +en-u-ss-standard; English (Suppress Sentence Breaks After Standard Abbreviations) +en-t-t0-und; English (Unspecified Machine Translation) +en-u-tz-uslax; English (Time Zone: Los Angeles Time) +en-u-tz-gblon; English (Time Zone: United Kingdom Time) +en-u-tz-chzrh; English (Time Zone: Switzerland Time) +en-u-va-posix; English (POSIX Compliant Locale) +en-t-x0-foobar2; English (Private-Use Transform: foobar2)