diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index fe97edb6cf6..08684d82cc0 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -42,6 +42,7 @@ #include "bytesinkutil.h" #include "charstr.h" +#include "charstrmap.h" #include "cmemory.h" #include "cstring.h" #include "mutex.h" @@ -51,7 +52,9 @@ #include "uhash.h" #include "ulocimp.h" #include "umutex.h" +#include "uniquecharstr.h" #include "ustr_imp.h" +#include "uvector.h" U_CDECL_BEGIN static UBool U_CALLCONV locale_cleanup(void); @@ -246,6 +249,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale) // '_' // In the platform codepage. #define SEP_CHAR '_' +#define NULL_CHAR '\0' Locale::~Locale() { @@ -500,38 +504,1110 @@ Locale::operator==( const Locale& other) const return (uprv_strcmp(other.fullName, fullName) == 0); } -#define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) - namespace { -CharString& AppendLSCVE(CharString& out, const char* language, const char* script, - const char* country, const char* variants, const char* extension, - UErrorCode& status) { - out.append(language, status); - if (script && script[0] != '\0') { - out.append('_', status); - out.append(script, status); +UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER; +UHashtable *gKnownCanonicalized = nullptr; + +static const char* const KNOWN_CANONICALIZED[] = { + "c", + // Commonly used locales known are already canonicalized + "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ", + "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES", + "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR", + "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu", + "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR", + "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN", + "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS", + "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ", + "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA", + "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN", + "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP", + "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF", + "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si", + "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr", + "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta", + "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk", + "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant", + "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant", + "zh_Hant_TW", "zh_TW", "zu", "zu_ZA" +}; + +static UBool U_CALLCONV cleanupKnownCanonicalized() { + gKnownCanonicalizedInitOnce.reset(); + if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); } + return TRUE; +} + +static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) { + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED, + cleanupKnownCanonicalized); + LocalUHashtablePointer newKnownCanonicalizedMap( + uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status)); + for (int32_t i = 0; + U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED); + i++) { + uhash_puti(newKnownCanonicalizedMap.getAlias(), + (void*)KNOWN_CANONICALIZED[i], + 1, &status); } - if (country && country[0] != '\0') { - out.append('_', status); - out.append(country, status); + if (U_FAILURE(status)) { + return; } - if (variants && variants[0] != '\0') { - if ((script == nullptr || script[0] == '\0') && - (country == nullptr || country[0] == '\0')) { - out.append('_', status); + + gKnownCanonicalized = newKnownCanonicalizedMap.orphan(); +} + +class AliasData; + +/** + * A Builder class to build the alias data. + */ +class AliasDataBuilder { +public: + AliasDataBuilder() { + } + + // Build the AliasData from resource. + AliasData* build(UErrorCode &status); + +private: + void readAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + void (*checkType)(const char* type), + void (*checkReplacement)(const UnicodeString& replacement), + UErrorCode &status); + + // Read the languageAlias data from alias to + // strings+types+replacementIndexes + // The number of record will be stored into length. + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement script. + void readLanguageAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + UErrorCode &status); + + // Read the scriptAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement script. + void readScriptAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, UErrorCode &status); + + // Read the territoryAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement script. + void readTerritoryAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, UErrorCode &status); + + // Read the variantAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement variant. + void readVariantAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, UErrorCode &status); +}; + +/** + * A class to hold the Alias Data. + */ +class AliasData : public UMemory { +public: + static const AliasData* singleton(UErrorCode status) { + umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status); + return gSingleton; + } + + const CharStringMap& languageMap() const { return language; } + const CharStringMap& scriptMap() const { return script; } + const CharStringMap& territoryMap() const { return territory; } + const CharStringMap& variantMap() const { return variant; } + + static void U_CALLCONV loadData(UErrorCode &status); + static UBool U_CALLCONV cleanup(); + + static UInitOnce gInitOnce; + +private: + AliasData(CharStringMap languageMap, + CharStringMap scriptMap, + CharStringMap territoryMap, + CharStringMap variantMap, + CharString* strings) + : language(std::move(languageMap)), + script(std::move(scriptMap)), + territory(std::move(territoryMap)), + variant(std::move(variantMap)), + strings(strings) { + } + + ~AliasData() { + delete strings; + } + + static const AliasData* gSingleton; + + CharStringMap language; + CharStringMap script; + CharStringMap territory; + CharStringMap variant; + CharString* strings; + + friend class AliasDataBuilder; +}; + + +const AliasData* AliasData::gSingleton = nullptr; +UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV +AliasData::cleanup() +{ + gInitOnce.reset(); + delete gSingleton; + return TRUE; +} + +void +AliasDataBuilder::readAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + void (*checkType)(const char* type), + void (*checkReplacement)(const UnicodeString& replacement), + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + length = ures_getSize(alias); + const char** rawTypes = types.allocateInsteadAndCopy(length); + if (rawTypes == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length); + if (rawIndexes == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + int i = 0; + while (ures_hasNext(alias)) { + LocalUResourceBundlePointer res( + ures_getNextResource(alias, nullptr, &status)); + const char* aliasFrom = ures_getKey(res.getAlias()); + UnicodeString aliasTo = + ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status); + + checkType(aliasFrom); + checkReplacement(aliasTo); + + rawTypes[i] = aliasFrom; + rawIndexes[i] = strings->add(aliasTo, status); + i++; + } +} + +/** + * Read the languageAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement language. + */ +void +AliasDataBuilder::readLanguageAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + // Assert the aliasFrom only contains the following possibilties + // language_REGION_variant + // language_REGION + // language_variant + // language + // und_variant + Locale test(type); + // Assert no script in aliasFrom + U_ASSERT(test.getScript()[0] == '\0'); + // Assert when language is und, no REGION in aliasFrom. + U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0'); + }, +#else + [](const char*) {}, +#endif + [](const UnicodeString&) {}, status); +} + +/** + * Read the scriptAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement script. + */ +void +AliasDataBuilder::readScriptAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) == 4); + }, + [](const UnicodeString& replacement) { + U_ASSERT(replacement.length() == 4); + }, +#else + [](const char*) {}, + [](const UnicodeString&) { }, +#endif + status); +} + +/** + * Read the territoryAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement regions. + */ +void +AliasDataBuilder::readTerritoryAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3); + }, +#else + [](const char*) {}, +#endif + [](const UnicodeString&) { }, + status); +} + +/** + * Read the variantAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement variant. + */ +void +AliasDataBuilder::readVariantAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8); + U_ASSERT(uprv_strlen(type) != 4 || + (type[0] >= '0' && type[0] <= '9')); + }, + [](const UnicodeString& replacement) { + U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8); + U_ASSERT(replacement.length() != 4 || + (replacement.charAt(0) >= u'0' && + replacement.charAt(0) <= u'9')); + }, +#else + [](const char*) {}, + [](const UnicodeString&) { }, +#endif + status); +} + +/** + * Initializes the alias data from the ICU resource bundles. The alias data + * contains alias of language, country, script and variants. + * + * If the alias data has already loaded, then this method simply returns without + * doing anything meaningful. + */ +void U_CALLCONV +AliasData::loadData(UErrorCode &status) +{ +#ifdef LOCALE_CANONICALIZATION_DEBUG + UDate start = uprv_getRawUTCtime(); +#endif // LOCALE_CANONICALIZATION_DEBUG + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup); + AliasDataBuilder builder; + gSingleton = builder.build(status); +#ifdef LOCALE_CANONICALIZATION_DEBUG + UDate end = uprv_getRawUTCtime(); + printf("AliasData::loadData took total %f ms\n", end - start); +#endif // LOCALE_CANONICALIZATION_DEBUG + if (gSingleton == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } +} + +/** + * Build the alias data from resources. + */ +AliasData* +AliasDataBuilder::build(UErrorCode &status) { + LocalUResourceBundlePointer metadata( + ures_openDirect(nullptr, "metadata", &status)); + LocalUResourceBundlePointer metadataAlias( + ures_getByKey(metadata.getAlias(), "alias", nullptr, &status)); + LocalUResourceBundlePointer languageAlias( + ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status)); + LocalUResourceBundlePointer scriptAlias( + ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status)); + LocalUResourceBundlePointer territoryAlias( + ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status)); + LocalUResourceBundlePointer variantAlias( + ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status)); + + int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0, + variantLength = 0; + + // Read the languageAlias into languageTypes, languageReplacementIndexes + // and strings + UniqueCharStrings strings(status); + LocalMemory languageTypes; + LocalMemory languageReplacementIndexes; + readLanguageAlias(languageAlias.getAlias(), + &strings, + languageTypes, + languageReplacementIndexes, + languagesLength, + status); + + // Read the scriptAlias into scriptTypes, scriptReplacementIndexes + // and strings + LocalMemory scriptTypes; + LocalMemory scriptReplacementIndexes; + readScriptAlias(scriptAlias.getAlias(), + &strings, + scriptTypes, + scriptReplacementIndexes, + scriptLength, + status); + + // Read the territoryAlias into territoryTypes, territoryReplacementIndexes + // and strings + LocalMemory territoryTypes; + LocalMemory territoryReplacementIndexes; + readTerritoryAlias(territoryAlias.getAlias(), + &strings, + territoryTypes, + territoryReplacementIndexes, + territoryLength, status); + + // Read the variantAlias into variantTypes, variantReplacementIndexes + // and strings + LocalMemory variantTypes; + LocalMemory variantReplacementIndexes; + readVariantAlias(variantAlias.getAlias(), + &strings, + variantTypes, + variantReplacementIndexes, + variantLength, status); + + if (U_FAILURE(status)) { + return nullptr; + } + + // We can only use strings after freeze it. + strings.freeze(); + + // Build the languageMap from languageTypes & languageReplacementIndexes + CharStringMap languageMap(490, status); + for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) { + languageMap.put(languageTypes[i], + strings.get(languageReplacementIndexes[i]), + status); + } + + // Build the scriptMap from scriptTypes & scriptReplacementIndexes + CharStringMap scriptMap(1, status); + for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) { + scriptMap.put(scriptTypes[i], + strings.get(scriptReplacementIndexes[i]), + status); + } + + // Build the territoryMap from territoryTypes & territoryReplacementIndexes + CharStringMap territoryMap(650, status); + for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) { + territoryMap.put(territoryTypes[i], + strings.get(territoryReplacementIndexes[i]), + status); + } + + // Build the variantMap from variantTypes & variantReplacementIndexes. + CharStringMap variantMap(2, status); + for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) { + variantMap.put(variantTypes[i], + strings.get(variantReplacementIndexes[i]), + status); + } + + // copy hashtables + return new AliasData( + std::move(languageMap), + std::move(scriptMap), + std::move(territoryMap), + std::move(variantMap), + strings.orphanCharStrings()); +} + +/** + * A class that find the replacement values of locale fields by using AliasData. + */ +class AliasReplacer { +public: + AliasReplacer(UErrorCode status) : + language(nullptr), script(nullptr), region(nullptr), + extensions(nullptr), variants(status), + data(nullptr) { + } + ~AliasReplacer() { + } + + // Check the fields inside locale, if need to replace fields, + // place the the replaced locale ID in out and return true. + // Otherwise return false for no replacement or error. + bool replace( + const Locale& locale, CharString& out, UErrorCode status); + +private: + const char* language; + const char* script; + const char* region; + const char* extensions; + UVector variants; + + const AliasData* data; + + inline bool notEmpty(const char* str) { + return str && str[0] != NULL_CHAR; + } + + /** + * If replacement is neither null nor empty and input is either null or empty, + * return replacement. + * If replacement is neither null nor empty but input is not empty, return input. + * If replacement is either null or empty and type is either null or empty, + * return input. + * Otherwise return null. + * replacement input type return + * AAA nullptr * AAA + * AAA BBB * BBB + * nullptr || "" CCC nullptr CCC + * nullptr || "" * DDD nullptr + */ + inline const char* deleteOrReplace( + const char* input, const char* type, const char* replacement) { + return notEmpty(replacement) ? + ((input == nullptr) ? replacement : input) : + ((type == nullptr) ? input : nullptr); + } + + inline bool same(const char* a, const char* b) { + if (a == nullptr && b == nullptr) { + return true; } - out.append('_', status); - out.append(variants, status); + if ((a == nullptr && b != nullptr) || + (a != nullptr && b == nullptr)) { + return false; + } + return uprv_strcmp(a, b) == 0; } - if (extension && extension[0] != '\0') { - out.append(extension, status); + + // Gather fields and generate locale ID into out. + CharString& outputToString(CharString& out, UErrorCode status); + + // Generate the lookup key. + CharString& generateKey(const char* language, const char* region, + const char* variant, CharString& out, + UErrorCode status); + + void parseLanguageReplacement(const char* replacement, + const char*& replaceLanguage, + const char*& replaceScript, + const char*& replaceRegion, + const char*& replaceVariant, + const char*& replaceExtensions, + UVector& toBeFreed, + UErrorCode& status); + + // Replace by using languageAlias. + bool replaceLanguage(bool checkLanguage, bool checkRegion, + bool checkVariants, UVector& toBeFreed, + UErrorCode& status); + + // Replace by using territoryAlias. + bool replaceTerritory(UVector& toBeFreed, UErrorCode& status); + + // Replace by using scriptAlias. + bool replaceScript(UErrorCode& status); + + // Replace by using variantAlias. + bool replaceVariant(UErrorCode& status); +}; + +CharString& +AliasReplacer::generateKey( + const char* language, const char* region, const char* variant, + CharString& out, UErrorCode status) +{ + out.append(language, status); + if (notEmpty(region)) { + out.append(SEP_CHAR, status) + .append(region, status); + } + if (notEmpty(variant)) { + out.append(SEP_CHAR, status) + .append(variant, status); } return out; } +void +AliasReplacer::parseLanguageReplacement( + const char* replacement, + const char*& replacedLanguage, + const char*& replacedScript, + const char*& replacedRegion, + const char*& replacedVariant, + const char*& replacedExtensions, + UVector& toBeFreed, + UErrorCode& status) +{ + if (U_FAILURE(status)) { + return; + } + replacedScript = replacedRegion = replacedVariant + = replacedExtensions = nullptr; + if (uprv_strchr(replacement, '_') == nullptr) { + replacedLanguage = replacement; + // reach the end, just return it. + return; + } + // We have multiple field so we have to allocate and parse + CharString* str = new CharString( + replacement, (int32_t)uprv_strlen(replacement), status); + if (U_FAILURE(status)) { + return; + } + if (str == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + toBeFreed.addElement(str, status); + char* data = str->data(); + replacedLanguage = (const char*) data; + char* endOfField = uprv_strchr(data, '_'); + *endOfField = '\0'; // null terminiate it. + endOfField++; + const char* start = endOfField; + endOfField = (char*) uprv_strchr(start, '_'); + size_t len = 0; + if (endOfField == nullptr) { + len = uprv_strlen(start); + } else { + len = endOfField - start; + *endOfField = '\0'; // null terminiate it. + } + if (len == 4 && uprv_isASCIILetter(*start)) { + // Got a script + replacedScript = start; + if (endOfField == nullptr) { + return; + } + start = endOfField++; + endOfField = (char*)uprv_strchr(start, '_'); + if (endOfField == nullptr) { + len = uprv_strlen(start); + } else { + len = endOfField - start; + *endOfField = '\0'; // null terminiate it. + } + } + if (len >= 2 && len <= 3) { + // Got a region + replacedRegion = start; + if (endOfField == nullptr) { + return; + } + start = endOfField++; + endOfField = (char*)uprv_strchr(start, '_'); + if (endOfField == nullptr) { + len = uprv_strlen(start); + } else { + len = endOfField - start; + *endOfField = '\0'; // null terminiate it. + } + } + if (len >= 4) { + // Got a variant + replacedVariant = start; + if (endOfField == nullptr) { + return; + } + start = endOfField++; + } + replacedExtensions = start; +} + +bool +AliasReplacer::replaceLanguage( + bool checkLanguage, bool checkRegion, + bool checkVariants, UVector& toBeFreed, UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + if ( (checkRegion && region == nullptr) || + (checkVariants && variants.size() == 0)) { + // Nothing to search. + return false; + } + int32_t variant_size = checkVariants ? variants.size() : 1; + // Since we may have more than one variant, we need to loop through them. + const char* searchLanguage = checkLanguage ? language : "und"; + const char* searchRegion = checkRegion ? region : nullptr; + const char* searchVariant = nullptr; + for (int32_t variant_index = 0; + variant_index < variant_size; + variant_index++) { + if (checkVariants) { + U_ASSERT(variant_index < variant_size); + searchVariant = (const char*)(variants.elementAt(variant_index)); + } + + if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) { + // Do not consider ill-formed variant subtag. + searchVariant = nullptr; + } + CharString typeKey; + generateKey(searchLanguage, searchRegion, searchVariant, typeKey, + status); + if (U_FAILURE(status)) { + return false; + } + const char *replacement = data->languageMap().get(typeKey.data()); + if (replacement == nullptr) { + // Found no replacement data. + continue; + } + + const char* replacedLanguage; + const char* replacedScript; + const char* replacedRegion; + const char* replacedVariant; + const char* replacedExtensions; + parseLanguageReplacement(replacement, + replacedLanguage, + replacedScript, + replacedRegion, + replacedVariant, + replacedExtensions, + toBeFreed, + status); + replacedLanguage = + uprv_strcmp(replacedLanguage, "und") == 0 ? + language : replacedLanguage; + replacedScript = deleteOrReplace(script, nullptr, replacedScript); + replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion); + replacedVariant = deleteOrReplace( + searchVariant, searchVariant, replacedVariant); + + if ( same(language, replacedLanguage) && + same(script, replacedScript) && + same(region, replacedRegion) && + same(searchVariant, replacedVariant) && + replacedExtensions == nullptr) { + // Replacement produce no changes. + continue; + } + + language = replacedLanguage; + region = replacedRegion; + script = replacedScript; + if (searchVariant != nullptr) { + if (notEmpty(replacedVariant)) { + variants.setElementAt((void*)replacedVariant, variant_index); + } else { + variants.removeElementAt(variant_index); + } + } + if (replacedExtensions != nullptr) { + // TODO(ICU-21292) + // DO NOTHING + // UTS35 does not specifiy what should we do if we have extensions in the + // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have + // extensions in them languageAlias: + // i_default => en_x_i_default + // i_enochian => und_x_i_enochian + // i_mingo => see_x_i_mingo + // zh_min => nan_x_zh_min + // But all of them are already changed by code inside ultag_parse() before + // hitting this code. + } + + // Something changed by language alias data. + return true; + } + // Nothing changed by language alias data. + return false; +} + +bool +AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + if (region == nullptr) { + // No region to search. + return false; + } + const char *replacement = data->territoryMap().get(region); + if (replacement == nullptr) { + // Found no replacement data for this region. + return false; + } + const char* replacedRegion = replacement; + const char* firstSpace = uprv_strchr(replacement, ' '); + if (firstSpace != nullptr) { + // If there are are more than one region in the replacement. + // We need to check which one match based on the language. + Locale l(language, nullptr, script); + l.addLikelySubtags(status); + const char* likelyRegion = l.getCountry(); + CharString* item = nullptr; + if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) { + size_t len = uprv_strlen(likelyRegion); + const char* foundInReplacement = uprv_strstr(replacement, + likelyRegion); + if (foundInReplacement != nullptr) { + // Assuming the case there are no three letter region code in + // the replacement of territoryAlias + U_ASSERT(foundInReplacement == replacement || + *(foundInReplacement-1) == ' '); + U_ASSERT(foundInReplacement[len] == ' ' || + foundInReplacement[len] == '\0'); + item = new CharString(foundInReplacement, (int32_t)len, status); + } + } + if (item == nullptr) { + item = new CharString(replacement, + (int32_t)(firstSpace - replacement), status); + } + if (U_FAILURE(status)) { return false; } + if (item == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return false; + } + replacedRegion = item->data(); + toBeFreed.addElement(item, status); + } + U_ASSERT(!same(region, replacedRegion)); + region = replacedRegion; + // The region is changed by data in territory alias. + return true; +} + +bool +AliasReplacer::replaceScript(UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + if (script == nullptr) { + // No script to search. + return false; + } + const char *replacement = data->scriptMap().get(script); + if (replacement == nullptr) { + // Found no replacement data for this script. + return false; + } + U_ASSERT(!same(script, replacement)); + script = replacement; + // The script is changed by data in script alias. + return true; +} + +bool +AliasReplacer::replaceVariant(UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + // Since we may have more than one variant, we need to loop through them. + for (int32_t i = 0; i < variants.size(); i++) { + const char *variant = (const char*)(variants.elementAt(i)); + const char *replacement = data->variantMap().get(variant); + if (replacement == nullptr) { + // Found no replacement data for this variant. + continue; + } + U_ASSERT((uprv_strlen(replacement) >= 5 && + uprv_strlen(replacement) <= 8) || + (uprv_strlen(replacement) == 4 && + replacement[0] >= '0' && + replacement[0] <= '9')); + if (!same(variant, replacement)) { + variants.setElementAt((void*)replacement, i); + // Special hack to handle hepburn-heploc => alalc97 + if (uprv_strcmp(variant, "heploc") == 0) { + for (int32_t j = 0; j < variants.size(); j++) { + if (uprv_strcmp((const char*)(variants.elementAt(j)), + "hepburn") == 0) { + variants.removeElementAt(j); + } + } + } + return true; + } + } + return false; +} + +CharString& +AliasReplacer::outputToString( + CharString& out, UErrorCode status) +{ + out.append(language, status); + if (notEmpty(script)) { + out.append(SEP_CHAR, status) + .append(script, status); + } + if (notEmpty(region)) { + out.append(SEP_CHAR, status) + .append(region, status); + } + if (variants.size() > 0) { + if (!notEmpty(script) && !notEmpty(region)) { + out.append(SEP_CHAR, status); + } + variants.sort([](UElement e1, UElement e2) -> int8_t { + return uprv_strcmp( + (const char*)e1.pointer, (const char*)e2.pointer); + }, status); + int32_t variantsStart = out.length(); + for (int32_t i = 0; i < variants.size(); i++) { + out.append(SEP_CHAR, status) + .append((const char*)((UVector*)variants.elementAt(i)), + status); + } + T_CString_toUpperCase(out.data() + variantsStart); + } + if (notEmpty(extensions)) { + CharString tmp("und_", status); + tmp.append(extensions, status); + Locale tmpLocale(tmp.data()); + // only support x extension inside CLDR for now. + U_ASSERT(extensions[0] == 'x'); + out.append(tmpLocale.getName() + 1, status); + } + return out; +} + +bool +AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) +{ + data = AliasData::singleton(status); + if (U_FAILURE(status)) { + return false; + } + out.clear(); + language = locale.getLanguage(); + if (!notEmpty(language)) { + language = nullptr; + } + script = locale.getScript(); + if (!notEmpty(script)) { + script = nullptr; + } + region = locale.getCountry(); + if (!notEmpty(region)) { + region = nullptr; + } + const char* variantsStr = locale.getVariant(); + const char* extensionsStr = locale_getKeywordsStart(locale.getName()); + CharString variantsBuff(variantsStr, -1, status); + if (!variantsBuff.isEmpty()) { + if (U_FAILURE(status)) { return false; } + char* start = variantsBuff.data(); + T_CString_toLowerCase(start); + char* end; + while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr && + U_SUCCESS(status)) { + *end = NULL_CHAR; // null terminate inside variantsBuff + variants.addElement(start, status); + start = end + 1; + } + variants.addElement(start, status); + } + if (U_FAILURE(status)) { return false; } + + // Sort the variants + variants.sort([](UElement e1, UElement e2) -> int8_t { + return uprv_strcmp( + (const char*)e1.pointer, (const char*)e2.pointer); + }, status); + + // A changed count to assert when loop too many times. + int changed = 0; + // A UVector to to hold CharString allocated by the replace* method + // and freed when out of scope from his function. + UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); }, + nullptr, 10, status); + while (U_SUCCESS(status)) { + // Something wrong with the data cause looping here more than 10 times + // already. + U_ASSERT(changed < 5); + // From observation of key in data/misc/metadata.txt + // we know currently we only need to search in the following combination + // of fields for type in languageAlias: + // * lang_region_variant + // * lang_region + // * lang_variant + // * lang + // * und_variant + // This assumption is ensured by the U_ASSERT in readLanguageAlias + // + // lang REGION variant + if ( replaceLanguage(true, true, true, stringsToBeFreed, status) || + replaceLanguage(true, true, false, stringsToBeFreed, status) || + replaceLanguage(true, false, true, stringsToBeFreed, status) || + replaceLanguage(true, false, false, stringsToBeFreed, status) || + replaceLanguage(false,false, true, stringsToBeFreed, status) || + replaceTerritory(stringsToBeFreed, status) || + replaceScript(status) || + replaceVariant(status)) { + // Some values in data is changed, try to match from the beginning + // again. + changed++; + continue; + } + // Nothing changed. Break out. + break; + } // while(1) + + if (U_FAILURE(status)) { return false; } + // Nothing changed and we know the order of the vaiants are not change + // because we have no variant or only one. + if (changed == 0 && variants.size() <= 1) { + return false; + } + outputToString(out, status); + if (extensionsStr != nullptr) { + out.append(extensionsStr, status); + } + if (U_FAILURE(status)) { + return false; + } + // If the tag is not changed, return. + if (uprv_strcmp(out.data(), locale.getName()) == 0) { + U_ASSERT(changed == 0); + U_ASSERT(variants.size() > 1); + out.clear(); + return false; + } + return true; +} + +// Return true if the locale is changed during canonicalization. +// The replaced value then will be put into out. +bool +canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status) +{ + AliasReplacer replacer(status); + return replacer.replace(locale, out, status); +} + +// Function to optimize for known cases without so we can skip the loading +// of resources in the startup time until we really need it. +bool +isKnownCanonicalizedLocale(const char* locale, UErrorCode& status) +{ + if ( uprv_strcmp(locale, "c") == 0 || + uprv_strcmp(locale, "en") == 0 || + uprv_strcmp(locale, "en_US") == 0) { + return true; + } + + // common well-known Canonicalized. + umtx_initOnce(gKnownCanonicalizedInitOnce, + &loadKnownCanonicalized, status); + if (U_FAILURE(status)) { + return false; + } + U_ASSERT(gKnownCanonicalized != nullptr); + return uhash_geti(gKnownCanonicalized, locale) != 0; +} + } // namespace +// Function for testing. +U_CAPI const char* const* +ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length) +{ + *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED); + return KNOWN_CANONICALIZED; +} + +// Function for testing. +U_CAPI bool +ulocimp_isCanonicalizedLocaleForTest(const char* localeName) +{ + Locale l(localeName); + UErrorCode status = U_ZERO_ERROR; + CharString temp; + return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status); +} + /*This function initializes a Locale from a C locale ID*/ Locale& Locale::init(const char* localeID, UBool canonicalize) { @@ -626,9 +1702,9 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) uprv_memcpy(language, fullName, fieldLen[0]); language[fieldLen[0]] = 0; } - if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) && - ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) && - ISASCIIALPHA(field[1][3])) { + if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) && + uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) && + uprv_isASCIILetter(field[1][3])) { /* We have at least a script */ uprv_memcpy(script, field[1], fieldLen[1]); script[fieldLen[1]] = 0; @@ -656,193 +1732,18 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) } if (canonicalize) { - UErrorCode status = U_ZERO_ERROR; - // TODO: Try to use ResourceDataValue and ures_getValueWithFallback() etc. - LocalUResourceBundlePointer metadata(ures_openDirect(NULL, "metadata", &status)); - LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(), "alias", NULL, &status)); - // Look up the metadata:alias:language:$key:replacement entries - // key could be one of the following: - // language - // language_Script_REGION - // language_REGION - // language_variant - do { - // The resource structure looks like - // metadata { - // alias { - // language { - // art_lojban { - // replacement{"jbo"} - // } - // ... - // ks_Arab_IN { - // replacement{"ks_IN"} - // } - // ... - // no { - // replacement{"nb"} - // } - // .... - // zh_CN { - // replacement{"zh_Hans_CN"} - // } - // } - // ... - // } - // } - LocalUResourceBundlePointer languageAlias(ures_getByKey(metadataAlias.getAlias(), "language", NULL, &status)); - if (U_FAILURE(status)) + if (!isKnownCanonicalizedLocale(fullName, err)) { + CharString replaced; + // Not sure it is already canonicalized + if (canonicalizeLocale(*this, replaced, err)) { + U_ASSERT(U_SUCCESS(err)); + // If need replacement, call init again. + init(replaced.data(), false); + } + if (U_FAILURE(err)) { break; - CharString temp; - // Handle cases of key pattern "language _ variant" - // ex: Map "art_lojban" to "jbo" - const char* variants = getVariant(); - if (variants != nullptr && variants[0] != '\0') { - const char* begin = variants; - const char* end = begin; - // We may have multiple variants, need to look at each of - // them. - for (;;) { - status = U_ZERO_ERROR; - end = uprv_strchr(begin, '_'); - int32_t len = (end == nullptr) ? int32_t(uprv_strlen(begin)) : int32_t(end - begin); - temp.clear().append(getLanguage(), status).append("_", status).append(begin, len, status); - LocalUResourceBundlePointer languageVariantAlias( - ures_getByKey(languageAlias.getAlias(), - temp.data(), - NULL, &status)); - temp.clear().appendInvariantChars( - UnicodeString(ures_getStringByKey(languageVariantAlias.getAlias(), "replacement", nullptr, &status)), status); - if (U_SUCCESS(status)) { - CharString newVar; - if (begin != variants) { - newVar.append(variants, static_cast(begin - variants - 1), status); - } - if (end != nullptr) { - if (begin != variants) { - newVar.append("_", status); - } - newVar.append(end + 1, status); - } - Locale l(temp.data()); - init(AppendLSCVE(temp.clear(), - l.getLanguage(), - (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(), - (getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(), - newVar.data(), - uprv_strchr(fullName, '@'), status).data(), false); - break; - } - if (end == nullptr) break; - begin = end + 1; - } - } // End of handle language _ variant - // Handle cases of key pattern "language _ Script _ REGION" - // ex: Map "ks_Arab_IN" to "ks_IN" - if (getScript() != nullptr && getScript()[0] != '\0' && - getCountry() != nullptr && getCountry()[0] != '\0') { - status = U_ZERO_ERROR; - LocalUResourceBundlePointer replacedAlias( - ures_getByKey(languageAlias.getAlias(), - AppendLSCVE(temp.clear(), getLanguage(), getScript(), getCountry(), - nullptr, nullptr, status).data(), NULL, &status)); - temp.clear().appendInvariantChars( - UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status); - if (U_SUCCESS(status)) { - Locale l(temp.data()); - init(AppendLSCVE(temp.clear(), - l.getLanguage(), - l.getScript(), - l.getCountry(), - getVariant(), - uprv_strchr(fullName, '@'), status).data(), false); - } - } // End of handle language _ Script _ REGION - // Handle cases of key pattern "language _ REGION" - // ex: Map "zh_CN" to "zh_Hans_CN" - if (getCountry() != nullptr && getCountry()[0] != '\0') { - status = U_ZERO_ERROR; - LocalUResourceBundlePointer replacedAlias( - ures_getByKey(languageAlias.getAlias(), - AppendLSCVE(temp.clear(), getLanguage(), nullptr, getCountry(), - nullptr, nullptr, status).data(), NULL, &status)); - temp.clear().appendInvariantChars( - UnicodeString(ures_getStringByKey(replacedAlias.getAlias(), "replacement", nullptr, &status)), status); - if (U_SUCCESS(status)) { - Locale l(temp.data()); - init(AppendLSCVE(temp.clear(), - l.getLanguage(), - (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(), - l.getCountry(), - getVariant(), - uprv_strchr(fullName, '@'), status).data(), false); - } - } // End of handle "language _ REGION" - // Handle cases of key pattern "language" - // ex: Map "no" to "nb" - { - status = U_ZERO_ERROR; - LocalUResourceBundlePointer replaceLanguageAlias(ures_getByKey(languageAlias.getAlias(), getLanguage(), NULL, &status)); - temp.clear().appendInvariantChars( - UnicodeString(ures_getStringByKey(replaceLanguageAlias.getAlias(), "replacement", nullptr, &status)), status); - if (U_SUCCESS(status)) { - Locale l(temp.data()); - init(AppendLSCVE(temp.clear(), - l.getLanguage(), - (getScript() != nullptr && getScript()[0] != '\0') ? getScript() : l.getScript(), - (getCountry() != nullptr && getCountry()[0] != '\0') ? getCountry() : l.getCountry(), - getVariant(), - uprv_strchr(fullName, '@'), status).data(), false); - } - } // End of handle "language" - - // Look up the metadata:alias:territory:$key:replacement entries - // key is region code. - if (getCountry() != nullptr) { - status = U_ZERO_ERROR; - // The resource structure looks like - // metadata { - // alias { - // ... - // territory: { - // 172 { - // replacement{"RU AM AZ BY GE KG KZ MD TJ TM UA UZ"} - // } - // ... - // 554 { - // replacement{"NZ"} - // } - // } - // } - // } - LocalUResourceBundlePointer territoryAlias(ures_getByKey(metadataAlias.getAlias(), "territory", NULL, &status)); - LocalUResourceBundlePointer countryAlias(ures_getByKey(territoryAlias.getAlias(), getCountry(), NULL, &status)); - UnicodeString replacements( - ures_getStringByKey(countryAlias.getAlias(), "replacement", nullptr, &status)); - if (U_SUCCESS(status)) { - CharString replacedCountry; - int32_t delPos = replacements.indexOf(' '); - if (delPos == -1) { - replacedCountry.appendInvariantChars(replacements, status); - } else { - Locale l(AppendLSCVE(temp.clear(), getLanguage(), nullptr, getScript(), - nullptr, nullptr, status).data()); - l.addLikelySubtags(status); - if (replacements.indexOf(UnicodeString(l.getCountry())) != -1) { - replacedCountry.append(l.getCountry(), status); - } else { - replacedCountry.appendInvariantChars(replacements.getBuffer(), delPos, status); - } - } - init(AppendLSCVE(temp.clear(), - getLanguage(), - getScript(), - replacedCountry.data(), - getVariant(), - uprv_strchr(fullName, '@'), status).data(), false); - } - } // End of handle REGION - } while (0); + } + } } // if (canonicalize) { // successful end of init() diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h index b837fb94629..44b73e94da7 100644 --- a/icu4c/source/common/ucln_cmn.h +++ b/icu4c/source/common/ucln_cmn.h @@ -38,6 +38,8 @@ typedef enum ECleanupCommonType { UCLN_COMMON_SERVICE, UCLN_COMMON_LOCALE_KEY_TYPE, UCLN_COMMON_LOCALE, + UCLN_COMMON_LOCALE_ALIAS, + UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED, UCLN_COMMON_LOCALE_AVAILABLE, UCLN_COMMON_LIKELY_SUBTAGS, UCLN_COMMON_LOCALE_DISTANCE, diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index b9070f8ac89..5691fe9a77f 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -298,4 +298,10 @@ ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* i U_CFUNC const char* ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); +/* Function for testing purpose */ +U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length); + +// Return true if the value is already canonicalized. +U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); + #endif diff --git a/icu4c/source/data/cldr-icu-readme.txt b/icu4c/source/data/cldr-icu-readme.txt index 039550b6fee..93b9ef391a7 100644 --- a/icu4c/source/data/cldr-icu-readme.txt +++ b/icu4c/source/data/cldr-icu-readme.txt @@ -192,6 +192,12 @@ ant -f build-icu-data.xml -DcldrDataDir="$CLDR_TMP_DIR/production" | tee /tmp/cl cd $TOOLS_ROOT/cldr ant copy-cldr-testdata +# 4d. Copy from CLDR common/testData/localeIdentifiers/localeCanonicalization.txt +# into icu4c/source/test/testdata/localeCanonicalization.txt +# and icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt +# and add the following line to the begginning of these two files +# # File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt + # 5. Check which data files have modifications, which have been added or removed # (if there are no changes, you may not need to proceed further). Make sure the # list seems reasonable. diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp index c5ed9fa2edc..b80711951a2 100644 --- a/icu4c/source/test/intltest/loctest.cpp +++ b/icu4c/source/test/intltest/loctest.cpp @@ -32,6 +32,8 @@ #include "putilimp.h" #include "hash.h" #include "locmap.h" +#include "uparse.h" +#include "ulocimp.h" static const char* const rawData[33][8] = { @@ -257,6 +259,8 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c TESTCASE_AUTO(TestBug13554); TESTCASE_AUTO(TestBug20410); TESTCASE_AUTO(TestBug20900); + TESTCASE_AUTO(TestLocaleCanonicalizationFromFile); + TESTCASE_AUTO(TestKnownCanonicalizedListCorrect); TESTCASE_AUTO(TestConstructorAcceptsBCP47); TESTCASE_AUTO(TestForLanguageTag); TESTCASE_AUTO(TestToLanguageTag); @@ -4707,10 +4711,10 @@ void LocaleTest::TestCanonicalization(void) } testCases[] = { { "ca_ES-with-extra-stuff-that really doesn't make any sense-unless-you're trying to increase code coverage", "ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE", - "ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE"}, + "ca_ES_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_WITH_YOU'RE TRYING TO INCREASE CODE COVERAGE"}, { "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" }, - { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" }, - { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" }, + { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" }, + { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" }, { "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" }, { "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" }, { "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ }, @@ -4729,13 +4733,17 @@ void LocaleTest::TestCanonicalization(void) { "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML" }, { "i-cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US" }, { "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA" }, - { "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "nb_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */ + { "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "nb_NO_B_NY" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */ /* fleshing out canonicalization */ /* trim space and sort keywords, ';' is separator so not present at end in canonical form */ - { "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" }, + { "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;", + "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", + "en_Hant_IL_GIRL_VALLEY@calendar=Japanese;currency=EUR" }, /* already-canonical ids are not changed */ - { "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" }, + { "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", + "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", + "en_Hant_IL_GIRL_VALLEY@calendar=Japanese;currency=EUR" }, /* norwegian is just too weird, if we handle things in their full generality */ { "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "nb_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ }, @@ -4776,13 +4784,13 @@ void LocaleTest::TestCanonicalization(void) { "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" }, { "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" }, { "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" }, - { "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" }, + { "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" }, { "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" }, { "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */ { "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */ { "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */ - { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */ - { "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */ + { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */ + { "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */ { "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */ { "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */ /* PRE_EURO and EURO conversions don't affect other keywords */ @@ -4799,13 +4807,6 @@ void LocaleTest::TestCanonicalization(void) for (i=0; i < UPRV_LENGTHOF(testCases); i++) { for (j=0; j<3; ++j) { - if (j==1 && logKnownIssue("21236", "skip some canonicalization tests until code fixed")) { - if (uprv_strncmp(testCases[i].localeID, "zh_CN", 5) == 0 || - uprv_strncmp(testCases[i].localeID, "zh_TW", 5) == 0 || - uprv_strncmp(testCases[i].localeID, "uz-UZ", 5) == 0 ) { - continue; - } - } const char* expected = (j==1) ? testCases[i].canonicalID : testCases[i].getNameID; Locale loc = _canonicalize(j, testCases[i].localeID); const char* getName = loc.isBogus() ? "BOGUS" : loc.getName(); @@ -4858,17 +4859,18 @@ void LocaleTest::TestCanonicalize(void) // also test with script, variants and extensions { "prs-Cyrl-1009-u-ca-roc", "fa-Cyrl-AF-1009-u-ca-roc" }, - // language _ country -> language _ script _ country - { "pa-IN", "pa-Guru-IN" }, + { "pa-IN", "pa-IN" }, // also test with script { "pa-Latn-IN", "pa-Latn-IN" }, // also test with variants and extensions - { "pa-IN-5678-u-ca-hindi", "pa-Guru-IN-5678-u-ca-hindi" }, + { "pa-IN-5678-u-ca-hindi", "pa-IN-5678-u-ca-hindi" }, - // language _ script _ country -> language _ country - { "ky-Cyrl-KG", "ky-KG" }, + { "ky-Cyrl-KG", "ky-Cyrl-KG" }, // also test with variants and extensions - { "ky-Cyrl-KG-3456-u-ca-roc", "ky-KG-3456-u-ca-roc" }, + { "ky-Cyrl-KG-3456-u-ca-roc", "ky-Cyrl-KG-3456-u-ca-roc" }, + + // Test replacement of scriptAlias + { "en-Qaai", "en-Zinh" }, // Test replacement of territoryAlias // 554 has one replacement @@ -4887,18 +4889,14 @@ void LocaleTest::TestCanonicalize(void) { "uz-Cyrl-172-5678-u-nu-latn", "uz-Cyrl-UZ-5678-u-nu-latn" }, // a language not used in this region { "fr-172", "fr-RU" }, + + // variant + { "ja-Latn-hepburn-heploc", "ja-Latn-alalc97"}, + + { "aaa-Fooo-SU", "aaa-Fooo-RU"}, }; int32_t i; for (i=0; i < UPRV_LENGTHOF(testCases); i++) { - if (logKnownIssue("21236", "skip some canonicalization tests until code fixed")) { - if (uprv_strstr(testCases[i].localeID, "-BOKMAL") != 0 || - uprv_strstr(testCases[i].localeID, "-NYNORSK") != 0 || - uprv_strstr(testCases[i].localeID, "-SAAHO") != 0 || - uprv_strncmp(testCases[i].localeID, "pa-IN", 5) == 0 || - uprv_strncmp(testCases[i].localeID, "ky-Cyrl", 7) == 0 ) { - continue; - } - } UErrorCode status = U_ZERO_ERROR; std::string otag = testCases[i].localeID; Locale loc = Locale::forLanguageTag(otag.c_str(), status); @@ -5351,6 +5349,73 @@ void LocaleTest::TestBug20900() { } } +U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); +void LocaleTest::TestLocaleCanonicalizationFromFile() +{ + IcuTestErrorCode status(*this, "TestLocaleCanonicalizationFromFile"); + const char *sourceTestDataPath=getSourceTestData(status); + if(status.errIfFailureAndReset("unable to find the source/test/testdata " + "folder (getSourceTestData())")) { + return; + } + char testPath[400]; + char line[256]; + strcpy(testPath, sourceTestDataPath); + strcat(testPath, "localeCanonicalization.txt"); + LocalStdioFilePointer testFile(fopen(testPath, "r")); + if(testFile.isNull()) { + errln("unable to open %s", testPath); + return; + } + // Format: + // ; + while (fgets(line, (int)sizeof(line), testFile.getAlias())!=NULL) { + if (line[0] == '#') { + // ignore any lines start with # + continue; + } + char *semi = strchr(line, ';'); + if (semi == nullptr) { + // ignore any lines without ; + continue; + } + *semi = '\0'; // null terminiate on the spot of semi + const char* from = u_skipWhitespace((const char*)line); + u_rtrim((char*)from); + const char* to = u_skipWhitespace((const char*)semi + 1); + u_rtrim((char*)to); + std::string expect(to); + // Change the _ to - + std::transform(expect.begin(), expect.end(), expect.begin(), + [](unsigned char c){ return c == '_' ? '-' : c; }); + + Locale loc = Locale::createCanonical(from); + std::string result = loc.toLanguageTag(status); + const char* tag = loc.isBogus() ? "BOGUS" : result.c_str(); + status.errIfFailureAndReset( + "FAIL: createCanonical(%s).toLanguageTag() expected \"%s\" locale is %s", + from, tag, loc.getName()); + std::string msg("createCanonical("); + msg += from; + msg += ") locale = "; + msg += loc.getName(); + assertEquals(msg.c_str(), expect.c_str(), tag); + } +} + +void LocaleTest::TestKnownCanonicalizedListCorrect() +{ + IcuTestErrorCode status(*this, "TestKnownCanonicalizedListCorrect"); + int32_t numOfKnownCanonicalized; + const char* const* knownCanonicalized = + ulocimp_getKnownCanonicalizedLocaleForTest(&numOfKnownCanonicalized); + for (int32_t i = 0; i < numOfKnownCanonicalized; i++) { + std::string msg("Known Canonicalized Locale is not canonicalized: "); + assertTrue((msg + knownCanonicalized[i]).c_str(), + ulocimp_isCanonicalizedLocaleForTest(knownCanonicalized[i])); + } +} + void LocaleTest::TestConstructorAcceptsBCP47() { IcuTestErrorCode status(*this, "TestConstructorAcceptsBCP47"); diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h index b217ce26c6a..a3a1ebc1071 100644 --- a/icu4c/source/test/intltest/loctest.h +++ b/icu4c/source/test/intltest/loctest.h @@ -122,6 +122,8 @@ public: void TestBug13554(); void TestBug20410(); void TestBug20900(); + void TestLocaleCanonicalizationFromFile(); + void TestKnownCanonicalizedListCorrect(); void TestConstructorAcceptsBCP47(); void TestAddLikelySubtags(); diff --git a/icu4c/source/test/testdata/localeCanonicalization.txt b/icu4c/source/test/testdata/localeCanonicalization.txt new file mode 100644 index 00000000000..e41eaac05a7 --- /dev/null +++ b/icu4c/source/test/testdata/localeCanonicalization.txt @@ -0,0 +1,1648 @@ +# File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt +# Test data for locale identifier canonicalization +# Copyright © 1991-2020 Unicode, Inc. +# For terms of use, see http://www.unicode.org/copyright.html +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) +# +# Format: +# ; +# +# The data lines are divided into 4 sets: +# explicit: a short list of explicit test cases. +# fromAliases: test cases generated from the alias data. +# decanonicalized: test cases generated by reversing the normalization process. +# withIrrelevants: test cases generated from the others by adding irrelevant fields where possible, +# to ensure that the canonicalization implementation is not sensitive to irrelevant fields. These include: +# Language: aaa +# Script: Adlm +# Region: AC +# Variant: fonipa +###### + + + +# explicit + +art_lojban ; jbo +en_US_aaland ; en_US +en_US_heploc ; en_US_alalc97 +en_US_polytoni ; en_US_polyton +en_aaland ; en_AX +en_arevela ; en +en_arevmda_arevela ; en +en_lojban ; en +hy_arevela ; hy +hy_arevmda ; hyw +hy_arevmda_arevela ; hyw +hye_arevmda ; hyw +no_bokmal_nynorsk ; nb +no_nynorsk_bokmal ; nb +zh_guoyu_hakka_xiang ; hak +zh_hakka_xiang ; hak + +# fromAliases + +aa_saaho ; ssy +aam ; aas +aar ; aa +abk ; ab +adp ; dz +afr ; af +aju ; jrb +aka ; ak +alb ; sq +als ; sq +amh ; am +ara ; ar +arb ; ar +arg ; an +arm ; hy +asd ; snz +asm ; as +aue ; ktz +ava ; av +ave ; ae +aym ; ay +ayr ; ay +ayx ; nun +aze ; az +azj ; az +bak ; ba +bam ; bm +baq ; eu +bcc ; bal +bcl ; bik +bel ; be +ben ; bn +bgm ; bcg +bh ; bho +bih ; bho +bis ; bi +bjd ; drl +bod ; bo +bos ; bs +bre ; br +bul ; bg +bur ; my +bxk ; luy +bxr ; bua +cat ; ca +ccq ; rki +cel_gaulish ; xtg +ces ; cs +cha ; ch +che ; ce +chi ; zh +chu ; cu +chv ; cv +cjr ; mom +cka ; cmr +cld ; syr +cmk ; xch +cmn ; zh +cnr ; sr_ME +cor ; kw +cos ; co +coy ; pij +cqu ; quh +cre ; cr +cwd ; cr +cym ; cy +cze ; cs +dan ; da +deu ; de +dgo ; doi +dhd ; mwr +dik ; din +diq ; zza +dit ; dif +div ; dv +drh ; mn +drw ; fa_AF +dut ; nl +dzo ; dz +ekk ; et +ell ; el +emk ; man +eng ; en +epo ; eo +esk ; ik +est ; et +eus ; eu +ewe ; ee +fao ; fo +fas ; fa +fat ; ak +fij ; fj +fin ; fi +fra ; fr +fre ; fr +fry ; fy +fuc ; ff +ful ; ff +gav ; dev +gaz ; om +gbo ; grb +geo ; ka +ger ; de +gfx ; vaj +ggn ; gvr +gla ; gd +gle ; ga +glg ; gl +glv ; gv +gno ; gon +gre ; el +grn ; gn +gti ; nyc +gug ; gn +guj ; gu +guv ; duz +gya ; gba +hat ; ht +hau ; ha +hbs ; sr_Latn +hdn ; hai +hea ; hmn +heb ; he +her ; hz +him ; srx +hin ; hi +hmo ; ho +hrr ; jal +hrv ; hr +hun ; hu +hye ; hy +ibi ; opa +ibo ; ig +ice ; is +ido ; io +iii ; ii +ike ; iu +iku ; iu +ile ; ie +ilw ; gal +in ; id +ina ; ia +ind ; id +ipk ; ik +isl ; is +ita ; it +iw ; he +jav ; jv +jeg ; oyb +ji ; yi +jpn ; ja +jw ; jv +kal ; kl +kan ; kn +kas ; ks +kat ; ka +kau ; kr +kaz ; kk +kgc ; tdf +kgh ; kml +khk ; mn +khm ; km +kik ; ki +kin ; rw +kir ; ky +kmr ; ku +knc ; kr +kng ; kg +knn ; kok +koj ; kwv +kom ; kv +kon ; kg +kor ; ko +kpv ; kv +krm ; bmf +ktr ; dtp +kua ; kj +kur ; ku +kvs ; gdj +kwq ; yam +kxe ; tvd +kzj ; dtp +kzt ; dtp +lao ; lo +lat ; la +lav ; lv +lbk ; bnc +lii ; raq +lim ; li +lin ; ln +lit ; lt +llo ; ngt +lmm ; rmx +ltz ; lb +lub ; lu +lug ; lg +lvs ; lv +mac ; mk +mah ; mh +mal ; ml +mao ; mi +mar ; mr +may ; ms +meg ; cir +mhr ; chm +mkd ; mk +mlg ; mg +mlt ; mt +mnk ; man +mo ; ro +mol ; ro +mon ; mn +mri ; mi +msa ; ms +mst ; mry +mup ; raj +mwj ; vaj +mya ; my +myd ; aog +myt ; mry +nad ; xny +nau ; na +nav ; nv +nbl ; nr +ncp ; kdz +nde ; nd +ndo ; ng +nep ; ne +nld ; nl +nno ; nn +nns ; nbr +nnx ; ngv +no ; nb +no_bokmal ; nb +no_nynorsk ; nn +nob ; nb +nor ; nb +npi ; ne +nts ; pij +nya ; ny +oci ; oc +ojg ; oj +oji ; oj +ori ; or +orm ; om +ory ; or +oss ; os +oun ; vaj +pan ; pa +pbu ; ps +pcr ; adx +per ; fa +pes ; fa +pli ; pi +plt ; mg +pmc ; huw +pmu ; phr +pnb ; lah +pol ; pl +por ; pt +ppa ; bfy +ppr ; lcq +prs ; fa_AF +pry ; prt +pus ; ps +puz ; pub +que ; qu +quz ; qu +rmy ; rom +roh ; rm +ron ; ro +rum ; ro +run ; rn +rus ; ru +sag ; sg +san ; sa +sca ; hle +scc ; sr +scr ; hr +sgn_BR ; bzs +sgn_CO ; csn +sgn_DE ; gsg +sgn_DK ; dsl +sgn_FR ; fsl +sgn_GB ; bfi +sgn_GR ; gss +sgn_IE ; isg +sgn_IT ; ise +sgn_JP ; jsl +sgn_MX ; mfs +sgn_NI ; ncs +sgn_NL ; dse +sgn_NO ; nsi +sgn_PT ; psr +sgn_SE ; swl +sgn_US ; ase +sgn_ZA ; sfs +sh ; sr_Latn +sin ; si +skk ; oyb +slk ; sk +slo ; sk +slv ; sl +sme ; se +smo ; sm +sna ; sn +snd ; sd +som ; so +sot ; st +spa ; es +spy ; kln +sqi ; sq +src ; sc +srd ; sc +srp ; sr +ssw ; ss +sun ; su +swa ; sw +swc ; sw_CD +swe ; sv +swh ; sw +tah ; ty +tam ; ta +tat ; tt +tdu ; dtp +tel ; te +tgk ; tg +tgl ; fil +tha ; th +thc ; tpo +thx ; oyb +tib ; bo +tie ; ras +tir ; ti +tkk ; twm +tl ; fil +tlw ; weo +tmp ; tyj +tne ; kak +tnf ; fa_AF +ton ; to +tsf ; taj +tsn ; tn +tso ; ts +ttq ; tmh +tuk ; tk +tur ; tr +tw ; ak +twi ; ak +uig ; ug +ukr ; uk +umu ; del +und_004 ; und_AF +und_008 ; und_AL +und_010 ; und_AQ +und_012 ; und_DZ +und_016 ; und_AS +und_020 ; und_AD +und_024 ; und_AO +und_028 ; und_AG +und_031 ; und_AZ +und_032 ; und_AR +und_036 ; und_AU +und_040 ; und_AT +und_044 ; und_BS +und_048 ; und_BH +und_050 ; und_BD +und_051 ; und_AM +und_052 ; und_BB +und_056 ; und_BE +und_060 ; und_BM +und_062 ; und_034 +und_064 ; und_BT +und_068 ; und_BO +und_070 ; und_BA +und_072 ; und_BW +und_074 ; und_BV +und_076 ; und_BR +und_084 ; und_BZ +und_086 ; und_IO +und_090 ; und_SB +und_092 ; und_VG +und_096 ; und_BN +und_100 ; und_BG +und_104 ; und_MM +und_108 ; und_BI +und_112 ; und_BY +und_116 ; und_KH +und_120 ; und_CM +und_124 ; und_CA +und_132 ; und_CV +und_136 ; und_KY +und_140 ; und_CF +und_144 ; und_LK +und_148 ; und_TD +und_152 ; und_CL +und_156 ; und_CN +und_158 ; und_TW +und_162 ; und_CX +und_166 ; und_CC +und_170 ; und_CO +und_172 ; und_RU +und_174 ; und_KM +und_175 ; und_YT +und_178 ; und_CG +und_180 ; und_CD +und_184 ; und_CK +und_188 ; und_CR +und_191 ; und_HR +und_192 ; und_CU +und_196 ; und_CY +und_200 ; und_CZ +und_203 ; und_CZ +und_204 ; und_BJ +und_208 ; und_DK +und_212 ; und_DM +und_214 ; und_DO +und_218 ; und_EC +und_222 ; und_SV +und_226 ; und_GQ +und_230 ; und_ET +und_231 ; und_ET +und_232 ; und_ER +und_233 ; und_EE +und_234 ; und_FO +und_238 ; und_FK +und_239 ; und_GS +und_242 ; und_FJ +und_246 ; und_FI +und_248 ; und_AX +und_249 ; und_FR +und_250 ; und_FR +und_254 ; und_GF +und_258 ; und_PF +und_260 ; und_TF +und_262 ; und_DJ +und_266 ; und_GA +und_268 ; und_GE +und_270 ; und_GM +und_275 ; und_PS +und_276 ; und_DE +und_278 ; und_DE +und_280 ; und_DE +und_288 ; und_GH +und_292 ; und_GI +und_296 ; und_KI +und_300 ; und_GR +und_304 ; und_GL +und_308 ; und_GD +und_312 ; und_GP +und_316 ; und_GU +und_320 ; und_GT +und_324 ; und_GN +und_328 ; und_GY +und_332 ; und_HT +und_334 ; und_HM +und_336 ; und_VA +und_340 ; und_HN +und_344 ; und_HK +und_348 ; und_HU +und_352 ; und_IS +und_356 ; und_IN +und_360 ; und_ID +und_364 ; und_IR +und_368 ; und_IQ +und_372 ; und_IE +und_376 ; und_IL +und_380 ; und_IT +und_384 ; und_CI +und_388 ; und_JM +und_392 ; und_JP +und_398 ; und_KZ +und_400 ; und_JO +und_404 ; und_KE +und_408 ; und_KP +und_410 ; und_KR +und_414 ; und_KW +und_417 ; und_KG +und_418 ; und_LA +und_422 ; und_LB +und_426 ; und_LS +und_428 ; und_LV +und_430 ; und_LR +und_434 ; und_LY +und_438 ; und_LI +und_440 ; und_LT +und_442 ; und_LU +und_446 ; und_MO +und_450 ; und_MG +und_454 ; und_MW +und_458 ; und_MY +und_462 ; und_MV +und_466 ; und_ML +und_470 ; und_MT +und_474 ; und_MQ +und_478 ; und_MR +und_480 ; und_MU +und_484 ; und_MX +und_492 ; und_MC +und_496 ; und_MN +und_498 ; und_MD +und_499 ; und_ME +und_500 ; und_MS +und_504 ; und_MA +und_508 ; und_MZ +und_512 ; und_OM +und_516 ; und_NA +und_520 ; und_NR +und_524 ; und_NP +und_528 ; und_NL +und_530 ; und_CW +und_531 ; und_CW +und_532 ; und_CW +und_533 ; und_AW +und_534 ; und_SX +und_535 ; und_BQ +und_536 ; und_SA +und_540 ; und_NC +und_548 ; und_VU +und_554 ; und_NZ +und_558 ; und_NI +und_562 ; und_NE +und_566 ; und_NG +und_570 ; und_NU +und_574 ; und_NF +und_578 ; und_NO +und_580 ; und_MP +und_581 ; und_UM +und_582 ; und_FM +und_583 ; und_FM +und_584 ; und_MH +und_585 ; und_PW +und_586 ; und_PK +und_591 ; und_PA +und_598 ; und_PG +und_600 ; und_PY +und_604 ; und_PE +und_608 ; und_PH +und_612 ; und_PN +und_616 ; und_PL +und_620 ; und_PT +und_624 ; und_GW +und_626 ; und_TL +und_630 ; und_PR +und_634 ; und_QA +und_638 ; und_RE +und_642 ; und_RO +und_643 ; und_RU +und_646 ; und_RW +und_652 ; und_BL +und_654 ; und_SH +und_659 ; und_KN +und_660 ; und_AI +und_662 ; und_LC +und_663 ; und_MF +und_666 ; und_PM +und_670 ; und_VC +und_674 ; und_SM +und_678 ; und_ST +und_682 ; und_SA +und_686 ; und_SN +und_688 ; und_RS +und_690 ; und_SC +und_694 ; und_SL +und_702 ; und_SG +und_703 ; und_SK +und_704 ; und_VN +und_705 ; und_SI +und_706 ; und_SO +und_710 ; und_ZA +und_716 ; und_ZW +und_720 ; und_YE +und_724 ; und_ES +und_728 ; und_SS +und_729 ; und_SD +und_732 ; und_EH +und_736 ; und_SD +und_740 ; und_SR +und_744 ; und_SJ +und_748 ; und_SZ +und_752 ; und_SE +und_756 ; und_CH +und_760 ; und_SY +und_762 ; und_TJ +und_764 ; und_TH +und_768 ; und_TG +und_772 ; und_TK +und_776 ; und_TO +und_780 ; und_TT +und_784 ; und_AE +und_788 ; und_TN +und_792 ; und_TR +und_795 ; und_TM +und_796 ; und_TC +und_798 ; und_TV +und_800 ; und_UG +und_804 ; und_UA +und_807 ; und_MK +und_810 ; und_RU +und_818 ; und_EG +und_826 ; und_GB +und_830 ; und_JE +und_831 ; und_GG +und_832 ; und_JE +und_833 ; und_IM +und_834 ; und_TZ +und_840 ; und_US +und_850 ; und_VI +und_854 ; und_BF +und_858 ; und_UY +und_860 ; und_UZ +und_862 ; und_VE +und_876 ; und_WF +und_882 ; und_WS +und_886 ; und_YE +und_887 ; und_YE +und_890 ; und_RS +und_891 ; und_RS +und_894 ; und_ZM +und_958 ; und_AA +und_959 ; und_QM +und_960 ; und_QN +und_962 ; und_QP +und_963 ; und_QQ +und_964 ; und_QR +und_965 ; und_QS +und_966 ; und_QT +und_967 ; und_EU +und_968 ; und_QV +und_969 ; und_QW +und_970 ; und_QX +und_971 ; und_QY +und_972 ; und_QZ +und_973 ; und_XA +und_974 ; und_XB +und_975 ; und_XC +und_976 ; und_XD +und_977 ; und_XE +und_978 ; und_XF +und_979 ; und_XG +und_980 ; und_XH +und_981 ; und_XI +und_982 ; und_XJ +und_983 ; und_XK +und_984 ; und_XL +und_985 ; und_XM +und_986 ; und_XN +und_987 ; und_XO +und_988 ; und_XP +und_989 ; und_XQ +und_990 ; und_XR +und_991 ; und_XS +und_992 ; und_XT +und_993 ; und_XU +und_994 ; und_XV +und_995 ; und_XW +und_996 ; und_XX +und_997 ; und_XY +und_998 ; und_XZ +und_999 ; und_ZZ +und_AN ; und_CW +und_BU ; und_MM +und_CS ; und_RS +und_CT ; und_KI +und_DD ; und_DE +und_DY ; und_BJ +und_FQ ; und_AQ +und_FX ; und_FR +und_HV ; und_BF +und_JT ; und_UM +und_MI ; und_UM +und_NH ; und_VU +und_NQ ; und_AQ +und_NT ; und_SA +und_PC ; und_FM +und_PU ; und_UM +und_PZ ; und_PA +und_QU ; und_EU +und_Qaai ; und_Zinh +und_RH ; und_ZW +und_SU ; und_RU +und_TP ; und_TL +und_UK ; und_GB +und_VD ; und_VN +und_WK ; und_UM +und_YD ; und_YE +und_YU ; und_RS +und_ZR ; und_CD +und_aaland ; und_AX +und_arevela ; und +und_arevmda ; und +und_bokmal ; und +und_hakka ; und +und_heploc ; und_alalc97 +und_lojban ; und +und_nynorsk ; und +und_polytoni ; und_polyton +und_saaho ; und +und_xiang ; und +uok ; ema +urd ; ur +uzb ; uz +uzn ; uz +ven ; ve +vie ; vi +vol ; vo +wel ; cy +wln ; wa +wol ; wo +xba ; cax +xho ; xh +xia ; acn +xkh ; waw +xpe ; kpe +xsj ; suj +xsl ; den +ybd ; rki +ydd ; yi +yid ; yi +yma ; lrr +ymt ; mtm +yor ; yo +yos ; zom +yuu ; yug +zai ; zap +zh_guoyu ; zh +zh_hakka ; hak +zh_xiang ; hsn +zha ; za +zho ; zh +zsm ; ms +zul ; zu +zyb ; za + +# decanonicalized + +aar_saaho ; ssy +arm_arevela ; hy +arm_arevela_arevmda ; hyw +arm_arevmda ; hyw +chi_guoyu ; zh +chi_guoyu_hakka_xiang ; hak +chi_hakka ; hak +chi_hakka_xiang ; hak +chi_xiang ; hsn +cmn_guoyu ; zh +cmn_guoyu_hakka_xiang ; hak +cmn_hakka ; hak +cmn_hakka_xiang ; hak +cmn_xiang ; hsn +en_840_aaland ; en_US +en_840_heploc ; en_US_alalc97 +en_840_polytoni ; en_US_polyton +eng_840_aaland ; en_US +eng_840_heploc ; en_US_alalc97 +eng_840_polytoni ; en_US_polyton +eng_US_aaland ; en_US +eng_US_heploc ; en_US_alalc97 +eng_US_polytoni ; en_US_polyton +eng_aaland ; en_AX +eng_arevela ; en +eng_arevela_arevmda ; en +eng_lojban ; en +hye_arevela ; hy +hye_arevela_arevmda ; hyw +sgn_076 ; bzs +sgn_170 ; csn +sgn_208 ; dsl +sgn_249 ; fsl +sgn_250 ; fsl +sgn_276 ; gsg +sgn_278 ; gsg +sgn_280 ; gsg +sgn_300 ; gss +sgn_372 ; isg +sgn_380 ; ise +sgn_392 ; jsl +sgn_484 ; mfs +sgn_528 ; dse +sgn_558 ; ncs +sgn_578 ; nsi +sgn_620 ; psr +sgn_710 ; sfs +sgn_752 ; swl +sgn_826 ; bfi +sgn_840 ; ase +sgn_DD ; gsg +sgn_FX ; fsl +sgn_UK ; bfi +zho_guoyu ; zh +zho_guoyu_hakka_xiang ; hak +zho_hakka ; hak +zho_hakka_xiang ; hak +zho_xiang ; hsn + +# withIrrelevants + +aa_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa +aaa_Adlm_004_fonipa ; aaa_Adlm_AF_fonipa +aaa_Adlm_008_fonipa ; aaa_Adlm_AL_fonipa +aaa_Adlm_010_fonipa ; aaa_Adlm_AQ_fonipa +aaa_Adlm_012_fonipa ; aaa_Adlm_DZ_fonipa +aaa_Adlm_016_fonipa ; aaa_Adlm_AS_fonipa +aaa_Adlm_020_fonipa ; aaa_Adlm_AD_fonipa +aaa_Adlm_024_fonipa ; aaa_Adlm_AO_fonipa +aaa_Adlm_028_fonipa ; aaa_Adlm_AG_fonipa +aaa_Adlm_031_fonipa ; aaa_Adlm_AZ_fonipa +aaa_Adlm_032_fonipa ; aaa_Adlm_AR_fonipa +aaa_Adlm_036_fonipa ; aaa_Adlm_AU_fonipa +aaa_Adlm_040_fonipa ; aaa_Adlm_AT_fonipa +aaa_Adlm_044_fonipa ; aaa_Adlm_BS_fonipa +aaa_Adlm_048_fonipa ; aaa_Adlm_BH_fonipa +aaa_Adlm_050_fonipa ; aaa_Adlm_BD_fonipa +aaa_Adlm_051_fonipa ; aaa_Adlm_AM_fonipa +aaa_Adlm_052_fonipa ; aaa_Adlm_BB_fonipa +aaa_Adlm_056_fonipa ; aaa_Adlm_BE_fonipa +aaa_Adlm_060_fonipa ; aaa_Adlm_BM_fonipa +aaa_Adlm_062_fonipa ; aaa_Adlm_034_fonipa +aaa_Adlm_064_fonipa ; aaa_Adlm_BT_fonipa +aaa_Adlm_068_fonipa ; aaa_Adlm_BO_fonipa +aaa_Adlm_070_fonipa ; aaa_Adlm_BA_fonipa +aaa_Adlm_072_fonipa ; aaa_Adlm_BW_fonipa +aaa_Adlm_074_fonipa ; aaa_Adlm_BV_fonipa +aaa_Adlm_076_fonipa ; aaa_Adlm_BR_fonipa +aaa_Adlm_084_fonipa ; aaa_Adlm_BZ_fonipa +aaa_Adlm_086_fonipa ; aaa_Adlm_IO_fonipa +aaa_Adlm_090_fonipa ; aaa_Adlm_SB_fonipa +aaa_Adlm_092_fonipa ; aaa_Adlm_VG_fonipa +aaa_Adlm_096_fonipa ; aaa_Adlm_BN_fonipa +aaa_Adlm_100_fonipa ; aaa_Adlm_BG_fonipa +aaa_Adlm_104_fonipa ; aaa_Adlm_MM_fonipa +aaa_Adlm_108_fonipa ; aaa_Adlm_BI_fonipa +aaa_Adlm_112_fonipa ; aaa_Adlm_BY_fonipa +aaa_Adlm_116_fonipa ; aaa_Adlm_KH_fonipa +aaa_Adlm_120_fonipa ; aaa_Adlm_CM_fonipa +aaa_Adlm_124_fonipa ; aaa_Adlm_CA_fonipa +aaa_Adlm_132_fonipa ; aaa_Adlm_CV_fonipa +aaa_Adlm_136_fonipa ; aaa_Adlm_KY_fonipa +aaa_Adlm_140_fonipa ; aaa_Adlm_CF_fonipa +aaa_Adlm_144_fonipa ; aaa_Adlm_LK_fonipa +aaa_Adlm_148_fonipa ; aaa_Adlm_TD_fonipa +aaa_Adlm_152_fonipa ; aaa_Adlm_CL_fonipa +aaa_Adlm_156_fonipa ; aaa_Adlm_CN_fonipa +aaa_Adlm_158_fonipa ; aaa_Adlm_TW_fonipa +aaa_Adlm_162_fonipa ; aaa_Adlm_CX_fonipa +aaa_Adlm_166_fonipa ; aaa_Adlm_CC_fonipa +aaa_Adlm_170_fonipa ; aaa_Adlm_CO_fonipa +aaa_Adlm_172_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_174_fonipa ; aaa_Adlm_KM_fonipa +aaa_Adlm_175_fonipa ; aaa_Adlm_YT_fonipa +aaa_Adlm_178_fonipa ; aaa_Adlm_CG_fonipa +aaa_Adlm_180_fonipa ; aaa_Adlm_CD_fonipa +aaa_Adlm_184_fonipa ; aaa_Adlm_CK_fonipa +aaa_Adlm_188_fonipa ; aaa_Adlm_CR_fonipa +aaa_Adlm_191_fonipa ; aaa_Adlm_HR_fonipa +aaa_Adlm_192_fonipa ; aaa_Adlm_CU_fonipa +aaa_Adlm_196_fonipa ; aaa_Adlm_CY_fonipa +aaa_Adlm_200_fonipa ; aaa_Adlm_CZ_fonipa +aaa_Adlm_203_fonipa ; aaa_Adlm_CZ_fonipa +aaa_Adlm_204_fonipa ; aaa_Adlm_BJ_fonipa +aaa_Adlm_208_fonipa ; aaa_Adlm_DK_fonipa +aaa_Adlm_212_fonipa ; aaa_Adlm_DM_fonipa +aaa_Adlm_214_fonipa ; aaa_Adlm_DO_fonipa +aaa_Adlm_218_fonipa ; aaa_Adlm_EC_fonipa +aaa_Adlm_222_fonipa ; aaa_Adlm_SV_fonipa +aaa_Adlm_226_fonipa ; aaa_Adlm_GQ_fonipa +aaa_Adlm_230_fonipa ; aaa_Adlm_ET_fonipa +aaa_Adlm_231_fonipa ; aaa_Adlm_ET_fonipa +aaa_Adlm_232_fonipa ; aaa_Adlm_ER_fonipa +aaa_Adlm_233_fonipa ; aaa_Adlm_EE_fonipa +aaa_Adlm_234_fonipa ; aaa_Adlm_FO_fonipa +aaa_Adlm_238_fonipa ; aaa_Adlm_FK_fonipa +aaa_Adlm_239_fonipa ; aaa_Adlm_GS_fonipa +aaa_Adlm_242_fonipa ; aaa_Adlm_FJ_fonipa +aaa_Adlm_246_fonipa ; aaa_Adlm_FI_fonipa +aaa_Adlm_248_fonipa ; aaa_Adlm_AX_fonipa +aaa_Adlm_249_fonipa ; aaa_Adlm_FR_fonipa +aaa_Adlm_250_fonipa ; aaa_Adlm_FR_fonipa +aaa_Adlm_254_fonipa ; aaa_Adlm_GF_fonipa +aaa_Adlm_258_fonipa ; aaa_Adlm_PF_fonipa +aaa_Adlm_260_fonipa ; aaa_Adlm_TF_fonipa +aaa_Adlm_262_fonipa ; aaa_Adlm_DJ_fonipa +aaa_Adlm_266_fonipa ; aaa_Adlm_GA_fonipa +aaa_Adlm_268_fonipa ; aaa_Adlm_GE_fonipa +aaa_Adlm_270_fonipa ; aaa_Adlm_GM_fonipa +aaa_Adlm_275_fonipa ; aaa_Adlm_PS_fonipa +aaa_Adlm_276_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_278_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_280_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_288_fonipa ; aaa_Adlm_GH_fonipa +aaa_Adlm_292_fonipa ; aaa_Adlm_GI_fonipa +aaa_Adlm_296_fonipa ; aaa_Adlm_KI_fonipa +aaa_Adlm_300_fonipa ; aaa_Adlm_GR_fonipa +aaa_Adlm_304_fonipa ; aaa_Adlm_GL_fonipa +aaa_Adlm_308_fonipa ; aaa_Adlm_GD_fonipa +aaa_Adlm_312_fonipa ; aaa_Adlm_GP_fonipa +aaa_Adlm_316_fonipa ; aaa_Adlm_GU_fonipa +aaa_Adlm_320_fonipa ; aaa_Adlm_GT_fonipa +aaa_Adlm_324_fonipa ; aaa_Adlm_GN_fonipa +aaa_Adlm_328_fonipa ; aaa_Adlm_GY_fonipa +aaa_Adlm_332_fonipa ; aaa_Adlm_HT_fonipa +aaa_Adlm_334_fonipa ; aaa_Adlm_HM_fonipa +aaa_Adlm_336_fonipa ; aaa_Adlm_VA_fonipa +aaa_Adlm_340_fonipa ; aaa_Adlm_HN_fonipa +aaa_Adlm_344_fonipa ; aaa_Adlm_HK_fonipa +aaa_Adlm_348_fonipa ; aaa_Adlm_HU_fonipa +aaa_Adlm_352_fonipa ; aaa_Adlm_IS_fonipa +aaa_Adlm_356_fonipa ; aaa_Adlm_IN_fonipa +aaa_Adlm_360_fonipa ; aaa_Adlm_ID_fonipa +aaa_Adlm_364_fonipa ; aaa_Adlm_IR_fonipa +aaa_Adlm_368_fonipa ; aaa_Adlm_IQ_fonipa +aaa_Adlm_372_fonipa ; aaa_Adlm_IE_fonipa +aaa_Adlm_376_fonipa ; aaa_Adlm_IL_fonipa +aaa_Adlm_380_fonipa ; aaa_Adlm_IT_fonipa +aaa_Adlm_384_fonipa ; aaa_Adlm_CI_fonipa +aaa_Adlm_388_fonipa ; aaa_Adlm_JM_fonipa +aaa_Adlm_392_fonipa ; aaa_Adlm_JP_fonipa +aaa_Adlm_398_fonipa ; aaa_Adlm_KZ_fonipa +aaa_Adlm_400_fonipa ; aaa_Adlm_JO_fonipa +aaa_Adlm_404_fonipa ; aaa_Adlm_KE_fonipa +aaa_Adlm_408_fonipa ; aaa_Adlm_KP_fonipa +aaa_Adlm_410_fonipa ; aaa_Adlm_KR_fonipa +aaa_Adlm_414_fonipa ; aaa_Adlm_KW_fonipa +aaa_Adlm_417_fonipa ; aaa_Adlm_KG_fonipa +aaa_Adlm_418_fonipa ; aaa_Adlm_LA_fonipa +aaa_Adlm_422_fonipa ; aaa_Adlm_LB_fonipa +aaa_Adlm_426_fonipa ; aaa_Adlm_LS_fonipa +aaa_Adlm_428_fonipa ; aaa_Adlm_LV_fonipa +aaa_Adlm_430_fonipa ; aaa_Adlm_LR_fonipa +aaa_Adlm_434_fonipa ; aaa_Adlm_LY_fonipa +aaa_Adlm_438_fonipa ; aaa_Adlm_LI_fonipa +aaa_Adlm_440_fonipa ; aaa_Adlm_LT_fonipa +aaa_Adlm_442_fonipa ; aaa_Adlm_LU_fonipa +aaa_Adlm_446_fonipa ; aaa_Adlm_MO_fonipa +aaa_Adlm_450_fonipa ; aaa_Adlm_MG_fonipa +aaa_Adlm_454_fonipa ; aaa_Adlm_MW_fonipa +aaa_Adlm_458_fonipa ; aaa_Adlm_MY_fonipa +aaa_Adlm_462_fonipa ; aaa_Adlm_MV_fonipa +aaa_Adlm_466_fonipa ; aaa_Adlm_ML_fonipa +aaa_Adlm_470_fonipa ; aaa_Adlm_MT_fonipa +aaa_Adlm_474_fonipa ; aaa_Adlm_MQ_fonipa +aaa_Adlm_478_fonipa ; aaa_Adlm_MR_fonipa +aaa_Adlm_480_fonipa ; aaa_Adlm_MU_fonipa +aaa_Adlm_484_fonipa ; aaa_Adlm_MX_fonipa +aaa_Adlm_492_fonipa ; aaa_Adlm_MC_fonipa +aaa_Adlm_496_fonipa ; aaa_Adlm_MN_fonipa +aaa_Adlm_498_fonipa ; aaa_Adlm_MD_fonipa +aaa_Adlm_499_fonipa ; aaa_Adlm_ME_fonipa +aaa_Adlm_500_fonipa ; aaa_Adlm_MS_fonipa +aaa_Adlm_504_fonipa ; aaa_Adlm_MA_fonipa +aaa_Adlm_508_fonipa ; aaa_Adlm_MZ_fonipa +aaa_Adlm_512_fonipa ; aaa_Adlm_OM_fonipa +aaa_Adlm_516_fonipa ; aaa_Adlm_NA_fonipa +aaa_Adlm_520_fonipa ; aaa_Adlm_NR_fonipa +aaa_Adlm_524_fonipa ; aaa_Adlm_NP_fonipa +aaa_Adlm_528_fonipa ; aaa_Adlm_NL_fonipa +aaa_Adlm_530_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_531_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_532_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_533_fonipa ; aaa_Adlm_AW_fonipa +aaa_Adlm_534_fonipa ; aaa_Adlm_SX_fonipa +aaa_Adlm_535_fonipa ; aaa_Adlm_BQ_fonipa +aaa_Adlm_536_fonipa ; aaa_Adlm_SA_fonipa +aaa_Adlm_540_fonipa ; aaa_Adlm_NC_fonipa +aaa_Adlm_548_fonipa ; aaa_Adlm_VU_fonipa +aaa_Adlm_554_fonipa ; aaa_Adlm_NZ_fonipa +aaa_Adlm_558_fonipa ; aaa_Adlm_NI_fonipa +aaa_Adlm_562_fonipa ; aaa_Adlm_NE_fonipa +aaa_Adlm_566_fonipa ; aaa_Adlm_NG_fonipa +aaa_Adlm_570_fonipa ; aaa_Adlm_NU_fonipa +aaa_Adlm_574_fonipa ; aaa_Adlm_NF_fonipa +aaa_Adlm_578_fonipa ; aaa_Adlm_NO_fonipa +aaa_Adlm_580_fonipa ; aaa_Adlm_MP_fonipa +aaa_Adlm_581_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_582_fonipa ; aaa_Adlm_FM_fonipa +aaa_Adlm_583_fonipa ; aaa_Adlm_FM_fonipa +aaa_Adlm_584_fonipa ; aaa_Adlm_MH_fonipa +aaa_Adlm_585_fonipa ; aaa_Adlm_PW_fonipa +aaa_Adlm_586_fonipa ; aaa_Adlm_PK_fonipa +aaa_Adlm_591_fonipa ; aaa_Adlm_PA_fonipa +aaa_Adlm_598_fonipa ; aaa_Adlm_PG_fonipa +aaa_Adlm_600_fonipa ; aaa_Adlm_PY_fonipa +aaa_Adlm_604_fonipa ; aaa_Adlm_PE_fonipa +aaa_Adlm_608_fonipa ; aaa_Adlm_PH_fonipa +aaa_Adlm_612_fonipa ; aaa_Adlm_PN_fonipa +aaa_Adlm_616_fonipa ; aaa_Adlm_PL_fonipa +aaa_Adlm_620_fonipa ; aaa_Adlm_PT_fonipa +aaa_Adlm_624_fonipa ; aaa_Adlm_GW_fonipa +aaa_Adlm_626_fonipa ; aaa_Adlm_TL_fonipa +aaa_Adlm_630_fonipa ; aaa_Adlm_PR_fonipa +aaa_Adlm_634_fonipa ; aaa_Adlm_QA_fonipa +aaa_Adlm_638_fonipa ; aaa_Adlm_RE_fonipa +aaa_Adlm_642_fonipa ; aaa_Adlm_RO_fonipa +aaa_Adlm_643_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_646_fonipa ; aaa_Adlm_RW_fonipa +aaa_Adlm_652_fonipa ; aaa_Adlm_BL_fonipa +aaa_Adlm_654_fonipa ; aaa_Adlm_SH_fonipa +aaa_Adlm_659_fonipa ; aaa_Adlm_KN_fonipa +aaa_Adlm_660_fonipa ; aaa_Adlm_AI_fonipa +aaa_Adlm_662_fonipa ; aaa_Adlm_LC_fonipa +aaa_Adlm_663_fonipa ; aaa_Adlm_MF_fonipa +aaa_Adlm_666_fonipa ; aaa_Adlm_PM_fonipa +aaa_Adlm_670_fonipa ; aaa_Adlm_VC_fonipa +aaa_Adlm_674_fonipa ; aaa_Adlm_SM_fonipa +aaa_Adlm_678_fonipa ; aaa_Adlm_ST_fonipa +aaa_Adlm_682_fonipa ; aaa_Adlm_SA_fonipa +aaa_Adlm_686_fonipa ; aaa_Adlm_SN_fonipa +aaa_Adlm_688_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_690_fonipa ; aaa_Adlm_SC_fonipa +aaa_Adlm_694_fonipa ; aaa_Adlm_SL_fonipa +aaa_Adlm_702_fonipa ; aaa_Adlm_SG_fonipa +aaa_Adlm_703_fonipa ; aaa_Adlm_SK_fonipa +aaa_Adlm_704_fonipa ; aaa_Adlm_VN_fonipa +aaa_Adlm_705_fonipa ; aaa_Adlm_SI_fonipa +aaa_Adlm_706_fonipa ; aaa_Adlm_SO_fonipa +aaa_Adlm_710_fonipa ; aaa_Adlm_ZA_fonipa +aaa_Adlm_716_fonipa ; aaa_Adlm_ZW_fonipa +aaa_Adlm_720_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_724_fonipa ; aaa_Adlm_ES_fonipa +aaa_Adlm_728_fonipa ; aaa_Adlm_SS_fonipa +aaa_Adlm_729_fonipa ; aaa_Adlm_SD_fonipa +aaa_Adlm_732_fonipa ; aaa_Adlm_EH_fonipa +aaa_Adlm_736_fonipa ; aaa_Adlm_SD_fonipa +aaa_Adlm_740_fonipa ; aaa_Adlm_SR_fonipa +aaa_Adlm_744_fonipa ; aaa_Adlm_SJ_fonipa +aaa_Adlm_748_fonipa ; aaa_Adlm_SZ_fonipa +aaa_Adlm_752_fonipa ; aaa_Adlm_SE_fonipa +aaa_Adlm_756_fonipa ; aaa_Adlm_CH_fonipa +aaa_Adlm_760_fonipa ; aaa_Adlm_SY_fonipa +aaa_Adlm_762_fonipa ; aaa_Adlm_TJ_fonipa +aaa_Adlm_764_fonipa ; aaa_Adlm_TH_fonipa +aaa_Adlm_768_fonipa ; aaa_Adlm_TG_fonipa +aaa_Adlm_772_fonipa ; aaa_Adlm_TK_fonipa +aaa_Adlm_776_fonipa ; aaa_Adlm_TO_fonipa +aaa_Adlm_780_fonipa ; aaa_Adlm_TT_fonipa +aaa_Adlm_784_fonipa ; aaa_Adlm_AE_fonipa +aaa_Adlm_788_fonipa ; aaa_Adlm_TN_fonipa +aaa_Adlm_792_fonipa ; aaa_Adlm_TR_fonipa +aaa_Adlm_795_fonipa ; aaa_Adlm_TM_fonipa +aaa_Adlm_796_fonipa ; aaa_Adlm_TC_fonipa +aaa_Adlm_798_fonipa ; aaa_Adlm_TV_fonipa +aaa_Adlm_800_fonipa ; aaa_Adlm_UG_fonipa +aaa_Adlm_804_fonipa ; aaa_Adlm_UA_fonipa +aaa_Adlm_807_fonipa ; aaa_Adlm_MK_fonipa +aaa_Adlm_810_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_818_fonipa ; aaa_Adlm_EG_fonipa +aaa_Adlm_826_fonipa ; aaa_Adlm_GB_fonipa +aaa_Adlm_830_fonipa ; aaa_Adlm_JE_fonipa +aaa_Adlm_831_fonipa ; aaa_Adlm_GG_fonipa +aaa_Adlm_832_fonipa ; aaa_Adlm_JE_fonipa +aaa_Adlm_833_fonipa ; aaa_Adlm_IM_fonipa +aaa_Adlm_834_fonipa ; aaa_Adlm_TZ_fonipa +aaa_Adlm_840_fonipa ; aaa_Adlm_US_fonipa +aaa_Adlm_850_fonipa ; aaa_Adlm_VI_fonipa +aaa_Adlm_854_fonipa ; aaa_Adlm_BF_fonipa +aaa_Adlm_858_fonipa ; aaa_Adlm_UY_fonipa +aaa_Adlm_860_fonipa ; aaa_Adlm_UZ_fonipa +aaa_Adlm_862_fonipa ; aaa_Adlm_VE_fonipa +aaa_Adlm_876_fonipa ; aaa_Adlm_WF_fonipa +aaa_Adlm_882_fonipa ; aaa_Adlm_WS_fonipa +aaa_Adlm_886_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_887_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_890_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_891_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_894_fonipa ; aaa_Adlm_ZM_fonipa +aaa_Adlm_958_fonipa ; aaa_Adlm_AA_fonipa +aaa_Adlm_959_fonipa ; aaa_Adlm_QM_fonipa +aaa_Adlm_960_fonipa ; aaa_Adlm_QN_fonipa +aaa_Adlm_962_fonipa ; aaa_Adlm_QP_fonipa +aaa_Adlm_963_fonipa ; aaa_Adlm_QQ_fonipa +aaa_Adlm_964_fonipa ; aaa_Adlm_QR_fonipa +aaa_Adlm_965_fonipa ; aaa_Adlm_QS_fonipa +aaa_Adlm_966_fonipa ; aaa_Adlm_QT_fonipa +aaa_Adlm_967_fonipa ; aaa_Adlm_EU_fonipa +aaa_Adlm_968_fonipa ; aaa_Adlm_QV_fonipa +aaa_Adlm_969_fonipa ; aaa_Adlm_QW_fonipa +aaa_Adlm_970_fonipa ; aaa_Adlm_QX_fonipa +aaa_Adlm_971_fonipa ; aaa_Adlm_QY_fonipa +aaa_Adlm_972_fonipa ; aaa_Adlm_QZ_fonipa +aaa_Adlm_973_fonipa ; aaa_Adlm_XA_fonipa +aaa_Adlm_974_fonipa ; aaa_Adlm_XB_fonipa +aaa_Adlm_975_fonipa ; aaa_Adlm_XC_fonipa +aaa_Adlm_976_fonipa ; aaa_Adlm_XD_fonipa +aaa_Adlm_977_fonipa ; aaa_Adlm_XE_fonipa +aaa_Adlm_978_fonipa ; aaa_Adlm_XF_fonipa +aaa_Adlm_979_fonipa ; aaa_Adlm_XG_fonipa +aaa_Adlm_980_fonipa ; aaa_Adlm_XH_fonipa +aaa_Adlm_981_fonipa ; aaa_Adlm_XI_fonipa +aaa_Adlm_982_fonipa ; aaa_Adlm_XJ_fonipa +aaa_Adlm_983_fonipa ; aaa_Adlm_XK_fonipa +aaa_Adlm_984_fonipa ; aaa_Adlm_XL_fonipa +aaa_Adlm_985_fonipa ; aaa_Adlm_XM_fonipa +aaa_Adlm_986_fonipa ; aaa_Adlm_XN_fonipa +aaa_Adlm_987_fonipa ; aaa_Adlm_XO_fonipa +aaa_Adlm_988_fonipa ; aaa_Adlm_XP_fonipa +aaa_Adlm_989_fonipa ; aaa_Adlm_XQ_fonipa +aaa_Adlm_990_fonipa ; aaa_Adlm_XR_fonipa +aaa_Adlm_991_fonipa ; aaa_Adlm_XS_fonipa +aaa_Adlm_992_fonipa ; aaa_Adlm_XT_fonipa +aaa_Adlm_993_fonipa ; aaa_Adlm_XU_fonipa +aaa_Adlm_994_fonipa ; aaa_Adlm_XV_fonipa +aaa_Adlm_995_fonipa ; aaa_Adlm_XW_fonipa +aaa_Adlm_996_fonipa ; aaa_Adlm_XX_fonipa +aaa_Adlm_997_fonipa ; aaa_Adlm_XY_fonipa +aaa_Adlm_998_fonipa ; aaa_Adlm_XZ_fonipa +aaa_Adlm_999_fonipa ; aaa_Adlm_ZZ_fonipa +aaa_Adlm_AC_aaland_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_arevela_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_arevmda_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_bokmal_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_hakka ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_heploc ; aaa_Adlm_AC_alalc97_fonipa +aaa_Adlm_AC_fonipa_lojban ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_nynorsk ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_polytoni ; aaa_Adlm_AC_fonipa_polyton +aaa_Adlm_AC_fonipa_saaho ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_xiang ; aaa_Adlm_AC_fonipa +aaa_Adlm_AN_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_BU_fonipa ; aaa_Adlm_MM_fonipa +aaa_Adlm_CS_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_CT_fonipa ; aaa_Adlm_KI_fonipa +aaa_Adlm_DD_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_DY_fonipa ; aaa_Adlm_BJ_fonipa +aaa_Adlm_FQ_fonipa ; aaa_Adlm_AQ_fonipa +aaa_Adlm_FX_fonipa ; aaa_Adlm_FR_fonipa +aaa_Adlm_HV_fonipa ; aaa_Adlm_BF_fonipa +aaa_Adlm_JT_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_MI_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_NH_fonipa ; aaa_Adlm_VU_fonipa +aaa_Adlm_NQ_fonipa ; aaa_Adlm_AQ_fonipa +aaa_Adlm_NT_fonipa ; aaa_Adlm_SA_fonipa +aaa_Adlm_PC_fonipa ; aaa_Adlm_FM_fonipa +aaa_Adlm_PU_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_PZ_fonipa ; aaa_Adlm_PA_fonipa +aaa_Adlm_QU_fonipa ; aaa_Adlm_EU_fonipa +aaa_Adlm_RH_fonipa ; aaa_Adlm_ZW_fonipa +aaa_Adlm_SU_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_TP_fonipa ; aaa_Adlm_TL_fonipa +aaa_Adlm_UK_fonipa ; aaa_Adlm_GB_fonipa +aaa_Adlm_VD_fonipa ; aaa_Adlm_VN_fonipa +aaa_Adlm_WK_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_YD_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_YU_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_ZR_fonipa ; aaa_Adlm_CD_fonipa +aaa_Qaai_AC_fonipa ; aaa_Zinh_AC_fonipa +aam_Adlm_AC_fonipa ; aas_Adlm_AC_fonipa +aar_Adlm_AC_fonipa ; aa_Adlm_AC_fonipa +aar_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa +abk_Adlm_AC_fonipa ; ab_Adlm_AC_fonipa +adp_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa +afr_Adlm_AC_fonipa ; af_Adlm_AC_fonipa +aju_Adlm_AC_fonipa ; jrb_Adlm_AC_fonipa +aka_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +alb_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa +als_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa +amh_Adlm_AC_fonipa ; am_Adlm_AC_fonipa +ara_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa +arb_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa +arg_Adlm_AC_fonipa ; an_Adlm_AC_fonipa +arm_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa +arm_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa +arm_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa +arm_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa +art_Adlm_AC_fonipa_lojban ; jbo_Adlm_AC_fonipa +asd_Adlm_AC_fonipa ; snz_Adlm_AC_fonipa +asm_Adlm_AC_fonipa ; as_Adlm_AC_fonipa +aue_Adlm_AC_fonipa ; ktz_Adlm_AC_fonipa +ava_Adlm_AC_fonipa ; av_Adlm_AC_fonipa +ave_Adlm_AC_fonipa ; ae_Adlm_AC_fonipa +aym_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa +ayr_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa +ayx_Adlm_AC_fonipa ; nun_Adlm_AC_fonipa +aze_Adlm_AC_fonipa ; az_Adlm_AC_fonipa +azj_Adlm_AC_fonipa ; az_Adlm_AC_fonipa +bak_Adlm_AC_fonipa ; ba_Adlm_AC_fonipa +bam_Adlm_AC_fonipa ; bm_Adlm_AC_fonipa +baq_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa +bcc_Adlm_AC_fonipa ; bal_Adlm_AC_fonipa +bcl_Adlm_AC_fonipa ; bik_Adlm_AC_fonipa +bel_Adlm_AC_fonipa ; be_Adlm_AC_fonipa +ben_Adlm_AC_fonipa ; bn_Adlm_AC_fonipa +bgm_Adlm_AC_fonipa ; bcg_Adlm_AC_fonipa +bh_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa +bih_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa +bis_Adlm_AC_fonipa ; bi_Adlm_AC_fonipa +bjd_Adlm_AC_fonipa ; drl_Adlm_AC_fonipa +bod_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa +bos_Adlm_AC_fonipa ; bs_Adlm_AC_fonipa +bre_Adlm_AC_fonipa ; br_Adlm_AC_fonipa +bul_Adlm_AC_fonipa ; bg_Adlm_AC_fonipa +bur_Adlm_AC_fonipa ; my_Adlm_AC_fonipa +bxk_Adlm_AC_fonipa ; luy_Adlm_AC_fonipa +bxr_Adlm_AC_fonipa ; bua_Adlm_AC_fonipa +cat_Adlm_AC_fonipa ; ca_Adlm_AC_fonipa +ccq_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa +cel_Adlm_AC_fonipa_gaulish ; xtg_Adlm_AC_fonipa +ces_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa +cha_Adlm_AC_fonipa ; ch_Adlm_AC_fonipa +che_Adlm_AC_fonipa ; ce_Adlm_AC_fonipa +chi_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +chu_Adlm_AC_fonipa ; cu_Adlm_AC_fonipa +chv_Adlm_AC_fonipa ; cv_Adlm_AC_fonipa +cjr_Adlm_AC_fonipa ; mom_Adlm_AC_fonipa +cka_Adlm_AC_fonipa ; cmr_Adlm_AC_fonipa +cld_Adlm_AC_fonipa ; syr_Adlm_AC_fonipa +cmk_Adlm_AC_fonipa ; xch_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +cnr_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +cor_Adlm_AC_fonipa ; kw_Adlm_AC_fonipa +cos_Adlm_AC_fonipa ; co_Adlm_AC_fonipa +coy_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa +cqu_Adlm_AC_fonipa ; quh_Adlm_AC_fonipa +cre_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa +cwd_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa +cym_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa +cze_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa +dan_Adlm_AC_fonipa ; da_Adlm_AC_fonipa +deu_Adlm_AC_fonipa ; de_Adlm_AC_fonipa +dgo_Adlm_AC_fonipa ; doi_Adlm_AC_fonipa +dhd_Adlm_AC_fonipa ; mwr_Adlm_AC_fonipa +dik_Adlm_AC_fonipa ; din_Adlm_AC_fonipa +diq_Adlm_AC_fonipa ; zza_Adlm_AC_fonipa +dit_Adlm_AC_fonipa ; dif_Adlm_AC_fonipa +div_Adlm_AC_fonipa ; dv_Adlm_AC_fonipa +drh_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa +drw_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +dut_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa +dzo_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa +ekk_Adlm_AC_fonipa ; et_Adlm_AC_fonipa +ell_Adlm_AC_fonipa ; el_Adlm_AC_fonipa +emk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa +en_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa +en_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +en_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +en_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa +en_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa +en_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa +en_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa +en_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa +en_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +en_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +eng_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa +eng_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +eng_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +eng_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa +eng_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa +eng_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +eng_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +epo_Adlm_AC_fonipa ; eo_Adlm_AC_fonipa +esk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa +est_Adlm_AC_fonipa ; et_Adlm_AC_fonipa +eus_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa +ewe_Adlm_AC_fonipa ; ee_Adlm_AC_fonipa +fao_Adlm_AC_fonipa ; fo_Adlm_AC_fonipa +fas_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +fat_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +fij_Adlm_AC_fonipa ; fj_Adlm_AC_fonipa +fin_Adlm_AC_fonipa ; fi_Adlm_AC_fonipa +fra_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa +fre_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa +fry_Adlm_AC_fonipa ; fy_Adlm_AC_fonipa +fuc_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa +ful_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa +gav_Adlm_AC_fonipa ; dev_Adlm_AC_fonipa +gaz_Adlm_AC_fonipa ; om_Adlm_AC_fonipa +gbo_Adlm_AC_fonipa ; grb_Adlm_AC_fonipa +geo_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa +ger_Adlm_AC_fonipa ; de_Adlm_AC_fonipa +gfx_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa +ggn_Adlm_AC_fonipa ; gvr_Adlm_AC_fonipa +gla_Adlm_AC_fonipa ; gd_Adlm_AC_fonipa +gle_Adlm_AC_fonipa ; ga_Adlm_AC_fonipa +glg_Adlm_AC_fonipa ; gl_Adlm_AC_fonipa +glv_Adlm_AC_fonipa ; gv_Adlm_AC_fonipa +gno_Adlm_AC_fonipa ; gon_Adlm_AC_fonipa +gre_Adlm_AC_fonipa ; el_Adlm_AC_fonipa +grn_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa +gti_Adlm_AC_fonipa ; nyc_Adlm_AC_fonipa +gug_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa +guj_Adlm_AC_fonipa ; gu_Adlm_AC_fonipa +guv_Adlm_AC_fonipa ; duz_Adlm_AC_fonipa +gya_Adlm_AC_fonipa ; gba_Adlm_AC_fonipa +hat_Adlm_AC_fonipa ; ht_Adlm_AC_fonipa +hau_Adlm_AC_fonipa ; ha_Adlm_AC_fonipa +hbs_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +hdn_Adlm_AC_fonipa ; hai_Adlm_AC_fonipa +hea_Adlm_AC_fonipa ; hmn_Adlm_AC_fonipa +heb_Adlm_AC_fonipa ; he_Adlm_AC_fonipa +her_Adlm_AC_fonipa ; hz_Adlm_AC_fonipa +him_Adlm_AC_fonipa ; srx_Adlm_AC_fonipa +hin_Adlm_AC_fonipa ; hi_Adlm_AC_fonipa +hmo_Adlm_AC_fonipa ; ho_Adlm_AC_fonipa +hrr_Adlm_AC_fonipa ; jal_Adlm_AC_fonipa +hrv_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa +hun_Adlm_AC_fonipa ; hu_Adlm_AC_fonipa +hy_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hy_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa +hy_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hye_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hye_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa +hye_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hye_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa +ibi_Adlm_AC_fonipa ; opa_Adlm_AC_fonipa +ibo_Adlm_AC_fonipa ; ig_Adlm_AC_fonipa +ice_Adlm_AC_fonipa ; is_Adlm_AC_fonipa +ido_Adlm_AC_fonipa ; io_Adlm_AC_fonipa +iii_Adlm_AC_fonipa ; ii_Adlm_AC_fonipa +ike_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa +iku_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa +ile_Adlm_AC_fonipa ; ie_Adlm_AC_fonipa +ilw_Adlm_AC_fonipa ; gal_Adlm_AC_fonipa +in_Adlm_AC_fonipa ; id_Adlm_AC_fonipa +ina_Adlm_AC_fonipa ; ia_Adlm_AC_fonipa +ind_Adlm_AC_fonipa ; id_Adlm_AC_fonipa +ipk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa +isl_Adlm_AC_fonipa ; is_Adlm_AC_fonipa +ita_Adlm_AC_fonipa ; it_Adlm_AC_fonipa +iw_Adlm_AC_fonipa ; he_Adlm_AC_fonipa +jav_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa +jeg_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa +ji_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa +jpn_Adlm_AC_fonipa ; ja_Adlm_AC_fonipa +jw_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa +kal_Adlm_AC_fonipa ; kl_Adlm_AC_fonipa +kan_Adlm_AC_fonipa ; kn_Adlm_AC_fonipa +kas_Adlm_AC_fonipa ; ks_Adlm_AC_fonipa +kat_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa +kau_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa +kaz_Adlm_AC_fonipa ; kk_Adlm_AC_fonipa +kgc_Adlm_AC_fonipa ; tdf_Adlm_AC_fonipa +kgh_Adlm_AC_fonipa ; kml_Adlm_AC_fonipa +khk_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa +khm_Adlm_AC_fonipa ; km_Adlm_AC_fonipa +kik_Adlm_AC_fonipa ; ki_Adlm_AC_fonipa +kin_Adlm_AC_fonipa ; rw_Adlm_AC_fonipa +kir_Adlm_AC_fonipa ; ky_Adlm_AC_fonipa +kmr_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa +knc_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa +kng_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa +knn_Adlm_AC_fonipa ; kok_Adlm_AC_fonipa +koj_Adlm_AC_fonipa ; kwv_Adlm_AC_fonipa +kom_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa +kon_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa +kor_Adlm_AC_fonipa ; ko_Adlm_AC_fonipa +kpv_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa +krm_Adlm_AC_fonipa ; bmf_Adlm_AC_fonipa +ktr_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +kua_Adlm_AC_fonipa ; kj_Adlm_AC_fonipa +kur_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa +kvs_Adlm_AC_fonipa ; gdj_Adlm_AC_fonipa +kwq_Adlm_AC_fonipa ; yam_Adlm_AC_fonipa +kxe_Adlm_AC_fonipa ; tvd_Adlm_AC_fonipa +kzj_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +kzt_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +lao_Adlm_AC_fonipa ; lo_Adlm_AC_fonipa +lat_Adlm_AC_fonipa ; la_Adlm_AC_fonipa +lav_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa +lbk_Adlm_AC_fonipa ; bnc_Adlm_AC_fonipa +lii_Adlm_AC_fonipa ; raq_Adlm_AC_fonipa +lim_Adlm_AC_fonipa ; li_Adlm_AC_fonipa +lin_Adlm_AC_fonipa ; ln_Adlm_AC_fonipa +lit_Adlm_AC_fonipa ; lt_Adlm_AC_fonipa +llo_Adlm_AC_fonipa ; ngt_Adlm_AC_fonipa +lmm_Adlm_AC_fonipa ; rmx_Adlm_AC_fonipa +ltz_Adlm_AC_fonipa ; lb_Adlm_AC_fonipa +lub_Adlm_AC_fonipa ; lu_Adlm_AC_fonipa +lug_Adlm_AC_fonipa ; lg_Adlm_AC_fonipa +lvs_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa +mac_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa +mah_Adlm_AC_fonipa ; mh_Adlm_AC_fonipa +mal_Adlm_AC_fonipa ; ml_Adlm_AC_fonipa +mao_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa +mar_Adlm_AC_fonipa ; mr_Adlm_AC_fonipa +may_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa +meg_Adlm_AC_fonipa ; cir_Adlm_AC_fonipa +mhr_Adlm_AC_fonipa ; chm_Adlm_AC_fonipa +mkd_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa +mlg_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa +mlt_Adlm_AC_fonipa ; mt_Adlm_AC_fonipa +mnk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa +mo_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +mol_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +mon_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa +mri_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa +msa_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa +mst_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa +mup_Adlm_AC_fonipa ; raj_Adlm_AC_fonipa +mwj_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa +mya_Adlm_AC_fonipa ; my_Adlm_AC_fonipa +myd_Adlm_AC_fonipa ; aog_Adlm_AC_fonipa +myt_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa +nad_Adlm_AC_fonipa ; xny_Adlm_AC_fonipa +nau_Adlm_AC_fonipa ; na_Adlm_AC_fonipa +nav_Adlm_AC_fonipa ; nv_Adlm_AC_fonipa +nbl_Adlm_AC_fonipa ; nr_Adlm_AC_fonipa +ncp_Adlm_AC_fonipa ; kdz_Adlm_AC_fonipa +nde_Adlm_AC_fonipa ; nd_Adlm_AC_fonipa +ndo_Adlm_AC_fonipa ; ng_Adlm_AC_fonipa +nep_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa +nld_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa +nno_Adlm_AC_fonipa ; nn_Adlm_AC_fonipa +nns_Adlm_AC_fonipa ; nbr_Adlm_AC_fonipa +nnx_Adlm_AC_fonipa ; ngv_Adlm_AC_fonipa +no_Adlm_AC_bokmal_fonipa ; nb_Adlm_AC_fonipa +no_Adlm_AC_bokmal_fonipa_nynorsk ; nb_Adlm_AC_fonipa +no_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa +no_Adlm_AC_fonipa_nynorsk ; nn_Adlm_AC_fonipa +nob_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa +nor_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa +npi_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa +nts_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa +nya_Adlm_AC_fonipa ; ny_Adlm_AC_fonipa +oci_Adlm_AC_fonipa ; oc_Adlm_AC_fonipa +ojg_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa +oji_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa +ori_Adlm_AC_fonipa ; or_Adlm_AC_fonipa +orm_Adlm_AC_fonipa ; om_Adlm_AC_fonipa +ory_Adlm_AC_fonipa ; or_Adlm_AC_fonipa +oss_Adlm_AC_fonipa ; os_Adlm_AC_fonipa +oun_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa +pan_Adlm_AC_fonipa ; pa_Adlm_AC_fonipa +pbu_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa +pcr_Adlm_AC_fonipa ; adx_Adlm_AC_fonipa +per_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +pes_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +pli_Adlm_AC_fonipa ; pi_Adlm_AC_fonipa +plt_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa +pmc_Adlm_AC_fonipa ; huw_Adlm_AC_fonipa +pmu_Adlm_AC_fonipa ; phr_Adlm_AC_fonipa +pnb_Adlm_AC_fonipa ; lah_Adlm_AC_fonipa +pol_Adlm_AC_fonipa ; pl_Adlm_AC_fonipa +por_Adlm_AC_fonipa ; pt_Adlm_AC_fonipa +ppa_Adlm_AC_fonipa ; bfy_Adlm_AC_fonipa +ppr_Adlm_AC_fonipa ; lcq_Adlm_AC_fonipa +prs_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +pry_Adlm_AC_fonipa ; prt_Adlm_AC_fonipa +pus_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa +puz_Adlm_AC_fonipa ; pub_Adlm_AC_fonipa +que_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa +quz_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa +rmy_Adlm_AC_fonipa ; rom_Adlm_AC_fonipa +roh_Adlm_AC_fonipa ; rm_Adlm_AC_fonipa +ron_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +rum_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +run_Adlm_AC_fonipa ; rn_Adlm_AC_fonipa +rus_Adlm_AC_fonipa ; ru_Adlm_AC_fonipa +sag_Adlm_AC_fonipa ; sg_Adlm_AC_fonipa +san_Adlm_AC_fonipa ; sa_Adlm_AC_fonipa +sca_Adlm_AC_fonipa ; hle_Adlm_AC_fonipa +scc_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +scr_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa +sgn_Adlm_076_fonipa ; bzs_Adlm_fonipa +sgn_Adlm_170_fonipa ; csn_Adlm_fonipa +sgn_Adlm_208_fonipa ; dsl_Adlm_fonipa +sgn_Adlm_249_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_250_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_276_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_278_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_280_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_300_fonipa ; gss_Adlm_fonipa +sgn_Adlm_372_fonipa ; isg_Adlm_fonipa +sgn_Adlm_380_fonipa ; ise_Adlm_fonipa +sgn_Adlm_392_fonipa ; jsl_Adlm_fonipa +sgn_Adlm_484_fonipa ; mfs_Adlm_fonipa +sgn_Adlm_528_fonipa ; dse_Adlm_fonipa +sgn_Adlm_558_fonipa ; ncs_Adlm_fonipa +sgn_Adlm_578_fonipa ; nsi_Adlm_fonipa +sgn_Adlm_620_fonipa ; psr_Adlm_fonipa +sgn_Adlm_710_fonipa ; sfs_Adlm_fonipa +sgn_Adlm_752_fonipa ; swl_Adlm_fonipa +sgn_Adlm_826_fonipa ; bfi_Adlm_fonipa +sgn_Adlm_840_fonipa ; ase_Adlm_fonipa +sgn_Adlm_BR_fonipa ; bzs_Adlm_fonipa +sgn_Adlm_CO_fonipa ; csn_Adlm_fonipa +sgn_Adlm_DD_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_DE_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_DK_fonipa ; dsl_Adlm_fonipa +sgn_Adlm_FR_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_FX_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_GB_fonipa ; bfi_Adlm_fonipa +sgn_Adlm_GR_fonipa ; gss_Adlm_fonipa +sgn_Adlm_IE_fonipa ; isg_Adlm_fonipa +sgn_Adlm_IT_fonipa ; ise_Adlm_fonipa +sgn_Adlm_JP_fonipa ; jsl_Adlm_fonipa +sgn_Adlm_MX_fonipa ; mfs_Adlm_fonipa +sgn_Adlm_NI_fonipa ; ncs_Adlm_fonipa +sgn_Adlm_NL_fonipa ; dse_Adlm_fonipa +sgn_Adlm_NO_fonipa ; nsi_Adlm_fonipa +sgn_Adlm_PT_fonipa ; psr_Adlm_fonipa +sgn_Adlm_SE_fonipa ; swl_Adlm_fonipa +sgn_Adlm_UK_fonipa ; bfi_Adlm_fonipa +sgn_Adlm_US_fonipa ; ase_Adlm_fonipa +sgn_Adlm_ZA_fonipa ; sfs_Adlm_fonipa +sh_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +sin_Adlm_AC_fonipa ; si_Adlm_AC_fonipa +skk_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa +slk_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa +slo_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa +slv_Adlm_AC_fonipa ; sl_Adlm_AC_fonipa +sme_Adlm_AC_fonipa ; se_Adlm_AC_fonipa +smo_Adlm_AC_fonipa ; sm_Adlm_AC_fonipa +sna_Adlm_AC_fonipa ; sn_Adlm_AC_fonipa +snd_Adlm_AC_fonipa ; sd_Adlm_AC_fonipa +som_Adlm_AC_fonipa ; so_Adlm_AC_fonipa +sot_Adlm_AC_fonipa ; st_Adlm_AC_fonipa +spa_Adlm_AC_fonipa ; es_Adlm_AC_fonipa +spy_Adlm_AC_fonipa ; kln_Adlm_AC_fonipa +sqi_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa +src_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa +srd_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa +srp_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +ssw_Adlm_AC_fonipa ; ss_Adlm_AC_fonipa +sun_Adlm_AC_fonipa ; su_Adlm_AC_fonipa +swa_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa +swc_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa +swe_Adlm_AC_fonipa ; sv_Adlm_AC_fonipa +swh_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa +tah_Adlm_AC_fonipa ; ty_Adlm_AC_fonipa +tam_Adlm_AC_fonipa ; ta_Adlm_AC_fonipa +tat_Adlm_AC_fonipa ; tt_Adlm_AC_fonipa +tdu_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +tel_Adlm_AC_fonipa ; te_Adlm_AC_fonipa +tgk_Adlm_AC_fonipa ; tg_Adlm_AC_fonipa +tgl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa +tha_Adlm_AC_fonipa ; th_Adlm_AC_fonipa +thc_Adlm_AC_fonipa ; tpo_Adlm_AC_fonipa +thx_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa +tib_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa +tie_Adlm_AC_fonipa ; ras_Adlm_AC_fonipa +tir_Adlm_AC_fonipa ; ti_Adlm_AC_fonipa +tkk_Adlm_AC_fonipa ; twm_Adlm_AC_fonipa +tl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa +tlw_Adlm_AC_fonipa ; weo_Adlm_AC_fonipa +tmp_Adlm_AC_fonipa ; tyj_Adlm_AC_fonipa +tne_Adlm_AC_fonipa ; kak_Adlm_AC_fonipa +tnf_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +ton_Adlm_AC_fonipa ; to_Adlm_AC_fonipa +tsf_Adlm_AC_fonipa ; taj_Adlm_AC_fonipa +tsn_Adlm_AC_fonipa ; tn_Adlm_AC_fonipa +tso_Adlm_AC_fonipa ; ts_Adlm_AC_fonipa +ttq_Adlm_AC_fonipa ; tmh_Adlm_AC_fonipa +tuk_Adlm_AC_fonipa ; tk_Adlm_AC_fonipa +tur_Adlm_AC_fonipa ; tr_Adlm_AC_fonipa +tw_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +twi_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +uig_Adlm_AC_fonipa ; ug_Adlm_AC_fonipa +ukr_Adlm_AC_fonipa ; uk_Adlm_AC_fonipa +umu_Adlm_AC_fonipa ; del_Adlm_AC_fonipa +uok_Adlm_AC_fonipa ; ema_Adlm_AC_fonipa +urd_Adlm_AC_fonipa ; ur_Adlm_AC_fonipa +uzb_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa +uzn_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa +ven_Adlm_AC_fonipa ; ve_Adlm_AC_fonipa +vie_Adlm_AC_fonipa ; vi_Adlm_AC_fonipa +vol_Adlm_AC_fonipa ; vo_Adlm_AC_fonipa +wel_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa +wln_Adlm_AC_fonipa ; wa_Adlm_AC_fonipa +wol_Adlm_AC_fonipa ; wo_Adlm_AC_fonipa +xba_Adlm_AC_fonipa ; cax_Adlm_AC_fonipa +xho_Adlm_AC_fonipa ; xh_Adlm_AC_fonipa +xia_Adlm_AC_fonipa ; acn_Adlm_AC_fonipa +xkh_Adlm_AC_fonipa ; waw_Adlm_AC_fonipa +xpe_Adlm_AC_fonipa ; kpe_Adlm_AC_fonipa +xsj_Adlm_AC_fonipa ; suj_Adlm_AC_fonipa +xsl_Adlm_AC_fonipa ; den_Adlm_AC_fonipa +ybd_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa +ydd_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa +yid_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa +yma_Adlm_AC_fonipa ; lrr_Adlm_AC_fonipa +ymt_Adlm_AC_fonipa ; mtm_Adlm_AC_fonipa +yor_Adlm_AC_fonipa ; yo_Adlm_AC_fonipa +yos_Adlm_AC_fonipa ; zom_Adlm_AC_fonipa +yuu_Adlm_AC_fonipa ; yug_Adlm_AC_fonipa +zai_Adlm_AC_fonipa ; zap_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +zha_Adlm_AC_fonipa ; za_Adlm_AC_fonipa +zho_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +zsm_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa +zul_Adlm_AC_fonipa ; zu_Adlm_AC_fonipa +zyb_Adlm_AC_fonipa ; za_Adlm_AC_fonipa diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java index 428a68f4a5c..ef2d6e6f23f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java @@ -78,7 +78,7 @@ public class LanguageTag { final String[][] entries = { //{"tag", "preferred"}, {"art-lojban", "jbo"}, - {"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback + {"cel-gaulish", "xtg"}, // fallback {"en-GB-oed", "en-GB-x-oed"}, // fallback {"i-ami", "ami"}, {"i-bnn", "bnn"}, diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java index 109c495ecfd..2a3fce3ff20 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java @@ -16,12 +16,15 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.MissingResourceException; +import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -1203,6 +1206,396 @@ public final class ULocale implements Serializable, Comparable { return new LocaleIDParser(localeID).getKeywordValue(keywordName); } + static private class AliasReplacer { + /** + * @param language language subtag to be replaced. Cannot be null but could be empty. + * @param script script subtag to be replaced. Cannot be null but could be empty. + * @param region region subtag to be replaced. Cannot be null but could be empty. + * @param variants variant subtags to be replaced. Cannot be null but could be empty. + * @param extensions extensions in string to be replaced. Cannot be null but could be empty. + */ + public AliasReplacer(String language, String script, String region, + String variants, String extensions) { + + assert language != null; + assert script != null; + assert region != null; + assert variants != null; + assert extensions != null; + this.language = language; + this.script = script; + this.region = region; + if (!variants.isEmpty()) { + this.variants = + new ArrayList(Arrays.asList(variants.split("_"))); + } + this.extensions = extensions; + } + + private String language; + private String script; + private String region; + private List variants; + private String extensions; + + public String replace() { + boolean changed = false; + loadAliasData(); + int count = 0; + while (true) { + if (count++ > 10) { + // Throw exception when we loop through too many time + // stop to avoid infinity loop cauesd by incorrect data + // in resource. + throw new IllegalArgumentException( + "Have problem to resolve locale alias of " + + lscvToID(language, script, region, + ((variants == null) ? "" : String.join("_", variants))) + + extensions); + } + // Anytime we replace something, we need to start over again. + // lang REGION variant + if ( replaceLanguage(true, true, true) || + replaceLanguage(true, true, false) || + replaceLanguage(true, false, true) || + replaceLanguage(true, false, false) || + replaceLanguage(false, false, true) || + replaceRegion() || + replaceScript() || + replaceVariant()) { + // Some values in data is changed, try to match from the + // beginning again. + changed = true; + continue; + } + // Nothing changed in this iteration, break out the loop + break; + } // while(1) + if (changed) { + String result = lscvToID(language, script, region, + ((variants == null) ? "" : String.join("_", variants))); + if (extensions != null) { + result += extensions; + } + return result; + } + // Nothing changed in any iteration of the loop. + return null; + }; + + private static boolean aliasDataIsLoaded = false; + private static Map languageAliasMap = null; + private static Map scriptAliasMap = null; + private static Map> territoryAliasMap = null; + private static Map variantAliasMap = null; + + /* + * Initializes the alias data from the ICU resource bundles. The alias + * data contains alias of language, country, script and variants. + * + * If the alias data has already loaded, then this method simply + * returns without doing anything meaningful. + * + */ + private static synchronized void loadAliasData() { + if (aliasDataIsLoaded) { + return; + } + languageAliasMap = new HashMap(); + scriptAliasMap = new HashMap(); + territoryAliasMap = new HashMap>(); + variantAliasMap = new HashMap(); + + UResourceBundle metadata = UResourceBundle.getBundleInstance( + ICUData.ICU_BASE_NAME, "metadata", + ICUResourceBundle.ICU_DATA_CLASS_LOADER); + UResourceBundle metadataAlias = metadata.get("alias"); + UResourceBundle languageAlias = metadataAlias.get("language"); + UResourceBundle scriptAlias = metadataAlias.get("script"); + UResourceBundle territoryAlias = metadataAlias.get("territory"); + UResourceBundle variantAlias = metadataAlias.get("variant"); + + for (int i = 0 ; i < languageAlias.getSize(); i++) { + UResourceBundle res = languageAlias.get(i); + String aliasFrom = res.getKey(); + String aliasTo = res.get("replacement").getString(); + Locale testLocale = new Locale(aliasFrom); + // if there are script in the aliasFrom + // or we have both a und as language and a region code. + if ( ! testLocale.getScript().isEmpty() || + (aliasFrom.startsWith("und") && ! testLocale.getCountry().isEmpty())) { + throw new IllegalArgumentException( + "key [" + aliasFrom + + "] in alias:language contains unsupported fields combination."); + } + languageAliasMap.put(aliasFrom, aliasTo); + } + for (int i = 0 ; i < scriptAlias.getSize(); i++) { + UResourceBundle res = scriptAlias.get(i); + String aliasFrom = res.getKey(); + String aliasTo = res.get("replacement").getString(); + if (aliasFrom.length() != 4) { + throw new IllegalArgumentException( + "Incorrect key [" + aliasFrom + "] in alias:script."); + } + scriptAliasMap.put(aliasFrom, aliasTo); + } + for (int i = 0 ; i < territoryAlias.getSize(); i++) { + UResourceBundle res = territoryAlias.get(i); + String aliasFrom = res.getKey(); + String aliasTo = res.get("replacement").getString(); + if (aliasFrom.length() < 2 || aliasFrom.length() > 3) { + throw new IllegalArgumentException( + "Incorrect key [" + aliasFrom + "] in alias:territory."); + } + territoryAliasMap.put(aliasFrom, + new ArrayList(Arrays.asList(aliasTo.split(" ")))); + } + for (int i = 0 ; i < variantAlias.getSize(); i++) { + UResourceBundle res = variantAlias.get(i); + String aliasFrom = res.getKey(); + String aliasTo = res.get("replacement").getString(); + if ( aliasFrom.length() < 4 || + aliasFrom.length() > 8 || + (aliasFrom.length() == 4 && + (aliasFrom.charAt(0) < '0' || aliasFrom.charAt(0) > '9'))) { + throw new IllegalArgumentException( + "Incorrect key [" + aliasFrom + "] in alias:variant."); + } + if ( aliasTo.length() < 4 || + aliasTo.length() > 8 || + (aliasTo.length() == 4 && + (aliasTo.charAt(0) < '0' || aliasTo.charAt(0) > '9'))) { + throw new IllegalArgumentException( + "Incorrect variant [" + aliasTo + "] for the key [" + aliasFrom + + "] in alias:variant."); + } + variantAliasMap.put(aliasFrom, aliasTo); + } + + aliasDataIsLoaded = true; + } + + private static String generateKey( + String language, String region, String variant) { + assert variant == null || variant.length() >= 4; + StringBuilder buf = new StringBuilder(); + buf.append(language); + if (region != null && !region.isEmpty()) { + buf.append(UNDERSCORE); + buf.append(region); + } + if (variant != null && !variant.isEmpty()) { + buf.append(UNDERSCORE); + buf.append(variant); + } + return buf.toString(); + } + + /** + * If replacement is neither null nor empty and input is either null or empty, + * return replacement. + * If replacement is neither null nor empty but input is not empty, return input. + * If replacement is either null or empty and type is either null or empty, + * return input. + * Otherwise return null. + * replacement input type return + * AAA "" * AAA + * AAA BBB * BBB + * "" CCC "" CCC + * "" * i DDD "" + */ + private static String deleteOrReplace( + String input, String type, String replacement) { + return (replacement != null && !replacement.isEmpty()) ? + ((input == null || input.isEmpty()) ? replacement : input) : + ((type == null || type.isEmpty()) ? input : null); + } + + private boolean replaceLanguage(boolean checkLanguage, + boolean checkRegion, boolean checkVariants) { + if ( (checkRegion && (region == null || region.isEmpty())) || + (checkVariants && (variants == null))) { + // Nothing to search + return false; + } + int variantSize = checkVariants ? variants.size() : 1; + // Since we may have more than one variant, we need to loop through + // them. + String searchLanguage = checkLanguage ? language : UNDEFINED_LANGUAGE; + String searchRegion = checkRegion ? region : null; + String searchVariant = null; + for (int variantIndex = 0; variantIndex < variantSize; ++variantIndex) { + if (checkVariants) { + searchVariant = variants.get(variantIndex); + } + if (searchVariant != null && searchVariant.length() < 4) { + // Do not consider ill-formed variant subtag. + searchVariant = null; + } + String typeKey = generateKey( + searchLanguage, searchRegion, searchVariant); + String replacement = languageAliasMap.get(typeKey); + if (replacement == null) { + // Found no replacement data. + continue; + } + String replacedScript = null; + String replacedRegion = null; + String replacedVariant = null; + String replacedExtensions = null; + String replacedLanguage = null; + + if (replacement.indexOf('_') < 0) { + replacedLanguage = replacement.equals(UNDEFINED_LANGUAGE) ? + language : replacement; + } else { + String[] replacementFields = replacement.split("_"); + replacedLanguage = replacementFields[0]; + int index = 1; + + if (replacedLanguage.equals(UNDEFINED_LANGUAGE)) { + replacedLanguage = language; + } + int consumed = replacementFields[0].length() + 1; + while (replacementFields.length > index) { + String field = replacementFields[index]; + int len = field.length(); + if (1 == len) { + replacedExtensions = replacement.substring(consumed); + break; + } else if (len >= 2 && len <= 3) { + assert replacedRegion == null; + replacedRegion = field; + } else if (len >= 5 && len <= 8) { + assert replacedVariant == null; + replacedVariant = field; + } else if (len == 4) { + if (field.charAt(0) >= '0' && field.charAt(0) <= '9') { + assert replacedVariant == null; + replacedVariant = field; + } else { + assert replacedScript == null; + replacedScript = field; + } + } + index++; + consumed += len + 1; + } + } + + replacedScript = deleteOrReplace(script, null, replacedScript); + replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion); + replacedVariant = deleteOrReplace(searchVariant, searchVariant, replacedVariant); + + if ( this.language.equals(replacedLanguage) && + this.script.equals(replacedScript) && + this.region.equals(replacedRegion) && + Objects.equals(searchVariant, replacedVariant) && + replacedExtensions == null) { + // Replacement produce no changes on search. + // For example, apply pa_IN=> pa_Guru_IN on pa_Guru_IN. + continue; + } + this.language = replacedLanguage; + this.script = replacedScript; + this.region = replacedRegion; + if (searchVariant != null && !searchVariant.isEmpty()) { + if (replacedVariant != null && !replacedVariant.isEmpty()) { + this.variants.set(variantIndex, replacedVariant); + } else { + this.variants.remove(variantIndex); + if (this.variants.isEmpty()) { + this.variants = null; + } + } + } + if (replacedExtensions != null && !replacedExtensions.isEmpty()) { + // TODO(ICU-21292) + // DO NOTHING + // UTS35 does not specifiy what should we do if we have extensions in the + // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have + // extensions in them languageAlias: + // i_default => en_x_i_default + // i_enochian => und_x_i_enochian + // i_mingo => see_x_i_mingo + // zh_min => nan_x_zh_min + // But all of them are already changed by code inside LanguageTag before + // hitting this code. + } + // Something in search changed by language alias data. + return true; + } + // Nothing changed in search by language alias data. + return false; + } + + private boolean replaceRegion() { + if (region == null || region.isEmpty()) return false; + List replacement = territoryAliasMap.get(region); + if (replacement == null) { + // Found no replacement data for this region. + return false; + } + String replacedRegion; + if (replacement.size() > 1) { + String regionOfLanguageAndScript = + ULocale.addLikelySubtags( + new ULocale(this.language, this.script, null)) + .getCountry(); + replacedRegion = replacement.contains(regionOfLanguageAndScript) ? + regionOfLanguageAndScript : replacement.get(0); + } else { + replacedRegion = replacement.get(0); + } + assert this.region != replacedRegion; + this.region = replacedRegion; + // The region is changed by data in territory alias. + return true; + } + + private boolean replaceScript() { + if (script == null || script.isEmpty()) return false; + String replacement = scriptAliasMap.get(script); + if (replacement == null) { + // Found no replacement data for this script. + return false; + } + assert this.script != replacement; + this.script = replacement; + // The script is changed by data in script alias. + return true; + } + + private boolean replaceVariant() { + if (variants == null) return false; + for (int i = 0; i < variants.size(); i++) { + String variant = variants.get(i); + String replacement = variantAliasMap.get(variant); + if (replacement == null) { + // Found no replacement data for this variant. + continue; + } + assert replacement.length() >= 4; + assert replacement.length() <= 8; + assert replacement.length() != 4 || + ( replacement.charAt(0) >= '0' && replacement.charAt(0) <= '9'); + if (!variant.equals(replacement)) { + variants.set(i, replacement); + // Special hack to handle hepburn-heploc => alalc97 + if (variant.equals("heploc")) { + variants.remove("hepburn"); + if (variants.isEmpty()) { + variants = null; + } + } + return true; + } + } + return false; + } + }; + /** * {@icu} Returns the canonical name according to CLDR for the specified locale ID. * This is used to convert POSIX and other legacy IDs to standard ICU form. @@ -1239,147 +1632,55 @@ public final class ULocale implements Serializable, Comparable { } } - // If the BCP 47 primary language subtag matches the type attribute of a languageAlias - // element in Supplemental Data, replace the language subtag with the replacement value. - // If there are additional subtags in the replacement value, add them to the result, but - // only if there is no corresponding subtag already in the tag. - // Five special deprecated codes (such as i-default) are in type attributes, and are also replaced. - try { - UResourceBundle languageAlias = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, - "metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER) - .get("alias") - .get("language"); - // language _ variant - if (!parser.getVariant().isEmpty()) { - String [] variants = parser.getVariant().split("_"); - for (String variant : variants) { - try { - // Note the key in the metadata.txt is formatted as language_variant - // instead of language__variant but lscvToID will generate - // language__variant so we have to build the string ourselves. - ULocale replaceLocale = new ULocale(languageAlias.get( - (new StringBuilder(parser.getLanguage().length() + 1 + parser.getVariant().length())) - .append(parser.getLanguage()) - .append("_") - .append(variant) - .toString()) - .get("replacement") - .getString()); - StringBuilder replacedVariant = new StringBuilder(parser.getVariant().length()); - for (String current : variants) { - if (current.equals(variant)) continue; - if (replacedVariant.length() > 0) replacedVariant.append("_"); - replacedVariant.append(current); - } - parser = new LocaleIDParser( - (new StringBuilder(localeID.length())) - .append(lscvToID(replaceLocale.getLanguage(), - !parser.getScript().isEmpty() ? parser.getScript() : replaceLocale.getScript(), - !parser.getCountry().isEmpty() ? parser.getCountry() : replaceLocale.getCountry(), - replacedVariant.toString())) - .append(parser.getName().substring(parser.getBaseName().length())) - .toString()); - } catch (MissingResourceException e) { - } - } - } - - // language _ script _ country - // ug_Arab_CN -> ug_CN - if (!parser.getScript().isEmpty() && !parser.getCountry().isEmpty()) { - try { - ULocale replaceLocale = new ULocale(languageAlias.get( - lscvToID(parser.getLanguage(), parser.getScript(), parser.getCountry(), null)) - .get("replacement") - .getString()); - parser = new LocaleIDParser((new StringBuilder(localeID.length())) - .append(lscvToID(replaceLocale.getLanguage(), - replaceLocale.getScript(), - replaceLocale.getCountry(), - parser.getVariant())) - .append(parser.getName().substring(parser.getBaseName().length())) - .toString()); - } catch (MissingResourceException e) { - } - } - // language _ country - // eg. az_AZ -> az_Latn_AZ - if (!parser.getCountry().isEmpty()) { - try { - ULocale replaceLocale = new ULocale(languageAlias.get( - lscvToID(parser.getLanguage(), null, parser.getCountry(), null)) - .get("replacement") - .getString()); - parser = new LocaleIDParser((new StringBuilder(localeID.length())) - .append(lscvToID(replaceLocale.getLanguage(), - parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript(), - replaceLocale.getCountry(), - parser.getVariant())) - .append(parser.getName().substring(parser.getBaseName().length())) - .toString()); - } catch (MissingResourceException e) { - } - } - // only language - // e.g. twi -> ak - try { - ULocale replaceLocale = new ULocale(languageAlias.get(parser.getLanguage()) - .get("replacement") - .getString()); - parser = new LocaleIDParser((new StringBuilder(localeID.length())) - .append(lscvToID(replaceLocale.getLanguage(), - parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript() , - parser.getCountry().isEmpty() ? replaceLocale.getCountry() : parser.getCountry() , - parser.getVariant())) - .append(parser.getName().substring(parser.getBaseName().length())) - .toString()); - } catch (MissingResourceException e) { - } - } catch (MissingResourceException e) { - } - - // If the BCP 47 region subtag matches the type attribute of a - // territoryAlias element in Supplemental Data, replace the language - // subtag with the replacement value, as follows: - if (!parser.getCountry().isEmpty()) { - try { - String replacements[] = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, - "metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER) - .get("alias") - .get("territory") - .get(parser.getCountry()) - .get("replacement") - .getString() - .split(" "); - String replacement = replacements[0]; - // If there is a single territory in the replacement, use it. - // If there are multiple territories: - // Look up the most likely territory for the base language code (and script, if there is one). - // If that likely territory is in the list, use it. - // Otherwise, use the first territory in the list. - if (replacements.length > 1) { - String likelyCountry = ULocale.addLikelySubtags( - new ULocale(lscvToID(parser.getLanguage(), parser.getScript(), null, parser.getVariant()))) - .getCountry(); - for (String country : replacements) { - if (country.equals(likelyCountry)) { - replacement = likelyCountry; - break; - } - } - } - parser = new LocaleIDParser( - (new StringBuilder(localeID.length())) - .append(lscvToID(parser.getLanguage(), parser.getScript(), replacement, parser.getVariant())) - .append(parser.getName().substring(parser.getBaseName().length())) - .toString()); - } catch (MissingResourceException e) { + boolean knownCanonicalized = false; + String name = parser.getName(); + if (!isKnownCanonicalizedLocale(name)) { + AliasReplacer replacer = new AliasReplacer( + parser.getLanguage(), parser.getScript(), parser.getCountry(), + AsciiUtil.toLowerString(parser.getVariant()), + parser.getName().substring(parser.getBaseName().length())); + String replaced = replacer.replace(); + if (replaced != null) { + parser = new LocaleIDParser(replaced); } } return parser.getName(); } + private static synchronized boolean isKnownCanonicalizedLocale(String name) { + if (name.equals("c") || name.equals("en") || name.equals("en_US")) { + return true; + } + if (gKnownCanonicalizedCases == null) { + List items = Arrays.asList( + "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ", + "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES", + "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR", + "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu", + "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR", + "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN", + "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS", + "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ", + "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA", + "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN", + "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP", + "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF", + "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si", + "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr", + "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta", + "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk", + "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant", + "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant", + "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"); + gKnownCanonicalizedCases = new HashSet(items); + + } + return gKnownCanonicalizedCases.contains(name); + } + + private static Set gKnownCanonicalizedCases = null; + /** * {@icu} Given a keyword and a value, return a new locale with an updated * keyword and value. If the keyword is null, this removes all keywords from the locale id. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt new file mode 100644 index 00000000000..e41eaac05a7 --- /dev/null +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt @@ -0,0 +1,1648 @@ +# File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt +# Test data for locale identifier canonicalization +# Copyright © 1991-2020 Unicode, Inc. +# For terms of use, see http://www.unicode.org/copyright.html +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) +# +# Format: +# ; +# +# The data lines are divided into 4 sets: +# explicit: a short list of explicit test cases. +# fromAliases: test cases generated from the alias data. +# decanonicalized: test cases generated by reversing the normalization process. +# withIrrelevants: test cases generated from the others by adding irrelevant fields where possible, +# to ensure that the canonicalization implementation is not sensitive to irrelevant fields. These include: +# Language: aaa +# Script: Adlm +# Region: AC +# Variant: fonipa +###### + + + +# explicit + +art_lojban ; jbo +en_US_aaland ; en_US +en_US_heploc ; en_US_alalc97 +en_US_polytoni ; en_US_polyton +en_aaland ; en_AX +en_arevela ; en +en_arevmda_arevela ; en +en_lojban ; en +hy_arevela ; hy +hy_arevmda ; hyw +hy_arevmda_arevela ; hyw +hye_arevmda ; hyw +no_bokmal_nynorsk ; nb +no_nynorsk_bokmal ; nb +zh_guoyu_hakka_xiang ; hak +zh_hakka_xiang ; hak + +# fromAliases + +aa_saaho ; ssy +aam ; aas +aar ; aa +abk ; ab +adp ; dz +afr ; af +aju ; jrb +aka ; ak +alb ; sq +als ; sq +amh ; am +ara ; ar +arb ; ar +arg ; an +arm ; hy +asd ; snz +asm ; as +aue ; ktz +ava ; av +ave ; ae +aym ; ay +ayr ; ay +ayx ; nun +aze ; az +azj ; az +bak ; ba +bam ; bm +baq ; eu +bcc ; bal +bcl ; bik +bel ; be +ben ; bn +bgm ; bcg +bh ; bho +bih ; bho +bis ; bi +bjd ; drl +bod ; bo +bos ; bs +bre ; br +bul ; bg +bur ; my +bxk ; luy +bxr ; bua +cat ; ca +ccq ; rki +cel_gaulish ; xtg +ces ; cs +cha ; ch +che ; ce +chi ; zh +chu ; cu +chv ; cv +cjr ; mom +cka ; cmr +cld ; syr +cmk ; xch +cmn ; zh +cnr ; sr_ME +cor ; kw +cos ; co +coy ; pij +cqu ; quh +cre ; cr +cwd ; cr +cym ; cy +cze ; cs +dan ; da +deu ; de +dgo ; doi +dhd ; mwr +dik ; din +diq ; zza +dit ; dif +div ; dv +drh ; mn +drw ; fa_AF +dut ; nl +dzo ; dz +ekk ; et +ell ; el +emk ; man +eng ; en +epo ; eo +esk ; ik +est ; et +eus ; eu +ewe ; ee +fao ; fo +fas ; fa +fat ; ak +fij ; fj +fin ; fi +fra ; fr +fre ; fr +fry ; fy +fuc ; ff +ful ; ff +gav ; dev +gaz ; om +gbo ; grb +geo ; ka +ger ; de +gfx ; vaj +ggn ; gvr +gla ; gd +gle ; ga +glg ; gl +glv ; gv +gno ; gon +gre ; el +grn ; gn +gti ; nyc +gug ; gn +guj ; gu +guv ; duz +gya ; gba +hat ; ht +hau ; ha +hbs ; sr_Latn +hdn ; hai +hea ; hmn +heb ; he +her ; hz +him ; srx +hin ; hi +hmo ; ho +hrr ; jal +hrv ; hr +hun ; hu +hye ; hy +ibi ; opa +ibo ; ig +ice ; is +ido ; io +iii ; ii +ike ; iu +iku ; iu +ile ; ie +ilw ; gal +in ; id +ina ; ia +ind ; id +ipk ; ik +isl ; is +ita ; it +iw ; he +jav ; jv +jeg ; oyb +ji ; yi +jpn ; ja +jw ; jv +kal ; kl +kan ; kn +kas ; ks +kat ; ka +kau ; kr +kaz ; kk +kgc ; tdf +kgh ; kml +khk ; mn +khm ; km +kik ; ki +kin ; rw +kir ; ky +kmr ; ku +knc ; kr +kng ; kg +knn ; kok +koj ; kwv +kom ; kv +kon ; kg +kor ; ko +kpv ; kv +krm ; bmf +ktr ; dtp +kua ; kj +kur ; ku +kvs ; gdj +kwq ; yam +kxe ; tvd +kzj ; dtp +kzt ; dtp +lao ; lo +lat ; la +lav ; lv +lbk ; bnc +lii ; raq +lim ; li +lin ; ln +lit ; lt +llo ; ngt +lmm ; rmx +ltz ; lb +lub ; lu +lug ; lg +lvs ; lv +mac ; mk +mah ; mh +mal ; ml +mao ; mi +mar ; mr +may ; ms +meg ; cir +mhr ; chm +mkd ; mk +mlg ; mg +mlt ; mt +mnk ; man +mo ; ro +mol ; ro +mon ; mn +mri ; mi +msa ; ms +mst ; mry +mup ; raj +mwj ; vaj +mya ; my +myd ; aog +myt ; mry +nad ; xny +nau ; na +nav ; nv +nbl ; nr +ncp ; kdz +nde ; nd +ndo ; ng +nep ; ne +nld ; nl +nno ; nn +nns ; nbr +nnx ; ngv +no ; nb +no_bokmal ; nb +no_nynorsk ; nn +nob ; nb +nor ; nb +npi ; ne +nts ; pij +nya ; ny +oci ; oc +ojg ; oj +oji ; oj +ori ; or +orm ; om +ory ; or +oss ; os +oun ; vaj +pan ; pa +pbu ; ps +pcr ; adx +per ; fa +pes ; fa +pli ; pi +plt ; mg +pmc ; huw +pmu ; phr +pnb ; lah +pol ; pl +por ; pt +ppa ; bfy +ppr ; lcq +prs ; fa_AF +pry ; prt +pus ; ps +puz ; pub +que ; qu +quz ; qu +rmy ; rom +roh ; rm +ron ; ro +rum ; ro +run ; rn +rus ; ru +sag ; sg +san ; sa +sca ; hle +scc ; sr +scr ; hr +sgn_BR ; bzs +sgn_CO ; csn +sgn_DE ; gsg +sgn_DK ; dsl +sgn_FR ; fsl +sgn_GB ; bfi +sgn_GR ; gss +sgn_IE ; isg +sgn_IT ; ise +sgn_JP ; jsl +sgn_MX ; mfs +sgn_NI ; ncs +sgn_NL ; dse +sgn_NO ; nsi +sgn_PT ; psr +sgn_SE ; swl +sgn_US ; ase +sgn_ZA ; sfs +sh ; sr_Latn +sin ; si +skk ; oyb +slk ; sk +slo ; sk +slv ; sl +sme ; se +smo ; sm +sna ; sn +snd ; sd +som ; so +sot ; st +spa ; es +spy ; kln +sqi ; sq +src ; sc +srd ; sc +srp ; sr +ssw ; ss +sun ; su +swa ; sw +swc ; sw_CD +swe ; sv +swh ; sw +tah ; ty +tam ; ta +tat ; tt +tdu ; dtp +tel ; te +tgk ; tg +tgl ; fil +tha ; th +thc ; tpo +thx ; oyb +tib ; bo +tie ; ras +tir ; ti +tkk ; twm +tl ; fil +tlw ; weo +tmp ; tyj +tne ; kak +tnf ; fa_AF +ton ; to +tsf ; taj +tsn ; tn +tso ; ts +ttq ; tmh +tuk ; tk +tur ; tr +tw ; ak +twi ; ak +uig ; ug +ukr ; uk +umu ; del +und_004 ; und_AF +und_008 ; und_AL +und_010 ; und_AQ +und_012 ; und_DZ +und_016 ; und_AS +und_020 ; und_AD +und_024 ; und_AO +und_028 ; und_AG +und_031 ; und_AZ +und_032 ; und_AR +und_036 ; und_AU +und_040 ; und_AT +und_044 ; und_BS +und_048 ; und_BH +und_050 ; und_BD +und_051 ; und_AM +und_052 ; und_BB +und_056 ; und_BE +und_060 ; und_BM +und_062 ; und_034 +und_064 ; und_BT +und_068 ; und_BO +und_070 ; und_BA +und_072 ; und_BW +und_074 ; und_BV +und_076 ; und_BR +und_084 ; und_BZ +und_086 ; und_IO +und_090 ; und_SB +und_092 ; und_VG +und_096 ; und_BN +und_100 ; und_BG +und_104 ; und_MM +und_108 ; und_BI +und_112 ; und_BY +und_116 ; und_KH +und_120 ; und_CM +und_124 ; und_CA +und_132 ; und_CV +und_136 ; und_KY +und_140 ; und_CF +und_144 ; und_LK +und_148 ; und_TD +und_152 ; und_CL +und_156 ; und_CN +und_158 ; und_TW +und_162 ; und_CX +und_166 ; und_CC +und_170 ; und_CO +und_172 ; und_RU +und_174 ; und_KM +und_175 ; und_YT +und_178 ; und_CG +und_180 ; und_CD +und_184 ; und_CK +und_188 ; und_CR +und_191 ; und_HR +und_192 ; und_CU +und_196 ; und_CY +und_200 ; und_CZ +und_203 ; und_CZ +und_204 ; und_BJ +und_208 ; und_DK +und_212 ; und_DM +und_214 ; und_DO +und_218 ; und_EC +und_222 ; und_SV +und_226 ; und_GQ +und_230 ; und_ET +und_231 ; und_ET +und_232 ; und_ER +und_233 ; und_EE +und_234 ; und_FO +und_238 ; und_FK +und_239 ; und_GS +und_242 ; und_FJ +und_246 ; und_FI +und_248 ; und_AX +und_249 ; und_FR +und_250 ; und_FR +und_254 ; und_GF +und_258 ; und_PF +und_260 ; und_TF +und_262 ; und_DJ +und_266 ; und_GA +und_268 ; und_GE +und_270 ; und_GM +und_275 ; und_PS +und_276 ; und_DE +und_278 ; und_DE +und_280 ; und_DE +und_288 ; und_GH +und_292 ; und_GI +und_296 ; und_KI +und_300 ; und_GR +und_304 ; und_GL +und_308 ; und_GD +und_312 ; und_GP +und_316 ; und_GU +und_320 ; und_GT +und_324 ; und_GN +und_328 ; und_GY +und_332 ; und_HT +und_334 ; und_HM +und_336 ; und_VA +und_340 ; und_HN +und_344 ; und_HK +und_348 ; und_HU +und_352 ; und_IS +und_356 ; und_IN +und_360 ; und_ID +und_364 ; und_IR +und_368 ; und_IQ +und_372 ; und_IE +und_376 ; und_IL +und_380 ; und_IT +und_384 ; und_CI +und_388 ; und_JM +und_392 ; und_JP +und_398 ; und_KZ +und_400 ; und_JO +und_404 ; und_KE +und_408 ; und_KP +und_410 ; und_KR +und_414 ; und_KW +und_417 ; und_KG +und_418 ; und_LA +und_422 ; und_LB +und_426 ; und_LS +und_428 ; und_LV +und_430 ; und_LR +und_434 ; und_LY +und_438 ; und_LI +und_440 ; und_LT +und_442 ; und_LU +und_446 ; und_MO +und_450 ; und_MG +und_454 ; und_MW +und_458 ; und_MY +und_462 ; und_MV +und_466 ; und_ML +und_470 ; und_MT +und_474 ; und_MQ +und_478 ; und_MR +und_480 ; und_MU +und_484 ; und_MX +und_492 ; und_MC +und_496 ; und_MN +und_498 ; und_MD +und_499 ; und_ME +und_500 ; und_MS +und_504 ; und_MA +und_508 ; und_MZ +und_512 ; und_OM +und_516 ; und_NA +und_520 ; und_NR +und_524 ; und_NP +und_528 ; und_NL +und_530 ; und_CW +und_531 ; und_CW +und_532 ; und_CW +und_533 ; und_AW +und_534 ; und_SX +und_535 ; und_BQ +und_536 ; und_SA +und_540 ; und_NC +und_548 ; und_VU +und_554 ; und_NZ +und_558 ; und_NI +und_562 ; und_NE +und_566 ; und_NG +und_570 ; und_NU +und_574 ; und_NF +und_578 ; und_NO +und_580 ; und_MP +und_581 ; und_UM +und_582 ; und_FM +und_583 ; und_FM +und_584 ; und_MH +und_585 ; und_PW +und_586 ; und_PK +und_591 ; und_PA +und_598 ; und_PG +und_600 ; und_PY +und_604 ; und_PE +und_608 ; und_PH +und_612 ; und_PN +und_616 ; und_PL +und_620 ; und_PT +und_624 ; und_GW +und_626 ; und_TL +und_630 ; und_PR +und_634 ; und_QA +und_638 ; und_RE +und_642 ; und_RO +und_643 ; und_RU +und_646 ; und_RW +und_652 ; und_BL +und_654 ; und_SH +und_659 ; und_KN +und_660 ; und_AI +und_662 ; und_LC +und_663 ; und_MF +und_666 ; und_PM +und_670 ; und_VC +und_674 ; und_SM +und_678 ; und_ST +und_682 ; und_SA +und_686 ; und_SN +und_688 ; und_RS +und_690 ; und_SC +und_694 ; und_SL +und_702 ; und_SG +und_703 ; und_SK +und_704 ; und_VN +und_705 ; und_SI +und_706 ; und_SO +und_710 ; und_ZA +und_716 ; und_ZW +und_720 ; und_YE +und_724 ; und_ES +und_728 ; und_SS +und_729 ; und_SD +und_732 ; und_EH +und_736 ; und_SD +und_740 ; und_SR +und_744 ; und_SJ +und_748 ; und_SZ +und_752 ; und_SE +und_756 ; und_CH +und_760 ; und_SY +und_762 ; und_TJ +und_764 ; und_TH +und_768 ; und_TG +und_772 ; und_TK +und_776 ; und_TO +und_780 ; und_TT +und_784 ; und_AE +und_788 ; und_TN +und_792 ; und_TR +und_795 ; und_TM +und_796 ; und_TC +und_798 ; und_TV +und_800 ; und_UG +und_804 ; und_UA +und_807 ; und_MK +und_810 ; und_RU +und_818 ; und_EG +und_826 ; und_GB +und_830 ; und_JE +und_831 ; und_GG +und_832 ; und_JE +und_833 ; und_IM +und_834 ; und_TZ +und_840 ; und_US +und_850 ; und_VI +und_854 ; und_BF +und_858 ; und_UY +und_860 ; und_UZ +und_862 ; und_VE +und_876 ; und_WF +und_882 ; und_WS +und_886 ; und_YE +und_887 ; und_YE +und_890 ; und_RS +und_891 ; und_RS +und_894 ; und_ZM +und_958 ; und_AA +und_959 ; und_QM +und_960 ; und_QN +und_962 ; und_QP +und_963 ; und_QQ +und_964 ; und_QR +und_965 ; und_QS +und_966 ; und_QT +und_967 ; und_EU +und_968 ; und_QV +und_969 ; und_QW +und_970 ; und_QX +und_971 ; und_QY +und_972 ; und_QZ +und_973 ; und_XA +und_974 ; und_XB +und_975 ; und_XC +und_976 ; und_XD +und_977 ; und_XE +und_978 ; und_XF +und_979 ; und_XG +und_980 ; und_XH +und_981 ; und_XI +und_982 ; und_XJ +und_983 ; und_XK +und_984 ; und_XL +und_985 ; und_XM +und_986 ; und_XN +und_987 ; und_XO +und_988 ; und_XP +und_989 ; und_XQ +und_990 ; und_XR +und_991 ; und_XS +und_992 ; und_XT +und_993 ; und_XU +und_994 ; und_XV +und_995 ; und_XW +und_996 ; und_XX +und_997 ; und_XY +und_998 ; und_XZ +und_999 ; und_ZZ +und_AN ; und_CW +und_BU ; und_MM +und_CS ; und_RS +und_CT ; und_KI +und_DD ; und_DE +und_DY ; und_BJ +und_FQ ; und_AQ +und_FX ; und_FR +und_HV ; und_BF +und_JT ; und_UM +und_MI ; und_UM +und_NH ; und_VU +und_NQ ; und_AQ +und_NT ; und_SA +und_PC ; und_FM +und_PU ; und_UM +und_PZ ; und_PA +und_QU ; und_EU +und_Qaai ; und_Zinh +und_RH ; und_ZW +und_SU ; und_RU +und_TP ; und_TL +und_UK ; und_GB +und_VD ; und_VN +und_WK ; und_UM +und_YD ; und_YE +und_YU ; und_RS +und_ZR ; und_CD +und_aaland ; und_AX +und_arevela ; und +und_arevmda ; und +und_bokmal ; und +und_hakka ; und +und_heploc ; und_alalc97 +und_lojban ; und +und_nynorsk ; und +und_polytoni ; und_polyton +und_saaho ; und +und_xiang ; und +uok ; ema +urd ; ur +uzb ; uz +uzn ; uz +ven ; ve +vie ; vi +vol ; vo +wel ; cy +wln ; wa +wol ; wo +xba ; cax +xho ; xh +xia ; acn +xkh ; waw +xpe ; kpe +xsj ; suj +xsl ; den +ybd ; rki +ydd ; yi +yid ; yi +yma ; lrr +ymt ; mtm +yor ; yo +yos ; zom +yuu ; yug +zai ; zap +zh_guoyu ; zh +zh_hakka ; hak +zh_xiang ; hsn +zha ; za +zho ; zh +zsm ; ms +zul ; zu +zyb ; za + +# decanonicalized + +aar_saaho ; ssy +arm_arevela ; hy +arm_arevela_arevmda ; hyw +arm_arevmda ; hyw +chi_guoyu ; zh +chi_guoyu_hakka_xiang ; hak +chi_hakka ; hak +chi_hakka_xiang ; hak +chi_xiang ; hsn +cmn_guoyu ; zh +cmn_guoyu_hakka_xiang ; hak +cmn_hakka ; hak +cmn_hakka_xiang ; hak +cmn_xiang ; hsn +en_840_aaland ; en_US +en_840_heploc ; en_US_alalc97 +en_840_polytoni ; en_US_polyton +eng_840_aaland ; en_US +eng_840_heploc ; en_US_alalc97 +eng_840_polytoni ; en_US_polyton +eng_US_aaland ; en_US +eng_US_heploc ; en_US_alalc97 +eng_US_polytoni ; en_US_polyton +eng_aaland ; en_AX +eng_arevela ; en +eng_arevela_arevmda ; en +eng_lojban ; en +hye_arevela ; hy +hye_arevela_arevmda ; hyw +sgn_076 ; bzs +sgn_170 ; csn +sgn_208 ; dsl +sgn_249 ; fsl +sgn_250 ; fsl +sgn_276 ; gsg +sgn_278 ; gsg +sgn_280 ; gsg +sgn_300 ; gss +sgn_372 ; isg +sgn_380 ; ise +sgn_392 ; jsl +sgn_484 ; mfs +sgn_528 ; dse +sgn_558 ; ncs +sgn_578 ; nsi +sgn_620 ; psr +sgn_710 ; sfs +sgn_752 ; swl +sgn_826 ; bfi +sgn_840 ; ase +sgn_DD ; gsg +sgn_FX ; fsl +sgn_UK ; bfi +zho_guoyu ; zh +zho_guoyu_hakka_xiang ; hak +zho_hakka ; hak +zho_hakka_xiang ; hak +zho_xiang ; hsn + +# withIrrelevants + +aa_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa +aaa_Adlm_004_fonipa ; aaa_Adlm_AF_fonipa +aaa_Adlm_008_fonipa ; aaa_Adlm_AL_fonipa +aaa_Adlm_010_fonipa ; aaa_Adlm_AQ_fonipa +aaa_Adlm_012_fonipa ; aaa_Adlm_DZ_fonipa +aaa_Adlm_016_fonipa ; aaa_Adlm_AS_fonipa +aaa_Adlm_020_fonipa ; aaa_Adlm_AD_fonipa +aaa_Adlm_024_fonipa ; aaa_Adlm_AO_fonipa +aaa_Adlm_028_fonipa ; aaa_Adlm_AG_fonipa +aaa_Adlm_031_fonipa ; aaa_Adlm_AZ_fonipa +aaa_Adlm_032_fonipa ; aaa_Adlm_AR_fonipa +aaa_Adlm_036_fonipa ; aaa_Adlm_AU_fonipa +aaa_Adlm_040_fonipa ; aaa_Adlm_AT_fonipa +aaa_Adlm_044_fonipa ; aaa_Adlm_BS_fonipa +aaa_Adlm_048_fonipa ; aaa_Adlm_BH_fonipa +aaa_Adlm_050_fonipa ; aaa_Adlm_BD_fonipa +aaa_Adlm_051_fonipa ; aaa_Adlm_AM_fonipa +aaa_Adlm_052_fonipa ; aaa_Adlm_BB_fonipa +aaa_Adlm_056_fonipa ; aaa_Adlm_BE_fonipa +aaa_Adlm_060_fonipa ; aaa_Adlm_BM_fonipa +aaa_Adlm_062_fonipa ; aaa_Adlm_034_fonipa +aaa_Adlm_064_fonipa ; aaa_Adlm_BT_fonipa +aaa_Adlm_068_fonipa ; aaa_Adlm_BO_fonipa +aaa_Adlm_070_fonipa ; aaa_Adlm_BA_fonipa +aaa_Adlm_072_fonipa ; aaa_Adlm_BW_fonipa +aaa_Adlm_074_fonipa ; aaa_Adlm_BV_fonipa +aaa_Adlm_076_fonipa ; aaa_Adlm_BR_fonipa +aaa_Adlm_084_fonipa ; aaa_Adlm_BZ_fonipa +aaa_Adlm_086_fonipa ; aaa_Adlm_IO_fonipa +aaa_Adlm_090_fonipa ; aaa_Adlm_SB_fonipa +aaa_Adlm_092_fonipa ; aaa_Adlm_VG_fonipa +aaa_Adlm_096_fonipa ; aaa_Adlm_BN_fonipa +aaa_Adlm_100_fonipa ; aaa_Adlm_BG_fonipa +aaa_Adlm_104_fonipa ; aaa_Adlm_MM_fonipa +aaa_Adlm_108_fonipa ; aaa_Adlm_BI_fonipa +aaa_Adlm_112_fonipa ; aaa_Adlm_BY_fonipa +aaa_Adlm_116_fonipa ; aaa_Adlm_KH_fonipa +aaa_Adlm_120_fonipa ; aaa_Adlm_CM_fonipa +aaa_Adlm_124_fonipa ; aaa_Adlm_CA_fonipa +aaa_Adlm_132_fonipa ; aaa_Adlm_CV_fonipa +aaa_Adlm_136_fonipa ; aaa_Adlm_KY_fonipa +aaa_Adlm_140_fonipa ; aaa_Adlm_CF_fonipa +aaa_Adlm_144_fonipa ; aaa_Adlm_LK_fonipa +aaa_Adlm_148_fonipa ; aaa_Adlm_TD_fonipa +aaa_Adlm_152_fonipa ; aaa_Adlm_CL_fonipa +aaa_Adlm_156_fonipa ; aaa_Adlm_CN_fonipa +aaa_Adlm_158_fonipa ; aaa_Adlm_TW_fonipa +aaa_Adlm_162_fonipa ; aaa_Adlm_CX_fonipa +aaa_Adlm_166_fonipa ; aaa_Adlm_CC_fonipa +aaa_Adlm_170_fonipa ; aaa_Adlm_CO_fonipa +aaa_Adlm_172_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_174_fonipa ; aaa_Adlm_KM_fonipa +aaa_Adlm_175_fonipa ; aaa_Adlm_YT_fonipa +aaa_Adlm_178_fonipa ; aaa_Adlm_CG_fonipa +aaa_Adlm_180_fonipa ; aaa_Adlm_CD_fonipa +aaa_Adlm_184_fonipa ; aaa_Adlm_CK_fonipa +aaa_Adlm_188_fonipa ; aaa_Adlm_CR_fonipa +aaa_Adlm_191_fonipa ; aaa_Adlm_HR_fonipa +aaa_Adlm_192_fonipa ; aaa_Adlm_CU_fonipa +aaa_Adlm_196_fonipa ; aaa_Adlm_CY_fonipa +aaa_Adlm_200_fonipa ; aaa_Adlm_CZ_fonipa +aaa_Adlm_203_fonipa ; aaa_Adlm_CZ_fonipa +aaa_Adlm_204_fonipa ; aaa_Adlm_BJ_fonipa +aaa_Adlm_208_fonipa ; aaa_Adlm_DK_fonipa +aaa_Adlm_212_fonipa ; aaa_Adlm_DM_fonipa +aaa_Adlm_214_fonipa ; aaa_Adlm_DO_fonipa +aaa_Adlm_218_fonipa ; aaa_Adlm_EC_fonipa +aaa_Adlm_222_fonipa ; aaa_Adlm_SV_fonipa +aaa_Adlm_226_fonipa ; aaa_Adlm_GQ_fonipa +aaa_Adlm_230_fonipa ; aaa_Adlm_ET_fonipa +aaa_Adlm_231_fonipa ; aaa_Adlm_ET_fonipa +aaa_Adlm_232_fonipa ; aaa_Adlm_ER_fonipa +aaa_Adlm_233_fonipa ; aaa_Adlm_EE_fonipa +aaa_Adlm_234_fonipa ; aaa_Adlm_FO_fonipa +aaa_Adlm_238_fonipa ; aaa_Adlm_FK_fonipa +aaa_Adlm_239_fonipa ; aaa_Adlm_GS_fonipa +aaa_Adlm_242_fonipa ; aaa_Adlm_FJ_fonipa +aaa_Adlm_246_fonipa ; aaa_Adlm_FI_fonipa +aaa_Adlm_248_fonipa ; aaa_Adlm_AX_fonipa +aaa_Adlm_249_fonipa ; aaa_Adlm_FR_fonipa +aaa_Adlm_250_fonipa ; aaa_Adlm_FR_fonipa +aaa_Adlm_254_fonipa ; aaa_Adlm_GF_fonipa +aaa_Adlm_258_fonipa ; aaa_Adlm_PF_fonipa +aaa_Adlm_260_fonipa ; aaa_Adlm_TF_fonipa +aaa_Adlm_262_fonipa ; aaa_Adlm_DJ_fonipa +aaa_Adlm_266_fonipa ; aaa_Adlm_GA_fonipa +aaa_Adlm_268_fonipa ; aaa_Adlm_GE_fonipa +aaa_Adlm_270_fonipa ; aaa_Adlm_GM_fonipa +aaa_Adlm_275_fonipa ; aaa_Adlm_PS_fonipa +aaa_Adlm_276_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_278_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_280_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_288_fonipa ; aaa_Adlm_GH_fonipa +aaa_Adlm_292_fonipa ; aaa_Adlm_GI_fonipa +aaa_Adlm_296_fonipa ; aaa_Adlm_KI_fonipa +aaa_Adlm_300_fonipa ; aaa_Adlm_GR_fonipa +aaa_Adlm_304_fonipa ; aaa_Adlm_GL_fonipa +aaa_Adlm_308_fonipa ; aaa_Adlm_GD_fonipa +aaa_Adlm_312_fonipa ; aaa_Adlm_GP_fonipa +aaa_Adlm_316_fonipa ; aaa_Adlm_GU_fonipa +aaa_Adlm_320_fonipa ; aaa_Adlm_GT_fonipa +aaa_Adlm_324_fonipa ; aaa_Adlm_GN_fonipa +aaa_Adlm_328_fonipa ; aaa_Adlm_GY_fonipa +aaa_Adlm_332_fonipa ; aaa_Adlm_HT_fonipa +aaa_Adlm_334_fonipa ; aaa_Adlm_HM_fonipa +aaa_Adlm_336_fonipa ; aaa_Adlm_VA_fonipa +aaa_Adlm_340_fonipa ; aaa_Adlm_HN_fonipa +aaa_Adlm_344_fonipa ; aaa_Adlm_HK_fonipa +aaa_Adlm_348_fonipa ; aaa_Adlm_HU_fonipa +aaa_Adlm_352_fonipa ; aaa_Adlm_IS_fonipa +aaa_Adlm_356_fonipa ; aaa_Adlm_IN_fonipa +aaa_Adlm_360_fonipa ; aaa_Adlm_ID_fonipa +aaa_Adlm_364_fonipa ; aaa_Adlm_IR_fonipa +aaa_Adlm_368_fonipa ; aaa_Adlm_IQ_fonipa +aaa_Adlm_372_fonipa ; aaa_Adlm_IE_fonipa +aaa_Adlm_376_fonipa ; aaa_Adlm_IL_fonipa +aaa_Adlm_380_fonipa ; aaa_Adlm_IT_fonipa +aaa_Adlm_384_fonipa ; aaa_Adlm_CI_fonipa +aaa_Adlm_388_fonipa ; aaa_Adlm_JM_fonipa +aaa_Adlm_392_fonipa ; aaa_Adlm_JP_fonipa +aaa_Adlm_398_fonipa ; aaa_Adlm_KZ_fonipa +aaa_Adlm_400_fonipa ; aaa_Adlm_JO_fonipa +aaa_Adlm_404_fonipa ; aaa_Adlm_KE_fonipa +aaa_Adlm_408_fonipa ; aaa_Adlm_KP_fonipa +aaa_Adlm_410_fonipa ; aaa_Adlm_KR_fonipa +aaa_Adlm_414_fonipa ; aaa_Adlm_KW_fonipa +aaa_Adlm_417_fonipa ; aaa_Adlm_KG_fonipa +aaa_Adlm_418_fonipa ; aaa_Adlm_LA_fonipa +aaa_Adlm_422_fonipa ; aaa_Adlm_LB_fonipa +aaa_Adlm_426_fonipa ; aaa_Adlm_LS_fonipa +aaa_Adlm_428_fonipa ; aaa_Adlm_LV_fonipa +aaa_Adlm_430_fonipa ; aaa_Adlm_LR_fonipa +aaa_Adlm_434_fonipa ; aaa_Adlm_LY_fonipa +aaa_Adlm_438_fonipa ; aaa_Adlm_LI_fonipa +aaa_Adlm_440_fonipa ; aaa_Adlm_LT_fonipa +aaa_Adlm_442_fonipa ; aaa_Adlm_LU_fonipa +aaa_Adlm_446_fonipa ; aaa_Adlm_MO_fonipa +aaa_Adlm_450_fonipa ; aaa_Adlm_MG_fonipa +aaa_Adlm_454_fonipa ; aaa_Adlm_MW_fonipa +aaa_Adlm_458_fonipa ; aaa_Adlm_MY_fonipa +aaa_Adlm_462_fonipa ; aaa_Adlm_MV_fonipa +aaa_Adlm_466_fonipa ; aaa_Adlm_ML_fonipa +aaa_Adlm_470_fonipa ; aaa_Adlm_MT_fonipa +aaa_Adlm_474_fonipa ; aaa_Adlm_MQ_fonipa +aaa_Adlm_478_fonipa ; aaa_Adlm_MR_fonipa +aaa_Adlm_480_fonipa ; aaa_Adlm_MU_fonipa +aaa_Adlm_484_fonipa ; aaa_Adlm_MX_fonipa +aaa_Adlm_492_fonipa ; aaa_Adlm_MC_fonipa +aaa_Adlm_496_fonipa ; aaa_Adlm_MN_fonipa +aaa_Adlm_498_fonipa ; aaa_Adlm_MD_fonipa +aaa_Adlm_499_fonipa ; aaa_Adlm_ME_fonipa +aaa_Adlm_500_fonipa ; aaa_Adlm_MS_fonipa +aaa_Adlm_504_fonipa ; aaa_Adlm_MA_fonipa +aaa_Adlm_508_fonipa ; aaa_Adlm_MZ_fonipa +aaa_Adlm_512_fonipa ; aaa_Adlm_OM_fonipa +aaa_Adlm_516_fonipa ; aaa_Adlm_NA_fonipa +aaa_Adlm_520_fonipa ; aaa_Adlm_NR_fonipa +aaa_Adlm_524_fonipa ; aaa_Adlm_NP_fonipa +aaa_Adlm_528_fonipa ; aaa_Adlm_NL_fonipa +aaa_Adlm_530_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_531_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_532_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_533_fonipa ; aaa_Adlm_AW_fonipa +aaa_Adlm_534_fonipa ; aaa_Adlm_SX_fonipa +aaa_Adlm_535_fonipa ; aaa_Adlm_BQ_fonipa +aaa_Adlm_536_fonipa ; aaa_Adlm_SA_fonipa +aaa_Adlm_540_fonipa ; aaa_Adlm_NC_fonipa +aaa_Adlm_548_fonipa ; aaa_Adlm_VU_fonipa +aaa_Adlm_554_fonipa ; aaa_Adlm_NZ_fonipa +aaa_Adlm_558_fonipa ; aaa_Adlm_NI_fonipa +aaa_Adlm_562_fonipa ; aaa_Adlm_NE_fonipa +aaa_Adlm_566_fonipa ; aaa_Adlm_NG_fonipa +aaa_Adlm_570_fonipa ; aaa_Adlm_NU_fonipa +aaa_Adlm_574_fonipa ; aaa_Adlm_NF_fonipa +aaa_Adlm_578_fonipa ; aaa_Adlm_NO_fonipa +aaa_Adlm_580_fonipa ; aaa_Adlm_MP_fonipa +aaa_Adlm_581_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_582_fonipa ; aaa_Adlm_FM_fonipa +aaa_Adlm_583_fonipa ; aaa_Adlm_FM_fonipa +aaa_Adlm_584_fonipa ; aaa_Adlm_MH_fonipa +aaa_Adlm_585_fonipa ; aaa_Adlm_PW_fonipa +aaa_Adlm_586_fonipa ; aaa_Adlm_PK_fonipa +aaa_Adlm_591_fonipa ; aaa_Adlm_PA_fonipa +aaa_Adlm_598_fonipa ; aaa_Adlm_PG_fonipa +aaa_Adlm_600_fonipa ; aaa_Adlm_PY_fonipa +aaa_Adlm_604_fonipa ; aaa_Adlm_PE_fonipa +aaa_Adlm_608_fonipa ; aaa_Adlm_PH_fonipa +aaa_Adlm_612_fonipa ; aaa_Adlm_PN_fonipa +aaa_Adlm_616_fonipa ; aaa_Adlm_PL_fonipa +aaa_Adlm_620_fonipa ; aaa_Adlm_PT_fonipa +aaa_Adlm_624_fonipa ; aaa_Adlm_GW_fonipa +aaa_Adlm_626_fonipa ; aaa_Adlm_TL_fonipa +aaa_Adlm_630_fonipa ; aaa_Adlm_PR_fonipa +aaa_Adlm_634_fonipa ; aaa_Adlm_QA_fonipa +aaa_Adlm_638_fonipa ; aaa_Adlm_RE_fonipa +aaa_Adlm_642_fonipa ; aaa_Adlm_RO_fonipa +aaa_Adlm_643_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_646_fonipa ; aaa_Adlm_RW_fonipa +aaa_Adlm_652_fonipa ; aaa_Adlm_BL_fonipa +aaa_Adlm_654_fonipa ; aaa_Adlm_SH_fonipa +aaa_Adlm_659_fonipa ; aaa_Adlm_KN_fonipa +aaa_Adlm_660_fonipa ; aaa_Adlm_AI_fonipa +aaa_Adlm_662_fonipa ; aaa_Adlm_LC_fonipa +aaa_Adlm_663_fonipa ; aaa_Adlm_MF_fonipa +aaa_Adlm_666_fonipa ; aaa_Adlm_PM_fonipa +aaa_Adlm_670_fonipa ; aaa_Adlm_VC_fonipa +aaa_Adlm_674_fonipa ; aaa_Adlm_SM_fonipa +aaa_Adlm_678_fonipa ; aaa_Adlm_ST_fonipa +aaa_Adlm_682_fonipa ; aaa_Adlm_SA_fonipa +aaa_Adlm_686_fonipa ; aaa_Adlm_SN_fonipa +aaa_Adlm_688_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_690_fonipa ; aaa_Adlm_SC_fonipa +aaa_Adlm_694_fonipa ; aaa_Adlm_SL_fonipa +aaa_Adlm_702_fonipa ; aaa_Adlm_SG_fonipa +aaa_Adlm_703_fonipa ; aaa_Adlm_SK_fonipa +aaa_Adlm_704_fonipa ; aaa_Adlm_VN_fonipa +aaa_Adlm_705_fonipa ; aaa_Adlm_SI_fonipa +aaa_Adlm_706_fonipa ; aaa_Adlm_SO_fonipa +aaa_Adlm_710_fonipa ; aaa_Adlm_ZA_fonipa +aaa_Adlm_716_fonipa ; aaa_Adlm_ZW_fonipa +aaa_Adlm_720_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_724_fonipa ; aaa_Adlm_ES_fonipa +aaa_Adlm_728_fonipa ; aaa_Adlm_SS_fonipa +aaa_Adlm_729_fonipa ; aaa_Adlm_SD_fonipa +aaa_Adlm_732_fonipa ; aaa_Adlm_EH_fonipa +aaa_Adlm_736_fonipa ; aaa_Adlm_SD_fonipa +aaa_Adlm_740_fonipa ; aaa_Adlm_SR_fonipa +aaa_Adlm_744_fonipa ; aaa_Adlm_SJ_fonipa +aaa_Adlm_748_fonipa ; aaa_Adlm_SZ_fonipa +aaa_Adlm_752_fonipa ; aaa_Adlm_SE_fonipa +aaa_Adlm_756_fonipa ; aaa_Adlm_CH_fonipa +aaa_Adlm_760_fonipa ; aaa_Adlm_SY_fonipa +aaa_Adlm_762_fonipa ; aaa_Adlm_TJ_fonipa +aaa_Adlm_764_fonipa ; aaa_Adlm_TH_fonipa +aaa_Adlm_768_fonipa ; aaa_Adlm_TG_fonipa +aaa_Adlm_772_fonipa ; aaa_Adlm_TK_fonipa +aaa_Adlm_776_fonipa ; aaa_Adlm_TO_fonipa +aaa_Adlm_780_fonipa ; aaa_Adlm_TT_fonipa +aaa_Adlm_784_fonipa ; aaa_Adlm_AE_fonipa +aaa_Adlm_788_fonipa ; aaa_Adlm_TN_fonipa +aaa_Adlm_792_fonipa ; aaa_Adlm_TR_fonipa +aaa_Adlm_795_fonipa ; aaa_Adlm_TM_fonipa +aaa_Adlm_796_fonipa ; aaa_Adlm_TC_fonipa +aaa_Adlm_798_fonipa ; aaa_Adlm_TV_fonipa +aaa_Adlm_800_fonipa ; aaa_Adlm_UG_fonipa +aaa_Adlm_804_fonipa ; aaa_Adlm_UA_fonipa +aaa_Adlm_807_fonipa ; aaa_Adlm_MK_fonipa +aaa_Adlm_810_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_818_fonipa ; aaa_Adlm_EG_fonipa +aaa_Adlm_826_fonipa ; aaa_Adlm_GB_fonipa +aaa_Adlm_830_fonipa ; aaa_Adlm_JE_fonipa +aaa_Adlm_831_fonipa ; aaa_Adlm_GG_fonipa +aaa_Adlm_832_fonipa ; aaa_Adlm_JE_fonipa +aaa_Adlm_833_fonipa ; aaa_Adlm_IM_fonipa +aaa_Adlm_834_fonipa ; aaa_Adlm_TZ_fonipa +aaa_Adlm_840_fonipa ; aaa_Adlm_US_fonipa +aaa_Adlm_850_fonipa ; aaa_Adlm_VI_fonipa +aaa_Adlm_854_fonipa ; aaa_Adlm_BF_fonipa +aaa_Adlm_858_fonipa ; aaa_Adlm_UY_fonipa +aaa_Adlm_860_fonipa ; aaa_Adlm_UZ_fonipa +aaa_Adlm_862_fonipa ; aaa_Adlm_VE_fonipa +aaa_Adlm_876_fonipa ; aaa_Adlm_WF_fonipa +aaa_Adlm_882_fonipa ; aaa_Adlm_WS_fonipa +aaa_Adlm_886_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_887_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_890_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_891_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_894_fonipa ; aaa_Adlm_ZM_fonipa +aaa_Adlm_958_fonipa ; aaa_Adlm_AA_fonipa +aaa_Adlm_959_fonipa ; aaa_Adlm_QM_fonipa +aaa_Adlm_960_fonipa ; aaa_Adlm_QN_fonipa +aaa_Adlm_962_fonipa ; aaa_Adlm_QP_fonipa +aaa_Adlm_963_fonipa ; aaa_Adlm_QQ_fonipa +aaa_Adlm_964_fonipa ; aaa_Adlm_QR_fonipa +aaa_Adlm_965_fonipa ; aaa_Adlm_QS_fonipa +aaa_Adlm_966_fonipa ; aaa_Adlm_QT_fonipa +aaa_Adlm_967_fonipa ; aaa_Adlm_EU_fonipa +aaa_Adlm_968_fonipa ; aaa_Adlm_QV_fonipa +aaa_Adlm_969_fonipa ; aaa_Adlm_QW_fonipa +aaa_Adlm_970_fonipa ; aaa_Adlm_QX_fonipa +aaa_Adlm_971_fonipa ; aaa_Adlm_QY_fonipa +aaa_Adlm_972_fonipa ; aaa_Adlm_QZ_fonipa +aaa_Adlm_973_fonipa ; aaa_Adlm_XA_fonipa +aaa_Adlm_974_fonipa ; aaa_Adlm_XB_fonipa +aaa_Adlm_975_fonipa ; aaa_Adlm_XC_fonipa +aaa_Adlm_976_fonipa ; aaa_Adlm_XD_fonipa +aaa_Adlm_977_fonipa ; aaa_Adlm_XE_fonipa +aaa_Adlm_978_fonipa ; aaa_Adlm_XF_fonipa +aaa_Adlm_979_fonipa ; aaa_Adlm_XG_fonipa +aaa_Adlm_980_fonipa ; aaa_Adlm_XH_fonipa +aaa_Adlm_981_fonipa ; aaa_Adlm_XI_fonipa +aaa_Adlm_982_fonipa ; aaa_Adlm_XJ_fonipa +aaa_Adlm_983_fonipa ; aaa_Adlm_XK_fonipa +aaa_Adlm_984_fonipa ; aaa_Adlm_XL_fonipa +aaa_Adlm_985_fonipa ; aaa_Adlm_XM_fonipa +aaa_Adlm_986_fonipa ; aaa_Adlm_XN_fonipa +aaa_Adlm_987_fonipa ; aaa_Adlm_XO_fonipa +aaa_Adlm_988_fonipa ; aaa_Adlm_XP_fonipa +aaa_Adlm_989_fonipa ; aaa_Adlm_XQ_fonipa +aaa_Adlm_990_fonipa ; aaa_Adlm_XR_fonipa +aaa_Adlm_991_fonipa ; aaa_Adlm_XS_fonipa +aaa_Adlm_992_fonipa ; aaa_Adlm_XT_fonipa +aaa_Adlm_993_fonipa ; aaa_Adlm_XU_fonipa +aaa_Adlm_994_fonipa ; aaa_Adlm_XV_fonipa +aaa_Adlm_995_fonipa ; aaa_Adlm_XW_fonipa +aaa_Adlm_996_fonipa ; aaa_Adlm_XX_fonipa +aaa_Adlm_997_fonipa ; aaa_Adlm_XY_fonipa +aaa_Adlm_998_fonipa ; aaa_Adlm_XZ_fonipa +aaa_Adlm_999_fonipa ; aaa_Adlm_ZZ_fonipa +aaa_Adlm_AC_aaland_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_arevela_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_arevmda_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_bokmal_fonipa ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_hakka ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_heploc ; aaa_Adlm_AC_alalc97_fonipa +aaa_Adlm_AC_fonipa_lojban ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_nynorsk ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_polytoni ; aaa_Adlm_AC_fonipa_polyton +aaa_Adlm_AC_fonipa_saaho ; aaa_Adlm_AC_fonipa +aaa_Adlm_AC_fonipa_xiang ; aaa_Adlm_AC_fonipa +aaa_Adlm_AN_fonipa ; aaa_Adlm_CW_fonipa +aaa_Adlm_BU_fonipa ; aaa_Adlm_MM_fonipa +aaa_Adlm_CS_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_CT_fonipa ; aaa_Adlm_KI_fonipa +aaa_Adlm_DD_fonipa ; aaa_Adlm_DE_fonipa +aaa_Adlm_DY_fonipa ; aaa_Adlm_BJ_fonipa +aaa_Adlm_FQ_fonipa ; aaa_Adlm_AQ_fonipa +aaa_Adlm_FX_fonipa ; aaa_Adlm_FR_fonipa +aaa_Adlm_HV_fonipa ; aaa_Adlm_BF_fonipa +aaa_Adlm_JT_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_MI_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_NH_fonipa ; aaa_Adlm_VU_fonipa +aaa_Adlm_NQ_fonipa ; aaa_Adlm_AQ_fonipa +aaa_Adlm_NT_fonipa ; aaa_Adlm_SA_fonipa +aaa_Adlm_PC_fonipa ; aaa_Adlm_FM_fonipa +aaa_Adlm_PU_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_PZ_fonipa ; aaa_Adlm_PA_fonipa +aaa_Adlm_QU_fonipa ; aaa_Adlm_EU_fonipa +aaa_Adlm_RH_fonipa ; aaa_Adlm_ZW_fonipa +aaa_Adlm_SU_fonipa ; aaa_Adlm_RU_fonipa +aaa_Adlm_TP_fonipa ; aaa_Adlm_TL_fonipa +aaa_Adlm_UK_fonipa ; aaa_Adlm_GB_fonipa +aaa_Adlm_VD_fonipa ; aaa_Adlm_VN_fonipa +aaa_Adlm_WK_fonipa ; aaa_Adlm_UM_fonipa +aaa_Adlm_YD_fonipa ; aaa_Adlm_YE_fonipa +aaa_Adlm_YU_fonipa ; aaa_Adlm_RS_fonipa +aaa_Adlm_ZR_fonipa ; aaa_Adlm_CD_fonipa +aaa_Qaai_AC_fonipa ; aaa_Zinh_AC_fonipa +aam_Adlm_AC_fonipa ; aas_Adlm_AC_fonipa +aar_Adlm_AC_fonipa ; aa_Adlm_AC_fonipa +aar_Adlm_AC_fonipa_saaho ; ssy_Adlm_AC_fonipa +abk_Adlm_AC_fonipa ; ab_Adlm_AC_fonipa +adp_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa +afr_Adlm_AC_fonipa ; af_Adlm_AC_fonipa +aju_Adlm_AC_fonipa ; jrb_Adlm_AC_fonipa +aka_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +alb_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa +als_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa +amh_Adlm_AC_fonipa ; am_Adlm_AC_fonipa +ara_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa +arb_Adlm_AC_fonipa ; ar_Adlm_AC_fonipa +arg_Adlm_AC_fonipa ; an_Adlm_AC_fonipa +arm_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa +arm_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa +arm_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa +arm_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa +art_Adlm_AC_fonipa_lojban ; jbo_Adlm_AC_fonipa +asd_Adlm_AC_fonipa ; snz_Adlm_AC_fonipa +asm_Adlm_AC_fonipa ; as_Adlm_AC_fonipa +aue_Adlm_AC_fonipa ; ktz_Adlm_AC_fonipa +ava_Adlm_AC_fonipa ; av_Adlm_AC_fonipa +ave_Adlm_AC_fonipa ; ae_Adlm_AC_fonipa +aym_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa +ayr_Adlm_AC_fonipa ; ay_Adlm_AC_fonipa +ayx_Adlm_AC_fonipa ; nun_Adlm_AC_fonipa +aze_Adlm_AC_fonipa ; az_Adlm_AC_fonipa +azj_Adlm_AC_fonipa ; az_Adlm_AC_fonipa +bak_Adlm_AC_fonipa ; ba_Adlm_AC_fonipa +bam_Adlm_AC_fonipa ; bm_Adlm_AC_fonipa +baq_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa +bcc_Adlm_AC_fonipa ; bal_Adlm_AC_fonipa +bcl_Adlm_AC_fonipa ; bik_Adlm_AC_fonipa +bel_Adlm_AC_fonipa ; be_Adlm_AC_fonipa +ben_Adlm_AC_fonipa ; bn_Adlm_AC_fonipa +bgm_Adlm_AC_fonipa ; bcg_Adlm_AC_fonipa +bh_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa +bih_Adlm_AC_fonipa ; bho_Adlm_AC_fonipa +bis_Adlm_AC_fonipa ; bi_Adlm_AC_fonipa +bjd_Adlm_AC_fonipa ; drl_Adlm_AC_fonipa +bod_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa +bos_Adlm_AC_fonipa ; bs_Adlm_AC_fonipa +bre_Adlm_AC_fonipa ; br_Adlm_AC_fonipa +bul_Adlm_AC_fonipa ; bg_Adlm_AC_fonipa +bur_Adlm_AC_fonipa ; my_Adlm_AC_fonipa +bxk_Adlm_AC_fonipa ; luy_Adlm_AC_fonipa +bxr_Adlm_AC_fonipa ; bua_Adlm_AC_fonipa +cat_Adlm_AC_fonipa ; ca_Adlm_AC_fonipa +ccq_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa +cel_Adlm_AC_fonipa_gaulish ; xtg_Adlm_AC_fonipa +ces_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa +cha_Adlm_AC_fonipa ; ch_Adlm_AC_fonipa +che_Adlm_AC_fonipa ; ce_Adlm_AC_fonipa +chi_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +chi_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +chu_Adlm_AC_fonipa ; cu_Adlm_AC_fonipa +chv_Adlm_AC_fonipa ; cv_Adlm_AC_fonipa +cjr_Adlm_AC_fonipa ; mom_Adlm_AC_fonipa +cka_Adlm_AC_fonipa ; cmr_Adlm_AC_fonipa +cld_Adlm_AC_fonipa ; syr_Adlm_AC_fonipa +cmk_Adlm_AC_fonipa ; xch_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +cmn_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +cnr_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +cor_Adlm_AC_fonipa ; kw_Adlm_AC_fonipa +cos_Adlm_AC_fonipa ; co_Adlm_AC_fonipa +coy_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa +cqu_Adlm_AC_fonipa ; quh_Adlm_AC_fonipa +cre_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa +cwd_Adlm_AC_fonipa ; cr_Adlm_AC_fonipa +cym_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa +cze_Adlm_AC_fonipa ; cs_Adlm_AC_fonipa +dan_Adlm_AC_fonipa ; da_Adlm_AC_fonipa +deu_Adlm_AC_fonipa ; de_Adlm_AC_fonipa +dgo_Adlm_AC_fonipa ; doi_Adlm_AC_fonipa +dhd_Adlm_AC_fonipa ; mwr_Adlm_AC_fonipa +dik_Adlm_AC_fonipa ; din_Adlm_AC_fonipa +diq_Adlm_AC_fonipa ; zza_Adlm_AC_fonipa +dit_Adlm_AC_fonipa ; dif_Adlm_AC_fonipa +div_Adlm_AC_fonipa ; dv_Adlm_AC_fonipa +drh_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa +drw_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +dut_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa +dzo_Adlm_AC_fonipa ; dz_Adlm_AC_fonipa +ekk_Adlm_AC_fonipa ; et_Adlm_AC_fonipa +ell_Adlm_AC_fonipa ; el_Adlm_AC_fonipa +emk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa +en_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa +en_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +en_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +en_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa +en_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa +en_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa +en_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa +en_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa +en_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +en_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +eng_Adlm_840_aaland_fonipa ; en_Adlm_US_fonipa +eng_Adlm_840_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +eng_Adlm_840_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +eng_Adlm_AC_aaland_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_arevela_arevmda_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_arevela_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_fonipa ; en_Adlm_AC_fonipa +eng_Adlm_AC_fonipa_lojban ; en_Adlm_AC_fonipa +eng_Adlm_US_aaland_fonipa ; en_Adlm_US_fonipa +eng_Adlm_US_fonipa_heploc ; en_Adlm_US_alalc97_fonipa +eng_Adlm_US_fonipa_polytoni ; en_Adlm_US_fonipa_polyton +epo_Adlm_AC_fonipa ; eo_Adlm_AC_fonipa +esk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa +est_Adlm_AC_fonipa ; et_Adlm_AC_fonipa +eus_Adlm_AC_fonipa ; eu_Adlm_AC_fonipa +ewe_Adlm_AC_fonipa ; ee_Adlm_AC_fonipa +fao_Adlm_AC_fonipa ; fo_Adlm_AC_fonipa +fas_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +fat_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +fij_Adlm_AC_fonipa ; fj_Adlm_AC_fonipa +fin_Adlm_AC_fonipa ; fi_Adlm_AC_fonipa +fra_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa +fre_Adlm_AC_fonipa ; fr_Adlm_AC_fonipa +fry_Adlm_AC_fonipa ; fy_Adlm_AC_fonipa +fuc_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa +ful_Adlm_AC_fonipa ; ff_Adlm_AC_fonipa +gav_Adlm_AC_fonipa ; dev_Adlm_AC_fonipa +gaz_Adlm_AC_fonipa ; om_Adlm_AC_fonipa +gbo_Adlm_AC_fonipa ; grb_Adlm_AC_fonipa +geo_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa +ger_Adlm_AC_fonipa ; de_Adlm_AC_fonipa +gfx_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa +ggn_Adlm_AC_fonipa ; gvr_Adlm_AC_fonipa +gla_Adlm_AC_fonipa ; gd_Adlm_AC_fonipa +gle_Adlm_AC_fonipa ; ga_Adlm_AC_fonipa +glg_Adlm_AC_fonipa ; gl_Adlm_AC_fonipa +glv_Adlm_AC_fonipa ; gv_Adlm_AC_fonipa +gno_Adlm_AC_fonipa ; gon_Adlm_AC_fonipa +gre_Adlm_AC_fonipa ; el_Adlm_AC_fonipa +grn_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa +gti_Adlm_AC_fonipa ; nyc_Adlm_AC_fonipa +gug_Adlm_AC_fonipa ; gn_Adlm_AC_fonipa +guj_Adlm_AC_fonipa ; gu_Adlm_AC_fonipa +guv_Adlm_AC_fonipa ; duz_Adlm_AC_fonipa +gya_Adlm_AC_fonipa ; gba_Adlm_AC_fonipa +hat_Adlm_AC_fonipa ; ht_Adlm_AC_fonipa +hau_Adlm_AC_fonipa ; ha_Adlm_AC_fonipa +hbs_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +hdn_Adlm_AC_fonipa ; hai_Adlm_AC_fonipa +hea_Adlm_AC_fonipa ; hmn_Adlm_AC_fonipa +heb_Adlm_AC_fonipa ; he_Adlm_AC_fonipa +her_Adlm_AC_fonipa ; hz_Adlm_AC_fonipa +him_Adlm_AC_fonipa ; srx_Adlm_AC_fonipa +hin_Adlm_AC_fonipa ; hi_Adlm_AC_fonipa +hmo_Adlm_AC_fonipa ; ho_Adlm_AC_fonipa +hrr_Adlm_AC_fonipa ; jal_Adlm_AC_fonipa +hrv_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa +hun_Adlm_AC_fonipa ; hu_Adlm_AC_fonipa +hy_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hy_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa +hy_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hye_Adlm_AC_arevela_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hye_Adlm_AC_arevela_fonipa ; hy_Adlm_AC_fonipa +hye_Adlm_AC_arevmda_fonipa ; hyw_Adlm_AC_fonipa +hye_Adlm_AC_fonipa ; hy_Adlm_AC_fonipa +ibi_Adlm_AC_fonipa ; opa_Adlm_AC_fonipa +ibo_Adlm_AC_fonipa ; ig_Adlm_AC_fonipa +ice_Adlm_AC_fonipa ; is_Adlm_AC_fonipa +ido_Adlm_AC_fonipa ; io_Adlm_AC_fonipa +iii_Adlm_AC_fonipa ; ii_Adlm_AC_fonipa +ike_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa +iku_Adlm_AC_fonipa ; iu_Adlm_AC_fonipa +ile_Adlm_AC_fonipa ; ie_Adlm_AC_fonipa +ilw_Adlm_AC_fonipa ; gal_Adlm_AC_fonipa +in_Adlm_AC_fonipa ; id_Adlm_AC_fonipa +ina_Adlm_AC_fonipa ; ia_Adlm_AC_fonipa +ind_Adlm_AC_fonipa ; id_Adlm_AC_fonipa +ipk_Adlm_AC_fonipa ; ik_Adlm_AC_fonipa +isl_Adlm_AC_fonipa ; is_Adlm_AC_fonipa +ita_Adlm_AC_fonipa ; it_Adlm_AC_fonipa +iw_Adlm_AC_fonipa ; he_Adlm_AC_fonipa +jav_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa +jeg_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa +ji_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa +jpn_Adlm_AC_fonipa ; ja_Adlm_AC_fonipa +jw_Adlm_AC_fonipa ; jv_Adlm_AC_fonipa +kal_Adlm_AC_fonipa ; kl_Adlm_AC_fonipa +kan_Adlm_AC_fonipa ; kn_Adlm_AC_fonipa +kas_Adlm_AC_fonipa ; ks_Adlm_AC_fonipa +kat_Adlm_AC_fonipa ; ka_Adlm_AC_fonipa +kau_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa +kaz_Adlm_AC_fonipa ; kk_Adlm_AC_fonipa +kgc_Adlm_AC_fonipa ; tdf_Adlm_AC_fonipa +kgh_Adlm_AC_fonipa ; kml_Adlm_AC_fonipa +khk_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa +khm_Adlm_AC_fonipa ; km_Adlm_AC_fonipa +kik_Adlm_AC_fonipa ; ki_Adlm_AC_fonipa +kin_Adlm_AC_fonipa ; rw_Adlm_AC_fonipa +kir_Adlm_AC_fonipa ; ky_Adlm_AC_fonipa +kmr_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa +knc_Adlm_AC_fonipa ; kr_Adlm_AC_fonipa +kng_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa +knn_Adlm_AC_fonipa ; kok_Adlm_AC_fonipa +koj_Adlm_AC_fonipa ; kwv_Adlm_AC_fonipa +kom_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa +kon_Adlm_AC_fonipa ; kg_Adlm_AC_fonipa +kor_Adlm_AC_fonipa ; ko_Adlm_AC_fonipa +kpv_Adlm_AC_fonipa ; kv_Adlm_AC_fonipa +krm_Adlm_AC_fonipa ; bmf_Adlm_AC_fonipa +ktr_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +kua_Adlm_AC_fonipa ; kj_Adlm_AC_fonipa +kur_Adlm_AC_fonipa ; ku_Adlm_AC_fonipa +kvs_Adlm_AC_fonipa ; gdj_Adlm_AC_fonipa +kwq_Adlm_AC_fonipa ; yam_Adlm_AC_fonipa +kxe_Adlm_AC_fonipa ; tvd_Adlm_AC_fonipa +kzj_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +kzt_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +lao_Adlm_AC_fonipa ; lo_Adlm_AC_fonipa +lat_Adlm_AC_fonipa ; la_Adlm_AC_fonipa +lav_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa +lbk_Adlm_AC_fonipa ; bnc_Adlm_AC_fonipa +lii_Adlm_AC_fonipa ; raq_Adlm_AC_fonipa +lim_Adlm_AC_fonipa ; li_Adlm_AC_fonipa +lin_Adlm_AC_fonipa ; ln_Adlm_AC_fonipa +lit_Adlm_AC_fonipa ; lt_Adlm_AC_fonipa +llo_Adlm_AC_fonipa ; ngt_Adlm_AC_fonipa +lmm_Adlm_AC_fonipa ; rmx_Adlm_AC_fonipa +ltz_Adlm_AC_fonipa ; lb_Adlm_AC_fonipa +lub_Adlm_AC_fonipa ; lu_Adlm_AC_fonipa +lug_Adlm_AC_fonipa ; lg_Adlm_AC_fonipa +lvs_Adlm_AC_fonipa ; lv_Adlm_AC_fonipa +mac_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa +mah_Adlm_AC_fonipa ; mh_Adlm_AC_fonipa +mal_Adlm_AC_fonipa ; ml_Adlm_AC_fonipa +mao_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa +mar_Adlm_AC_fonipa ; mr_Adlm_AC_fonipa +may_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa +meg_Adlm_AC_fonipa ; cir_Adlm_AC_fonipa +mhr_Adlm_AC_fonipa ; chm_Adlm_AC_fonipa +mkd_Adlm_AC_fonipa ; mk_Adlm_AC_fonipa +mlg_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa +mlt_Adlm_AC_fonipa ; mt_Adlm_AC_fonipa +mnk_Adlm_AC_fonipa ; man_Adlm_AC_fonipa +mo_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +mol_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +mon_Adlm_AC_fonipa ; mn_Adlm_AC_fonipa +mri_Adlm_AC_fonipa ; mi_Adlm_AC_fonipa +msa_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa +mst_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa +mup_Adlm_AC_fonipa ; raj_Adlm_AC_fonipa +mwj_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa +mya_Adlm_AC_fonipa ; my_Adlm_AC_fonipa +myd_Adlm_AC_fonipa ; aog_Adlm_AC_fonipa +myt_Adlm_AC_fonipa ; mry_Adlm_AC_fonipa +nad_Adlm_AC_fonipa ; xny_Adlm_AC_fonipa +nau_Adlm_AC_fonipa ; na_Adlm_AC_fonipa +nav_Adlm_AC_fonipa ; nv_Adlm_AC_fonipa +nbl_Adlm_AC_fonipa ; nr_Adlm_AC_fonipa +ncp_Adlm_AC_fonipa ; kdz_Adlm_AC_fonipa +nde_Adlm_AC_fonipa ; nd_Adlm_AC_fonipa +ndo_Adlm_AC_fonipa ; ng_Adlm_AC_fonipa +nep_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa +nld_Adlm_AC_fonipa ; nl_Adlm_AC_fonipa +nno_Adlm_AC_fonipa ; nn_Adlm_AC_fonipa +nns_Adlm_AC_fonipa ; nbr_Adlm_AC_fonipa +nnx_Adlm_AC_fonipa ; ngv_Adlm_AC_fonipa +no_Adlm_AC_bokmal_fonipa ; nb_Adlm_AC_fonipa +no_Adlm_AC_bokmal_fonipa_nynorsk ; nb_Adlm_AC_fonipa +no_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa +no_Adlm_AC_fonipa_nynorsk ; nn_Adlm_AC_fonipa +nob_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa +nor_Adlm_AC_fonipa ; nb_Adlm_AC_fonipa +npi_Adlm_AC_fonipa ; ne_Adlm_AC_fonipa +nts_Adlm_AC_fonipa ; pij_Adlm_AC_fonipa +nya_Adlm_AC_fonipa ; ny_Adlm_AC_fonipa +oci_Adlm_AC_fonipa ; oc_Adlm_AC_fonipa +ojg_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa +oji_Adlm_AC_fonipa ; oj_Adlm_AC_fonipa +ori_Adlm_AC_fonipa ; or_Adlm_AC_fonipa +orm_Adlm_AC_fonipa ; om_Adlm_AC_fonipa +ory_Adlm_AC_fonipa ; or_Adlm_AC_fonipa +oss_Adlm_AC_fonipa ; os_Adlm_AC_fonipa +oun_Adlm_AC_fonipa ; vaj_Adlm_AC_fonipa +pan_Adlm_AC_fonipa ; pa_Adlm_AC_fonipa +pbu_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa +pcr_Adlm_AC_fonipa ; adx_Adlm_AC_fonipa +per_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +pes_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +pli_Adlm_AC_fonipa ; pi_Adlm_AC_fonipa +plt_Adlm_AC_fonipa ; mg_Adlm_AC_fonipa +pmc_Adlm_AC_fonipa ; huw_Adlm_AC_fonipa +pmu_Adlm_AC_fonipa ; phr_Adlm_AC_fonipa +pnb_Adlm_AC_fonipa ; lah_Adlm_AC_fonipa +pol_Adlm_AC_fonipa ; pl_Adlm_AC_fonipa +por_Adlm_AC_fonipa ; pt_Adlm_AC_fonipa +ppa_Adlm_AC_fonipa ; bfy_Adlm_AC_fonipa +ppr_Adlm_AC_fonipa ; lcq_Adlm_AC_fonipa +prs_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +pry_Adlm_AC_fonipa ; prt_Adlm_AC_fonipa +pus_Adlm_AC_fonipa ; ps_Adlm_AC_fonipa +puz_Adlm_AC_fonipa ; pub_Adlm_AC_fonipa +que_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa +quz_Adlm_AC_fonipa ; qu_Adlm_AC_fonipa +rmy_Adlm_AC_fonipa ; rom_Adlm_AC_fonipa +roh_Adlm_AC_fonipa ; rm_Adlm_AC_fonipa +ron_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +rum_Adlm_AC_fonipa ; ro_Adlm_AC_fonipa +run_Adlm_AC_fonipa ; rn_Adlm_AC_fonipa +rus_Adlm_AC_fonipa ; ru_Adlm_AC_fonipa +sag_Adlm_AC_fonipa ; sg_Adlm_AC_fonipa +san_Adlm_AC_fonipa ; sa_Adlm_AC_fonipa +sca_Adlm_AC_fonipa ; hle_Adlm_AC_fonipa +scc_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +scr_Adlm_AC_fonipa ; hr_Adlm_AC_fonipa +sgn_Adlm_076_fonipa ; bzs_Adlm_fonipa +sgn_Adlm_170_fonipa ; csn_Adlm_fonipa +sgn_Adlm_208_fonipa ; dsl_Adlm_fonipa +sgn_Adlm_249_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_250_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_276_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_278_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_280_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_300_fonipa ; gss_Adlm_fonipa +sgn_Adlm_372_fonipa ; isg_Adlm_fonipa +sgn_Adlm_380_fonipa ; ise_Adlm_fonipa +sgn_Adlm_392_fonipa ; jsl_Adlm_fonipa +sgn_Adlm_484_fonipa ; mfs_Adlm_fonipa +sgn_Adlm_528_fonipa ; dse_Adlm_fonipa +sgn_Adlm_558_fonipa ; ncs_Adlm_fonipa +sgn_Adlm_578_fonipa ; nsi_Adlm_fonipa +sgn_Adlm_620_fonipa ; psr_Adlm_fonipa +sgn_Adlm_710_fonipa ; sfs_Adlm_fonipa +sgn_Adlm_752_fonipa ; swl_Adlm_fonipa +sgn_Adlm_826_fonipa ; bfi_Adlm_fonipa +sgn_Adlm_840_fonipa ; ase_Adlm_fonipa +sgn_Adlm_BR_fonipa ; bzs_Adlm_fonipa +sgn_Adlm_CO_fonipa ; csn_Adlm_fonipa +sgn_Adlm_DD_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_DE_fonipa ; gsg_Adlm_fonipa +sgn_Adlm_DK_fonipa ; dsl_Adlm_fonipa +sgn_Adlm_FR_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_FX_fonipa ; fsl_Adlm_fonipa +sgn_Adlm_GB_fonipa ; bfi_Adlm_fonipa +sgn_Adlm_GR_fonipa ; gss_Adlm_fonipa +sgn_Adlm_IE_fonipa ; isg_Adlm_fonipa +sgn_Adlm_IT_fonipa ; ise_Adlm_fonipa +sgn_Adlm_JP_fonipa ; jsl_Adlm_fonipa +sgn_Adlm_MX_fonipa ; mfs_Adlm_fonipa +sgn_Adlm_NI_fonipa ; ncs_Adlm_fonipa +sgn_Adlm_NL_fonipa ; dse_Adlm_fonipa +sgn_Adlm_NO_fonipa ; nsi_Adlm_fonipa +sgn_Adlm_PT_fonipa ; psr_Adlm_fonipa +sgn_Adlm_SE_fonipa ; swl_Adlm_fonipa +sgn_Adlm_UK_fonipa ; bfi_Adlm_fonipa +sgn_Adlm_US_fonipa ; ase_Adlm_fonipa +sgn_Adlm_ZA_fonipa ; sfs_Adlm_fonipa +sh_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +sin_Adlm_AC_fonipa ; si_Adlm_AC_fonipa +skk_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa +slk_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa +slo_Adlm_AC_fonipa ; sk_Adlm_AC_fonipa +slv_Adlm_AC_fonipa ; sl_Adlm_AC_fonipa +sme_Adlm_AC_fonipa ; se_Adlm_AC_fonipa +smo_Adlm_AC_fonipa ; sm_Adlm_AC_fonipa +sna_Adlm_AC_fonipa ; sn_Adlm_AC_fonipa +snd_Adlm_AC_fonipa ; sd_Adlm_AC_fonipa +som_Adlm_AC_fonipa ; so_Adlm_AC_fonipa +sot_Adlm_AC_fonipa ; st_Adlm_AC_fonipa +spa_Adlm_AC_fonipa ; es_Adlm_AC_fonipa +spy_Adlm_AC_fonipa ; kln_Adlm_AC_fonipa +sqi_Adlm_AC_fonipa ; sq_Adlm_AC_fonipa +src_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa +srd_Adlm_AC_fonipa ; sc_Adlm_AC_fonipa +srp_Adlm_AC_fonipa ; sr_Adlm_AC_fonipa +ssw_Adlm_AC_fonipa ; ss_Adlm_AC_fonipa +sun_Adlm_AC_fonipa ; su_Adlm_AC_fonipa +swa_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa +swc_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa +swe_Adlm_AC_fonipa ; sv_Adlm_AC_fonipa +swh_Adlm_AC_fonipa ; sw_Adlm_AC_fonipa +tah_Adlm_AC_fonipa ; ty_Adlm_AC_fonipa +tam_Adlm_AC_fonipa ; ta_Adlm_AC_fonipa +tat_Adlm_AC_fonipa ; tt_Adlm_AC_fonipa +tdu_Adlm_AC_fonipa ; dtp_Adlm_AC_fonipa +tel_Adlm_AC_fonipa ; te_Adlm_AC_fonipa +tgk_Adlm_AC_fonipa ; tg_Adlm_AC_fonipa +tgl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa +tha_Adlm_AC_fonipa ; th_Adlm_AC_fonipa +thc_Adlm_AC_fonipa ; tpo_Adlm_AC_fonipa +thx_Adlm_AC_fonipa ; oyb_Adlm_AC_fonipa +tib_Adlm_AC_fonipa ; bo_Adlm_AC_fonipa +tie_Adlm_AC_fonipa ; ras_Adlm_AC_fonipa +tir_Adlm_AC_fonipa ; ti_Adlm_AC_fonipa +tkk_Adlm_AC_fonipa ; twm_Adlm_AC_fonipa +tl_Adlm_AC_fonipa ; fil_Adlm_AC_fonipa +tlw_Adlm_AC_fonipa ; weo_Adlm_AC_fonipa +tmp_Adlm_AC_fonipa ; tyj_Adlm_AC_fonipa +tne_Adlm_AC_fonipa ; kak_Adlm_AC_fonipa +tnf_Adlm_AC_fonipa ; fa_Adlm_AC_fonipa +ton_Adlm_AC_fonipa ; to_Adlm_AC_fonipa +tsf_Adlm_AC_fonipa ; taj_Adlm_AC_fonipa +tsn_Adlm_AC_fonipa ; tn_Adlm_AC_fonipa +tso_Adlm_AC_fonipa ; ts_Adlm_AC_fonipa +ttq_Adlm_AC_fonipa ; tmh_Adlm_AC_fonipa +tuk_Adlm_AC_fonipa ; tk_Adlm_AC_fonipa +tur_Adlm_AC_fonipa ; tr_Adlm_AC_fonipa +tw_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +twi_Adlm_AC_fonipa ; ak_Adlm_AC_fonipa +uig_Adlm_AC_fonipa ; ug_Adlm_AC_fonipa +ukr_Adlm_AC_fonipa ; uk_Adlm_AC_fonipa +umu_Adlm_AC_fonipa ; del_Adlm_AC_fonipa +uok_Adlm_AC_fonipa ; ema_Adlm_AC_fonipa +urd_Adlm_AC_fonipa ; ur_Adlm_AC_fonipa +uzb_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa +uzn_Adlm_AC_fonipa ; uz_Adlm_AC_fonipa +ven_Adlm_AC_fonipa ; ve_Adlm_AC_fonipa +vie_Adlm_AC_fonipa ; vi_Adlm_AC_fonipa +vol_Adlm_AC_fonipa ; vo_Adlm_AC_fonipa +wel_Adlm_AC_fonipa ; cy_Adlm_AC_fonipa +wln_Adlm_AC_fonipa ; wa_Adlm_AC_fonipa +wol_Adlm_AC_fonipa ; wo_Adlm_AC_fonipa +xba_Adlm_AC_fonipa ; cax_Adlm_AC_fonipa +xho_Adlm_AC_fonipa ; xh_Adlm_AC_fonipa +xia_Adlm_AC_fonipa ; acn_Adlm_AC_fonipa +xkh_Adlm_AC_fonipa ; waw_Adlm_AC_fonipa +xpe_Adlm_AC_fonipa ; kpe_Adlm_AC_fonipa +xsj_Adlm_AC_fonipa ; suj_Adlm_AC_fonipa +xsl_Adlm_AC_fonipa ; den_Adlm_AC_fonipa +ybd_Adlm_AC_fonipa ; rki_Adlm_AC_fonipa +ydd_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa +yid_Adlm_AC_fonipa ; yi_Adlm_AC_fonipa +yma_Adlm_AC_fonipa ; lrr_Adlm_AC_fonipa +ymt_Adlm_AC_fonipa ; mtm_Adlm_AC_fonipa +yor_Adlm_AC_fonipa ; yo_Adlm_AC_fonipa +yos_Adlm_AC_fonipa ; zom_Adlm_AC_fonipa +yuu_Adlm_AC_fonipa ; yug_Adlm_AC_fonipa +zai_Adlm_AC_fonipa ; zap_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +zh_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +zha_Adlm_AC_fonipa ; za_Adlm_AC_fonipa +zho_Adlm_AC_fonipa ; zh_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_guoyu ; zh_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_guoyu_hakka_xiang ; hak_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_hakka ; hak_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_hakka_xiang ; hak_Adlm_AC_fonipa +zho_Adlm_AC_fonipa_xiang ; hsn_Adlm_AC_fonipa +zsm_Adlm_AC_fonipa ; ms_Adlm_AC_fonipa +zul_Adlm_AC_fonipa ; zu_Adlm_AC_fonipa +zyb_Adlm_AC_fonipa ; za_Adlm_AC_fonipa diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java index 296d92dbb2b..42e771647ac 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java @@ -14,6 +14,8 @@ package com.ibm.icu.dev.test.util; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.io.BufferedReader; +import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -892,8 +894,8 @@ public class ULocaleTest extends TestFmwk { public void TestCanonicalization(){ final String[][]testCases = new String[][]{ { "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" }, - { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" }, - { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" }, + { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" }, + { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" }, { "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" }, { "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" }, { "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ }, @@ -956,14 +958,14 @@ public class ULocaleTest extends TestFmwk { { "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" }, { "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" }, { "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" }, - { "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" }, + { "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" }, { "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" }, { "qz-qz@Euro", null, "qz_QZ_EURO" }, /* qz-qz uses private use iso codes */ { "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */ { "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */ { "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */ - { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */ - { "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */ + { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */ + { "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */ { "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */ { "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */ /* PRE_EURO and EURO conversions don't affect other keywords */ @@ -5175,21 +5177,18 @@ public class ULocaleTest extends TestFmwk { // also test with script, variants and extensions Assert.assertEquals("fa-Cyrl-AF-1009-u-ca-roc", canonicalTag("prs-Cyrl-1009-u-ca-roc")); - if (!logKnownIssue("21236", "skip some canonicalization tests until code fixed")) { - // language _ country -> language _ script _ country - Assert.assertEquals("pa-Guru-IN", canonicalTag("pa-IN")); - } + Assert.assertEquals("pa-IN", canonicalTag("pa-IN")); // also test with script Assert.assertEquals("pa-Latn-IN", canonicalTag("pa-Latn-IN")); - if (!logKnownIssue("21236", "skip some canonicalization tests until code fixed")) { - // also test with variants and extensions - Assert.assertEquals("pa-Guru-IN-5678-u-ca-hindi", canonicalTag("pa-IN-5678-u-ca-hindi")); + // also test with variants and extensions + Assert.assertEquals("pa-IN-5678-u-ca-hindi", canonicalTag("pa-IN-5678-u-ca-hindi")); - // language _ script _ country -> language _ country - Assert.assertEquals("ky-KG", canonicalTag("ky-Cyrl-KG")); - // also test with variants and extensions - Assert.assertEquals("ky-KG-3456-u-ca-roc", canonicalTag("ky-Cyrl-KG-3456-u-ca-roc")); - } + Assert.assertEquals("ky-Cyrl-KG", canonicalTag("ky-Cyrl-KG")); + // also test with variants and extensions + Assert.assertEquals("ky-Cyrl-KG-3456-u-ca-roc", canonicalTag("ky-Cyrl-KG-3456-u-ca-roc")); + + // Test replacement of scriptAlias + Assert.assertEquals("en-Zinh", canonicalTag("en-Qaai")); // Test replacement of territoryAlias // 554 has one replacement @@ -5209,5 +5208,35 @@ public class ULocaleTest extends TestFmwk { Assert.assertEquals("uz-Cyrl-UZ-5678-u-nu-latn", canonicalTag("uz-Cyrl-172-5678-u-nu-latn")); // a language not used in this region Assert.assertEquals("fr-RU", canonicalTag("fr-172")); + + Assert.assertEquals("ja-Latn-alalc97", canonicalTag("ja-Latn-hepburn-heploc")); + + Assert.assertEquals("aaa-Fooo-RU", canonicalTag("aaa-Fooo-SU")); + } + + @Test + public void TestLocaleCanonicalizationFromFile() throws IOException { + BufferedReader testFile = TestUtil.getDataReader("unicode/localeCanonicalization.txt"); + try { + String line; + while ((line = testFile.readLine()) != null) { + if (line.startsWith("#")) { + // ignore any lines start with # + continue; + } + String[] fields = line.split("\t;\t"); + if (fields.length != 2) { + // ignore any lines without TAB ; TAB + continue; + } + String from = fields[0].replace("_", "-"); + String to = fields[1].replace("_", "-"); + Assert.assertEquals("canonicalTag(" + from + ")", + to, canonicalTag(from)); + } + } finally { + testFile.close(); + } + } }