diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py index cb612b982..f8fb05f11 100755 --- a/src/gen-tag-table.py +++ b/src/gen-tag-table.py @@ -329,6 +329,10 @@ class OpenTypeRegistryParser (HTMLParser): from_bcp_47 (DefaultDict[str, AbstractSet[str]]): ``to_bcp_47`` inverted. Its values start as unsorted sets; ``sort_languages`` converts them to sorted lists. + from_bcp_47_uninherited (Optional[Dict[str, AbstractSet[str]]]): + A copy of ``from_bcp_47``. It starts as ``None`` and is + populated at the beginning of the first call to + ``inherit_from_macrolanguages``. """ def __init__ (self): @@ -338,6 +342,7 @@ class OpenTypeRegistryParser (HTMLParser): self.ranks = collections.defaultdict (int) self.to_bcp_47 = collections.defaultdict (set) self.from_bcp_47 = collections.defaultdict (set) + self.from_bcp_47_uninherited = None # Whether the parser is in a element self._td = False # Whether the parser is after a
element within the current element @@ -462,30 +467,51 @@ class OpenTypeRegistryParser (HTMLParser): explicit mapping, so it inherits from sq (Albanian) the mapping to SQI. + However, if an OpenType tag maps to a BCP 47 macrolanguage and + some but not all of its individual languages, the mapping is not + inherited from the macrolanguage to the missing individual + languages. For example, INUK (Nunavik Inuktitut) is mapped to + ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to + ikt (Inuinnaqtun, which is an individual language of iu), so + this method does not add a mapping from ikt to INUK. + If a BCP 47 tag for a macrolanguage has no OpenType mapping but - all of its individual languages do and they all map to the same - tags, the mapping is copied to the macrolanguage. + some of its individual languages do, their mappings are copied + to the macrolanguage. """ global bcp_47 - original_ot_from_bcp_47 = dict (self.from_bcp_47) + first_time = self.from_bcp_47_uninherited is None + if first_time: + self.from_bcp_47_uninherited = dict (self.from_bcp_47) for macrolanguage, languages in dict (bcp_47.macrolanguages).items (): - ot_macrolanguages = set (original_ot_from_bcp_47.get (macrolanguage, set ())) + ot_macrolanguages = { + ot_macrolanguage for ot_macrolanguage in self.from_bcp_47_uninherited.get (macrolanguage, set ()) + } + blocked_ot_macrolanguages = set () + if 'retired code' not in bcp_47.scopes.get (macrolanguage, ''): + for ot_macrolanguage in ot_macrolanguages: + round_trip_macrolanguages = { + l for l in self.to_bcp_47[ot_macrolanguage] + if 'retired code' not in bcp_47.scopes.get (l, '') + } + round_trip_languages = { + l for l in languages + if 'retired code' not in bcp_47.scopes.get (l, '') + } + intersection = round_trip_macrolanguages & round_trip_languages + if intersection and intersection != round_trip_languages: + blocked_ot_macrolanguages.add (ot_macrolanguage) if ot_macrolanguages: for ot_macrolanguage in ot_macrolanguages: - for language in languages: - self.add_language (language, ot_macrolanguage) - self.ranks[ot_macrolanguage] += 1 - else: + if ot_macrolanguage not in blocked_ot_macrolanguages: + for language in languages: + self.add_language (language, ot_macrolanguage) + if not blocked_ot_macrolanguages: + self.ranks[ot_macrolanguage] += 1 + elif first_time: for language in languages: - if language in original_ot_from_bcp_47: - if ot_macrolanguages: - ml = original_ot_from_bcp_47[language] - if ml: - ot_macrolanguages &= ml - else: - pass - else: - ot_macrolanguages |= original_ot_from_bcp_47[language] + if language in self.from_bcp_47_uninherited: + ot_macrolanguages |= self.from_bcp_47_uninherited[language] else: ot_macrolanguages.clear () if not ot_macrolanguages: @@ -570,7 +596,7 @@ class BCP47Parser (object): if scope == 'macrolanguage': scope = ' [macrolanguage]' elif scope == 'collection': - scope = ' [family]' + scope = ' [collection]' else: continue self.scopes[subtag] = scope @@ -715,6 +741,7 @@ ot.add_language ('no', 'NOR') ot.add_language ('oc-provenc', 'PRO') +ot.remove_language_ot ('QUZ') ot.add_language ('qu', 'QUZ') ot.add_language ('qub', 'QWH') ot.add_language ('qud', 'QVI') @@ -747,7 +774,6 @@ ot.add_language ('qxr', 'QVI') ot.add_language ('qxt', 'QWH') ot.add_language ('qxw', 'QWH') -bcp_47.macrolanguages['ro'].remove ('mo') bcp_47.macrolanguages['ro-MD'].add ('mo') ot.remove_language_ot ('SYRE') @@ -987,6 +1013,8 @@ for initial, items in sorted (complex_tags.items ()): if initial != 'und': continue for lt, tags in items: + if not tags: + continue if lt.variant in bcp_47.prefixes: expect (next (iter (bcp_47.prefixes[lt.variant])) == lt.language, '%s is not a valid prefix of %s' % (lt.language, lt.variant)) @@ -1021,6 +1049,8 @@ for initial, items in sorted (complex_tags.items ()): continue print (" case '%s':" % initial) for lt, tags in items: + if not tags: + continue print (' if (', end='') script = lt.script region = lt.region @@ -1121,9 +1151,13 @@ def verify_disambiguation_dict (): elif len (primary_tags) == 0: expect (ot_tag not in disambiguation, 'There is no possible valid disambiguation for %s' % ot_tag) else: - macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]') + original_languages = [t for t in primary_tags if t in ot.from_bcp_47_uninherited and 'retired code' not in bcp_47.scopes.get (t, '')] + if len (original_languages) == 1: + macrolanguages = original_languages + else: + macrolanguages = [t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]'] if len (macrolanguages) != 1: - macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [family]') + macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [collection]') if len (macrolanguages) != 1: macrolanguages = list (t for t in primary_tags if 'retired code' not in bcp_47.scopes.get (t, '')) if len (macrolanguages) != 1: diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh index 2c6316df4..61d2814e9 100644 --- a/src/hb-ot-tag-table.hh +++ b/src/hb-ot-tag-table.hh @@ -6,8 +6,8 @@ * * on files with these headers: * - * - * File-Date: 2021-08-06 + * + * File-Date: 2021-12-29 */ #ifndef HB_OT_TAG_TABLE_HH @@ -66,7 +66,7 @@ static const LangTag ot_languages[] = { {"an", HB_TAG('A','R','G',' ')}, /* Aragonese */ /*{"ang", HB_TAG('A','N','G',' ')},*/ /* Old English (ca. 450-1100) -> Anglo-Saxon */ {"aoa", HB_TAG('C','P','P',' ')}, /* Angolar -> Creoles */ - {"apa", HB_TAG('A','T','H',' ')}, /* Apache [family] -> Athapaskan */ + {"apa", HB_TAG('A','T','H',' ')}, /* Apache [collection] -> Athapaskan */ {"apc", HB_TAG('A','R','A',' ')}, /* North Levantine Arabic -> Arabic */ {"apd", HB_TAG('A','R','A',' ')}, /* Sudanese Arabic -> Arabic */ {"apj", HB_TAG('A','T','H',' ')}, /* Jicarilla Apache -> Athapaskan */ @@ -86,7 +86,7 @@ static const LangTag ot_languages[] = { {"arz", HB_TAG('A','R','A',' ')}, /* Egyptian Arabic -> Arabic */ {"as", HB_TAG('A','S','M',' ')}, /* Assamese */ /*{"ast", HB_TAG('A','S','T',' ')},*/ /* Asturian */ -/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [family] -> Athapaskan */ +/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [collection] -> Athapaskan */ {"atj", HB_TAG('R','C','R',' ')}, /* Atikamekw -> R-Cree */ {"atv", HB_TAG('A','L','T',' ')}, /* Northern Altai -> Altai */ {"auj", HB_TAG('B','B','R',' ')}, /* Awjilah -> Berber */ @@ -110,10 +110,10 @@ static const LangTag ot_languages[] = { {"azn", HB_TAG('N','A','H',' ')}, /* Western Durango Nahuatl -> Nahuatl */ {"azz", HB_TAG('N','A','H',' ')}, /* Highland Puebla Nahuatl -> Nahuatl */ {"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */ - {"bad", HB_TAG('B','A','D','0')}, /* Banda [family] */ + {"bad", HB_TAG('B','A','D','0')}, /* Banda [collection] */ {"bag", HB_TAG_NONE }, /* Tuki != Baghelkhandi */ {"bah", HB_TAG('C','P','P',' ')}, /* Bahamas Creole English -> Creoles */ - {"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [family] */ + {"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [collection] */ {"bal", HB_TAG('B','L','I',' ')}, /* Baluchi [macrolanguage] */ /*{"ban", HB_TAG('B','A','N',' ')},*/ /* Balinese */ /*{"bar", HB_TAG('B','A','R',' ')},*/ /* Bavarian */ @@ -135,7 +135,7 @@ static const LangTag ot_languages[] = { {"bea", HB_TAG('A','T','H',' ')}, /* Beaver -> Athapaskan */ {"beb", HB_TAG('B','T','I',' ')}, /* Bebele -> Beti */ /*{"bem", HB_TAG('B','E','M',' ')},*/ /* Bemba (Zambia) */ - {"ber", HB_TAG('B','B','R',' ')}, /* Berber [family] */ + {"ber", HB_TAG('B','B','R',' ')}, /* Berber [collection] */ {"bew", HB_TAG('C','P','P',' ')}, /* Betawi -> Creoles */ {"bfl", HB_TAG('B','A','D','0')}, /* Banda-Ndélé -> Banda */ {"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */ @@ -203,7 +203,7 @@ static const LangTag ot_languages[] = { {"btd", HB_TAG('B','T','K',' ')}, /* Batak Dairi -> Batak */ {"bti", HB_TAG_NONE }, /* Burate != Beti */ {"btj", HB_TAG('M','L','Y',' ')}, /* Bacanese Malay -> Malay */ -/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [family] */ +/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [collection] */ {"btm", HB_TAG('B','T','M',' ')}, /* Batak Mandailing */ {"btm", HB_TAG('B','T','K',' ')}, /* Batak Mandailing -> Batak */ {"bto", HB_TAG('B','I','K',' ')}, /* Rinconada Bikol -> Bikol */ @@ -256,6 +256,8 @@ static const LangTag ot_languages[] = { {"chh", HB_TAG_NONE }, /* Chinook != Chattisgarhi */ {"chj", HB_TAG('C','C','H','N')}, /* Ojitlán Chinantec -> Chinantec */ {"chk", HB_TAG('C','H','K','0')}, /* Chuukese */ + {"chm", HB_TAG('H','M','A',' ')}, /* Mari (Russia) [macrolanguage] -> High Mari */ + {"chm", HB_TAG('L','M','A',' ')}, /* Mari (Russia) [macrolanguage] -> Low Mari */ {"chn", HB_TAG('C','P','P',' ')}, /* Chinook jargon -> Creoles */ /*{"cho", HB_TAG('C','H','O',' ')},*/ /* Choctaw */ {"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */ @@ -297,10 +299,10 @@ static const LangTag ot_languages[] = { /*{"cop", HB_TAG('C','O','P',' ')},*/ /* Coptic */ {"coq", HB_TAG('A','T','H',' ')}, /* Coquille -> Athapaskan */ {"cpa", HB_TAG('C','C','H','N')}, /* Palantla Chinantec -> Chinantec */ - {"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [family] -> Creoles */ - {"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [family] -> Creoles */ + {"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [collection] -> Creoles */ + {"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [collection] -> Creoles */ {"cpi", HB_TAG('C','P','P',' ')}, /* Chinese Pidgin English -> Creoles */ -/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [family] -> Creoles */ +/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [collection] -> Creoles */ {"cpx", HB_TAG('Z','H','S',' ')}, /* Pu-Xian Chinese -> Chinese, Simplified */ {"cqd", HB_TAG('H','M','N',' ')}, /* Chuanqiandian Cluster Miao -> Hmong */ {"cqu", HB_TAG('Q','U','H',' ')}, /* Chilean Quechua (retired code) -> Quechua (Bolivia) */ @@ -320,7 +322,7 @@ static const LangTag ot_languages[] = { {"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */ {"crm", HB_TAG('L','C','R',' ')}, /* Moose Cree -> L-Cree */ {"crm", HB_TAG('C','R','E',' ')}, /* Moose Cree -> Cree */ - {"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [family] -> Creoles */ + {"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [collection] -> Creoles */ {"crr", HB_TAG_NONE }, /* Carolina Algonquian != Carrier */ {"crs", HB_TAG('C','P','P',' ')}, /* Seselwa Creole French -> Creoles */ {"crt", HB_TAG_NONE }, /* Iyojwa'ja Chorote != Crimean Tatar */ @@ -431,7 +433,7 @@ static const LangTag ot_languages[] = { {"et", HB_TAG('E','T','I',' ')}, /* Estonian [macrolanguage] */ {"eto", HB_TAG('B','T','I',' ')}, /* Eton (Cameroon) -> Beti */ {"eu", HB_TAG('E','U','Q',' ')}, /* Basque */ - {"euq", HB_TAG_NONE }, /* Basque [family] != Basque */ + {"euq", HB_TAG_NONE }, /* Basque [collection] != Basque */ {"eve", HB_TAG('E','V','N',' ')}, /* Even */ {"evn", HB_TAG('E','V','K',' ')}, /* Evenki */ {"ewo", HB_TAG('B','T','I',' ')}, /* Ewondo -> Beti */ @@ -620,10 +622,11 @@ static const LangTag ot_languages[] = { {"ijc", HB_TAG('I','J','O',' ')}, /* Izon -> Ijo */ {"ije", HB_TAG('I','J','O',' ')}, /* Biseni -> Ijo */ {"ijn", HB_TAG('I','J','O',' ')}, /* Kalabari -> Ijo */ -/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [family] */ +/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [collection] */ {"ijs", HB_TAG('I','J','O',' ')}, /* Southeast Ijo -> Ijo */ {"ik", HB_TAG('I','P','K',' ')}, /* Inupiaq [macrolanguage] -> Inupiat */ {"ike", HB_TAG('I','N','U',' ')}, /* Eastern Canadian Inuktitut -> Inuktitut */ + {"ike", HB_TAG('I','N','U','K')}, /* Eastern Canadian Inuktitut -> Nunavik Inuktitut */ {"ikt", HB_TAG('I','N','U',' ')}, /* Inuinnaqtun -> Inuktitut */ /*{"ilo", HB_TAG('I','L','O',' ')},*/ /* Iloko -> Ilokano */ {"in", HB_TAG('I','N','D',' ')}, /* Indonesian (retired code) */ @@ -638,6 +641,7 @@ static const LangTag ot_languages[] = { {"it", HB_TAG('I','T','A',' ')}, /* Italian */ {"itz", HB_TAG('M','Y','N',' ')}, /* Itzá -> Mayan */ {"iu", HB_TAG('I','N','U',' ')}, /* Inuktitut [macrolanguage] */ + {"iu", HB_TAG('I','N','U','K')}, /* Inuktitut [macrolanguage] -> Nunavik Inuktitut */ {"iw", HB_TAG('I','W','R',' ')}, /* Hebrew (retired code) */ {"ixl", HB_TAG('M','Y','N',' ')}, /* Ixil -> Mayan */ {"ja", HB_TAG('J','A','N',' ')}, /* Japanese */ @@ -667,7 +671,7 @@ static const LangTag ot_languages[] = { {"kab", HB_TAG('B','B','R',' ')}, /* Kabyle -> Berber */ {"kac", HB_TAG_NONE }, /* Kachin != Kachchi */ {"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */ - {"kar", HB_TAG('K','R','N',' ')}, /* Karen [family] */ + {"kar", HB_TAG('K','R','N',' ')}, /* Karen [collection] */ /*{"kaw", HB_TAG('K','A','W',' ')},*/ /* Kawi (Old Javanese) */ {"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */ {"kby", HB_TAG('K','N','R',' ')}, /* Manga Kanuri -> Kanuri */ @@ -876,7 +880,7 @@ static const LangTag ot_languages[] = { {"mam", HB_TAG('M','A','M',' ')}, /* Mam */ {"mam", HB_TAG('M','Y','N',' ')}, /* Mam -> Mayan */ {"man", HB_TAG('M','N','K',' ')}, /* Mandingo [macrolanguage] -> Maninka */ - {"map", HB_TAG_NONE }, /* Austronesian [family] != Mapudungun */ + {"map", HB_TAG_NONE }, /* Austronesian [collection] != Mapudungun */ {"maw", HB_TAG_NONE }, /* Mampruli != Marwari */ {"max", HB_TAG('M','L','Y',' ')}, /* North Moluccan Malay -> Malay */ {"max", HB_TAG('C','P','P',' ')}, /* North Moluccan Malay -> Creoles */ @@ -936,6 +940,7 @@ static const LangTag ot_languages[] = { {"mnw", HB_TAG('M','O','N','T')}, /* Mon -> Thailand Mon */ {"mnx", HB_TAG_NONE }, /* Manikion != Manx */ {"mo", HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) */ + {"mo", HB_TAG('R','O','M',' ')}, /* Moldavian (retired code) -> Romanian */ {"mod", HB_TAG('C','P','P',' ')}, /* Mobilian -> Creoles */ /*{"moh", HB_TAG('M','O','H',' ')},*/ /* Mohawk */ {"mok", HB_TAG_NONE }, /* Morori != Moksha */ @@ -958,7 +963,7 @@ static const LangTag ot_languages[] = { {"mts", HB_TAG_NONE }, /* Yora != Maltese */ {"mud", HB_TAG('C','P','P',' ')}, /* Mednyj Aleut -> Creoles */ {"mui", HB_TAG('M','L','Y',' ')}, /* Musi -> Malay */ - {"mun", HB_TAG_NONE }, /* Munda [family] != Mundari */ + {"mun", HB_TAG_NONE }, /* Munda [collection] != Mundari */ {"mup", HB_TAG('R','A','J',' ')}, /* Malvi -> Rajasthani */ {"muq", HB_TAG('H','M','N',' ')}, /* Eastern Xiangxi Miao -> Hmong */ /*{"mus", HB_TAG('M','U','S',' ')},*/ /* Creek -> Muscogee */ @@ -973,7 +978,7 @@ static const LangTag ot_languages[] = { {"mww", HB_TAG('H','M','N',' ')}, /* Hmong Daw -> Hmong */ {"my", HB_TAG('B','R','M',' ')}, /* Burmese */ {"mym", HB_TAG('M','E','N',' ')}, /* Me’en */ -/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [family] */ +/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [collection] */ {"myq", HB_TAG('M','N','K',' ')}, /* Forest Maninka (retired code) -> Maninka */ {"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */ {"mzb", HB_TAG('B','B','R',' ')}, /* Tumzabt -> Berber */ @@ -982,7 +987,7 @@ static const LangTag ot_languages[] = { {"na", HB_TAG('N','A','U',' ')}, /* Nauru -> Nauruan */ {"nag", HB_TAG('N','A','G',' ')}, /* Naga Pidgin -> Naga-Assamese */ {"nag", HB_TAG('C','P','P',' ')}, /* Naga Pidgin -> Creoles */ -/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [family] */ +/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [collection] */ {"nan", HB_TAG('Z','H','S',' ')}, /* Min Nan Chinese -> Chinese, Simplified */ /*{"nap", HB_TAG('N','A','P',' ')},*/ /* Neapolitan */ {"nas", HB_TAG_NONE }, /* Naasioi != Naskapi */ @@ -1039,7 +1044,6 @@ static const LangTag ot_languages[] = { {"nln", HB_TAG('N','A','H',' ')}, /* Durango Nahuatl (retired code) -> Nahuatl */ {"nlv", HB_TAG('N','A','H',' ')}, /* Orizaba Nahuatl -> Nahuatl */ {"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk (Nynorsk, Norwegian) */ - {"nn", HB_TAG('N','O','R',' ')}, /* Norwegian Nynorsk -> Norwegian */ {"nnh", HB_TAG('B','M','L',' ')}, /* Ngiemboon -> Bamileke */ {"nnz", HB_TAG('B','M','L',' ')}, /* Nda'nda' -> Bamileke */ {"no", HB_TAG('N','O','R',' ')}, /* Norwegian [macrolanguage] */ @@ -1093,7 +1097,7 @@ static const LangTag ot_languages[] = { {"otw", HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */ {"oua", HB_TAG('B','B','R',' ')}, /* Tagargrent -> Berber */ {"pa", HB_TAG('P','A','N',' ')}, /* Punjabi */ - {"paa", HB_TAG_NONE }, /* Papuan [family] != Palestinian Aramaic */ + {"paa", HB_TAG_NONE }, /* Papuan [collection] != Palestinian Aramaic */ /*{"pag", HB_TAG('P','A','G',' ')},*/ /* Pangasinan */ {"pal", HB_TAG_NONE }, /* Pahlavi != Pali */ /*{"pam", HB_TAG('P','A','M',' ')},*/ /* Pampanga -> Pampangan */ @@ -1308,6 +1312,9 @@ static const LangTag ot_languages[] = { {"sgo", HB_TAG_NONE }, /* Songa (retired code) != Sango */ /*{"sgs", HB_TAG('S','G','S',' ')},*/ /* Samogitian */ {"sgw", HB_TAG('C','H','G',' ')}, /* Sebat Bet Gurage -> Chaha Gurage */ + {"sh", HB_TAG('B','O','S',' ')}, /* Serbo-Croatian [macrolanguage] -> Bosnian */ + {"sh", HB_TAG('H','R','V',' ')}, /* Serbo-Croatian [macrolanguage] -> Croatian */ + {"sh", HB_TAG('S','R','B',' ')}, /* Serbo-Croatian [macrolanguage] -> Serbian */ {"shi", HB_TAG('S','H','I',' ')}, /* Tachelhit */ {"shi", HB_TAG('B','B','R',' ')}, /* Tachelhit -> Berber */ {"shl", HB_TAG('Q','I','N',' ')}, /* Shendu -> Chin */ @@ -1329,7 +1336,7 @@ static const LangTag ot_languages[] = { {"skw", HB_TAG('C','P','P',' ')}, /* Skepi Creole Dutch -> Creoles */ {"sky", HB_TAG_NONE }, /* Sikaiana != Slovak */ {"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */ - {"sla", HB_TAG_NONE }, /* Slavic [family] != Slavey */ + {"sla", HB_TAG_NONE }, /* Slavic [collection] != Slavey */ {"sm", HB_TAG('S','M','O',' ')}, /* Samoan */ {"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */ {"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */ @@ -1451,7 +1458,7 @@ static const LangTag ot_languages[] = { {"tpi", HB_TAG('C','P','P',' ')}, /* Tok Pisin -> Creoles */ {"tr", HB_TAG('T','R','K',' ')}, /* Turkish */ {"trf", HB_TAG('C','P','P',' ')}, /* Trinidadian Creole English -> Creoles */ - {"trk", HB_TAG_NONE }, /* Turkic [family] != Turkish */ + {"trk", HB_TAG_NONE }, /* Turkic [collection] != Turkish */ {"tru", HB_TAG('T','U','A',' ')}, /* Turoyo -> Turoyo Aramaic */ {"tru", HB_TAG('S','Y','R',' ')}, /* Turoyo -> Syriac */ {"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */ @@ -1593,7 +1600,7 @@ static const LangTag ot_languages[] = { {"zlq", HB_TAG('Z','H','A',' ')}, /* Liuqian Zhuang -> Zhuang */ {"zmi", HB_TAG('M','L','Y',' ')}, /* Negeri Sembilan Malay -> Malay */ {"zmz", HB_TAG('B','A','D','0')}, /* Mbandja -> Banda */ - {"znd", HB_TAG_NONE }, /* Zande [family] != Zande */ + {"znd", HB_TAG_NONE }, /* Zande [collection] != Zande */ {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ {"zom", HB_TAG('Q','I','N',' ')}, /* Zou -> Chin */ {"zqe", HB_TAG('Z','H','A',' ')}, /* Qiubei Zhuang -> Zhuang */ @@ -2607,14 +2614,8 @@ hb_ot_tags_from_complex_language (const char *lang_str, if (0 == strcmp (&lang_str[1], "o-nyn")) { /* Norwegian Nynorsk (retired code) */ - unsigned int i; - hb_tag_t possible_tags[] = { - HB_TAG('N','Y','N',' '), /* Norwegian Nynorsk (Nynorsk, Norwegian) */ - HB_TAG('N','O','R',' '), /* Norwegian */ - }; - for (i = 0; i < 2 && i < *count; i++) - tags[i] = possible_tags[i]; - *count = i; + tags[0] = HB_TAG('N','Y','N',' '); /* Norwegian Nynorsk (Nynorsk, Norwegian) */ + *count = 1; return true; } break; @@ -2623,8 +2624,14 @@ hb_ot_tags_from_complex_language (const char *lang_str, && subtag_matches (lang_str, limit, "-md")) { /* Romanian; Moldova */ - tags[0] = HB_TAG('M','O','L',' '); /* Moldavian */ - *count = 1; + unsigned int i; + hb_tag_t possible_tags[] = { + HB_TAG('M','O','L',' '), /* Moldavian */ + HB_TAG('R','O','M',' '), /* Romanian */ + }; + for (i = 0; i < 2 && i < *count; i++) + tags[i] = possible_tags[i]; + *count = i; return true; } break; @@ -2813,15 +2820,15 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('A','R','K',' '): /* Rakhine */ return hb_language_from_string ("rki", -1); /* Rakhine */ case HB_TAG('A','T','H',' '): /* Athapaskan */ - return hb_language_from_string ("ath", -1); /* Athapascan [family] */ + return hb_language_from_string ("ath", -1); /* Athapascan [collection] */ case HB_TAG('B','B','R',' '): /* Berber */ - return hb_language_from_string ("ber", -1); /* Berber [family] */ + return hb_language_from_string ("ber", -1); /* Berber [collection] */ case HB_TAG('B','I','K',' '): /* Bikol */ return hb_language_from_string ("bik", -1); /* Bikol [macrolanguage] */ case HB_TAG('B','T','K',' '): /* Batak */ - return hb_language_from_string ("btk", -1); /* Batak [family] */ + return hb_language_from_string ("btk", -1); /* Batak [collection] */ case HB_TAG('C','P','P',' '): /* Creoles */ - return hb_language_from_string ("crp", -1); /* Creoles and pidgins [family] */ + return hb_language_from_string ("crp", -1); /* Creoles and pidgins [collection] */ case HB_TAG('C','R','R',' '): /* Carrier */ return hb_language_from_string ("crx", -1); /* Carrier */ case HB_TAG('D','G','R',' '): /* Dogri (macrolanguage) */ @@ -2838,6 +2845,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) return hb_language_from_string ("fa", -1); /* Persian [macrolanguage] */ case HB_TAG('G','O','N',' '): /* Gondi */ return hb_language_from_string ("gon", -1); /* Gondi [macrolanguage] */ + case HB_TAG('H','M','A',' '): /* High Mari */ + return hb_language_from_string ("mrj", -1); /* Western Mari */ case HB_TAG('H','M','N',' '): /* Hmong */ return hb_language_from_string ("hmn", -1); /* Hmong [macrolanguage] */ case HB_TAG('H','N','D',' '): /* Hindko */ @@ -2847,7 +2856,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('I','B','A',' '): /* Iban */ return hb_language_from_string ("iba", -1); /* Iban */ case HB_TAG('I','J','O',' '): /* Ijo */ - return hb_language_from_string ("ijo", -1); /* Ijo [family] */ + return hb_language_from_string ("ijo", -1); /* Ijo [collection] */ case HB_TAG('I','N','U',' '): /* Inuktitut */ return hb_language_from_string ("iu", -1); /* Inuktitut [macrolanguage] */ case HB_TAG('I','P','K',' '): /* Inupiat */ @@ -2873,11 +2882,13 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('K','P','L',' '): /* Kpelle */ return hb_language_from_string ("kpe", -1); /* Kpelle [macrolanguage] */ case HB_TAG('K','R','N',' '): /* Karen */ - return hb_language_from_string ("kar", -1); /* Karen [family] */ + return hb_language_from_string ("kar", -1); /* Karen [collection] */ case HB_TAG('K','U','I',' '): /* Kui */ return hb_language_from_string ("uki", -1); /* Kui (India) */ case HB_TAG('K','U','R',' '): /* Kurdish */ return hb_language_from_string ("ku", -1); /* Kurdish [macrolanguage] */ + case HB_TAG('L','M','A',' '): /* Low Mari */ + return hb_language_from_string ("mhr", -1); /* Eastern Mari */ case HB_TAG('L','U','H',' '): /* Luyia */ return hb_language_from_string ("luy", -1); /* Luyia [macrolanguage] */ case HB_TAG('L','V','I',' '): /* Latvian */ @@ -2897,9 +2908,9 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('M','O','N','T'): /* Thailand Mon */ return hb_language_from_string ("mnw-TH", -1); /* Mon; Thailand */ case HB_TAG('M','Y','N',' '): /* Mayan */ - return hb_language_from_string ("myn", -1); /* Mayan [family] */ + return hb_language_from_string ("myn", -1); /* Mayan [collection] */ case HB_TAG('N','A','H',' '): /* Nahuatl */ - return hb_language_from_string ("nah", -1); /* Nahuatl [family] */ + return hb_language_from_string ("nah", -1); /* Nahuatl [collection] */ case HB_TAG('N','E','P',' '): /* Nepali */ return hb_language_from_string ("ne", -1); /* Nepali [macrolanguage] */ case HB_TAG('N','I','S',' '): /* Nisi */ @@ -2926,6 +2937,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) return hb_language_from_string ("qwh", -1); /* Huaylas Ancash Quechua */ case HB_TAG('R','A','J',' '): /* Rajasthani */ return hb_language_from_string ("raj", -1); /* Rajasthani [macrolanguage] */ + case HB_TAG('R','O','M',' '): /* Romanian */ + return hb_language_from_string ("ro", -1); /* Romanian */ case HB_TAG('R','O','Y',' '): /* Romany */ return hb_language_from_string ("rom", -1); /* Romany [macrolanguage] */ case HB_TAG('S','Q','I',' '): /* Albanian */