Update IANA Language Subtag Registry to 2024-03-07

This commit is contained in:
David Corbett 2024-03-07 17:30:56 -05:00 committed by Behdad Esfahbod
parent d6431dc7e6
commit 0692d23ccc
2 changed files with 17 additions and 13 deletions

View file

@ -584,7 +584,7 @@ class BCP47Parser (object):
self.grandfathered.add (subtag.lower ())
elif line.startswith ('Description: '):
description = line.split (' ', 1)[1].replace (' (individual language)', '')
description = re.sub (' (\(family\)|\((individual |macro)language\)|languages)$', '',
description = re.sub (r' (\(family\)|\((individual |macro)language\)|languages)$', '',
description)
if subtag in self.names:
self.names[subtag] += '\n' + description

View file

@ -6,8 +6,8 @@
*
* on files with these headers:
*
* <meta name="updated_at" content="2022-09-30 11:47 PM" />
* File-Date: 2023-08-02
* <meta name="updated_at" content="2023-09-30 01:21 AM" />
* File-Date: 2024-03-07
*/
#ifndef HB_OT_TAG_TABLE_HH
@ -31,7 +31,7 @@ static const LangTag ot_languages2[] = {
{HB_TAG('b','i',' ',' '), HB_TAG('B','I','S',' ')}, /* Bislama */
{HB_TAG('b','i',' ',' '), HB_TAG('C','P','P',' ')}, /* Bislama -> Creoles */
{HB_TAG('b','m',' ',' '), HB_TAG('B','M','B',' ')}, /* Bambara (Bamanankan) */
{HB_TAG('b','n',' ',' '), HB_TAG('B','E','N',' ')}, /* Bengali */
{HB_TAG('b','n',' ',' '), HB_TAG('B','E','N',' ')}, /* Bangla */
{HB_TAG('b','o',' ',' '), HB_TAG('T','I','B',' ')}, /* Tibetan */
{HB_TAG('b','r',' ',' '), HB_TAG('B','R','E',' ')}, /* Breton */
{HB_TAG('b','s',' ',' '), HB_TAG('B','O','S',' ')}, /* Bosnian */
@ -64,7 +64,7 @@ static const LangTag ot_languages2[] = {
{HB_TAG('f','r',' ',' '), HB_TAG('F','R','A',' ')}, /* French */
{HB_TAG('f','y',' ',' '), HB_TAG('F','R','I',' ')}, /* Western Frisian -> Frisian */
{HB_TAG('g','a',' ',' '), HB_TAG('I','R','I',' ')}, /* Irish */
{HB_TAG('g','d',' ',' '), HB_TAG('G','A','E',' ')}, /* Scottish Gaelic (Gaelic) */
{HB_TAG('g','d',' ',' '), HB_TAG('G','A','E',' ')}, /* Scottish Gaelic */
{HB_TAG('g','l',' ',' '), HB_TAG('G','A','L',' ')}, /* Galician */
{HB_TAG('g','n',' ',' '), HB_TAG('G','U','A',' ')}, /* Guarani [macrolanguage] */
{HB_TAG('g','u',' ',' '), HB_TAG('G','U','J',' ')}, /* Gujarati */
@ -132,7 +132,7 @@ static const LangTag ot_languages2[] = {
{HB_TAG('m','l',' ',' '), HB_TAG('M','A','L',' ')}, /* Malayalam -> Malayalam Traditional */
{HB_TAG('m','l',' ',' '), HB_TAG('M','L','R',' ')}, /* Malayalam -> Malayalam Reformed */
{HB_TAG('m','n',' ',' '), HB_TAG('M','N','G',' ')}, /* Mongolian [macrolanguage] */
{HB_TAG('m','o',' ',' '), HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) */
{HB_TAG('m','o',' ',' '), HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) -> Romanian (Moldova) */
{HB_TAG('m','o',' ',' '), HB_TAG('R','O','M',' ')}, /* Moldavian (retired code) -> Romanian */
{HB_TAG('m','r',' ',' '), HB_TAG('M','A','R',' ')}, /* Marathi */
{HB_TAG('m','s',' ',' '), HB_TAG('M','L','Y',' ')}, /* Malay [macrolanguage] */
@ -153,7 +153,7 @@ static const LangTag ot_languages2[] = {
{HB_TAG('o','c',' ',' '), HB_TAG('O','C','I',' ')}, /* Occitan (post 1500) */
{HB_TAG('o','j',' ',' '), HB_TAG('O','J','B',' ')}, /* Ojibwa [macrolanguage] -> Ojibway */
{HB_TAG('o','m',' ',' '), HB_TAG('O','R','O',' ')}, /* Oromo [macrolanguage] */
{HB_TAG('o','r',' ',' '), HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) [macrolanguage] */
{HB_TAG('o','r',' ',' '), HB_TAG('O','R','I',' ')}, /* Odia [macrolanguage] */
{HB_TAG('o','s',' ',' '), HB_TAG('O','S','S',' ')}, /* Ossetian */
{HB_TAG('p','a',' ',' '), HB_TAG('P','A','N',' ')}, /* Punjabi */
{HB_TAG('p','i',' ',' '), HB_TAG('P','A','L',' ')}, /* Pali */
@ -166,7 +166,7 @@ static const LangTag ot_languages2[] = {
{HB_TAG('r','o',' ',' '), HB_TAG('R','O','M',' ')}, /* Romanian */
{HB_TAG('r','u',' ',' '), HB_TAG('R','U','S',' ')}, /* Russian */
{HB_TAG('r','w',' ',' '), HB_TAG('R','U','A',' ')}, /* Kinyarwanda */
{HB_TAG('s','a',' ',' '), HB_TAG('S','A','N',' ')}, /* Sanskrit */
{HB_TAG('s','a',' ',' '), HB_TAG('S','A','N',' ')}, /* Sanskrit [macrolanguage] */
{HB_TAG('s','c',' ',' '), HB_TAG('S','R','D',' ')}, /* Sardinian [macrolanguage] */
{HB_TAG('s','d',' ',' '), HB_TAG('S','N','D',' ')}, /* Sindhi */
{HB_TAG('s','e',' ',' '), HB_TAG('N','S','M',' ')}, /* Northern Sami */
@ -465,6 +465,7 @@ static const LangTag ot_languages3[] = {
{HB_TAG('c','l','d',' '), HB_TAG('S','Y','R',' ')}, /* Chaldean Neo-Aramaic -> Syriac */
{HB_TAG('c','l','e',' '), HB_TAG('C','C','H','N')}, /* Lealao Chinantec -> Chinantec */
{HB_TAG('c','l','j',' '), HB_TAG('Q','I','N',' ')}, /* Laitu Chin -> Chin */
{HB_TAG('c','l','s',' '), HB_TAG('S','A','N',' ')}, /* Classical Sanskrit -> Sanskrit */
{HB_TAG('c','l','t',' '), HB_TAG('Q','I','N',' ')}, /* Lautu Chin -> Chin */
{HB_TAG('c','m','n',' '), HB_TAG('Z','H','S',' ')}, /* Mandarin Chinese -> Chinese, Simplified */
{HB_TAG('c','m','r',' '), HB_TAG('Q','I','N',' ')}, /* Mro-Khimi Chin -> Chin */
@ -637,7 +638,7 @@ static const LangTag ot_languages3[] = {
{HB_TAG('g','a','a',' '), HB_TAG('G','A','D',' ')}, /* Ga */
{HB_TAG('g','a','c',' '), HB_TAG('C','P','P',' ')}, /* Mixed Great Andamanese -> Creoles */
{HB_TAG('g','a','d',' '), HB_TAG_NONE }, /* Gaddang != Ga */
{HB_TAG('g','a','e',' '), HB_TAG_NONE }, /* Guarequena != Scottish Gaelic (Gaelic) */
{HB_TAG('g','a','e',' '), HB_TAG_NONE }, /* Guarequena != Scottish Gaelic */
/*{HB_TAG('g','a','g',' '), HB_TAG('G','A','G',' ')},*/ /* Gagauz */
{HB_TAG('g','a','l',' '), HB_TAG_NONE }, /* Galolen != Galician */
{HB_TAG('g','a','n',' '), HB_TAG('Z','H','S',' ')}, /* Gan Chinese -> Chinese, Simplified */
@ -1160,7 +1161,7 @@ static const LangTag ot_languages3[] = {
{HB_TAG('o','r','o',' '), HB_TAG_NONE }, /* Orokolo != Oromo */
{HB_TAG('o','r','r',' '), HB_TAG('I','J','O',' ')}, /* Oruma -> Ijo */
{HB_TAG('o','r','s',' '), HB_TAG('M','L','Y',' ')}, /* Orang Seletar -> Malay */
{HB_TAG('o','r','y',' '), HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) */
{HB_TAG('o','r','y',' '), HB_TAG('O','R','I',' ')}, /* Odia */
{HB_TAG('o','t','w',' '), HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */
{HB_TAG('o','u','a',' '), HB_TAG('B','B','R',' ')}, /* Tagargrent -> Berber */
{HB_TAG('p','a','a',' '), HB_TAG_NONE }, /* Papuan [collection] != Palestinian Aramaic */
@ -1395,7 +1396,7 @@ static const LangTag ot_languages3[] = {
/*{HB_TAG('s','n','k',' '), HB_TAG('S','N','K',' ')},*/ /* Soninke */
{HB_TAG('s','o','g',' '), HB_TAG_NONE }, /* Sogdian != Sodo Gurage */
/*{HB_TAG('s','o','p',' '), HB_TAG('S','O','P',' ')},*/ /* Songe */
{HB_TAG('s','p','v',' '), HB_TAG('O','R','I',' ')}, /* Sambalpuri -> Odia (formerly Oriya) */
{HB_TAG('s','p','v',' '), HB_TAG('O','R','I',' ')}, /* Sambalpuri -> Odia */
{HB_TAG('s','p','y',' '), HB_TAG('K','A','L',' ')}, /* Sabaot -> Kalenjin */
{HB_TAG('s','r','b',' '), HB_TAG_NONE }, /* Sora != Serbian */
{HB_TAG('s','r','c',' '), HB_TAG('S','R','D',' ')}, /* Logudorese Sardinian -> Sardinian */
@ -1533,6 +1534,7 @@ static const LangTag ot_languages3[] = {
{HB_TAG('v','l','s',' '), HB_TAG('F','L','E',' ')}, /* Vlaams -> Dutch (Flemish) */
{HB_TAG('v','m','w',' '), HB_TAG('M','A','K',' ')}, /* Makhuwa */
/*{HB_TAG('v','r','o',' '), HB_TAG('V','R','O',' ')},*/ /* Võro */
{HB_TAG('v','s','n',' '), HB_TAG('S','A','N',' ')}, /* Vedic Sanskrit -> Sanskrit */
{HB_TAG('w','a','g',' '), HB_TAG_NONE }, /* Wa'ema != Wagdi */
/*{HB_TAG('w','a','r',' '), HB_TAG('W','A','R',' ')},*/ /* Waray (Philippines) -> Waray-Waray */
{HB_TAG('w','b','m',' '), HB_TAG('W','A',' ',' ')}, /* Wa */
@ -2643,7 +2645,7 @@ out:
/* Romanian; Moldova */
unsigned int i;
hb_tag_t possible_tags[] = {
HB_TAG('M','O','L',' '), /* Moldavian */
HB_TAG('M','O','L',' '), /* Romanian (Moldova) */
HB_TAG('R','O','M',' '), /* Romanian */
};
for (i = 0; i < 2 && i < *count; i++)
@ -2920,7 +2922,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
return hb_language_from_string ("mn", -1); /* Mongolian [macrolanguage] */
case HB_TAG('M','N','K',' '): /* Maninka */
return hb_language_from_string ("man", -1); /* Mandingo [macrolanguage] */
case HB_TAG('M','O','L',' '): /* Moldavian */
case HB_TAG('M','O','L',' '): /* Romanian (Moldova) */
return hb_language_from_string ("ro-MD", -1); /* Romanian; Moldova */
case HB_TAG('M','O','N','T'): /* Thailand Mon */
return hb_language_from_string ("mnw-TH", -1); /* Mon; Thailand */
@ -2958,6 +2960,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
return hb_language_from_string ("ro", -1); /* Romanian */
case HB_TAG('R','O','Y',' '): /* Romany */
return hb_language_from_string ("rom", -1); /* Romany [macrolanguage] */
case HB_TAG('S','A','N',' '): /* Sanskrit */
return hb_language_from_string ("sa", -1); /* Sanskrit [macrolanguage] */
case HB_TAG('S','Q','I',' '): /* Albanian */
return hb_language_from_string ("sq", -1); /* Albanian [macrolanguage] */
case HB_TAG('S','R','B',' '): /* Serbian */