From 0692d23ccc4f253b5e69591433a3c3990687b4a9 Mon Sep 17 00:00:00 2001 From: David Corbett Date: Thu, 7 Mar 2024 17:30:56 -0500 Subject: [PATCH] Update IANA Language Subtag Registry to 2024-03-07 --- src/gen-tag-table.py | 2 +- src/hb-ot-tag-table.hh | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py index 7e15c08c5..86913c3ef 100755 --- a/src/gen-tag-table.py +++ b/src/gen-tag-table.py @@ -584,7 +584,7 @@ class BCP47Parser (object): self.grandfathered.add (subtag.lower ()) elif line.startswith ('Description: '): description = line.split (' ', 1)[1].replace (' (individual language)', '') - description = re.sub (' (\(family\)|\((individual |macro)language\)|languages)$', '', + description = re.sub (r' (\(family\)|\((individual |macro)language\)|languages)$', '', description) if subtag in self.names: self.names[subtag] += '\n' + description diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh index 032a7c866..db92f4664 100644 --- a/src/hb-ot-tag-table.hh +++ b/src/hb-ot-tag-table.hh @@ -6,8 +6,8 @@ * * on files with these headers: * - * - * File-Date: 2023-08-02 + * + * File-Date: 2024-03-07 */ #ifndef HB_OT_TAG_TABLE_HH @@ -31,7 +31,7 @@ static const LangTag ot_languages2[] = { {HB_TAG('b','i',' ',' '), HB_TAG('B','I','S',' ')}, /* Bislama */ {HB_TAG('b','i',' ',' '), HB_TAG('C','P','P',' ')}, /* Bislama -> Creoles */ {HB_TAG('b','m',' ',' '), HB_TAG('B','M','B',' ')}, /* Bambara (Bamanankan) */ - {HB_TAG('b','n',' ',' '), HB_TAG('B','E','N',' ')}, /* Bengali */ + {HB_TAG('b','n',' ',' '), HB_TAG('B','E','N',' ')}, /* Bangla */ {HB_TAG('b','o',' ',' '), HB_TAG('T','I','B',' ')}, /* Tibetan */ {HB_TAG('b','r',' ',' '), HB_TAG('B','R','E',' ')}, /* Breton */ {HB_TAG('b','s',' ',' '), HB_TAG('B','O','S',' ')}, /* Bosnian */ @@ -64,7 +64,7 @@ static const LangTag ot_languages2[] = { {HB_TAG('f','r',' ',' '), HB_TAG('F','R','A',' ')}, /* French */ {HB_TAG('f','y',' ',' '), HB_TAG('F','R','I',' ')}, /* Western Frisian -> Frisian */ {HB_TAG('g','a',' ',' '), HB_TAG('I','R','I',' ')}, /* Irish */ - {HB_TAG('g','d',' ',' '), HB_TAG('G','A','E',' ')}, /* Scottish Gaelic (Gaelic) */ + {HB_TAG('g','d',' ',' '), HB_TAG('G','A','E',' ')}, /* Scottish Gaelic */ {HB_TAG('g','l',' ',' '), HB_TAG('G','A','L',' ')}, /* Galician */ {HB_TAG('g','n',' ',' '), HB_TAG('G','U','A',' ')}, /* Guarani [macrolanguage] */ {HB_TAG('g','u',' ',' '), HB_TAG('G','U','J',' ')}, /* Gujarati */ @@ -132,7 +132,7 @@ static const LangTag ot_languages2[] = { {HB_TAG('m','l',' ',' '), HB_TAG('M','A','L',' ')}, /* Malayalam -> Malayalam Traditional */ {HB_TAG('m','l',' ',' '), HB_TAG('M','L','R',' ')}, /* Malayalam -> Malayalam Reformed */ {HB_TAG('m','n',' ',' '), HB_TAG('M','N','G',' ')}, /* Mongolian [macrolanguage] */ - {HB_TAG('m','o',' ',' '), HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) */ + {HB_TAG('m','o',' ',' '), HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) -> Romanian (Moldova) */ {HB_TAG('m','o',' ',' '), HB_TAG('R','O','M',' ')}, /* Moldavian (retired code) -> Romanian */ {HB_TAG('m','r',' ',' '), HB_TAG('M','A','R',' ')}, /* Marathi */ {HB_TAG('m','s',' ',' '), HB_TAG('M','L','Y',' ')}, /* Malay [macrolanguage] */ @@ -153,7 +153,7 @@ static const LangTag ot_languages2[] = { {HB_TAG('o','c',' ',' '), HB_TAG('O','C','I',' ')}, /* Occitan (post 1500) */ {HB_TAG('o','j',' ',' '), HB_TAG('O','J','B',' ')}, /* Ojibwa [macrolanguage] -> Ojibway */ {HB_TAG('o','m',' ',' '), HB_TAG('O','R','O',' ')}, /* Oromo [macrolanguage] */ - {HB_TAG('o','r',' ',' '), HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) [macrolanguage] */ + {HB_TAG('o','r',' ',' '), HB_TAG('O','R','I',' ')}, /* Odia [macrolanguage] */ {HB_TAG('o','s',' ',' '), HB_TAG('O','S','S',' ')}, /* Ossetian */ {HB_TAG('p','a',' ',' '), HB_TAG('P','A','N',' ')}, /* Punjabi */ {HB_TAG('p','i',' ',' '), HB_TAG('P','A','L',' ')}, /* Pali */ @@ -166,7 +166,7 @@ static const LangTag ot_languages2[] = { {HB_TAG('r','o',' ',' '), HB_TAG('R','O','M',' ')}, /* Romanian */ {HB_TAG('r','u',' ',' '), HB_TAG('R','U','S',' ')}, /* Russian */ {HB_TAG('r','w',' ',' '), HB_TAG('R','U','A',' ')}, /* Kinyarwanda */ - {HB_TAG('s','a',' ',' '), HB_TAG('S','A','N',' ')}, /* Sanskrit */ + {HB_TAG('s','a',' ',' '), HB_TAG('S','A','N',' ')}, /* Sanskrit [macrolanguage] */ {HB_TAG('s','c',' ',' '), HB_TAG('S','R','D',' ')}, /* Sardinian [macrolanguage] */ {HB_TAG('s','d',' ',' '), HB_TAG('S','N','D',' ')}, /* Sindhi */ {HB_TAG('s','e',' ',' '), HB_TAG('N','S','M',' ')}, /* Northern Sami */ @@ -465,6 +465,7 @@ static const LangTag ot_languages3[] = { {HB_TAG('c','l','d',' '), HB_TAG('S','Y','R',' ')}, /* Chaldean Neo-Aramaic -> Syriac */ {HB_TAG('c','l','e',' '), HB_TAG('C','C','H','N')}, /* Lealao Chinantec -> Chinantec */ {HB_TAG('c','l','j',' '), HB_TAG('Q','I','N',' ')}, /* Laitu Chin -> Chin */ + {HB_TAG('c','l','s',' '), HB_TAG('S','A','N',' ')}, /* Classical Sanskrit -> Sanskrit */ {HB_TAG('c','l','t',' '), HB_TAG('Q','I','N',' ')}, /* Lautu Chin -> Chin */ {HB_TAG('c','m','n',' '), HB_TAG('Z','H','S',' ')}, /* Mandarin Chinese -> Chinese, Simplified */ {HB_TAG('c','m','r',' '), HB_TAG('Q','I','N',' ')}, /* Mro-Khimi Chin -> Chin */ @@ -637,7 +638,7 @@ static const LangTag ot_languages3[] = { {HB_TAG('g','a','a',' '), HB_TAG('G','A','D',' ')}, /* Ga */ {HB_TAG('g','a','c',' '), HB_TAG('C','P','P',' ')}, /* Mixed Great Andamanese -> Creoles */ {HB_TAG('g','a','d',' '), HB_TAG_NONE }, /* Gaddang != Ga */ - {HB_TAG('g','a','e',' '), HB_TAG_NONE }, /* Guarequena != Scottish Gaelic (Gaelic) */ + {HB_TAG('g','a','e',' '), HB_TAG_NONE }, /* Guarequena != Scottish Gaelic */ /*{HB_TAG('g','a','g',' '), HB_TAG('G','A','G',' ')},*/ /* Gagauz */ {HB_TAG('g','a','l',' '), HB_TAG_NONE }, /* Galolen != Galician */ {HB_TAG('g','a','n',' '), HB_TAG('Z','H','S',' ')}, /* Gan Chinese -> Chinese, Simplified */ @@ -1160,7 +1161,7 @@ static const LangTag ot_languages3[] = { {HB_TAG('o','r','o',' '), HB_TAG_NONE }, /* Orokolo != Oromo */ {HB_TAG('o','r','r',' '), HB_TAG('I','J','O',' ')}, /* Oruma -> Ijo */ {HB_TAG('o','r','s',' '), HB_TAG('M','L','Y',' ')}, /* Orang Seletar -> Malay */ - {HB_TAG('o','r','y',' '), HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) */ + {HB_TAG('o','r','y',' '), HB_TAG('O','R','I',' ')}, /* Odia */ {HB_TAG('o','t','w',' '), HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */ {HB_TAG('o','u','a',' '), HB_TAG('B','B','R',' ')}, /* Tagargrent -> Berber */ {HB_TAG('p','a','a',' '), HB_TAG_NONE }, /* Papuan [collection] != Palestinian Aramaic */ @@ -1395,7 +1396,7 @@ static const LangTag ot_languages3[] = { /*{HB_TAG('s','n','k',' '), HB_TAG('S','N','K',' ')},*/ /* Soninke */ {HB_TAG('s','o','g',' '), HB_TAG_NONE }, /* Sogdian != Sodo Gurage */ /*{HB_TAG('s','o','p',' '), HB_TAG('S','O','P',' ')},*/ /* Songe */ - {HB_TAG('s','p','v',' '), HB_TAG('O','R','I',' ')}, /* Sambalpuri -> Odia (formerly Oriya) */ + {HB_TAG('s','p','v',' '), HB_TAG('O','R','I',' ')}, /* Sambalpuri -> Odia */ {HB_TAG('s','p','y',' '), HB_TAG('K','A','L',' ')}, /* Sabaot -> Kalenjin */ {HB_TAG('s','r','b',' '), HB_TAG_NONE }, /* Sora != Serbian */ {HB_TAG('s','r','c',' '), HB_TAG('S','R','D',' ')}, /* Logudorese Sardinian -> Sardinian */ @@ -1533,6 +1534,7 @@ static const LangTag ot_languages3[] = { {HB_TAG('v','l','s',' '), HB_TAG('F','L','E',' ')}, /* Vlaams -> Dutch (Flemish) */ {HB_TAG('v','m','w',' '), HB_TAG('M','A','K',' ')}, /* Makhuwa */ /*{HB_TAG('v','r','o',' '), HB_TAG('V','R','O',' ')},*/ /* Võro */ + {HB_TAG('v','s','n',' '), HB_TAG('S','A','N',' ')}, /* Vedic Sanskrit -> Sanskrit */ {HB_TAG('w','a','g',' '), HB_TAG_NONE }, /* Wa'ema != Wagdi */ /*{HB_TAG('w','a','r',' '), HB_TAG('W','A','R',' ')},*/ /* Waray (Philippines) -> Waray-Waray */ {HB_TAG('w','b','m',' '), HB_TAG('W','A',' ',' ')}, /* Wa */ @@ -2643,7 +2645,7 @@ out: /* Romanian; Moldova */ unsigned int i; hb_tag_t possible_tags[] = { - HB_TAG('M','O','L',' '), /* Moldavian */ + HB_TAG('M','O','L',' '), /* Romanian (Moldova) */ HB_TAG('R','O','M',' '), /* Romanian */ }; for (i = 0; i < 2 && i < *count; i++) @@ -2920,7 +2922,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) return hb_language_from_string ("mn", -1); /* Mongolian [macrolanguage] */ case HB_TAG('M','N','K',' '): /* Maninka */ return hb_language_from_string ("man", -1); /* Mandingo [macrolanguage] */ - case HB_TAG('M','O','L',' '): /* Moldavian */ + case HB_TAG('M','O','L',' '): /* Romanian (Moldova) */ return hb_language_from_string ("ro-MD", -1); /* Romanian; Moldova */ case HB_TAG('M','O','N','T'): /* Thailand Mon */ return hb_language_from_string ("mnw-TH", -1); /* Mon; Thailand */ @@ -2958,6 +2960,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) return hb_language_from_string ("ro", -1); /* Romanian */ case HB_TAG('R','O','Y',' '): /* Romany */ return hb_language_from_string ("rom", -1); /* Romany [macrolanguage] */ + case HB_TAG('S','A','N',' '): /* Sanskrit */ + return hb_language_from_string ("sa", -1); /* Sanskrit [macrolanguage] */ case HB_TAG('S','Q','I',' '): /* Albanian */ return hb_language_from_string ("sq", -1); /* Albanian [macrolanguage] */ case HB_TAG('S','R','B',' '): /* Serbian */