From f8ebb8a65778745e43c560270d198c77c9d55220 Mon Sep 17 00:00:00 2001 From: David Corbett Date: Wed, 29 Sep 2021 19:18:25 -0400 Subject: [PATCH] [USE] Update the data files This uses the data files from . --- src/hb-ot-shape-complex-use-table.hh | 185 ++++++++++++++---- .../IndicPositionalCategory-Additional.txt | 97 ++++----- .../IndicSyllabicCategory-Additional.txt | 164 +++++++++------- 3 files changed, 283 insertions(+), 163 deletions(-) diff --git a/src/hb-ot-shape-complex-use-table.hh b/src/hb-ot-shape-complex-use-table.hh index 943462775..fbff07865 100644 --- a/src/hb-ot-shape-complex-use-table.hh +++ b/src/hb-ot-shape-complex-use-table.hh @@ -17,17 +17,19 @@ * # Override values For Indic_Syllabic_Category * # Not derivable * # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 - * # Updated for Unicode 10.0 by Andrew Glass 2017-07-25 - * # Updated for Unicode 12.1 by Andrew Glass 2019-05-24 - * # Updated for Unicode 13.0 by Andrew Glass 2020-07-28 + * # Updated for Unicode 10.0 by Andrew Glass 2017-07-25 + * # Updated for Unicode 12.1 by Andrew Glass 2019-05-24 + * # Updated for Unicode 13.0 by Andrew Glass 2020-07-28 + * # Updated for Unicode 14.0 by Andrew Glass 2021-09-25 * # Override values For Indic_Positional_Category * # Not derivable * # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 - * # Updated for Unicode 10.0 by Andrew Glass 2017-07-25 + * # Updated for Unicode 10.0 by Andrew Glass 2017-07-25 * # Ammended for Unicode 10.0 by Andrew Glass 2018-09-21 - * # Updated for L2/19-083 by Andrew Glass 2019-05-06 - * # Updated for Unicode 12.1 by Andrew Glass 2019-05-30 - * # Updated for Unicode 13.0 by Andrew Glass 2020-07-28 + * # Updated for L2/19-083 by Andrew Glass 2019-05-06 + * # Updated for Unicode 12.1 by Andrew Glass 2019-05-30 + * # Updated for Unicode 13.0 by Andrew Glass 2020-07-28 + * # Updated for Unicode 14.0 by Andrew Glass 2021-09-28 * UnicodeData.txt does not have a header. */ @@ -552,7 +554,18 @@ static const uint8_t use_table[] = { /* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O, /* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x10a00u 4008 +#define use_offset_0x10570u 4008 + + + /* Vithkuqi */ + + /* 10570 */ B, B, B, B, B, B, B, B, B, B, B, O, B, B, B, B, + /* 10580 */ B, B, B, B, B, B, B, B, B, B, B, O, B, B, B, B, + /* 10590 */ B, B, B, O, B, B, O, B, B, B, B, B, B, B, B, B, + /* 105A0 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 105B0 */ B, B, O, B, B, B, B, B, B, B, O, B, B, O, O, O, + +#define use_offset_0x10a00u 4088 /* Kharoshthi */ @@ -563,7 +576,7 @@ static const uint8_t use_table[] = { /* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H, /* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, -#define use_offset_0x10ac0u 4088 +#define use_offset_0x10ac0u 4168 /* Manichaean */ @@ -572,7 +585,7 @@ static const uint8_t use_table[] = { /* 10AD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 10AE0 */ B, B, B, B, B, CMBlw, CMBlw, O, -#define use_offset_0x10b80u 4128 +#define use_offset_0x10b80u 4208 /* Psalter Pahlavi */ @@ -581,7 +594,7 @@ static const uint8_t use_table[] = { /* 10B90 */ B, B, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 10BA0 */ O, O, O, O, O, O, O, O, O, B, B, B, B, B, B, O, -#define use_offset_0x10d00u 4176 +#define use_offset_0x10d00u 4256 /* Hanifi Rohingya */ @@ -591,7 +604,7 @@ static const uint8_t use_table[] = { /* 10D20 */ B, B, B, B, VMAbv, VMAbv, VMAbv, CMAbv, O, O, O, O, O, O, O, O, /* 10D30 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x10e80u 4240 +#define use_offset_0x10e80u 4320 /* Yezidi */ @@ -601,17 +614,22 @@ static const uint8_t use_table[] = { /* 10EA0 */ B, B, B, B, B, B, B, B, B, B, O, VAbv, VAbv, O, O, O, /* 10EB0 */ B, B, O, O, O, O, O, O, -#define use_offset_0x10f30u 4296 +#define use_offset_0x10f30u 4376 /* Sogdian */ /* 10F30 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 10F40 */ B, B, B, B, B, B, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, - /* 10F50 */ VMBlw, B, B, B, B, O, O, O, + /* 10F50 */ VMBlw, B, B, B, B, O, O, O, O, O, O, O, O, O, O, O, + /* 10F60 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 10F70 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, -#define use_offset_0x10fb0u 4336 + /* Old Uyghur */ + /* 10F80 */ O, O, CMBlw, CMBlw, CMBlw, CMBlw, O, O, O, O, O, O, O, O, O, O, + /* 10F90 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 10FA0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* Chorasmian */ @@ -640,7 +658,7 @@ static const uint8_t use_table[] = { /* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O, /* 110C0 */ O, O, VBlw, O, O, O, O, O, -#define use_offset_0x11100u 4616 +#define use_offset_0x11100u 4784 /* Chakma */ @@ -678,7 +696,7 @@ static const uint8_t use_table[] = { /* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw, /* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O, -#define use_offset_0x11280u 4936 +#define use_offset_0x11280u 5104 /* Multani */ @@ -706,7 +724,7 @@ static const uint8_t use_table[] = { /* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, /* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, -#define use_offset_0x11400u 5184 +#define use_offset_0x11400u 5352 /* Newa */ @@ -729,7 +747,7 @@ static const uint8_t use_table[] = { /* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O, /* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11580u 5408 +#define use_offset_0x11580u 5576 /* Siddham */ @@ -773,7 +791,7 @@ static const uint8_t use_table[] = { /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, /* 11740 */ B, B, B, B, B, B, B, O, -#define use_offset_0x11800u 5864 +#define use_offset_0x11800u 6032 /* Dogra */ @@ -783,7 +801,7 @@ static const uint8_t use_table[] = { /* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw, /* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O, -#define use_offset_0x11900u 5928 +#define use_offset_0x11900u 6096 /* Dives Akuru */ @@ -795,7 +813,7 @@ static const uint8_t use_table[] = { /* 11940 */ MPst, R, MPst, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, /* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x119a0u 6024 +#define use_offset_0x119a0u 6192 /* Nandinagari */ @@ -823,7 +841,7 @@ static const uint8_t use_table[] = { /* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, /* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O, -#define use_offset_0x11c00u 6280 +#define use_offset_0x11c00u 6448 /* Bhaiksuki */ @@ -844,7 +862,7 @@ static const uint8_t use_table[] = { /* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, /* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O, -#define use_offset_0x11d00u 6464 +#define use_offset_0x11d00u 6632 /* Masaram Gondi */ @@ -864,7 +882,7 @@ static const uint8_t use_table[] = { /* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O, /* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11ee0u 6640 +#define use_offset_0x11ee0u 6808 /* Makasar */ @@ -872,16 +890,93 @@ static const uint8_t use_table[] = { /* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O, -#define use_offset_0x13430u 6664 +#define use_offset_0x13000u 6832 + /* Egyptian Hieroglyphs */ + + /* 13000 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13020 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13030 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13040 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13050 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13060 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13070 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13080 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13100 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13120 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13130 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13140 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13150 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13160 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13170 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13180 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13190 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13200 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13210 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13220 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13230 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13240 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13250 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13260 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13270 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13280 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13300 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13310 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13320 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13330 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13340 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13350 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13360 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13370 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13380 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13390 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13400 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13410 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13420 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, + /* Egyptian Hieroglyph Format Controls */ - /* 13430 */ H, H, H, H, H, H, H, O, + /* 13430 */ H, H, H, H, H, H, H, B, B, O, O, O, O, O, O, O, -#define use_offset_0x16b00u 6672 +#define use_offset_0x16ac0u 7920 + /* Tangsa */ + + /* 16AC0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + /* 16AD0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 16AE0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 16AF0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* Pahawh Hmong */ /* 16B00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, @@ -889,7 +984,7 @@ static const uint8_t use_table[] = { /* 16B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 16B30 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, -#define use_offset_0x16f00u 6728 +#define use_offset_0x16f00u 8040 /* Miao */ @@ -905,14 +1000,14 @@ static const uint8_t use_table[] = { /* 16F80 */ VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, O, O, O, O, O, O, O, VMBlw, /* 16F90 */ VMBlw, VMBlw, VMBlw, O, O, O, O, O, -#define use_offset_0x16fe0u 6880 +#define use_offset_0x16fe0u 8192 /* Ideographic Symbols and Punctuation */ /* 16FE0 */ O, O, O, O, B, O, O, O, -#define use_offset_0x18b00u 6888 +#define use_offset_0x18b00u 8200 /* Khitan Small Script */ @@ -948,7 +1043,7 @@ static const uint8_t use_table[] = { /* 18CC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 18CD0 */ B, B, B, B, B, B, O, O, -#define use_offset_0x1bc00u 7360 +#define use_offset_0x1bc00u 8672 /* Duployan */ @@ -964,7 +1059,7 @@ static const uint8_t use_table[] = { /* 1BC80 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, /* 1BC90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, CMBlw, CMBlw, O, -#define use_offset_0x1e100u 7520 +#define use_offset_0x1e100u 8832 /* Nyiakeng Puachue Hmong */ @@ -975,9 +1070,15 @@ static const uint8_t use_table[] = { /* 1E130 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, O, O, /* 1E140 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, B, B, -#define use_offset_0x1e2c0u 7600 +#define use_offset_0x1e290u 8912 + /* Toto */ + + /* 1E290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1E2A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, VMAbv, O, + /* 1E2B0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* Wancho */ /* 1E2C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, @@ -985,7 +1086,7 @@ static const uint8_t use_table[] = { /* 1E2E0 */ B, B, B, B, B, B, B, B, B, B, B, B, VMAbv, VMAbv, VMAbv, VMAbv, /* 1E2F0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x1e900u 7664 +#define use_offset_0x1e900u 9024 /* Adlam */ @@ -997,7 +1098,7 @@ static const uint8_t use_table[] = { /* 1E940 */ B, B, B, B, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, B, O, O, O, O, /* 1E950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -}; /* Table items: 7760; occupancy: 76% */ +}; /* Table items: 9120; occupancy: 78% */ static inline uint8_t hb_use_get_category (hb_codepoint_t u) @@ -1037,17 +1138,17 @@ hb_use_get_category (hb_codepoint_t u) break; case 0x10u: + if (hb_in_range (u, 0x10570u, 0x105BFu)) return use_table[u - 0x10570u + use_offset_0x10570u]; if (hb_in_range (u, 0x10A00u, 0x10A4Fu)) return use_table[u - 0x10A00u + use_offset_0x10a00u]; if (hb_in_range (u, 0x10AC0u, 0x10AE7u)) return use_table[u - 0x10AC0u + use_offset_0x10ac0u]; if (hb_in_range (u, 0x10B80u, 0x10BAFu)) return use_table[u - 0x10B80u + use_offset_0x10b80u]; if (hb_in_range (u, 0x10D00u, 0x10D3Fu)) return use_table[u - 0x10D00u + use_offset_0x10d00u]; if (hb_in_range (u, 0x10E80u, 0x10EB7u)) return use_table[u - 0x10E80u + use_offset_0x10e80u]; - if (hb_in_range (u, 0x10F30u, 0x10F57u)) return use_table[u - 0x10F30u + use_offset_0x10f30u]; - if (hb_in_range (u, 0x10FB0u, 0x110C7u)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u]; + if (hb_in_range (u, 0x10F30u, 0x110C7u)) return use_table[u - 0x10F30u + use_offset_0x10f30u]; break; case 0x11u: - if (hb_in_range (u, 0x10FB0u, 0x110C7u)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u]; + if (hb_in_range (u, 0x10F30u, 0x110C7u)) return use_table[u - 0x10F30u + use_offset_0x10f30u]; if (hb_in_range (u, 0x11100u, 0x1123Fu)) return use_table[u - 0x11100u + use_offset_0x11100u]; if (hb_in_range (u, 0x11280u, 0x11377u)) return use_table[u - 0x11280u + use_offset_0x11280u]; if (hb_in_range (u, 0x11400u, 0x114DFu)) return use_table[u - 0x11400u + use_offset_0x11400u]; @@ -1061,11 +1162,11 @@ hb_use_get_category (hb_codepoint_t u) break; case 0x13u: - if (hb_in_range (u, 0x13430u, 0x13437u)) return use_table[u - 0x13430u + use_offset_0x13430u]; + if (hb_in_range (u, 0x13000u, 0x1343Fu)) return use_table[u - 0x13000u + use_offset_0x13000u]; break; case 0x16u: - if (hb_in_range (u, 0x16B00u, 0x16B37u)) return use_table[u - 0x16B00u + use_offset_0x16b00u]; + if (hb_in_range (u, 0x16AC0u, 0x16B37u)) return use_table[u - 0x16AC0u + use_offset_0x16ac0u]; if (hb_in_range (u, 0x16F00u, 0x16F97u)) return use_table[u - 0x16F00u + use_offset_0x16f00u]; if (hb_in_range (u, 0x16FE0u, 0x16FE7u)) return use_table[u - 0x16FE0u + use_offset_0x16fe0u]; break; @@ -1080,7 +1181,7 @@ hb_use_get_category (hb_codepoint_t u) case 0x1Eu: if (hb_in_range (u, 0x1E100u, 0x1E14Fu)) return use_table[u - 0x1E100u + use_offset_0x1e100u]; - if (hb_in_range (u, 0x1E2C0u, 0x1E2FFu)) return use_table[u - 0x1E2C0u + use_offset_0x1e2c0u]; + if (hb_in_range (u, 0x1E290u, 0x1E2FFu)) return use_table[u - 0x1E290u + use_offset_0x1e290u]; if (hb_in_range (u, 0x1E900u, 0x1E95Fu)) return use_table[u - 0x1E900u + use_offset_0x1e900u]; break; diff --git a/src/ms-use/IndicPositionalCategory-Additional.txt b/src/ms-use/IndicPositionalCategory-Additional.txt index 8d325adbd..83a164e49 100644 --- a/src/ms-use/IndicPositionalCategory-Additional.txt +++ b/src/ms-use/IndicPositionalCategory-Additional.txt @@ -1,11 +1,12 @@ # Override values For Indic_Positional_Category # Not derivable # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 -# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 +# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 # Ammended for Unicode 10.0 by Andrew Glass 2018-09-21 -# Updated for L2/19-083 by Andrew Glass 2019-05-06 -# Updated for Unicode 12.1 by Andrew Glass 2019-05-30 -# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for L2/19-083 by Andrew Glass 2019-05-06 +# Updated for Unicode 12.1 by Andrew Glass 2019-05-30 +# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 14.0 by Andrew Glass 2021-09-28 # ================================================ # ================================================ @@ -14,39 +15,39 @@ # ================================================ # Indic_Positional_Category=Bottom -0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model -0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model -0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model -A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA -11127..11129; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II -1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI -11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI +0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model +0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model +0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model +A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA +11127..11129 ; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II +1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI +11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI # ================================================ # Indic_Positional_Category=Left -1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left +1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left # ================================================ # Indic_Positional_Category=Right -A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right -10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers -11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right +A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right +10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers +11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right # ================================================ # Indic_Positional_Category=Top -0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model -1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above -AA35   ; Top # Mn       CHAM CONSONANT SIGN +0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model +1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above +AA35   ; Top # Mn       CHAM CONSONANT SIGN # ================================================ # Indic_Positional_Category=Top_And_Right -0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake. -0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake. +0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake. +0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake. # ================================================ # ================================================ @@ -55,41 +56,46 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN # ================================================ # Indic_Positional_Category=Bottom -0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA -10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Not really bottom, but here for ccc to control -10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW -10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW -10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden to below because ccc-based Normalization controls order -10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW -10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden to below because ccc-based Normalization controls order -10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW -16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR -16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA +10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order +10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW +10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW +10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order +10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW +10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order +10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW +10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order +10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW +10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order +10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW +16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW # ================================================ # Indic_Positional_Category=Left -103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA +103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA # ================================================ # Indic_Positional_Category=Top -07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order -1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA -10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA -10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI -16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D -1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order +1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Top # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA # ================================================ # Indic_Positional_Category=Overstruck -1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK # ================================================ # ================================================ @@ -98,5 +104,6 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN # ================================================ # Indic_Positional_Category=NA -180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -2D7F ; NA # Mn TIFINAGH CONSONANT JOINER +180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; NA # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +2D7F ; NA # Mn TIFINAGH CONSONANT JOINER diff --git a/src/ms-use/IndicSyllabicCategory-Additional.txt b/src/ms-use/IndicSyllabicCategory-Additional.txt index 091205520..8bcada38a 100644 --- a/src/ms-use/IndicSyllabicCategory-Additional.txt +++ b/src/ms-use/IndicSyllabicCategory-Additional.txt @@ -1,9 +1,10 @@ # Override values For Indic_Syllabic_Category # Not derivable # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 -# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 -# Updated for Unicode 12.1 by Andrew Glass 2019-05-24 -# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 +# Updated for Unicode 12.1 by Andrew Glass 2019-05-24 +# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 14.0 by Andrew Glass 2021-09-25 # ================================================ # OVERRIDES TO ASSIGNED VALUES @@ -17,28 +18,28 @@ AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA # ================================================ # Indic_Syllabic_Category=Consonant -0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN -0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED -0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA -19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant -25CC ; Consonant # So DOTTED CIRCLE +0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED +0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA +19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant +25CC ; Consonant # So DOTTED CIRCLE # ================================================ # Indic_Syllabic_Category=Consonant_Dead -0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster +0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster # ================================================ # Indic_Syllabic_Category=Consonant_Final -0F35 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F37 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0FC6 ; Consonant_Final # Mn TIBETAN SYMBOL PADMA GDAN +0F35 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0FC6 ; Consonant_Final # Mn TIBETAN SYMBOL PADMA GDAN # ================================================ # Indic_Syllabic_Category=Consonant_Final_Modifier -1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN +1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN # ================================================ @@ -54,9 +55,8 @@ AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA # ================================================ # Indic_Syllabic_Category=Tone_Mark -A982 ; Tone_Mark # Mn JAVANESE SIGN LAYAR# Not a repha, because it does not reorder to front of cluster -1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN -1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT # ================================================ @@ -72,41 +72,50 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN # ================================================ # Indic_Syllabic_Category=Consonant -0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF -1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work -1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER -180A ; Consonant # Po MONGOLIAN NIRUGU -1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS -1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN -2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO -2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK -10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW -10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW -10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA -10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET -10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN -111DA ; Consonant # Lo SHARADA EKAM -#HIEROGLYPHS moved to new category -#13000..1342E ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work +1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER +180A ; Consonant # Po MONGOLIAN NIRUGU +1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS +1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +10570..1057A ; Consonant # Lo [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Consonant # Lo [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Consonant # Lo [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Consonant # Lo [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Consonant # Lo [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Consonant # Lo [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Consonant # Lo [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Consonant # Lo [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +111DA ; Consonant # Lo SHARADA EKAM +#HIEROGLYPHS to be moved to new category +13000..1342E ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 #For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified. -#13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT -16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU -16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE -16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER -18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M -1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK -1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL -1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW -1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W -1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER -1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ -1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA -1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH -1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -1E94B ; Consonant # Lm ADLAM NASALIZATION MARK +13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT +16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class +18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; Consonant # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA +1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; Consonant # Lm ADLAM NASALIZATION MARK # ================================================ @@ -116,13 +125,13 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN # ================================================ # Indic_Syllabic_Category=Gemination_Mark -10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI +10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI # ================================================ # Indic_Syllabic_Category=Modifying_Letter -FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios -16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION +FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios +16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION # ================================================ @@ -136,49 +145,52 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELE 16F4F ; Nukta # Mn MIAO SIGN CONSONANT MODIFIER BAR 1BC9D..1BC9E ; Nukta # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1E944..1E94A ; Nukta # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +10F82..10F85 ; Nukta # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW # ================================================ # Indic_Syllabic_Category=Number -10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE -10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED -1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE -1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE -1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +16AC0..16AC9 ; Number # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE # ================================================ # Indic_Syllabic_Category=Tone_Mark -07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07FD ; Tone_Mark # Mn NKO DANTAYALAN -0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -17CF ; Tone_Mark # Mn KHMER SIGN AHSDA -10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA -10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW -16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW -1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D -1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Tone_Mark # Mn NKO DANTAYALAN +0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +17CF ; Tone_Mark # Mn KHMER SIGN AHSDA +10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA +10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Tone_Mark # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI # ================================================ # Indic_Syllabic_Category=Virama -2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER -13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE +2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER +13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE # ================================================ # Indic_Syllabic_Category=Vowel_Independent -AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA -AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA -AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN +AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA +AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA +AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN # ================================================ # Indic_Syllabic_Category=Vowel_Dependent -0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE -10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE +10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI # ================================================ # ================================================